linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
To: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Cc: akpm@linux-foundation.org, mgorman@suse.de,
	matthew.garrett@nebula.com, dave@sr71.net, rientjes@google.com,
	riel@redhat.com, arjan@linux.intel.com,
	maxime.coquelin@stericsson.com, loic.pallardy@stericsson.com,
	kamezawa.hiroyu@jp.fujitsu.com, lenb@kernel.org, rjw@sisk.pl,
	gargankita@gmail.com, paulmck@linux.vnet.ibm.com,
	amit.kachhap@linaro.org, svaidy@linux.vnet.ibm.com,
	andi@firstfloor.org, wujianguo@huawei.com, kmpark@infradead.org,
	thomas.abraham@linaro.org, santosh.shilimkar@ti.com,
	linux-pm@vger.kernel.org, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org
Subject: Re: [RFC PATCH v2 00/15][Sorted-buddy] mm: Memory Power Management
Date: Thu, 18 Apr 2013 08:13:54 -0700	[thread overview]
Message-ID: <51700DB2.5090506@linux.intel.com> (raw)
In-Reply-To: <516FC2D1.9020809@linux.vnet.ibm.com>

On 04/18/2013 02:54 AM, Srivatsa S. Bhat wrote:
> On 04/17/2013 10:23 PM, Srinivas Pandruvada wrote:
>> On 04/09/2013 02:45 PM, Srivatsa S. Bhat wrote:
>>> [I know, this cover letter is a little too long, but I wanted to clearly
>>> explain the overall goals and the high-level design of this patchset in
>>> detail. I hope this helps more than it annoys, and makes it easier for
>>> reviewers to relate to the background and the goals of this patchset.]
>>>
>>>
>>> Overview of Memory Power Management and its implications to the Linux MM
>>> ========================================================================
>>>
> [...]
>> One thing you need to prevent is boot time allocation. You have to make
>> sure that frequently accessed per node data stored at the end of memory
>> will keep all ranks of memory active.
>>
When I was experimenting I did something like this.
/////////////////////////////////


+/*
+ * Experimental MPST implemenentation
+ * Copyright (c) 2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public 
License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License 
along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/kthread.h>
+#include <linux/acpi.h>
+#include <linux/export.h>
+#include <linux/bootmem.h>
+#include <linux/delay.h>
+#include <linux/pfn.h>
+#include <linux/suspend.h>
+#include <linux/acpi.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/migrate.h>
+#include <linux/mm_inline.h>
+#include <linux/page-isolation.h>
+#include <linux/vmalloc.h>
+#include <linux/compaction.h>
+#include "internal.h"
+
+#define phys_to_pfn(p) ((p) >> PAGE_SHIFT)
+#define pfn_to_phys(p) ((p) << PAGE_SHIFT)
+#define MAX_MPST_ZONES 16
+/* Atleast 4G of non MPST memory. */
+#define MINIMAL_NON_MPST_MEMORY_PFN (0x100000000 >> PAGE_SHIFT)
+
+struct mpst_mem_zone {
+       phys_addr_t start_addr;
+       phys_addr_t end_addr;
+};
+
+static struct mpst_mem_zone mpst_zones[MAX_MPST_ZONES];
+static int mpst_zone_cnt;
+static unsigned long mpst_start_pfn;
+static unsigned long mpst_end_pfn;
+static bool mpst_enabled;
+
+/* Minimal parsing for just getting node ranges */
+static int __init acpi_parse_mpst_table(struct acpi_table_header *table)
+{
+       struct acpi_table_mpst *mpst;
+       struct acpi_mpst_power_node *node;
+       u16 node_count;
+       int i;
+
+       mpst = (struct acpi_table_mpst *)table;
+       if (!mpst) {
+               pr_warn("Unable to map MPST\n");
+               return -ENODEV;
+       }
+       node_count = mpst->power_node_count;
+       node = (struct acpi_mpst_power_node *)((u8 *)mpst + sizeof(*mpst));
+
+       for (i = mpst_zone_cnt; (i < node_count) && (i < MAX_MPST_ZONES);
+ ++i) {
+               if ((node->flags & ACPI_MPST_ENABLED) &&
+                       (node->flags & ACPI_MPST_POWER_MANAGED)) {
+                       mpst_zones[mpst_zone_cnt].start_addr =
+                               node->range_address;
+                       mpst_zones[mpst_zone_cnt].end_addr =
+                               node->range_address + node->range_length;
+                       ++mpst_zone_cnt;
+               }
+               ++node;
+       }
+
+       return 0;
+}
+
+static unsigned long local_ahex_to_long(const char *name)
+{
+       unsigned long val = 0;
+
+       for (;; name++) {
+               switch (*name) {
+               case '0' ... '9':
+                       val = 16*val+(*name-'0');
+                       break;
+               case 'A' ... 'F':
+                       val = 16*val+(*name-'A'+10);
+                       break;
+               case 'a' ... 'f':
+                       val = 16*val+(*name-'a'+10);
+                       break;
+               default:
+                       return val;
+               }
+       }
+
+       return val;
+}
+
+/* Specify MPST range by command line for test till ACPI - MPST is 
available */
+static int __init parse_mpst_opt(char *str)
+{
+       char *ptr;
+       phys_addr_t start_at = 0, end_at = 0;
+       u64  mem_size = 0;
+
+       if (!str)
+               return -EINVAL;
+       ptr = str;
+       while (1) {
+               if (*str == '-') {
+                       *str = '\0';
+                       start_at = local_ahex_to_long(ptr);
+                       ++str;
+                       ptr = str;
+               }
+               if (start_at && (*str == '\0' || *str == ',' || *str == 
' ')) {
+                       *str = '\0';
+                       end_at = local_ahex_to_long(ptr);
+                       mem_size = end_at-start_at;
+                       ++str;
+                       ptr = str;
+                       pr_info("-mpst[%#018Lx-%#018Lx size: %#018Lx]\n",
+                                               start_at, end_at, mem_size);
+                       if (IS_ALIGNED(phys_to_pfn(start_at),
+                                       pageblock_nr_pages) &&
+ IS_ALIGNED(phys_to_pfn(end_at),
+                                       pageblock_nr_pages)) {
+                               mpst_zones[mpst_zone_cnt].start_addr =
+ start_at;
+                               mpst_zones[mpst_zone_cnt].end_addr =
+ end_at;
+                       } else {
+                               pr_err("mpst invalid range\n");
+                               return -EINVAL;
+                       }
+                       mpst_zone_cnt++;
+                       start_at = mem_size = end_at = 0;
+               }
+               if (*str == '\0')
+                       break;
+               else
+                       ++str;
+       }
+
+       return 0;
+}
+early_param("mpst_range", parse_mpst_opt);
+
+/* Specify MPST range by command line for test till ACPI - MPST is 
available */
+static int __init parse_mpst_enable_opt(char *str)
+{
+       long value;
+       if (kstrtol(str, 10, &value))
+               return -EINVAL;
+       mpst_enabled = value ? true : false;
+
+       return 0;
+}
+early_param("mpst_enable", parse_mpst_enable_opt);
+
+/* Set the minimum and maximum PFN */
+static void mpst_set_min_max_pfn(void)
+{
+       int i;
+
+       if (!mpst_zone_cnt)
+               return;
+
+       mpst_start_pfn = phys_to_pfn(mpst_zones[0].start_addr);
+       mpst_end_pfn = phys_to_pfn(mpst_zones[0].end_addr);
+
+       for (i = 1; i < mpst_zone_cnt; ++i) {
+               if (mpst_start_pfn > phys_to_pfn(mpst_zones[i].start_addr))
+                       mpst_start_pfn = 
phys_to_pfn(mpst_zones[i].start_addr);
+               if (mpst_end_pfn < phys_to_pfn(mpst_zones[i].end_addr))
+                       mpst_end_pfn = phys_to_pfn(mpst_zones[i].end_addr);
+       }
+}
+
+/* Change migrate type for the MPST ranges */
+int mpst_set_migrate_type(void)
+{
+       int i;
+       struct page *page;
+       unsigned long start_pfn, end_pfn;
+
+       if (!mpst_start_pfn || !mpst_end_pfn)
+               return -EINVAL;
+       if (!IS_ALIGNED(mpst_start_pfn, pageblock_nr_pages))
+               return -EINVAL;
+       if (!IS_ALIGNED(mpst_end_pfn, pageblock_nr_pages))
+               return -EINVAL;
+       memblock_free(pfn_to_phys(mpst_start_pfn),
+               pfn_to_phys(mpst_end_pfn) - pfn_to_phys(mpst_start_pfn));
+       for (i = 0; i < mpst_zone_cnt; ++i) {
+               start_pfn = phys_to_pfn(mpst_zones[i].start_addr);
+               end_pfn = phys_to_pfn(mpst_zones[i].end_addr);
+               for (; start_pfn < end_pfn; ++start_pfn) {
+                       page = pfn_to_page(start_pfn);
+                       if (page)
+                               set_pageblock_migratetype(page,
+                                               MIGRATE_LP_MEMORY);
+               }
+       }
+
+       return 0;
+}
+
+/* Parse ACPI table and find start and end of MPST zone.
+Assuming zones are contiguous */
+int mpst_init(void)
+{
+       if (!mpst_enabled) {
+               pr_info("mpst not enabled in command line\n");
+               return 0;
+       }
+
+       acpi_table_parse(ACPI_SIG_MPST, acpi_parse_mpst_table);
+       mpst_set_min_max_pfn();
+       if (mpst_zone_cnt) {
+
+               if (mpst_start_pfn < MINIMAL_NON_MPST_MEMORY_PFN) {
+                       pr_err("Not enough memory: Ignore MPST\n");
+                       mpst_start_pfn = mpst_end_pfn = 0;
+                       return -EINVAL;
+               }
+               memblock_reserve(pfn_to_phys(mpst_start_pfn),
+                                       pfn_to_phys(mpst_end_pfn) -
+ pfn_to_phys(mpst_start_pfn));
+               pr_info("mpst_init memblock limit set to pfn %lu 
0x%#018lx\n",
+                       mpst_start_pfn, pfn_to_phys(mpst_start_pfn));
+       }
+
+       return 0;
+}





/////////////////////////////
> I think you meant to say "... stored at the end of memory will NOT keep all
> ranks of memory active".
>
> Yep, that's a good point! I'll think about how to achieve that. Thanks!
>
> Regards,
> Srivatsa S. Bhat
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2013-04-18 15:08 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-04-09 21:45 [RFC PATCH v2 00/15][Sorted-buddy] mm: Memory Power Management Srivatsa S. Bhat
2013-04-09 21:45 ` [RFC PATCH v2 01/15] mm: Introduce memory regions data-structure to capture region boundaries within nodes Srivatsa S. Bhat
2013-04-09 21:46 ` [RFC PATCH v2 02/15] mm: Initialize node memory regions during boot Srivatsa S. Bhat
2013-04-09 21:46 ` [RFC PATCH v2 03/15] mm: Introduce and initialize zone memory regions Srivatsa S. Bhat
2013-04-09 21:46 ` [RFC PATCH v2 04/15] mm: Add helpers to retrieve node region and zone region for a given page Srivatsa S. Bhat
2013-04-09 21:46 ` [RFC PATCH v2 05/15] mm: Add data-structures to describe memory regions within the zones' freelists Srivatsa S. Bhat
2013-04-09 21:47 ` [RFC PATCH v2 06/15] mm: Demarcate and maintain pageblocks in region-order in " Srivatsa S. Bhat
2013-04-09 21:47 ` [RFC PATCH v2 07/15] mm: Add an optimized version of del_from_freelist to keep page allocation fast Srivatsa S. Bhat
2013-04-09 21:47 ` [RFC PATCH v2 08/15] bitops: Document the difference in indexing between fls() and __fls() Srivatsa S. Bhat
2013-04-09 21:47 ` [RFC PATCH v2 09/15] mm: A new optimized O(log n) sorting algo to speed up buddy-sorting Srivatsa S. Bhat
2013-04-09 21:47 ` [RFC PATCH v2 10/15] mm: Add support to accurately track per-memory-region allocation Srivatsa S. Bhat
2013-04-09 21:48 ` [RFC PATCH v2 11/15] mm: Restructure the compaction part of CMA for wider use Srivatsa S. Bhat
2013-04-09 21:48 ` [RFC PATCH v2 12/15] mm: Add infrastructure to evacuate memory regions using compaction Srivatsa S. Bhat
2013-04-09 21:48 ` [RFC PATCH v2 13/15] mm: Implement the worker function for memory region compaction Srivatsa S. Bhat
2013-04-09 21:48 ` [RFC PATCH v2 14/15] mm: Add alloc-free handshake to trigger " Srivatsa S. Bhat
2013-04-10 23:26   ` Cody P Schafer
2013-04-16 13:49     ` Srivatsa S. Bhat
2013-04-09 21:49 ` [RFC PATCH v2 15/15] mm: Print memory region statistics to understand the buddy allocator behavior Srivatsa S. Bhat
2013-04-17 16:53 ` [RFC PATCH v2 00/15][Sorted-buddy] mm: Memory Power Management Srinivas Pandruvada
2013-04-18  9:54   ` Srivatsa S. Bhat
2013-04-18 15:13     ` Srinivas Pandruvada [this message]
2013-04-19  8:11       ` Srivatsa S. Bhat
2013-04-18 17:10 ` Dave Hansen
2013-04-19  6:50   ` Srivatsa S. Bhat
2013-04-25 17:57   ` Srivatsa S. Bhat
2013-04-19  5:34 ` Simon Jeons
2013-04-19  7:12   ` Srivatsa S. Bhat
2013-04-19 15:26     ` Srinivas Pandruvada
2013-05-28 20:08     ` Phillip Susi
2013-05-29  5:36       ` Srivatsa S. Bhat

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=51700DB2.5090506@linux.intel.com \
    --to=srinivas.pandruvada@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=amit.kachhap@linaro.org \
    --cc=andi@firstfloor.org \
    --cc=arjan@linux.intel.com \
    --cc=dave@sr71.net \
    --cc=gargankita@gmail.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=kmpark@infradead.org \
    --cc=lenb@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=loic.pallardy@stericsson.com \
    --cc=matthew.garrett@nebula.com \
    --cc=maxime.coquelin@stericsson.com \
    --cc=mgorman@suse.de \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=riel@redhat.com \
    --cc=rientjes@google.com \
    --cc=rjw@sisk.pl \
    --cc=santosh.shilimkar@ti.com \
    --cc=srivatsa.bhat@linux.vnet.ibm.com \
    --cc=svaidy@linux.vnet.ibm.com \
    --cc=thomas.abraham@linaro.org \
    --cc=wujianguo@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).