From: Nathan Fontenot <nfont@austin.ibm.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
linuxppc-dev@ozlabs.org
Cc: Greg KH <greg@kroah.com>,
KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
Dave Hansen <dave@linux.vnet.ibm.com>
Subject: [PATCH 5/9] v4 Allow memory_block to span multiple memory sections
Date: Tue, 03 Aug 2010 08:40:49 -0500 [thread overview]
Message-ID: <4C581C61.1050408@austin.ibm.com> (raw)
In-Reply-To: <4C581A6D.9030908@austin.ibm.com>
Update the memory sysfs code that each sysfs memory directory is now
considered a memory block that can contain multiple memory sections per
memory block. The default size of each memory block is SECTION_SIZE_BITS
to maintain the current behavior of having a single memory section per
memory block (i.e. one sysfs directory per memory section).
For architectures that want to have memory blocks span multiple
memory sections they need only define their own memory_block_size_bytes()
routine.
Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>
---
drivers/base/memory.c | 148 ++++++++++++++++++++++++++++++++++----------------
1 file changed, 103 insertions(+), 45 deletions(-)
Index: linux-2.6/drivers/base/memory.c
===================================================================
--- linux-2.6.orig/drivers/base/memory.c 2010-08-02 13:45:34.000000000 -0500
+++ linux-2.6/drivers/base/memory.c 2010-08-02 14:01:04.000000000 -0500
@@ -30,6 +30,14 @@
static struct mutex mem_sysfs_mutex;
#define MEMORY_CLASS_NAME "memory"
+#define MIN_MEMORY_BLOCK_SIZE (1 << SECTION_SIZE_BITS)
+
+static int sections_per_block;
+
+static inline int base_memory_block_id(int section_nr)
+{
+ return (section_nr / sections_per_block) * sections_per_block;
+}
static struct sysdev_class memory_sysdev_class = {
.name = MEMORY_CLASS_NAME,
@@ -84,22 +92,21 @@ EXPORT_SYMBOL(unregister_memory_isolate_
* register_memory - Setup a sysfs device for a memory block
*/
static
-int register_memory(struct memory_block *memory, struct mem_section *section)
+int register_memory(struct memory_block *memory)
{
int error;
memory->sysdev.cls = &memory_sysdev_class;
- memory->sysdev.id = __section_nr(section);
+ memory->sysdev.id = memory->start_phys_index;
error = sysdev_register(&memory->sysdev);
return error;
}
static void
-unregister_memory(struct memory_block *memory, struct mem_section *section)
+unregister_memory(struct memory_block *memory)
{
BUG_ON(memory->sysdev.cls != &memory_sysdev_class);
- BUG_ON(memory->sysdev.id != __section_nr(section));
/* drop the ref. we got in remove_memory_block() */
kobject_put(&memory->sysdev.kobj);
@@ -133,13 +140,16 @@ static ssize_t show_mem_end_phys_index(s
static ssize_t show_mem_removable(struct sys_device *dev,
struct sysdev_attribute *attr, char *buf)
{
- unsigned long start_pfn;
- int ret;
+ unsigned long i, pfn;
+ int ret = 1;
struct memory_block *mem =
container_of(dev, struct memory_block, sysdev);
- start_pfn = section_nr_to_pfn(mem->start_phys_index);
- ret = is_mem_section_removable(start_pfn, PAGES_PER_SECTION);
+ for (i = mem->start_phys_index; i <= mem->end_phys_index; i++) {
+ pfn = section_nr_to_pfn(i);
+ ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
+ }
+
return sprintf(buf, "%d\n", ret);
}
@@ -192,17 +202,14 @@ int memory_isolate_notify(unsigned long
* OK to have direct references to sparsemem variables in here.
*/
static int
-memory_block_action(struct memory_block *mem, unsigned long action)
+memory_section_action(unsigned long phys_index, unsigned long action)
{
int i;
- unsigned long psection;
unsigned long start_pfn, start_paddr;
struct page *first_page;
int ret;
- int old_state = mem->state;
- psection = mem->start_phys_index;
- first_page = pfn_to_page(psection << PFN_SECTION_SHIFT);
+ first_page = pfn_to_page(phys_index << PFN_SECTION_SHIFT);
/*
* The probe routines leave the pages reserved, just
@@ -215,8 +222,8 @@ memory_block_action(struct memory_block
continue;
printk(KERN_WARNING "section number %ld page number %d "
- "not reserved, was it already online? \n",
- psection, i);
+ "not reserved, was it already online?\n",
+ phys_index, i);
return -EBUSY;
}
}
@@ -227,18 +234,13 @@ memory_block_action(struct memory_block
ret = online_pages(start_pfn, PAGES_PER_SECTION);
break;
case MEM_OFFLINE:
- mem->state = MEM_GOING_OFFLINE;
start_paddr = page_to_pfn(first_page) << PAGE_SHIFT;
ret = remove_memory(start_paddr,
PAGES_PER_SECTION << PAGE_SHIFT);
- if (ret) {
- mem->state = old_state;
- break;
- }
break;
default:
- WARN(1, KERN_WARNING "%s(%p, %ld) unknown action: %ld\n",
- __func__, mem, action, action);
+ WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: "
+ "%ld\n", __func__, phys_index, action, action);
ret = -EINVAL;
}
@@ -248,7 +250,7 @@ memory_block_action(struct memory_block
static int memory_block_change_state(struct memory_block *mem,
unsigned long to_state, unsigned long from_state_req)
{
- int ret = 0;
+ int i, ret = 0;
mutex_lock(&mem->state_mutex);
if (mem->state != from_state_req) {
@@ -256,8 +258,21 @@ static int memory_block_change_state(str
goto out;
}
- ret = memory_block_action(mem, to_state);
- if (!ret)
+ if (to_state == MEM_OFFLINE)
+ mem->state = MEM_GOING_OFFLINE;
+
+ for (i = mem->start_phys_index; i <= mem->end_phys_index; i++) {
+ ret = memory_section_action(i, to_state);
+ if (ret)
+ break;
+ }
+
+ if (ret) {
+ for (i = mem->start_phys_index; i <= mem->end_phys_index; i++)
+ memory_section_action(i, from_state_req);
+
+ mem->state = from_state_req;
+ } else
mem->state = to_state;
out:
@@ -270,20 +285,15 @@ store_mem_state(struct sys_device *dev,
struct sysdev_attribute *attr, const char *buf, size_t count)
{
struct memory_block *mem;
- unsigned int phys_section_nr;
int ret = -EINVAL;
mem = container_of(dev, struct memory_block, sysdev);
- phys_section_nr = mem->start_phys_index;
-
- if (!present_section_nr(phys_section_nr))
- goto out;
if (!strncmp(buf, "online", min((int)count, 6)))
ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
else if(!strncmp(buf, "offline", min((int)count, 7)))
ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
-out:
+
if (ret)
return ret;
return count;
@@ -460,12 +470,13 @@ struct memory_block *find_memory_block(s
struct sys_device *sysdev;
struct memory_block *mem;
char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1];
+ int block_id = base_memory_block_id(__section_nr(section));
/*
* This only works because we know that section == sysdev->id
* slightly redundant with sysdev_register()
*/
- sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, __section_nr(section));
+ sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, block_id);
kobj = kset_find_obj(&memory_sysdev_class.kset, name);
if (!kobj)
@@ -477,26 +488,26 @@ struct memory_block *find_memory_block(s
return mem;
}
-static int add_memory_block(int nid, struct mem_section *section,
- unsigned long state, enum mem_add_context context)
+static int init_memory_block(struct memory_block **memory,
+ struct mem_section *section, unsigned long state)
{
- struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+ struct memory_block *mem;
unsigned long start_pfn;
int ret = 0;
+ mem = kzalloc(sizeof(*mem), GFP_KERNEL);
if (!mem)
return -ENOMEM;
- mutex_lock(&mem_sysfs_mutex);
-
- mem->start_phys_index = __section_nr(section);
+ mem->start_phys_index = base_memory_block_id(__section_nr(section));
+ mem->end_phys_index = mem->start_phys_index + sections_per_block - 1;
mem->state = state;
atomic_inc(&mem->section_count);
mutex_init(&mem->state_mutex);
start_pfn = section_nr_to_pfn(mem->start_phys_index);
mem->phys_device = arch_get_memory_phys_device(start_pfn);
- ret = register_memory(mem, section);
+ ret = register_memory(mem);
if (!ret)
ret = mem_create_simple_file(mem, phys_index);
if (!ret)
@@ -507,8 +518,29 @@ static int add_memory_block(int nid, str
ret = mem_create_simple_file(mem, phys_device);
if (!ret)
ret = mem_create_simple_file(mem, removable);
+
+ *memory = mem;
+ return ret;
+}
+
+static int add_memory_section(int nid, struct mem_section *section,
+ unsigned long state, enum mem_add_context context)
+{
+ struct memory_block *mem;
+ int ret = 0;
+
+ mutex_lock(&mem_sysfs_mutex);
+
+ mem = find_memory_block(section);
+ if (mem) {
+ atomic_inc(&mem->section_count);
+ kobject_put(&mem->sysdev.kobj);
+ } else
+ ret = init_memory_block(&mem, section, state);
+
if (!ret) {
- if (context == HOTPLUG)
+ if (context == HOTPLUG &&
+ atomic_read(&mem->section_count) == sections_per_block)
ret = register_mem_sect_under_node(mem, nid);
}
@@ -531,8 +563,10 @@ int remove_memory_block(unsigned long no
mem_remove_simple_file(mem, state);
mem_remove_simple_file(mem, phys_device);
mem_remove_simple_file(mem, removable);
- unregister_memory(mem, section);
- }
+ unregister_memory(mem);
+ kfree(mem);
+ } else
+ kobject_put(&mem->sysdev.kobj);
mutex_unlock(&mem_sysfs_mutex);
return 0;
@@ -544,7 +578,7 @@ int remove_memory_block(unsigned long no
*/
int register_new_memory(int nid, struct mem_section *section)
{
- return add_memory_block(nid, section, MEM_OFFLINE, HOTPLUG);
+ return add_memory_section(nid, section, MEM_OFFLINE, HOTPLUG);
}
int unregister_memory_section(struct mem_section *section)
@@ -555,6 +589,26 @@ int unregister_memory_section(struct mem
return remove_memory_block(0, section, 0);
}
+u32 __weak memory_block_size_bytes(void)
+{
+ return MIN_MEMORY_BLOCK_SIZE;
+}
+
+static u32 get_memory_block_size(void)
+{
+ u32 block_sz;
+
+ block_sz = memory_block_size_bytes();
+
+ /* Validate blk_sz is a power of 2 and not less than section size */
+ if ((block_sz & (block_sz - 1)) || (block_sz < MIN_MEMORY_BLOCK_SIZE)) {
+ WARN_ON(1);
+ block_sz = MIN_MEMORY_BLOCK_SIZE;
+ }
+
+ return block_sz;
+}
+
/*
* Initialize the sysfs support for memory devices...
*/
@@ -563,6 +617,7 @@ int __init memory_dev_init(void)
unsigned int i;
int ret;
int err;
+ int block_sz;
memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops;
ret = sysdev_class_register(&memory_sysdev_class);
@@ -571,6 +626,9 @@ int __init memory_dev_init(void)
mutex_init(&mem_sysfs_mutex);
+ block_sz = get_memory_block_size();
+ sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
+
/*
* Create entries for memory sections that were found
* during boot and have been initialized
@@ -578,8 +636,8 @@ int __init memory_dev_init(void)
for (i = 0; i < NR_MEM_SECTIONS; i++) {
if (!present_section_nr(i))
continue;
- err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE,
- BOOT);
+ err = add_memory_section(0, __nr_to_section(i), MEM_ONLINE,
+ BOOT);
if (!ret)
ret = err;
}
next prev parent reply other threads:[~2010-08-03 13:40 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-08-03 13:32 [PATCH 0/9] v4 De-couple sysfs memory directories from memory sections Nathan Fontenot
2010-08-03 13:36 ` [PATCH 1/9] v4 Move the find_memory_block() routine up Nathan Fontenot
2010-08-05 4:45 ` KAMEZAWA Hiroyuki
2010-08-03 13:37 ` [PATCH 2/9] v4 Add new phys_index properties Nathan Fontenot
2010-08-05 4:49 ` KAMEZAWA Hiroyuki
2010-08-03 13:38 ` [PATCH 3/9] v4 Add section count to memory_block Nathan Fontenot
2010-08-05 4:50 ` KAMEZAWA Hiroyuki
2010-08-03 13:39 ` [PATCH 4/9] v4 Add mutex for add/remove of memory blocks Nathan Fontenot
2010-08-05 4:53 ` KAMEZAWA Hiroyuki
2010-08-09 13:55 ` Nathan Fontenot
2010-08-03 13:40 ` Nathan Fontenot [this message]
2010-08-05 4:55 ` [PATCH 5/9] v4 Allow memory_block to span multiple memory sections KAMEZAWA Hiroyuki
2010-08-03 13:41 ` [PATCH 6/9] v4 Update the find_memory_block declaration Nathan Fontenot
2010-08-05 4:59 ` KAMEZAWA Hiroyuki
2010-08-09 13:56 ` Nathan Fontenot
2010-08-03 13:42 ` [PATCH 7/9] v4 Update the node sysfs code Nathan Fontenot
2010-08-05 5:01 ` KAMEZAWA Hiroyuki
2010-08-03 13:43 ` [PATCH 8/9] v4 Define memory_block_size_bytes() for ppc/pseries Nathan Fontenot
2010-08-03 13:44 ` [PATCH 9/9] v4 Update memory-hotplug documentation Nathan Fontenot
2010-08-05 5:04 ` KAMEZAWA Hiroyuki
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4C581C61.1050408@austin.ibm.com \
--to=nfont@austin.ibm.com \
--cc=dave@linux.vnet.ibm.com \
--cc=greg@kroah.com \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linuxppc-dev@ozlabs.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).