LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH 4/5] v2 Update sysfs node routines for new sysfs memory directories
From: Nathan Fontenot @ 2010-07-16 15:40 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki; +Cc: linux-mm, linux-kernel, linuxppc-dev
In-Reply-To: <20100716091239.69f40e47.kamezawa.hiroyu@jp.fujitsu.com>

On 07/15/2010 07:12 PM, KAMEZAWA Hiroyuki wrote:
> On Thu, 15 Jul 2010 13:40:40 -0500
> Nathan Fontenot <nfont@austin.ibm.com> wrote:
> 
>> Update the node sysfs directory routines that create
>> links to the memory sysfs directories under each node.
>> This update makes the node code aware that a memory sysfs
>> directory can cover multiple memory sections.
>>
>> Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>
> 
> Shouldn't "static int link_mem_sections(int nid)" be update ?
> It does
>  for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
>         register..
> 

No, although the name 'link_mem_sections' does imply that it should.  The
range of start_pfn..end_pfn examined in this routine is the range of pfn's
covered by the entire node, not a memory_block.

-Nathan

> Thanks,
> -Kame
> 
> 
>> ---
>>  drivers/base/node.c |   12 ++++++++----
>>  1 file changed, 8 insertions(+), 4 deletions(-)
>>
>> Index: linux-2.6/drivers/base/node.c
>> ===================================================================
>> --- linux-2.6.orig/drivers/base/node.c	2010-07-15 09:54:06.000000000 -0500
>> +++ linux-2.6/drivers/base/node.c	2010-07-15 09:56:16.000000000 -0500
>> @@ -346,8 +346,10 @@
>>  		return -EFAULT;
>>  	if (!node_online(nid))
>>  		return 0;
>> -	sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index);
>> -	sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
>> +
>> +	sect_start_pfn = section_nr_to_pfn(mem_blk->start_phys_index);
>> +	sect_end_pfn = section_nr_to_pfn(mem_blk->end_phys_index);
>> +	sect_end_pfn += PAGES_PER_SECTION - 1;
>>  	for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
>>  		int page_nid;
>>  
>> @@ -383,8 +385,10 @@
>>  	if (!unlinked_nodes)
>>  		return -ENOMEM;
>>  	nodes_clear(*unlinked_nodes);
>> -	sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index);
>> -	sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
>> +
>> +	sect_start_pfn = section_nr_to_pfn(mem_blk->start_phys_index);
>> +	sect_end_pfn = section_nr_to_pfn(mem_blk->end_phys_index);
>> +	sect_end_pfn += PAGES_PER_SECTION - 1;
>>  	for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
>>  		int nid;
>>  
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
>>
> 

^ permalink raw reply

* Re: [PATCH 2/5] v2 Create new 'end_phys_index' file
From: Nathan Fontenot @ 2010-07-16 15:36 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki; +Cc: linux-mm, linux-kernel, linuxppc-dev
In-Reply-To: <20100716090857.5e5c91a3.kamezawa.hiroyu@jp.fujitsu.com>

On 07/15/2010 07:08 PM, KAMEZAWA Hiroyuki wrote:
> On Thu, 15 Jul 2010 13:38:52 -0500
> Nathan Fontenot <nfont@austin.ibm.com> wrote:
> 
>> Add a new 'end_phys_index' file to each memory sysfs directory to
>> report the physical index of the last memory section
>> covered by the sysfs directory.
>>
>> Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>
> 
> Does memory_block have to be contiguous between [phys_index, end_phys_index] ?
> Should we provide "# of sections" or "amount of memory under a block" ?

Good point.  There is nothing that guarantees that a memory block contains
the contiguous memory sections [phys_index, end_phys_index].  Should there be
a 'memory_sections' file that list the memory sections present in a memory block?
Something along the lines of;

#> cat memory0/memory_sections
0,1,2,3

This could be done instead of the end_phys_index file.

-Nathan
 
> 
> No objections to end_phys_index...buf plz fix diff style.
> 
> Thanks,
> -Kame
> 
> 
>> ---
>>  drivers/base/memory.c  |   14 +++++++++++++-
>>  include/linux/memory.h |    3 +++
>>  2 files changed, 16 insertions(+), 1 deletion(-)
>>
>> Index: linux-2.6/drivers/base/memory.c
>> ===================================================================
>> --- linux-2.6.orig/drivers/base/memory.c	2010-07-15 09:55:54.000000000 -0500
>> +++ linux-2.6/drivers/base/memory.c	2010-07-15 09:56:05.000000000 -0500
>> @@ -121,7 +121,15 @@
>>  {
>>  	struct memory_block *mem =
>>  		container_of(dev, struct memory_block, sysdev);
>> -	return sprintf(buf, "%08lx\n", mem->phys_index);
>> +	return sprintf(buf, "%08lx\n", mem->start_phys_index);
>> +}
>> +
>> +static ssize_t show_mem_end_phys_index(struct sys_device *dev,
>> +			struct sysdev_attribute *attr, char *buf)
>> +{
>> +	struct memory_block *mem =
>> +		container_of(dev, struct memory_block, sysdev);
>> +	return sprintf(buf, "%08lx\n", mem->end_phys_index);
>>  }
>>  
>>  /*
>> @@ -321,6 +329,7 @@
>>  }
>>  
>>  static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL);
>> +static SYSDEV_ATTR(end_phys_index, 0444, show_mem_end_phys_index, NULL);
>>  static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state);
>>  static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL);
>>  static SYSDEV_ATTR(removable, 0444, show_mem_removable, NULL);
>> @@ -533,6 +542,8 @@
>>  		if (!ret)
>>  			ret = mem_create_simple_file(mem, phys_index);
>>  		if (!ret)
>> +			ret = mem_create_simple_file(mem, end_phys_index);
>> +		if (!ret)
>>  			ret = mem_create_simple_file(mem, state);
>>  		if (!ret)
>>  			ret = mem_create_simple_file(mem, phys_device);
>> @@ -577,6 +588,7 @@
>>  	if (list_empty(&mem->sections)) {
>>  		unregister_mem_sect_under_nodes(mem);
>>  		mem_remove_simple_file(mem, phys_index);
>> +		mem_remove_simple_file(mem, end_phys_index);
>>  		mem_remove_simple_file(mem, state);
>>  		mem_remove_simple_file(mem, phys_device);
>>  		mem_remove_simple_file(mem, removable);
>> Index: linux-2.6/include/linux/memory.h
>> ===================================================================
>> --- linux-2.6.orig/include/linux/memory.h	2010-07-15 09:54:06.000000000 -0500
>> +++ linux-2.6/include/linux/memory.h	2010-07-15 09:56:05.000000000 -0500
>> @@ -29,6 +29,9 @@
>>  
>>  struct memory_block {
>>  	unsigned long state;
>> +	unsigned long start_phys_index;
>> +	unsigned long end_phys_index;
>> +
>>  	/*
>>  	 * This serializes all state change requests.  It isn't
>>  	 * held during creation because the control files are
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
>>
> 

^ permalink raw reply

* Re: [PATCH 1/5] v2 Split the memory_block structure
From: Nathan Fontenot @ 2010-07-16 15:29 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki; +Cc: linux-mm, linux-kernel, linuxppc-dev
In-Reply-To: <20100716090637.3654f91d.kamezawa.hiroyu@jp.fujitsu.com>

Thanks for taking a look a this Kame, answers below...

-Nathan

On 07/15/2010 07:06 PM, KAMEZAWA Hiroyuki wrote:
> On Thu, 15 Jul 2010 13:37:51 -0500
> Nathan Fontenot <nfont@austin.ibm.com> wrote:
> 
>> Split the memory_block struct into a memory_block
>> struct to cover each sysfs directory and a new memory_block_section
>> struct for each memory section covered by the sysfs directory.
>> This change allows for creation of memory sysfs directories that
>> can span multiple memory sections.
>>
>> This can be beneficial in that it can reduce the number of memory
>> sysfs directories created at boot.  This also allows different
>> architectures to define how many memory sections are covered by
>> a sysfs directory.
>>
>> Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>
>> ---
>>  drivers/base/memory.c  |  222 ++++++++++++++++++++++++++++++++++---------------
>>  include/linux/memory.h |   11 +-
>>  2 files changed, 167 insertions(+), 66 deletions(-)
>>
>> Index: linux-2.6/drivers/base/memory.c
>> ===================================================================
>> --- linux-2.6.orig/drivers/base/memory.c	2010-07-15 08:48:41.000000000 -0500
>> +++ linux-2.6/drivers/base/memory.c	2010-07-15 09:55:54.000000000 -0500
>> @@ -28,6 +28,14 @@
>>  #include <asm/uaccess.h>
>>  
>>  #define MEMORY_CLASS_NAME	"memory"
>> +#define MIN_MEMORY_BLOCK_SIZE	(1 << SECTION_SIZE_BITS)
>> +
>> +static int sections_per_block;
>> +
>> +static inline int base_memory_block_id(int section_nr)
>> +{
>> +	return (section_nr / sections_per_block) * sections_per_block;
>> +}
>>  
>>  static struct sysdev_class memory_sysdev_class = {
>>  	.name = MEMORY_CLASS_NAME,
>> @@ -94,10 +102,9 @@
>>  }
>>  
>>  static void
>> -unregister_memory(struct memory_block *memory, struct mem_section *section)
>> +unregister_memory(struct memory_block *memory)
>>  {
>>  	BUG_ON(memory->sysdev.cls != &memory_sysdev_class);
>> -	BUG_ON(memory->sysdev.id != __section_nr(section));
>>  
>>  	/* drop the ref. we got in remove_memory_block() */
>>  	kobject_put(&memory->sysdev.kobj);
>> @@ -123,13 +130,20 @@
>>  static ssize_t show_mem_removable(struct sys_device *dev,
>>  			struct sysdev_attribute *attr, char *buf)
>>  {
>> +	struct memory_block *mem;
>> +	struct memory_block_section *mbs;
>>  	unsigned long start_pfn;
>> -	int ret;
>> -	struct memory_block *mem =
>> -		container_of(dev, struct memory_block, sysdev);
>> +	int ret = 1;
>> +
>> +	mem = container_of(dev, struct memory_block, sysdev);
>> +	mutex_lock(&mem->state_mutex);
>>  
>> -	start_pfn = section_nr_to_pfn(mem->phys_index);
>> -	ret = is_mem_section_removable(start_pfn, PAGES_PER_SECTION);
>> +	list_for_each_entry(mbs, &mem->sections, next) {
>> +		start_pfn = section_nr_to_pfn(mbs->phys_index);
>> +		ret &= is_mem_section_removable(start_pfn, PAGES_PER_SECTION);
>> +	}
>> +
>> +	mutex_unlock(&mem->state_mutex);
> 
> Hmm, this means memory cab be offlined the while memory block section. Right ?
> Please write this fact in patch description...
> And Documentaion/memory_hotplug.txt as "From user's perspective, memory section
> is not a unit of memory hotplug anymore".
> And descirbe about a new rule.

You are correct.  A memory block is removable only if all of the memory
sections contained within the memory block are removable.

I will include a documentation patch with v3 of the patches to explain this
and to explain that memory add/remove operations are done on a per memory
block basis.

> 
> 
>>  	return sprintf(buf, "%d\n", ret);
>>  }
>>  
>> @@ -182,16 +196,16 @@
>>   * OK to have direct references to sparsemem variables in here.
>>   */
>>  static int
>> -memory_block_action(struct memory_block *mem, unsigned long action)
>> +memory_block_action(struct memory_block_section *mbs, unsigned long action)
>>  {
>>  	int i;
>>  	unsigned long psection;
>>  	unsigned long start_pfn, start_paddr;
>>  	struct page *first_page;
>>  	int ret;
>> -	int old_state = mem->state;
>> +	int old_state = mbs->state;
>>  
>> -	psection = mem->phys_index;
>> +	psection = mbs->phys_index;
>>  	first_page = pfn_to_page(psection << PFN_SECTION_SHIFT);
>>  
>>  	/*
>> @@ -217,18 +231,18 @@
>>  			ret = online_pages(start_pfn, PAGES_PER_SECTION);
>>  			break;
>>  		case MEM_OFFLINE:
>> -			mem->state = MEM_GOING_OFFLINE;
>> +			mbs->state = MEM_GOING_OFFLINE;
>>  			start_paddr = page_to_pfn(first_page) << PAGE_SHIFT;
>>  			ret = remove_memory(start_paddr,
>>  					    PAGES_PER_SECTION << PAGE_SHIFT);
>>  			if (ret) {
>> -				mem->state = old_state;
>> +				mbs->state = old_state;
>>  				break;
>>  			}
>>  			break;
>>  		default:
>>  			WARN(1, KERN_WARNING "%s(%p, %ld) unknown action: %ld\n",
>> -					__func__, mem, action, action);
>> +					__func__, mbs, action, action);
>>  			ret = -EINVAL;
>>  	}
>>  
>> @@ -238,19 +252,34 @@
> 
> And please check quilt's diff option.
> Usual patche in ML shows a function name in any changes, as
> @@ -241,6 +293,8 @@ static int memory_block_change_state(str
> 
> Maybe "-p" option is lacked..

sorry about that.  I'm just using the default options with quilt.  I'll
play around with it to why this is happening.

> 
> 
>>  static int memory_block_change_state(struct memory_block *mem,
>>  		unsigned long to_state, unsigned long from_state_req)
>>  {
>> +	struct memory_block_section *mbs;
>>  	int ret = 0;
>> +
>>  	mutex_lock(&mem->state_mutex);
>>  
>> -	if (mem->state != from_state_req) {
>> -		ret = -EINVAL;
>> -		goto out;
>> +	list_for_each_entry(mbs, &mem->sections, next) {
>> +		if (mbs->state != from_state_req)
>> +			continue;
>> +
>> +		ret = memory_block_action(mbs, to_state);
>> +		if (ret)
>> +			break;
>> +	}
>> +
>> +	if (ret) {
>> +		list_for_each_entry(mbs, &mem->sections, next) {
>> +			if (mbs->state == from_state_req)
>> +				continue;
>> +
>> +			if (memory_block_action(mbs, to_state))
>> +				printk(KERN_ERR "Could not re-enable memory "
>> +				       "section %lx\n", mbs->phys_index);
> 
> Why re-enable only ? online->fail->offline never happens ?
> If so, please add comment at least.

This should handle both conditions.  If we fail to move all of the memory
sections to the 'to_state', it puts all of the memory sections back to the
'from_state_req'.

> BTW, is it guaranteed that all sections under a block has same state after
> boot ?

Yes, during boot all memory sections are marked online.

> 
>> +		}
>>  	}
>>  
>> -	ret = memory_block_action(mem, to_state);
>>  	if (!ret)
>>  		mem->state = to_state;
>>  
>> -out:
>>  	mutex_unlock(&mem->state_mutex);
>>  	return ret;
>>  }
>> @@ -260,20 +289,15 @@
>>  		struct sysdev_attribute *attr, const char *buf, size_t count)
>>  {
>>  	struct memory_block *mem;
>> -	unsigned int phys_section_nr;
>>  	int ret = -EINVAL;
>>  
>>  	mem = container_of(dev, struct memory_block, sysdev);
>> -	phys_section_nr = mem->phys_index;
>> -
>> -	if (!present_section_nr(phys_section_nr))
>> -		goto out;
>>
> I'm sorry but I couldn't remember why this check was necessary...

Not sure either, it appears that it is there to ensure that the memory
section we are trying to act on is actually present.

> 
> 
>  
>>  	if (!strncmp(buf, "online", min((int)count, 6)))
>>  		ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
>>  	else if(!strncmp(buf, "offline", min((int)count, 7)))
>>  		ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
>> -out:
>> +
>>  	if (ret)
>>  		return ret;
>>  	return count;
>> @@ -435,39 +459,6 @@
>>  	return 0;
>>  }
>>  
>> -static int add_memory_block(int nid, struct mem_section *section,
>> -			unsigned long state, enum mem_add_context context)
>> -{
>> -	struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
>> -	unsigned long start_pfn;
>> -	int ret = 0;
>> -
>> -	if (!mem)
>> -		return -ENOMEM;
>> -
>> -	mem->phys_index = __section_nr(section);
>> -	mem->state = state;
>> -	mutex_init(&mem->state_mutex);
>> -	start_pfn = section_nr_to_pfn(mem->phys_index);
>> -	mem->phys_device = arch_get_memory_phys_device(start_pfn);
>> -
>> -	ret = register_memory(mem, section);
>> -	if (!ret)
>> -		ret = mem_create_simple_file(mem, phys_index);
>> -	if (!ret)
>> -		ret = mem_create_simple_file(mem, state);
>> -	if (!ret)
>> -		ret = mem_create_simple_file(mem, phys_device);
>> -	if (!ret)
>> -		ret = mem_create_simple_file(mem, removable);
>> -	if (!ret) {
>> -		if (context == HOTPLUG)
>> -			ret = register_mem_sect_under_node(mem, nid);
>> -	}
>> -
>> -	return ret;
>> -}
>> -
> 
> I don't say strongly but this kind of move-code should be done in another patch.

ok,  I will move the code move piece to a differnet patch.

> 
> 
>>  /*
>>   * For now, we have a linear search to go find the appropriate
>>   * memory_block corresponding to a particular phys_index. If
>> @@ -482,12 +473,13 @@
>>  	struct sys_device *sysdev;
>>  	struct memory_block *mem;
>>  	char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1];
>> +	int block_id = base_memory_block_id(__section_nr(section));
>>  
>>  	/*
>>  	 * This only works because we know that section == sysdev->id
>>  	 * slightly redundant with sysdev_register()
>>  	 */
>> -	sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, __section_nr(section));
>> +	sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, block_id);
>>  
>>  	kobj = kset_find_obj(&memory_sysdev_class.kset, name);
>>  	if (!kobj)
>> @@ -499,18 +491,98 @@
>>  	return mem;
>>  }
>>  
>> +static int add_mem_block_section(struct memory_block *mem,
>> +				 int section_nr, unsigned long state)
>> +{
>> +	struct memory_block_section *mbs;
>> +
>> +	mbs = kzalloc(sizeof(*mbs), GFP_KERNEL);
>> +	if (!mbs)
>> +		return -ENOMEM;
>> +
>> +	mbs->phys_index = section_nr;
>> +	mbs->state = state;
>> +
>> +	list_add(&mbs->next, &mem->sections);
>> +	return 0;
>> +}
> 
> Doesn't this "sections" need to be sorted ? Hmm.

We could, but I cannot think of anything we gain by sorting it.

> 
> 
>> +
>> +static int add_memory_block(int nid, struct mem_section *section,
>> +			unsigned long state, enum mem_add_context context)
>> +{
>> +	struct memory_block *mem;
>> +	int ret = 0;
>> +
>> +	mem = find_memory_block(section);
>> +	if (!mem) {
>> +		unsigned long start_pfn;
>> +
>> +		mem = kzalloc(sizeof(*mem), GFP_KERNEL);
>> +		if (!mem)
>> +			return -ENOMEM;
>> +
>> +		mem->state = state;
>> +		mutex_init(&mem->state_mutex);
>> +		start_pfn = section_nr_to_pfn(__section_nr(section));
>> +		mem->phys_device = arch_get_memory_phys_device(start_pfn);
>> +		INIT_LIST_HEAD(&mem->sections);
>> +
>> +		mutex_lock(&mem->state_mutex);
>> +
>> +		ret = register_memory(mem, section);
>> +		if (!ret)
>> +			ret = mem_create_simple_file(mem, phys_index);
>> +		if (!ret)
>> +			ret = mem_create_simple_file(mem, state);
>> +		if (!ret)
>> +			ret = mem_create_simple_file(mem, phys_device);
>> +		if (!ret)
>> +			ret = mem_create_simple_file(mem, removable);
>> +		if (!ret) {
>> +			if (context == HOTPLUG)
>> +				ret = register_mem_sect_under_node(mem, nid);
>> +		}
>> +	} else {
>> +		kobject_put(&mem->sysdev.kobj);
>> +		mutex_lock(&mem->state_mutex);
>> +	}
>> +
>> +	if (!ret)
>> +		ret = add_mem_block_section(mem, __section_nr(section), state);
>> +
>> +	mutex_unlock(&mem->state_mutex);
>> +	return ret;
>> +}
>> +
>>  int remove_memory_block(unsigned long node_id, struct mem_section *section,
>>  		int phys_device)
>>  {
>>  	struct memory_block *mem;
>> +	struct memory_block_section *mbs, *tmp;
>> +	int section_nr = __section_nr(section);
>>  
>>  	mem = find_memory_block(section);
>> -	unregister_mem_sect_under_nodes(mem);
>> -	mem_remove_simple_file(mem, phys_index);
>> -	mem_remove_simple_file(mem, state);
>> -	mem_remove_simple_file(mem, phys_device);
>> -	mem_remove_simple_file(mem, removable);
>> -	unregister_memory(mem, section);
>> +	mutex_lock(&mem->state_mutex);
>> +
>> +	/* remove the specified section */
>> +	list_for_each_entry_safe(mbs, tmp, &mem->sections, next) {
>> +		if (mbs->phys_index == section_nr) {
>> +			list_del(&mbs->next);
>> +			kfree(mbs);
>> +		}
>> +	}
>> +
>> +	mutex_unlock(&mem->state_mutex);
>> +
>> +	if (list_empty(&mem->sections)) {
>> +		unregister_mem_sect_under_nodes(mem);
>> +		mem_remove_simple_file(mem, phys_index);
>> +		mem_remove_simple_file(mem, state);
>> +		mem_remove_simple_file(mem, phys_device);
>> +		mem_remove_simple_file(mem, removable);
>> +		unregister_memory(mem);
>> +		kfree(mem);
>> +	}
>>  
>>  	return 0;
>>  }
>> @@ -532,6 +604,24 @@
>>  	return remove_memory_block(0, section, 0);
>>  }
>>  
>> +u32 __weak memory_block_size(void)
>> +{
>> +	return MIN_MEMORY_BLOCK_SIZE;
>> +}
>> +
>> +static u32 get_memory_block_size(void)
>> +{
>> +	u32 blk_sz;
>> +
>> +	blk_sz = memory_block_size();
>> +
>> +	/* Validate blk_sz is a power of 2 and not less than section size */
>> +	if ((blk_sz & (blk_sz - 1)) || (blk_sz < MIN_MEMORY_BLOCK_SIZE))
>> +		blk_sz = MIN_MEMORY_BLOCK_SIZE;
>> +
>> +	return blk_sz;
>> +}
>> +
>>  /*
>>   * Initialize the sysfs support for memory devices...
>>   */
>> @@ -540,12 +630,16 @@
>>  	unsigned int i;
>>  	int ret;
>>  	int err;
>> +	int block_sz;
>>  
>>  	memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops;
>>  	ret = sysdev_class_register(&memory_sysdev_class);
>>  	if (ret)
>>  		goto out;
>>  
>> +	block_sz = get_memory_block_size();
>> +	sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
>> +
>>  	/*
>>  	 * Create entries for memory sections that were found
>>  	 * during boot and have been initialized
>> Index: linux-2.6/include/linux/memory.h
>> ===================================================================
>> --- linux-2.6.orig/include/linux/memory.h	2010-07-15 08:48:41.000000000 -0500
>> +++ linux-2.6/include/linux/memory.h	2010-07-15 09:54:06.000000000 -0500
>> @@ -19,9 +19,15 @@
>>  #include <linux/node.h>
>>  #include <linux/compiler.h>
>>  #include <linux/mutex.h>
>> +#include <linux/list.h>
>>  
>> -struct memory_block {
>> +struct memory_block_section {
>> +	unsigned long state;
>>  	unsigned long phys_index;
>> +	struct list_head next;
>> +};
>> +
>> +struct memory_block {
>>  	unsigned long state;
>>  	/*
>>  	 * This serializes all state change requests.  It isn't
>> @@ -34,6 +40,7 @@
>>  	void *hw;			/* optional pointer to fw/hw data */
>>  	int (*phys_callback)(struct memory_block *);
>>  	struct sys_device sysdev;
>> +	struct list_head sections;
>>  };
>>  
>>  int arch_get_memory_phys_device(unsigned long start_pfn);
>> @@ -113,7 +120,7 @@
>>  extern int remove_memory_block(unsigned long, struct mem_section *, int);
>>  extern int memory_notify(unsigned long val, void *v);
>>  extern int memory_isolate_notify(unsigned long val, void *v);
>> -extern struct memory_block *find_memory_block(unsigned long);
>> +extern struct memory_block *find_memory_block(struct mem_section *);
>>  extern int memory_is_hidden(struct mem_section *);
>>  #define CONFIG_MEM_BLOCK_SIZE	(PAGES_PER_SECTION<<PAGE_SHIFT)
>>  enum mem_add_context { BOOT, HOTPLUG };
>>
> 
> Okay, please go ahead. But my 1st impression is that IBM should increase ppc's
> SECTION_SIZE ;)
> 
> Thanks,
> -Kame
> 
> 
>  
> 

^ permalink raw reply

* Re: [PATCH 1/2] Remove REDWOOD_[456] config options and conditional code
From: Josh Boyer @ 2010-07-16 14:20 UTC (permalink / raw)
  To: Christian Dietrich
  Cc: Randy Dunlap, linuxppc-dev, Alexander Kurz, Paul Mackerras,
	John Linn, David Brown, Ladislav Michl, Solomon Peachy, vamos-dev,
	Mike Frysinger, Florian Fainelli, Artem Bityutskiy, Nicolas Pitre,
	netdev, linux-kernel, Milton Miller, Jiri Kosina, Joe Perches,
	linux-mtd, David Woodhouse, David S. Miller
In-Reply-To: <ca1bb25d203618c3548748f5efb6f125a96c89e0.1279282865.git.qy03fugy@stud.informatik.uni-erlangen.de>

On Fri, Jul 16, 2010 at 02:29:02PM +0200, Christian Dietrich wrote:
>The config options for REDWOOD_[456] were commented out in the powerpc
>Kconfig. The ifdefs referencing this options therefore are dead and all
>references to this can be removed (Also dependencies in other KConfig
>files).
>
>Signed-off-by: Christian Dietrich <qy03fugy@stud.informatik.uni-erlangen.de>
>Signed-off-by: Christoph Egger <siccegge@cs.fau.de>

This seems fine with me.

The only question is which tree it coms through.  I'm happy to take it
in via mine if the netdev and MTD people are fine with that.  Otherwise,
my ack is below.

Acked-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>

josh

>---
> arch/powerpc/platforms/40x/Kconfig |   16 -------------
> drivers/mtd/maps/Kconfig           |    2 +-
> drivers/mtd/maps/redwood.c         |   43 ------------------------------------
> drivers/net/Kconfig                |    2 +-
> drivers/net/smc91x.h               |   37 -------------------------------
> 5 files changed, 2 insertions(+), 98 deletions(-)
>
>diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig
>index ec64264..b721764 100644
>--- a/arch/powerpc/platforms/40x/Kconfig
>+++ b/arch/powerpc/platforms/40x/Kconfig
>@@ -71,22 +71,6 @@ config MAKALU
> 	help
> 	  This option enables support for the AMCC PPC405EX board.
>
>-#config REDWOOD_5
>-#	bool "Redwood-5"
>-#	depends on 40x
>-#	default n
>-#	select STB03xxx
>-#	help
>-#	  This option enables support for the IBM STB04 evaluation board.
>-
>-#config REDWOOD_6
>-#	bool "Redwood-6"
>-#	depends on 40x
>-#	default n
>-#	select STB03xxx
>-#	help
>-#	  This option enables support for the IBM STBx25xx evaluation board.
>-
> #config SYCAMORE
> #	bool "Sycamore"
> #	depends on 40x
>diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
>index f22bc9f..6629d09 100644
>--- a/drivers/mtd/maps/Kconfig
>+++ b/drivers/mtd/maps/Kconfig
>@@ -321,7 +321,7 @@ config MTD_CFI_FLAGADM
>
> config MTD_REDWOOD
> 	tristate "CFI Flash devices mapped on IBM Redwood"
>-	depends on MTD_CFI && ( REDWOOD_4 || REDWOOD_5 || REDWOOD_6 )
>+	depends on MTD_CFI
> 	help
> 	  This enables access routines for the flash chips on the IBM
> 	  Redwood board. If you have one of these boards and would like to
>diff --git a/drivers/mtd/maps/redwood.c b/drivers/mtd/maps/redwood.c
>index 933c0b6..d2c9db0 100644
>--- a/drivers/mtd/maps/redwood.c
>+++ b/drivers/mtd/maps/redwood.c
>@@ -22,8 +22,6 @@
>
> #include <asm/io.h>
>
>-#if !defined (CONFIG_REDWOOD_6)
>-
> #define WINDOW_ADDR 0xffc00000
> #define WINDOW_SIZE 0x00400000
>
>@@ -69,47 +67,6 @@ static struct mtd_partition redwood_flash_partitions[] = {
> 	}
> };
>
>-#else /* CONFIG_REDWOOD_6 */
>-/* FIXME: the window is bigger - armin */
>-#define WINDOW_ADDR 0xff800000
>-#define WINDOW_SIZE 0x00800000
>-
>-#define RW_PART0_OF	0
>-#define RW_PART0_SZ	0x400000	/* 4 MiB data */
>-#define RW_PART1_OF	RW_PART0_OF + RW_PART0_SZ
>-#define RW_PART1_SZ	0x10000		/* 64K VPD */
>-#define RW_PART2_OF	RW_PART1_OF + RW_PART1_SZ
>-#define RW_PART2_SZ	0x400000 - (0x10000 + 0x20000)
>-#define RW_PART3_OF	RW_PART2_OF + RW_PART2_SZ
>-#define RW_PART3_SZ	0x20000
>-
>-static struct mtd_partition redwood_flash_partitions[] = {
>-	{
>-		.name = "Redwood filesystem",
>-		.offset = RW_PART0_OF,
>-		.size = RW_PART0_SZ
>-	},
>-	{
>-		.name = "Redwood OpenBIOS Vital Product Data",
>-		.offset = RW_PART1_OF,
>-		.size = RW_PART1_SZ,
>-		.mask_flags = MTD_WRITEABLE	/* force read-only */
>-	},
>-	{
>-		.name = "Redwood kernel",
>-		.offset = RW_PART2_OF,
>-		.size = RW_PART2_SZ
>-	},
>-	{
>-		.name = "Redwood OpenBIOS",
>-		.offset = RW_PART3_OF,
>-		.size = RW_PART3_SZ,
>-		.mask_flags = MTD_WRITEABLE	/* force read-only */
>-	}
>-};
>-
>-#endif /* CONFIG_REDWOOD_6 */
>-
> struct map_info redwood_flash_map = {
> 	.name = "IBM Redwood",
> 	.size = WINDOW_SIZE,
>diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
>index ce2fcdd..313d306 100644
>--- a/drivers/net/Kconfig
>+++ b/drivers/net/Kconfig
>@@ -913,7 +913,7 @@ config SMC91X
> 	tristate "SMC 91C9x/91C1xxx support"
> 	select CRC32
> 	select MII
>-	depends on ARM || REDWOOD_5 || REDWOOD_6 || M32R || SUPERH || \
>+	depends on ARM || M32R || SUPERH || \
> 		MIPS || BLACKFIN || MN10300 || COLDFIRE
> 	help
> 	  This is a driver for SMC's 91x series of Ethernet chipsets,
>diff --git a/drivers/net/smc91x.h b/drivers/net/smc91x.h
>index 8d2772c..ee74791 100644
>--- a/drivers/net/smc91x.h
>+++ b/drivers/net/smc91x.h
>@@ -83,43 +83,6 @@ static inline void SMC_outw(u16 val, void __iomem *ioaddr, int reg)
> 	}
> }
>
>-#elif defined(CONFIG_REDWOOD_5) || defined(CONFIG_REDWOOD_6)
>-
>-/* We can only do 16-bit reads and writes in the static memory space. */
>-#define SMC_CAN_USE_8BIT	0
>-#define SMC_CAN_USE_16BIT	1
>-#define SMC_CAN_USE_32BIT	0
>-#define SMC_NOWAIT		1
>-
>-#define SMC_IO_SHIFT		0
>-
>-#define SMC_inw(a, r)		in_be16((volatile u16 *)((a) + (r)))
>-#define SMC_outw(v, a, r)	out_be16((volatile u16 *)((a) + (r)), v)
>-#define SMC_insw(a, r, p, l) 						\
>-	do {								\
>-		unsigned long __port = (a) + (r);			\
>-		u16 *__p = (u16 *)(p);					\
>-		int __l = (l);						\
>-		insw(__port, __p, __l);					\
>-		while (__l > 0) {					\
>-			*__p = swab16(*__p);				\
>-			__p++;						\
>-			__l--;						\
>-		}							\
>-	} while (0)
>-#define SMC_outsw(a, r, p, l) 						\
>-	do {								\
>-		unsigned long __port = (a) + (r);			\
>-		u16 *__p = (u16 *)(p);					\
>-		int __l = (l);						\
>-		while (__l > 0) {					\
>-			/* Believe it or not, the swab isn't needed. */	\
>-			outw( /* swab16 */ (*__p++), __port);		\
>-			__l--;						\
>-		}							\
>-	} while (0)
>-#define SMC_IRQ_FLAGS		(0)
>-
> #elif defined(CONFIG_SA1100_PLEB)
> /* We can only do 16-bit reads and writes in the static memory space. */
> #define SMC_CAN_USE_8BIT	1
>-- 
>1.7.0.4
>

^ permalink raw reply

* [PATCH 1/2] Remove REDWOOD_[456] config options and conditional code
From: Christian Dietrich @ 2010-07-16 12:29 UTC (permalink / raw)
  To: Milton Miller, Josh Boyer, Matt Porter, Benjamin Herrenschmidt,
	Paul Mackerras, Solomon Peachy, David Woodhouse, Mike Frysinger,
	Jiri Kosina, Artem Bityutskiy, Alexander Kurz, David S. Miller,
	Randy Dunlap, John Linn, Florian Fainelli, Nicolas Pitre,
	Joe Perches, Ladislav Michl, David Brown, linuxppc-dev,
	linux-kernel, linux-mtd, netdev
  Cc: vamos-dev
In-Reply-To: <cover.1279282865.git.qy03fugy@stud.informatik.uni-erlangen.de>

The config options for REDWOOD_[456] were commented out in the powerpc
Kconfig. The ifdefs referencing this options therefore are dead and all
references to this can be removed (Also dependencies in other KConfig
files).

Signed-off-by: Christian Dietrich <qy03fugy@stud.informatik.uni-erlangen.de>
Signed-off-by: Christoph Egger <siccegge@cs.fau.de>
---
 arch/powerpc/platforms/40x/Kconfig |   16 -------------
 drivers/mtd/maps/Kconfig           |    2 +-
 drivers/mtd/maps/redwood.c         |   43 ------------------------------------
 drivers/net/Kconfig                |    2 +-
 drivers/net/smc91x.h               |   37 -------------------------------
 5 files changed, 2 insertions(+), 98 deletions(-)

diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig
index ec64264..b721764 100644
--- a/arch/powerpc/platforms/40x/Kconfig
+++ b/arch/powerpc/platforms/40x/Kconfig
@@ -71,22 +71,6 @@ config MAKALU
 	help
 	  This option enables support for the AMCC PPC405EX board.
 
-#config REDWOOD_5
-#	bool "Redwood-5"
-#	depends on 40x
-#	default n
-#	select STB03xxx
-#	help
-#	  This option enables support for the IBM STB04 evaluation board.
-
-#config REDWOOD_6
-#	bool "Redwood-6"
-#	depends on 40x
-#	default n
-#	select STB03xxx
-#	help
-#	  This option enables support for the IBM STBx25xx evaluation board.
-
 #config SYCAMORE
 #	bool "Sycamore"
 #	depends on 40x
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index f22bc9f..6629d09 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -321,7 +321,7 @@ config MTD_CFI_FLAGADM
 
 config MTD_REDWOOD
 	tristate "CFI Flash devices mapped on IBM Redwood"
-	depends on MTD_CFI && ( REDWOOD_4 || REDWOOD_5 || REDWOOD_6 )
+	depends on MTD_CFI
 	help
 	  This enables access routines for the flash chips on the IBM
 	  Redwood board. If you have one of these boards and would like to
diff --git a/drivers/mtd/maps/redwood.c b/drivers/mtd/maps/redwood.c
index 933c0b6..d2c9db0 100644
--- a/drivers/mtd/maps/redwood.c
+++ b/drivers/mtd/maps/redwood.c
@@ -22,8 +22,6 @@
 
 #include <asm/io.h>
 
-#if !defined (CONFIG_REDWOOD_6)
-
 #define WINDOW_ADDR 0xffc00000
 #define WINDOW_SIZE 0x00400000
 
@@ -69,47 +67,6 @@ static struct mtd_partition redwood_flash_partitions[] = {
 	}
 };
 
-#else /* CONFIG_REDWOOD_6 */
-/* FIXME: the window is bigger - armin */
-#define WINDOW_ADDR 0xff800000
-#define WINDOW_SIZE 0x00800000
-
-#define RW_PART0_OF	0
-#define RW_PART0_SZ	0x400000	/* 4 MiB data */
-#define RW_PART1_OF	RW_PART0_OF + RW_PART0_SZ
-#define RW_PART1_SZ	0x10000		/* 64K VPD */
-#define RW_PART2_OF	RW_PART1_OF + RW_PART1_SZ
-#define RW_PART2_SZ	0x400000 - (0x10000 + 0x20000)
-#define RW_PART3_OF	RW_PART2_OF + RW_PART2_SZ
-#define RW_PART3_SZ	0x20000
-
-static struct mtd_partition redwood_flash_partitions[] = {
-	{
-		.name = "Redwood filesystem",
-		.offset = RW_PART0_OF,
-		.size = RW_PART0_SZ
-	},
-	{
-		.name = "Redwood OpenBIOS Vital Product Data",
-		.offset = RW_PART1_OF,
-		.size = RW_PART1_SZ,
-		.mask_flags = MTD_WRITEABLE	/* force read-only */
-	},
-	{
-		.name = "Redwood kernel",
-		.offset = RW_PART2_OF,
-		.size = RW_PART2_SZ
-	},
-	{
-		.name = "Redwood OpenBIOS",
-		.offset = RW_PART3_OF,
-		.size = RW_PART3_SZ,
-		.mask_flags = MTD_WRITEABLE	/* force read-only */
-	}
-};
-
-#endif /* CONFIG_REDWOOD_6 */
-
 struct map_info redwood_flash_map = {
 	.name = "IBM Redwood",
 	.size = WINDOW_SIZE,
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index ce2fcdd..313d306 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -913,7 +913,7 @@ config SMC91X
 	tristate "SMC 91C9x/91C1xxx support"
 	select CRC32
 	select MII
-	depends on ARM || REDWOOD_5 || REDWOOD_6 || M32R || SUPERH || \
+	depends on ARM || M32R || SUPERH || \
 		MIPS || BLACKFIN || MN10300 || COLDFIRE
 	help
 	  This is a driver for SMC's 91x series of Ethernet chipsets,
diff --git a/drivers/net/smc91x.h b/drivers/net/smc91x.h
index 8d2772c..ee74791 100644
--- a/drivers/net/smc91x.h
+++ b/drivers/net/smc91x.h
@@ -83,43 +83,6 @@ static inline void SMC_outw(u16 val, void __iomem *ioaddr, int reg)
 	}
 }
 
-#elif defined(CONFIG_REDWOOD_5) || defined(CONFIG_REDWOOD_6)
-
-/* We can only do 16-bit reads and writes in the static memory space. */
-#define SMC_CAN_USE_8BIT	0
-#define SMC_CAN_USE_16BIT	1
-#define SMC_CAN_USE_32BIT	0
-#define SMC_NOWAIT		1
-
-#define SMC_IO_SHIFT		0
-
-#define SMC_inw(a, r)		in_be16((volatile u16 *)((a) + (r)))
-#define SMC_outw(v, a, r)	out_be16((volatile u16 *)((a) + (r)), v)
-#define SMC_insw(a, r, p, l) 						\
-	do {								\
-		unsigned long __port = (a) + (r);			\
-		u16 *__p = (u16 *)(p);					\
-		int __l = (l);						\
-		insw(__port, __p, __l);					\
-		while (__l > 0) {					\
-			*__p = swab16(*__p);				\
-			__p++;						\
-			__l--;						\
-		}							\
-	} while (0)
-#define SMC_outsw(a, r, p, l) 						\
-	do {								\
-		unsigned long __port = (a) + (r);			\
-		u16 *__p = (u16 *)(p);					\
-		int __l = (l);						\
-		while (__l > 0) {					\
-			/* Believe it or not, the swab isn't needed. */	\
-			outw( /* swab16 */ (*__p++), __port);		\
-			__l--;						\
-		}							\
-	} while (0)
-#define SMC_IRQ_FLAGS		(0)
-
 #elif defined(CONFIG_SA1100_PLEB)
 /* We can only do 16-bit reads and writes in the static memory space. */
 #define SMC_CAN_USE_8BIT	1
-- 
1.7.0.4

^ permalink raw reply related

* [PATCH 0/2] Removing dead code
From: Christian Dietrich @ 2010-07-16 12:28 UTC (permalink / raw)
  To: Milton Miller, Josh Boyer, Matt Porter, Benjamin Herrenschmidt,
	Paul Mackerras, Solomon Peachy, David Woodhouse, Mike Frysinger,
	Jiri Kosina, Artem Bityutskiy, Alexander Kurz, Russell King,
	Ralf Baechle, Manuel Lauss, David S. Miller, Randy Dunlap,
	John Linn, Florian Fainelli, Nicolas Pitre, Joe Perches,
	Ladislav Michl, David Brown, linuxppc-dev, linux-kernel,
	linux-mtd, netdev
  Cc: vamos-dev
In-Reply-To: <redwood56-reply-1-miltonm@bga.com>

Hi all!

I merged the two patches from Christoph Egger[1] to remove the
REDWOOD_[456] config depends. And wrote a second patch, which removes
the redwood/mtd mapping module. I hope this is now acceptable to bring
it into the kernel, if this options are really dead.       

Regards

        Christian Dietrich

[0] http://vamos1.informatik.uni-erlangen.de/
[1] Message-Id: <adba61f63f4439ac17f2e428429f01ae5e65ab15.1279110895.git.siccegge@cs.fau.de>

Christian Dietrich (2):
  Remove REDWOOD_[456] config options and conditional code
  Removed redwood/mtd mapping

 arch/powerpc/platforms/40x/Kconfig |   16 ----
 drivers/mtd/maps/Kconfig           |    8 --
 drivers/mtd/maps/Makefile          |    1 -
 drivers/mtd/maps/redwood.c         |  174 ------------------------------------
 drivers/net/Kconfig                |    2 +-
 drivers/net/smc91x.h               |   37 --------
 6 files changed, 1 insertions(+), 237 deletions(-)
 delete mode 100644 drivers/mtd/maps/redwood.c

^ permalink raw reply

* Re: Badness with the kernel version 2.6.35-rc1-git1 running on P6 box
From: Eric Dumazet @ 2010-07-16 12:20 UTC (permalink / raw)
  To: divya; +Cc: sachinp, netdev, LKML, linuxppc-dev, Jan-Bernd Themann,
	David Miller
In-Reply-To: <1279274185.2549.14.camel@edumazet-laptop>

Le vendredi 16 juillet 2010 à 11:56 +0200, Eric Dumazet a écrit :

> [PATCH] ehea: ehea_get_stats() should use GFP_KERNEL
> 
> ehea_get_stats() is called in process context and should use GFP_KERNEL
> allocation instead of GFP_ATOMIC.
> 
> Clearing stats at beginning of ehea_get_stats() is racy in case of
> concurrent stat readers.
> 
> get_stats() can also use netdev net_device_stats, instead of a private
> copy.
> 
> Reported-by: divya <dipraksh@linux.vnet.ibm.com>
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> ---
>  drivers/net/ehea/ehea.h      |    1 -
>  drivers/net/ehea/ehea_main.c |    6 ++----
>  2 files changed, 2 insertions(+), 5 deletions(-)
> 
> 

Hmm, net-next-2.6 contains following patch :

commit 3d8009c780ee90fccb5c171caf30aff839f13547
Author: Brian King <brking@linux.vnet.ibm.com>
Date:   Wed Jun 30 11:59:12 2010 +0000

    ehea: Allocate stats buffer with GFP_KERNEL
    
    Since ehea_get_stats calls ehea_h_query_ehea_port, which
    can sleep, we can also sleep when allocating a page in
    this function. This fixes some memory allocation failure
    warnings seen under low memory conditions.
    
    Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 8b92acb..3beba70 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -335,7 +335,7 @@ static struct net_device_stats
*ehea_get_stats(struct net_device *dev)
 
        memset(stats, 0, sizeof(*stats));
 
-       cb2 = (void *)get_zeroed_page(GFP_ATOMIC);
+       cb2 = (void *)get_zeroed_page(GFP_KERNEL);
        if (!cb2) {
                ehea_error("no mem for cb2");
                goto out;

^ permalink raw reply related

* Re: Badness with the kernel version 2.6.35-rc1-git1 running on P6 box
From: Eric Dumazet @ 2010-07-16  9:56 UTC (permalink / raw)
  To: divya; +Cc: sachinp, netdev, LKML, linuxppc-dev, Jan-Bernd Themann,
	David Miller
In-Reply-To: <4C401D56.3070108@linux.vnet.ibm.com>

Le vendredi 16 juillet 2010 à 14:20 +0530, divya a écrit :
> Hi ,
> 
> With the latest kernel version 2.6.35-rc5-git1(2f7989efd4398) running on power(p6) box came across the following
> call trace
> 
> Call Trace:
> [c000000006a0e800] [c000000000011c30] .show_stack+0x6c/0x16c (unreliable)
> [c000000006a0e8b0] [c00000000012129c] .__alloc_pages_nodemask+0x6a0/0x75c
> [c000000006a0ea30] [c0000000001527cc] .alloc_pages_current+0xc4/0x104
> [c000000006a0ead0] [c00000000015b1a0] .new_slab+0xe0/0x314
> [c000000006a0eb70] [c00000000015b6fc] .__slab_alloc+0x328/0x644
> [c000000006a0ec50] [c00000000015cc34] .__kmalloc_node_track_caller+0x114/0x194
> [c000000006a0ed00] [c000000000599f6c] .__alloc_skb+0x94/0x180
> [c000000006a0edb0] [c00000000059af5c] .__netdev_alloc_skb+0x3c/0x74
> [c000000006a0ee30] [c0000000004f9480] .ehea_refill_rq_def+0xf8/0x2d0
> [c000000006a0ef30] [c0000000004fab8c] .ehea_up+0x5b8/0x69c
> [c000000006a0f040] [c0000000004facd4] .ehea_open+0x64/0x118
> [c000000006a0f0e0] [c0000000005a6e9c] .__dev_open+0x100/0x168
> [c000000006a0f170] [c0000000005a3ac0] .__dev_change_flags+0x10c/0x1ac
> [c000000006a0f210] [c0000000005a6d44] .dev_change_flags+0x24/0x7c
> [c000000006a0f2a0] [c0000000005b50b4] .do_setlink+0x31c/0x750
> [c000000006a0f3b0] [c0000000005b6724] .rtnl_newlink+0x388/0x618
> [c000000006a0f5f0] [c0000000005b6350] .rtnetlink_rcv_msg+0x268/0x2b4
> [c000000006a0f6a0] [c0000000005cfdc0] .netlink_rcv_skb+0x74/0x108
> [c000000006a0f730] [c0000000005b60c4] .rtnetlink_rcv+0x38/0x5c
> [c000000006a0f7c0] [c0000000005cf8c8] .netlink_unicast+0x318/0x3f4
> [c000000006a0f890] [c0000000005d05b4] .netlink_sendmsg+0x2d0/0x310
> [c000000006a0f970] [c00000000058e1e8] .sock_sendmsg+0xd4/0x110
> [c000000006a0fb50] [c00000000058e514] .SyS_sendmsg+0x1f4/0x288
> [c000000006a0fd70] [c00000000058c2b8] .SyS_socketcall+0x214/0x280
> [c000000006a0fe30] [c0000000000085b4] syscall_exit+0x0/0x40
> Mem-Info:
> Node 0 DMA per-cpu:
> CPU    0: hi:    0, btch:   1 usd:   0
> CPU    1: hi:    0, btch:   1 usd:   0
> CPU    2: hi:    0, btch:   1 usd:   0
> CPU    3: hi:    0, btch:   1 usd:   0
> active_anon:50 inactive_anon:260 isolated_anon:0
>   active_file:159 inactive_file:139 isolated_file:0
>   unevictable:0 dirty:2 writeback:1 unstable:0
>   free:16 slab_reclaimable:66 slab_unreclaimable:502
>   mapped:120 shmem:2 pagetables:37 bounce:0
> Node 0 DMA free:1024kB min:1408kB low:1728kB high:2112kB active_anon:3200kB inactive_anon:16640kB active_file:10176kB inactive_file:8896kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:130944kB mlocked:0kB dirty:128kB writeback:64kB mapped:7680kB shmem:128kB slab_reclaimable:4224kB slab_unreclaimable:32128kB kernel_stack:2528kB pagetables:2368kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? no
> lowmem_reserve[]: 0 0 0
> Node 0 DMA: 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB 0*8192kB 0*16384kB = 0kB
> 496 total pagecache pages
> 178 pages in swap cache
> Swap cache stats: add 780, delete 602, find 467/551
> Free swap  = 1027904kB
> Total swap = 1044160kB
> 2048 pages RAM
> 683 pages reserved
> 582 pages shared
> 1075 pages non-shared
> SLUB: Unable to allocate memory on node -1 (gfp=0x20)
>    cache: kmalloc-16384, object size: 16384, buffer size: 16384, default order: 2, min order: 0
>    node 0: slabs: 28, objs: 292, free: 0
> ip: page allocation failure. order:0, mode:0x8020
> Call Trace:
> [c000000006a0eb40] [c000000000011c30] .show_stack+0x6c/0x16c (unreliable)
> [c000000006a0ebf0] [c00000000012129c] .__alloc_pages_nodemask+0x6a0/0x75c
> [c000000006a0ed70] [c0000000001527cc] .alloc_pages_current+0xc4/0x104
> [c000000006a0ee10] [c00000000011fca4] .__get_free_pages+0x18/0x90
> [c000000006a0ee90] [c0000000004f7058] .ehea_get_stats+0x4c/0x1bc
> [c000000006a0ef30] [c0000000005a0a04] .dev_get_stats+0x38/0x64
> [c000000006a0efc0] [c0000000005b456c] .rtnl_fill_ifinfo+0x35c/0x85c
> [c000000006a0f150] [c0000000005b5920] .rtmsg_ifinfo+0x164/0x204
> [c000000006a0f210] [c0000000005a6d6c] .dev_change_flags+0x4c/0x7c
> [c000000006a0f2a0] [c0000000005b50b4] .do_setlink+0x31c/0x750
> [c000000006a0f3b0] [c0000000005b6724] .rtnl_newlink+0x388/0x618
> [c000000006a0f5f0] [c0000000005b6350] .rtnetlink_rcv_msg+0x268/0x2b4
> [c000000006a0f6a0] [c0000000005cfdc0] .netlink_rcv_skb+0x74/0x108
> [c000000006a0f730] [c0000000005b60c4] .rtnetlink_rcv+0x38/0x5c
> [c000000006a0f7c0] [c0000000005cf8c8] .netlink_unicast+0x318/0x3f4
> [c000000006a0f890] [c0000000005d05b4] .netlink_sendmsg+0x2d0/0x310
> [c000000006a0f970] [c00000000058e1e8] .sock_sendmsg+0xd4/0x110
> [c000000006a0fb50] [c00000000058e514] .SyS_sendmsg+0x1f4/0x288
> [c000000006a0fd70] [c00000000058c2b8] .SyS_socketcall+0x214/0x280
> [c000000006a0fe30] [c0000000000085b4] syscall_exit+0x0/0x40
> Mem-Info:
> Node 0 DMA per-cpu:
> CPU    0: hi:    0, btch:   1 usd:   0
> CPU    1: hi:    0, btch:   1 usd:   0
> CPU    2: hi:    0, btch:   1 usd:   0
> CPU    3: hi:    0, btch:   1 usd:   0
> 
> The mainline 2.6.35-rc5 worked fine.

Maybe you were lucky with 2.6.35-rc5

Anyway ehea should not use GFP_ATOMIC in its ehea_get_stats() method,
called in process context, but GFP_KERNEL.

Another patch is needed for ehea_refill_rq_def() as well.



[PATCH] ehea: ehea_get_stats() should use GFP_KERNEL

ehea_get_stats() is called in process context and should use GFP_KERNEL
allocation instead of GFP_ATOMIC.

Clearing stats at beginning of ehea_get_stats() is racy in case of
concurrent stat readers.

get_stats() can also use netdev net_device_stats, instead of a private
copy.

Reported-by: divya <dipraksh@linux.vnet.ibm.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 drivers/net/ehea/ehea.h      |    1 -
 drivers/net/ehea/ehea_main.c |    6 ++----
 2 files changed, 2 insertions(+), 5 deletions(-)

^ permalink raw reply

* Badness with the kernel version 2.6.35-rc1-git1 running on P6 box
From: divya @ 2010-07-16  8:50 UTC (permalink / raw)
  To: LKML, linuxppc-dev; +Cc: sachinp

Hi ,

With the latest kernel version 2.6.35-rc5-git1(2f7989efd4398) running on power(p6) box came across the following
call trace

Call Trace:
[c000000006a0e800] [c000000000011c30] .show_stack+0x6c/0x16c (unreliable)
[c000000006a0e8b0] [c00000000012129c] .__alloc_pages_nodemask+0x6a0/0x75c
[c000000006a0ea30] [c0000000001527cc] .alloc_pages_current+0xc4/0x104
[c000000006a0ead0] [c00000000015b1a0] .new_slab+0xe0/0x314
[c000000006a0eb70] [c00000000015b6fc] .__slab_alloc+0x328/0x644
[c000000006a0ec50] [c00000000015cc34] .__kmalloc_node_track_caller+0x114/0x194
[c000000006a0ed00] [c000000000599f6c] .__alloc_skb+0x94/0x180
[c000000006a0edb0] [c00000000059af5c] .__netdev_alloc_skb+0x3c/0x74
[c000000006a0ee30] [c0000000004f9480] .ehea_refill_rq_def+0xf8/0x2d0
[c000000006a0ef30] [c0000000004fab8c] .ehea_up+0x5b8/0x69c
[c000000006a0f040] [c0000000004facd4] .ehea_open+0x64/0x118
[c000000006a0f0e0] [c0000000005a6e9c] .__dev_open+0x100/0x168
[c000000006a0f170] [c0000000005a3ac0] .__dev_change_flags+0x10c/0x1ac
[c000000006a0f210] [c0000000005a6d44] .dev_change_flags+0x24/0x7c
[c000000006a0f2a0] [c0000000005b50b4] .do_setlink+0x31c/0x750
[c000000006a0f3b0] [c0000000005b6724] .rtnl_newlink+0x388/0x618
[c000000006a0f5f0] [c0000000005b6350] .rtnetlink_rcv_msg+0x268/0x2b4
[c000000006a0f6a0] [c0000000005cfdc0] .netlink_rcv_skb+0x74/0x108
[c000000006a0f730] [c0000000005b60c4] .rtnetlink_rcv+0x38/0x5c
[c000000006a0f7c0] [c0000000005cf8c8] .netlink_unicast+0x318/0x3f4
[c000000006a0f890] [c0000000005d05b4] .netlink_sendmsg+0x2d0/0x310
[c000000006a0f970] [c00000000058e1e8] .sock_sendmsg+0xd4/0x110
[c000000006a0fb50] [c00000000058e514] .SyS_sendmsg+0x1f4/0x288
[c000000006a0fd70] [c00000000058c2b8] .SyS_socketcall+0x214/0x280
[c000000006a0fe30] [c0000000000085b4] syscall_exit+0x0/0x40
Mem-Info:
Node 0 DMA per-cpu:
CPU    0: hi:    0, btch:   1 usd:   0
CPU    1: hi:    0, btch:   1 usd:   0
CPU    2: hi:    0, btch:   1 usd:   0
CPU    3: hi:    0, btch:   1 usd:   0
active_anon:50 inactive_anon:260 isolated_anon:0
  active_file:159 inactive_file:139 isolated_file:0
  unevictable:0 dirty:2 writeback:1 unstable:0
  free:16 slab_reclaimable:66 slab_unreclaimable:502
  mapped:120 shmem:2 pagetables:37 bounce:0
Node 0 DMA free:1024kB min:1408kB low:1728kB high:2112kB active_anon:3200kB inactive_anon:16640kB active_file:10176kB inactive_file:8896kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:130944kB mlocked:0kB dirty:128kB writeback:64kB mapped:7680kB shmem:128kB slab_reclaimable:4224kB slab_unreclaimable:32128kB kernel_stack:2528kB pagetables:2368kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? no
lowmem_reserve[]: 0 0 0
Node 0 DMA: 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB 0*8192kB 0*16384kB = 0kB
496 total pagecache pages
178 pages in swap cache
Swap cache stats: add 780, delete 602, find 467/551
Free swap  = 1027904kB
Total swap = 1044160kB
2048 pages RAM
683 pages reserved
582 pages shared
1075 pages non-shared
SLUB: Unable to allocate memory on node -1 (gfp=0x20)
   cache: kmalloc-16384, object size: 16384, buffer size: 16384, default order: 2, min order: 0
   node 0: slabs: 28, objs: 292, free: 0
ip: page allocation failure. order:0, mode:0x8020
Call Trace:
[c000000006a0eb40] [c000000000011c30] .show_stack+0x6c/0x16c (unreliable)
[c000000006a0ebf0] [c00000000012129c] .__alloc_pages_nodemask+0x6a0/0x75c
[c000000006a0ed70] [c0000000001527cc] .alloc_pages_current+0xc4/0x104
[c000000006a0ee10] [c00000000011fca4] .__get_free_pages+0x18/0x90
[c000000006a0ee90] [c0000000004f7058] .ehea_get_stats+0x4c/0x1bc
[c000000006a0ef30] [c0000000005a0a04] .dev_get_stats+0x38/0x64
[c000000006a0efc0] [c0000000005b456c] .rtnl_fill_ifinfo+0x35c/0x85c
[c000000006a0f150] [c0000000005b5920] .rtmsg_ifinfo+0x164/0x204
[c000000006a0f210] [c0000000005a6d6c] .dev_change_flags+0x4c/0x7c
[c000000006a0f2a0] [c0000000005b50b4] .do_setlink+0x31c/0x750
[c000000006a0f3b0] [c0000000005b6724] .rtnl_newlink+0x388/0x618
[c000000006a0f5f0] [c0000000005b6350] .rtnetlink_rcv_msg+0x268/0x2b4
[c000000006a0f6a0] [c0000000005cfdc0] .netlink_rcv_skb+0x74/0x108
[c000000006a0f730] [c0000000005b60c4] .rtnetlink_rcv+0x38/0x5c
[c000000006a0f7c0] [c0000000005cf8c8] .netlink_unicast+0x318/0x3f4
[c000000006a0f890] [c0000000005d05b4] .netlink_sendmsg+0x2d0/0x310
[c000000006a0f970] [c00000000058e1e8] .sock_sendmsg+0xd4/0x110
[c000000006a0fb50] [c00000000058e514] .SyS_sendmsg+0x1f4/0x288
[c000000006a0fd70] [c00000000058c2b8] .SyS_socketcall+0x214/0x280
[c000000006a0fe30] [c0000000000085b4] syscall_exit+0x0/0x40
Mem-Info:
Node 0 DMA per-cpu:
CPU    0: hi:    0, btch:   1 usd:   0
CPU    1: hi:    0, btch:   1 usd:   0
CPU    2: hi:    0, btch:   1 usd:   0
CPU    3: hi:    0, btch:   1 usd:   0

The mainline 2.6.35-rc5 worked fine.

Thanks
Divya

^ permalink raw reply

* hi, i have two flashs, but my kernel can only find one , how can i write the dts?
From: hacklu @ 2010-07-16  8:34 UTC (permalink / raw)
  To: linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 1398 bytes --]

this is my dts file:
flash@0,0 {
                        #address-cells = <1>;
                        #size-cells = <1>;
                        compatible = "cfi-flash";
                        probe-type = "CFI";
                        reg = <0 0 1000000>;
                        bank-width = <2>;
                        device-width = <1>;
                        hrcw@0 {
                                label = "hrcw";
                                reg = <0 40000>;
                        };
                        jffs@40000 {
                                label = "jffs";
                                reg = <40000 200000>;
                        };
                        jffs2@240000 {
                                label = "uimage";
                                reg = <240000 d80000>;
                        };
             };
flash@1,0 {
                        #address-cells = <1>;
                        #size-cells = <1>;
                        compatible = "cfi-flash";
                        probe-type = "CFI";
                        reg = <1000000 0 1000000>;
                        bank-width = <2>;
                        device-width = <1>;
                        jffs3@240000 {
                                label = "jffs2";
                                reg = <0 1000000>;
                        };
                }; 
2010-07-16 



hacklu 

[-- Attachment #2: Type: text/html, Size: 6905 bytes --]

^ permalink raw reply

* linux-next: build failure after merge of the final tree (powerpc related)
From: Stephen Rothwell @ 2010-07-16  7:19 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, linuxppc-dev
  Cc: Martyn Welch, linux-next, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 396 bytes --]

Hi all,

After merging the final tree, today's linux-next build (powerpc
allmodconfig) failed like this:

ERROR: "of_i8042_kbd_irq" [drivers/input/serio/i8042.ko] undefined!
ERROR: "of_i8042_aux_irq" [drivers/input/serio/i8042.ko] undefined!

Presumably missing EXPORT_SYMBOLs ..
-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

[-- Attachment #2: Type: application/pgp-signature, Size: 490 bytes --]

^ permalink raw reply

* [PATCH 1/1] powerpc/smp: remove the incorrect decrementer initial codes for AP
From: Tiejun Chen @ 2010-07-16  6:17 UTC (permalink / raw)
  To: benh, linuxppc-dev

We already defined start_cpu_decrementer() to invoke decrementer for AP as
the following path:
---
start_secondary() -> secondary_cpu_time_init() -> start_cpu_decrementer()

So remove these incorrect codes introduced from commit:
e7f75ad0 powerpc/47x: Base ppc476 support

And actually we really should not enable decrementer before calling set_dec().

Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com>
---
 arch/powerpc/kernel/smp.c |    8 --------
 1 files changed, 0 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 5c196d1..976fc7d 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -501,14 +501,6 @@ int __devinit start_secondary(void *unused)
 	current->active_mm = &init_mm;
 
 	smp_store_cpu_info(cpu);
-
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
-	/* Clear any pending timer interrupts */
-	mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS);
-
-	/* Enable decrementer interrupt */
-	mtspr(SPRN_TCR, TCR_DIE);
-#endif
 	set_dec(tb_ticks_per_jiffy);
 	preempt_disable();
 	cpu_callin_map[cpu] = 1;
-- 
1.5.6

^ permalink raw reply related

* Re: [PATCH 0/7] De-couple sysfs memory directories from memory sections
From: Greg KH @ 2010-07-16  7:13 UTC (permalink / raw)
  To: Nathan Fontenot; +Cc: linuxppc-dev, linux-kernel
In-Reply-To: <4C3B3446.5090302@austin.ibm.com>

On Mon, Jul 12, 2010 at 10:27:02AM -0500, Nathan Fontenot wrote:
> This set of patches de-couples the idea that there is a single
> directory in sysfs for each memory section.

Any reason you didn't cc: the sysfs maintainer on these patches?  If
not, I'll gladly ignore them...

(hint, scripts/get_maintainer.pl is your friend...)

greg k-h

^ permalink raw reply

* Re: cpm_uart_console_write() stuck in waiting for transmitter fifo ready
From: Shawn Jin @ 2010-07-16  7:12 UTC (permalink / raw)
  To: ppcdev, Scott Wood
In-Reply-To: <AANLkTin7nqaxDyXjkFhdJGEaazEL-oSMXnhkG0dblAUb@mail.gmail.com>

>> Why would the TxBD be filled with all 0xF? Would it be possible that
>> the bdbase actually points somewhere else other than the TxBD?
>
> The virtual address 0xfddfa000 is mapped to 0xfa202000. I suspect that
> the TxBD of my MPC870 may not start at 0xfa202020.
>
> I notice that for adder875, ep88xc and mpc885ads, the muram data's reg
> = <0 0x1c00> but for mgsuvd, its reg = <0x800 0x1800>. How does the
> kernel use muram for 885 family SoCs? How much muram should be
> reserved for data?
>
> My RCCR=0x1, meaning the first 512B is for microcode. So the data and
> the TxBD should really be starting at 0xfa202200? Then my muram data's
> reg should be <0x200 ?>? What size shall I specify?

After the muram data's reg is changed to <0x200 0x1a00>, the cpm_uart
driver works properly and the kernel messages are printed on the
serial port.

-Shawn.

^ permalink raw reply

* Re: [PATCH V4] powerpc/prom: Export device tree physical address via  proc
From: Mitch Bradley @ 2010-07-16  5:44 UTC (permalink / raw)
  To: Grant Likely
  Cc: Kumar Gala, Matthew McClintock, linuxppc-dev, Timur Tabi,
	David Gibson
In-Reply-To: <AANLkTilDcYYbsH-6f0_HlX9WwaOwG24w1CfhRLplZKS7@mail.gmail.com>

Grant Likely wrote:
> On Thu, Jul 15, 2010 at 12:58 PM, Matthew McClintock <msm@freescale.com> wrote:
>   
>> On Thu, 2010-07-15 at 12:37 -0600, Grant Likely wrote:
>>     
>>> On Thu, Jul 15, 2010 at 12:03 PM, Matthew McClintock <msm@freescale.com> wrote:
>>>       
>>>> Yes. Where would we get a list of memreserve sections?
>>>>         
>>> I would say the list of reserves that are not under the control of
>>> Linux should be explicitly described in the device tree proper.  For
>>> instance, if you have a region that firmware depends on, then have a
>>> node for describing the firmware and a property stating the memory
>>> regions that it depends on.  The memreserve regions can be generated
>>> from that.
>>>       
>> Ok, so we could traverse the tree node-by-bode for a
>> persistent-memreserve property and add them to the /memreserve/ list in
>> the kexec user space tools?
>>     
>
> I *think* that is okay, but I'd like to hear from Segher, Ben, Mitch,
> David Gibson, and other device tree experts on whether or not that
> exact property naming is a good one.
>   

In the /memory node, the "reg" property specifies all of memory and the 
"available" property specifies those portions that the OS is permitted 
to use.  Subtracting "available" from "reg" gives you the regions that 
are used for other purposes, such as frame buffers or firmware needs.

Often the OS can just look at "available", as it typically wants to know 
what it can use, not what it can't.

The full size as given by "reg" is useful for system configuration 
reporting purposes - the user thinks he bought 2G of memory, so it's 
good to report that 2G is indeed installed in the system.  (As an aside, 
when I first invented Open Boot, 16M was a typical memory size.  I'm 
rather gratified that the overall device tree design has held up 
reasonably well over the scale factors that have happened since then.)

It would be possible to mark the "used" regions with a finer-grained 
distinction than "they are unavailable to the OS", but that quickly gets 
into the diminishing returns realm - a lot of trouble for fairly small 
incremental value. The PC BIOS "E820" memory description scheme has a 
few extra categories of memory.  The one category that seems like it 
might (just barely) be worth the effort is "temporarily used by firmware 
but reclaimable after a certain point" - but then you have to define 
rather carefully the reclamation time and conditions.

> Write up a proposed binding (you can use devicetree.org).  Post it for
> review (make sure you cc: both devicetree-discuss and linuxppc-dev, as
> well as cc'ing the people listed above.)
>
>   
>>>> Should we export
>>>> the reserve sections instead of the device tree location?
>>>>         
>>> It shouldn't really be something that the kernel is explicitly
>>> exporting because it is a characteristic of the board design.  It is
>>> something that belongs in the tree-proper.  ie. when you extract the
>>> tree you have data telling what the region is, and why it is reserved.
>>>       
>> Agreed.
>>
>>     
>>>> We just need a
>>>> way to preserve what was there at boot to pass to the new kernel.
>>>>         
>>> Yet there is no differentiation between the board-dictated memory
>>> reserves and the things that U-Boot/Linux made an arbitrary decision
>>> on.  The solution should focus not on "can I throw this one away?" but
>>> rather "Is this one I should keep?"  :-)  A subtle difference, I know,
>>> but it changes the way you approach the solution.
>>>       
>> Fair enough. I think the above solution will work nicely, and I can
>> start implementing something if you agree - if I interpreted your idea
>> correctly. Although it should not require any changes to the kernel
>> proper.
>>     
>
> Correct.
>
> g.
>
>
>   

^ permalink raw reply

* Re: [PPC64/Power7 - 2.6.35-rc5] Bad relocation warnings while Building a CONFIG_RELOCATABLE kernel with CONFIG_ISERIES enabled
From: Michael Neuling @ 2010-07-16  2:04 UTC (permalink / raw)
  To: subrata
  Cc: sachinp, linux-kernel, Kamalesh Babulal, Linuxppc-dev,
	Paul Mackerras, Paul Mackerras, divya.vikas
In-Reply-To: <1279193743.10707.5.camel@subratamodak.linux.ibm.com>

> commit e62cee42e66dcca83aae02748535f62e0f564a0c solved the problem for
> 2.6.34-rc6. However some other bad relocation warnings generated against
> 2.6.35-rc5 on Power7/ppc64 below:
> 
> MODPOST 2004 modules^M
> WARNING: 2 bad relocations^M
> c000000000008590 R_PPC64_ADDR32    .text+0x4000000000008460^M
> c000000000008594 R_PPC64_ADDR32    .text+0x4000000000008598^M

I can't replicate this with your config with gcc 4.4.4 and binutils
2.20.  What are you using?

Mikey

> 
> Config file attached.
> 
> Regards--
> Subrata
> 
> On Fri, 2010-05-07 at 15:40 +1000, Paul Mackerras wrote:
> > On Wed, May 05, 2010 at 05:20:51PM +0530, Subrata Modak wrote:
> > 
> > > I built 2.6.34-rc6 with the attached Fedora Config file
> > > (config-2.6.33.1-19.fc13.ppc64) on my P5 Fedora Box and got the
> > > following warning. Is the following expected ?
> > > 
> > > CALL    arch/powerpc/relocs_check.pl
> > > Building modules, stage 2.
> > > WARNING: 4 bad relocations
> > > c00000000007216e R_PPC64_ADDR16_HIGHEST  __ksymtab+0x00000000009dcec8
> > > c000000000072172 R_PPC64_ADDR16_HIGHER  __ksymtab+0x00000000009dcec8
> > > c00000000007217a R_PPC64_ADDR16_HI  __ksymtab+0x00000000009dcec8
> > > c00000000007217e R_PPC64_ADDR16_LO  __ksymtab+0x00000000009dcec8
> > 
> > No, it's not expected.  It's in iSeries code, so you could avoid it
> > just by disabling CONFIG_ISERIES (I don't think any distro still
> > supports legacy iSeries).  I'll post a patch to fix the problem
> > properly.
> > 
> > Paul.

^ permalink raw reply

* Re: [PATCH 4/5] v2 Update sysfs node routines for new sysfs memory directories
From: KAMEZAWA Hiroyuki @ 2010-07-16  0:12 UTC (permalink / raw)
  To: Nathan Fontenot; +Cc: linux-mm, linux-kernel, linuxppc-dev
In-Reply-To: <4C3F5628.6030809@austin.ibm.com>

On Thu, 15 Jul 2010 13:40:40 -0500
Nathan Fontenot <nfont@austin.ibm.com> wrote:

> Update the node sysfs directory routines that create
> links to the memory sysfs directories under each node.
> This update makes the node code aware that a memory sysfs
> directory can cover multiple memory sections.
> 
> Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>

Shouldn't "static int link_mem_sections(int nid)" be update ?
It does
 for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
        register..

Thanks,
-Kame


> ---
>  drivers/base/node.c |   12 ++++++++----
>  1 file changed, 8 insertions(+), 4 deletions(-)
> 
> Index: linux-2.6/drivers/base/node.c
> ===================================================================
> --- linux-2.6.orig/drivers/base/node.c	2010-07-15 09:54:06.000000000 -0500
> +++ linux-2.6/drivers/base/node.c	2010-07-15 09:56:16.000000000 -0500
> @@ -346,8 +346,10 @@
>  		return -EFAULT;
>  	if (!node_online(nid))
>  		return 0;
> -	sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index);
> -	sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
> +
> +	sect_start_pfn = section_nr_to_pfn(mem_blk->start_phys_index);
> +	sect_end_pfn = section_nr_to_pfn(mem_blk->end_phys_index);
> +	sect_end_pfn += PAGES_PER_SECTION - 1;
>  	for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
>  		int page_nid;
>  
> @@ -383,8 +385,10 @@
>  	if (!unlinked_nodes)
>  		return -ENOMEM;
>  	nodes_clear(*unlinked_nodes);
> -	sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index);
> -	sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
> +
> +	sect_start_pfn = section_nr_to_pfn(mem_blk->start_phys_index);
> +	sect_end_pfn = section_nr_to_pfn(mem_blk->end_phys_index);
> +	sect_end_pfn += PAGES_PER_SECTION - 1;
>  	for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
>  		int nid;
>  
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

^ permalink raw reply

* Re: [PATCH 2/5] v2 Create new 'end_phys_index' file
From: KAMEZAWA Hiroyuki @ 2010-07-16  0:08 UTC (permalink / raw)
  To: Nathan Fontenot; +Cc: linux-mm, linux-kernel, linuxppc-dev
In-Reply-To: <4C3F55BC.4020600@austin.ibm.com>

On Thu, 15 Jul 2010 13:38:52 -0500
Nathan Fontenot <nfont@austin.ibm.com> wrote:

> Add a new 'end_phys_index' file to each memory sysfs directory to
> report the physical index of the last memory section
> covered by the sysfs directory.
> 
> Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>

Does memory_block have to be contiguous between [phys_index, end_phys_index] ?
Should we provide "# of sections" or "amount of memory under a block" ?

No objections to end_phys_index...buf plz fix diff style.

Thanks,
-Kame


> ---
>  drivers/base/memory.c  |   14 +++++++++++++-
>  include/linux/memory.h |    3 +++
>  2 files changed, 16 insertions(+), 1 deletion(-)
> 
> Index: linux-2.6/drivers/base/memory.c
> ===================================================================
> --- linux-2.6.orig/drivers/base/memory.c	2010-07-15 09:55:54.000000000 -0500
> +++ linux-2.6/drivers/base/memory.c	2010-07-15 09:56:05.000000000 -0500
> @@ -121,7 +121,15 @@
>  {
>  	struct memory_block *mem =
>  		container_of(dev, struct memory_block, sysdev);
> -	return sprintf(buf, "%08lx\n", mem->phys_index);
> +	return sprintf(buf, "%08lx\n", mem->start_phys_index);
> +}
> +
> +static ssize_t show_mem_end_phys_index(struct sys_device *dev,
> +			struct sysdev_attribute *attr, char *buf)
> +{
> +	struct memory_block *mem =
> +		container_of(dev, struct memory_block, sysdev);
> +	return sprintf(buf, "%08lx\n", mem->end_phys_index);
>  }
>  
>  /*
> @@ -321,6 +329,7 @@
>  }
>  
>  static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL);
> +static SYSDEV_ATTR(end_phys_index, 0444, show_mem_end_phys_index, NULL);
>  static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state);
>  static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL);
>  static SYSDEV_ATTR(removable, 0444, show_mem_removable, NULL);
> @@ -533,6 +542,8 @@
>  		if (!ret)
>  			ret = mem_create_simple_file(mem, phys_index);
>  		if (!ret)
> +			ret = mem_create_simple_file(mem, end_phys_index);
> +		if (!ret)
>  			ret = mem_create_simple_file(mem, state);
>  		if (!ret)
>  			ret = mem_create_simple_file(mem, phys_device);
> @@ -577,6 +588,7 @@
>  	if (list_empty(&mem->sections)) {
>  		unregister_mem_sect_under_nodes(mem);
>  		mem_remove_simple_file(mem, phys_index);
> +		mem_remove_simple_file(mem, end_phys_index);
>  		mem_remove_simple_file(mem, state);
>  		mem_remove_simple_file(mem, phys_device);
>  		mem_remove_simple_file(mem, removable);
> Index: linux-2.6/include/linux/memory.h
> ===================================================================
> --- linux-2.6.orig/include/linux/memory.h	2010-07-15 09:54:06.000000000 -0500
> +++ linux-2.6/include/linux/memory.h	2010-07-15 09:56:05.000000000 -0500
> @@ -29,6 +29,9 @@
>  
>  struct memory_block {
>  	unsigned long state;
> +	unsigned long start_phys_index;
> +	unsigned long end_phys_index;
> +
>  	/*
>  	 * This serializes all state change requests.  It isn't
>  	 * held during creation because the control files are
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

^ permalink raw reply

* Re: [PATCH 1/5] v2 Split the memory_block structure
From: KAMEZAWA Hiroyuki @ 2010-07-16  0:06 UTC (permalink / raw)
  To: Nathan Fontenot; +Cc: linux-mm, linux-kernel, linuxppc-dev
In-Reply-To: <4C3F557F.3000304@austin.ibm.com>

On Thu, 15 Jul 2010 13:37:51 -0500
Nathan Fontenot <nfont@austin.ibm.com> wrote:

> Split the memory_block struct into a memory_block
> struct to cover each sysfs directory and a new memory_block_section
> struct for each memory section covered by the sysfs directory.
> This change allows for creation of memory sysfs directories that
> can span multiple memory sections.
> 
> This can be beneficial in that it can reduce the number of memory
> sysfs directories created at boot.  This also allows different
> architectures to define how many memory sections are covered by
> a sysfs directory.
> 
> Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>
> ---
>  drivers/base/memory.c  |  222 ++++++++++++++++++++++++++++++++++---------------
>  include/linux/memory.h |   11 +-
>  2 files changed, 167 insertions(+), 66 deletions(-)
> 
> Index: linux-2.6/drivers/base/memory.c
> ===================================================================
> --- linux-2.6.orig/drivers/base/memory.c	2010-07-15 08:48:41.000000000 -0500
> +++ linux-2.6/drivers/base/memory.c	2010-07-15 09:55:54.000000000 -0500
> @@ -28,6 +28,14 @@
>  #include <asm/uaccess.h>
>  
>  #define MEMORY_CLASS_NAME	"memory"
> +#define MIN_MEMORY_BLOCK_SIZE	(1 << SECTION_SIZE_BITS)
> +
> +static int sections_per_block;
> +
> +static inline int base_memory_block_id(int section_nr)
> +{
> +	return (section_nr / sections_per_block) * sections_per_block;
> +}
>  
>  static struct sysdev_class memory_sysdev_class = {
>  	.name = MEMORY_CLASS_NAME,
> @@ -94,10 +102,9 @@
>  }
>  
>  static void
> -unregister_memory(struct memory_block *memory, struct mem_section *section)
> +unregister_memory(struct memory_block *memory)
>  {
>  	BUG_ON(memory->sysdev.cls != &memory_sysdev_class);
> -	BUG_ON(memory->sysdev.id != __section_nr(section));
>  
>  	/* drop the ref. we got in remove_memory_block() */
>  	kobject_put(&memory->sysdev.kobj);
> @@ -123,13 +130,20 @@
>  static ssize_t show_mem_removable(struct sys_device *dev,
>  			struct sysdev_attribute *attr, char *buf)
>  {
> +	struct memory_block *mem;
> +	struct memory_block_section *mbs;
>  	unsigned long start_pfn;
> -	int ret;
> -	struct memory_block *mem =
> -		container_of(dev, struct memory_block, sysdev);
> +	int ret = 1;
> +
> +	mem = container_of(dev, struct memory_block, sysdev);
> +	mutex_lock(&mem->state_mutex);
>  
> -	start_pfn = section_nr_to_pfn(mem->phys_index);
> -	ret = is_mem_section_removable(start_pfn, PAGES_PER_SECTION);
> +	list_for_each_entry(mbs, &mem->sections, next) {
> +		start_pfn = section_nr_to_pfn(mbs->phys_index);
> +		ret &= is_mem_section_removable(start_pfn, PAGES_PER_SECTION);
> +	}
> +
> +	mutex_unlock(&mem->state_mutex);

Hmm, this means memory cab be offlined the while memory block section. Right ?
Please write this fact in patch description...
And Documentaion/memory_hotplug.txt as "From user's perspective, memory section
is not a unit of memory hotplug anymore".
And descirbe about a new rule.


>  	return sprintf(buf, "%d\n", ret);
>  }
>  
> @@ -182,16 +196,16 @@
>   * OK to have direct references to sparsemem variables in here.
>   */
>  static int
> -memory_block_action(struct memory_block *mem, unsigned long action)
> +memory_block_action(struct memory_block_section *mbs, unsigned long action)
>  {
>  	int i;
>  	unsigned long psection;
>  	unsigned long start_pfn, start_paddr;
>  	struct page *first_page;
>  	int ret;
> -	int old_state = mem->state;
> +	int old_state = mbs->state;
>  
> -	psection = mem->phys_index;
> +	psection = mbs->phys_index;
>  	first_page = pfn_to_page(psection << PFN_SECTION_SHIFT);
>  
>  	/*
> @@ -217,18 +231,18 @@
>  			ret = online_pages(start_pfn, PAGES_PER_SECTION);
>  			break;
>  		case MEM_OFFLINE:
> -			mem->state = MEM_GOING_OFFLINE;
> +			mbs->state = MEM_GOING_OFFLINE;
>  			start_paddr = page_to_pfn(first_page) << PAGE_SHIFT;
>  			ret = remove_memory(start_paddr,
>  					    PAGES_PER_SECTION << PAGE_SHIFT);
>  			if (ret) {
> -				mem->state = old_state;
> +				mbs->state = old_state;
>  				break;
>  			}
>  			break;
>  		default:
>  			WARN(1, KERN_WARNING "%s(%p, %ld) unknown action: %ld\n",
> -					__func__, mem, action, action);
> +					__func__, mbs, action, action);
>  			ret = -EINVAL;
>  	}
>  
> @@ -238,19 +252,34 @@

And please check quilt's diff option.
Usual patche in ML shows a function name in any changes, as
@@ -241,6 +293,8 @@ static int memory_block_change_state(str

Maybe "-p" option is lacked..


>  static int memory_block_change_state(struct memory_block *mem,
>  		unsigned long to_state, unsigned long from_state_req)
>  {
> +	struct memory_block_section *mbs;
>  	int ret = 0;
> +
>  	mutex_lock(&mem->state_mutex);
>  
> -	if (mem->state != from_state_req) {
> -		ret = -EINVAL;
> -		goto out;
> +	list_for_each_entry(mbs, &mem->sections, next) {
> +		if (mbs->state != from_state_req)
> +			continue;
> +
> +		ret = memory_block_action(mbs, to_state);
> +		if (ret)
> +			break;
> +	}
> +
> +	if (ret) {
> +		list_for_each_entry(mbs, &mem->sections, next) {
> +			if (mbs->state == from_state_req)
> +				continue;
> +
> +			if (memory_block_action(mbs, to_state))
> +				printk(KERN_ERR "Could not re-enable memory "
> +				       "section %lx\n", mbs->phys_index);

Why re-enable only ? online->fail->offline never happens ?
If so, please add comment at least.
BTW, is it guaranteed that all sections under a block has same state after
boot ?

> +		}
>  	}
>  
> -	ret = memory_block_action(mem, to_state);
>  	if (!ret)
>  		mem->state = to_state;
>  
> -out:
>  	mutex_unlock(&mem->state_mutex);
>  	return ret;
>  }
> @@ -260,20 +289,15 @@
>  		struct sysdev_attribute *attr, const char *buf, size_t count)
>  {
>  	struct memory_block *mem;
> -	unsigned int phys_section_nr;
>  	int ret = -EINVAL;
>  
>  	mem = container_of(dev, struct memory_block, sysdev);
> -	phys_section_nr = mem->phys_index;
> -
> -	if (!present_section_nr(phys_section_nr))
> -		goto out;
> 
I'm sorry but I couldn't remember why this check was necessary...


 
>  	if (!strncmp(buf, "online", min((int)count, 6)))
>  		ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
>  	else if(!strncmp(buf, "offline", min((int)count, 7)))
>  		ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
> -out:
> +
>  	if (ret)
>  		return ret;
>  	return count;
> @@ -435,39 +459,6 @@
>  	return 0;
>  }
>  
> -static int add_memory_block(int nid, struct mem_section *section,
> -			unsigned long state, enum mem_add_context context)
> -{
> -	struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
> -	unsigned long start_pfn;
> -	int ret = 0;
> -
> -	if (!mem)
> -		return -ENOMEM;
> -
> -	mem->phys_index = __section_nr(section);
> -	mem->state = state;
> -	mutex_init(&mem->state_mutex);
> -	start_pfn = section_nr_to_pfn(mem->phys_index);
> -	mem->phys_device = arch_get_memory_phys_device(start_pfn);
> -
> -	ret = register_memory(mem, section);
> -	if (!ret)
> -		ret = mem_create_simple_file(mem, phys_index);
> -	if (!ret)
> -		ret = mem_create_simple_file(mem, state);
> -	if (!ret)
> -		ret = mem_create_simple_file(mem, phys_device);
> -	if (!ret)
> -		ret = mem_create_simple_file(mem, removable);
> -	if (!ret) {
> -		if (context == HOTPLUG)
> -			ret = register_mem_sect_under_node(mem, nid);
> -	}
> -
> -	return ret;
> -}
> -

I don't say strongly but this kind of move-code should be done in another patch.


>  /*
>   * For now, we have a linear search to go find the appropriate
>   * memory_block corresponding to a particular phys_index. If
> @@ -482,12 +473,13 @@
>  	struct sys_device *sysdev;
>  	struct memory_block *mem;
>  	char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1];
> +	int block_id = base_memory_block_id(__section_nr(section));
>  
>  	/*
>  	 * This only works because we know that section == sysdev->id
>  	 * slightly redundant with sysdev_register()
>  	 */
> -	sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, __section_nr(section));
> +	sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, block_id);
>  
>  	kobj = kset_find_obj(&memory_sysdev_class.kset, name);
>  	if (!kobj)
> @@ -499,18 +491,98 @@
>  	return mem;
>  }
>  
> +static int add_mem_block_section(struct memory_block *mem,
> +				 int section_nr, unsigned long state)
> +{
> +	struct memory_block_section *mbs;
> +
> +	mbs = kzalloc(sizeof(*mbs), GFP_KERNEL);
> +	if (!mbs)
> +		return -ENOMEM;
> +
> +	mbs->phys_index = section_nr;
> +	mbs->state = state;
> +
> +	list_add(&mbs->next, &mem->sections);
> +	return 0;
> +}

Doesn't this "sections" need to be sorted ? Hmm.


> +
> +static int add_memory_block(int nid, struct mem_section *section,
> +			unsigned long state, enum mem_add_context context)
> +{
> +	struct memory_block *mem;
> +	int ret = 0;
> +
> +	mem = find_memory_block(section);
> +	if (!mem) {
> +		unsigned long start_pfn;
> +
> +		mem = kzalloc(sizeof(*mem), GFP_KERNEL);
> +		if (!mem)
> +			return -ENOMEM;
> +
> +		mem->state = state;
> +		mutex_init(&mem->state_mutex);
> +		start_pfn = section_nr_to_pfn(__section_nr(section));
> +		mem->phys_device = arch_get_memory_phys_device(start_pfn);
> +		INIT_LIST_HEAD(&mem->sections);
> +
> +		mutex_lock(&mem->state_mutex);
> +
> +		ret = register_memory(mem, section);
> +		if (!ret)
> +			ret = mem_create_simple_file(mem, phys_index);
> +		if (!ret)
> +			ret = mem_create_simple_file(mem, state);
> +		if (!ret)
> +			ret = mem_create_simple_file(mem, phys_device);
> +		if (!ret)
> +			ret = mem_create_simple_file(mem, removable);
> +		if (!ret) {
> +			if (context == HOTPLUG)
> +				ret = register_mem_sect_under_node(mem, nid);
> +		}
> +	} else {
> +		kobject_put(&mem->sysdev.kobj);
> +		mutex_lock(&mem->state_mutex);
> +	}
> +
> +	if (!ret)
> +		ret = add_mem_block_section(mem, __section_nr(section), state);
> +
> +	mutex_unlock(&mem->state_mutex);
> +	return ret;
> +}
> +
>  int remove_memory_block(unsigned long node_id, struct mem_section *section,
>  		int phys_device)
>  {
>  	struct memory_block *mem;
> +	struct memory_block_section *mbs, *tmp;
> +	int section_nr = __section_nr(section);
>  
>  	mem = find_memory_block(section);
> -	unregister_mem_sect_under_nodes(mem);
> -	mem_remove_simple_file(mem, phys_index);
> -	mem_remove_simple_file(mem, state);
> -	mem_remove_simple_file(mem, phys_device);
> -	mem_remove_simple_file(mem, removable);
> -	unregister_memory(mem, section);
> +	mutex_lock(&mem->state_mutex);
> +
> +	/* remove the specified section */
> +	list_for_each_entry_safe(mbs, tmp, &mem->sections, next) {
> +		if (mbs->phys_index == section_nr) {
> +			list_del(&mbs->next);
> +			kfree(mbs);
> +		}
> +	}
> +
> +	mutex_unlock(&mem->state_mutex);
> +
> +	if (list_empty(&mem->sections)) {
> +		unregister_mem_sect_under_nodes(mem);
> +		mem_remove_simple_file(mem, phys_index);
> +		mem_remove_simple_file(mem, state);
> +		mem_remove_simple_file(mem, phys_device);
> +		mem_remove_simple_file(mem, removable);
> +		unregister_memory(mem);
> +		kfree(mem);
> +	}
>  
>  	return 0;
>  }
> @@ -532,6 +604,24 @@
>  	return remove_memory_block(0, section, 0);
>  }
>  
> +u32 __weak memory_block_size(void)
> +{
> +	return MIN_MEMORY_BLOCK_SIZE;
> +}
> +
> +static u32 get_memory_block_size(void)
> +{
> +	u32 blk_sz;
> +
> +	blk_sz = memory_block_size();
> +
> +	/* Validate blk_sz is a power of 2 and not less than section size */
> +	if ((blk_sz & (blk_sz - 1)) || (blk_sz < MIN_MEMORY_BLOCK_SIZE))
> +		blk_sz = MIN_MEMORY_BLOCK_SIZE;
> +
> +	return blk_sz;
> +}
> +
>  /*
>   * Initialize the sysfs support for memory devices...
>   */
> @@ -540,12 +630,16 @@
>  	unsigned int i;
>  	int ret;
>  	int err;
> +	int block_sz;
>  
>  	memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops;
>  	ret = sysdev_class_register(&memory_sysdev_class);
>  	if (ret)
>  		goto out;
>  
> +	block_sz = get_memory_block_size();
> +	sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
> +
>  	/*
>  	 * Create entries for memory sections that were found
>  	 * during boot and have been initialized
> Index: linux-2.6/include/linux/memory.h
> ===================================================================
> --- linux-2.6.orig/include/linux/memory.h	2010-07-15 08:48:41.000000000 -0500
> +++ linux-2.6/include/linux/memory.h	2010-07-15 09:54:06.000000000 -0500
> @@ -19,9 +19,15 @@
>  #include <linux/node.h>
>  #include <linux/compiler.h>
>  #include <linux/mutex.h>
> +#include <linux/list.h>
>  
> -struct memory_block {
> +struct memory_block_section {
> +	unsigned long state;
>  	unsigned long phys_index;
> +	struct list_head next;
> +};
> +
> +struct memory_block {
>  	unsigned long state;
>  	/*
>  	 * This serializes all state change requests.  It isn't
> @@ -34,6 +40,7 @@
>  	void *hw;			/* optional pointer to fw/hw data */
>  	int (*phys_callback)(struct memory_block *);
>  	struct sys_device sysdev;
> +	struct list_head sections;
>  };
>  
>  int arch_get_memory_phys_device(unsigned long start_pfn);
> @@ -113,7 +120,7 @@
>  extern int remove_memory_block(unsigned long, struct mem_section *, int);
>  extern int memory_notify(unsigned long val, void *v);
>  extern int memory_isolate_notify(unsigned long val, void *v);
> -extern struct memory_block *find_memory_block(unsigned long);
> +extern struct memory_block *find_memory_block(struct mem_section *);
>  extern int memory_is_hidden(struct mem_section *);
>  #define CONFIG_MEM_BLOCK_SIZE	(PAGES_PER_SECTION<<PAGE_SHIFT)
>  enum mem_add_context { BOOT, HOTPLUG };
> 

Okay, please go ahead. But my 1st impression is that IBM should increase ppc's
SECTION_SIZE ;)

Thanks,
-Kame


 

^ permalink raw reply

* RE: [PATCH] powerpc: Fix GENERIC_ISA_DMA dependency
From: Liu Dave-R63238 @ 2010-07-15 23:15 UTC (permalink / raw)
  To: Anton Vorontsov, Benjamin Herrenschmidt; +Cc: linuxppc-dev
In-Reply-To: <20100715173816.GA17810@oksana.dev.rtsoft.ru>

PiBPbiBQb3dlclBDIHdlIHNob3VsZCBhbHdheXMgdXNlIGdlbmVyaWMgSVNBIERNQSBBUEkgaW1w
bGVtZW50YXRpb24NCj4gYXMgdGhlcmUgaXMgc2ltcGx5IG5vIG90aGVyIGltcGxlbWVudGF0aW9u
IGV4aXN0Lg0KPiANCj4gV2l0aG91dCB0aGlzIHBhdGNoLCB0aGUgZm9sbG93aW5nIGJ1aWxkIGVy
cm9yIHBvcHMgdXA6DQo+IA0KPiAgIHNvdW5kL2J1aWx0LWluLm86IEluIGZ1bmN0aW9uICdzbmRf
ZG1hX3BvaW50ZXInOg0KPiAgICgudGV4dCsweDc0YWUpOiB1bmRlZmluZWQgcmVmZXJlbmNlIHRv
ICdkbWFfc3Bpbl9sb2NrJw0KPiAgIC4uLg0KPiAgIG1ha2U6ICoqKiBbLnRtcF92bWxpbnV4MV0g
RXJyb3IgMQ0KPiANCj4gVGhpcyBpcyBQUENfODV4eCwgU01QIGFuZCBzb21lIHNvdW5kIGRyaXZl
cnMgc2V0IHRvID15Lg0KPiANCj4gU2lnbmVkLW9mZi1ieTogQW50b24gVm9yb250c292IDxhdm9y
b250c292QG12aXN0YS5jb20+DQoNCkFja2VkLWJ5OiBEYXZlIExpdSA8ZGF2ZWxpdUBmcmVlc2Nh
bGUuY29tPg0KDQpJIGhhcHBlbmVkIHRoaXMgaXNzdWUgd2hlbiBJIGRldmVsb3BlZCBQMTAyMkRT
IEJTUC4NCg==

^ permalink raw reply

* Re: cpm_uart_console_write() stuck in waiting for transmitter fifo ready
From: Shawn Jin @ 2010-07-15 22:33 UTC (permalink / raw)
  To: ppcdev, Scott Wood
In-Reply-To: <AANLkTik9n_UkLKdmKT7PHiOL1-EW_TlMJFOGvWCU44xb@mail.gmail.com>

> The problem is that after/when the kernel switches to the real console
> from the boot console, printk() calls cpm_uart_console_write() to
> print the first (?) message using the cpm_uart driver. But the
> transmitter buffer never becomes ready. It's shown below with the gdb
> session.
>
> Program received signal SIGSTOP, Stopped (signal).
> 0xc00f3510 in cpm_uart_console_write (co=3D<value optimized out>,
> s=3D0xc017703e "console [ttyCPM0] enabled, bootconsole disabled\n",
> count=3D0x30) at /home/code/linux-2.6.33.5/arch/powerpc/include/asm/io.h:=
154
> (gdb) next
> (gdb) x/4h bdbase
> 0xfddfa020: =A0 =A0 0xffff =A00xffff =A00xffff =A00xffff
> (gdb)
>
> Why would the TxBD be filled with all 0xF? Would it be possible that
> the bdbase actually points somewhere else other than the TxBD?

The virtual address 0xfddfa000 is mapped to 0xfa202000. I suspect that
the TxBD of my MPC870 may not start at 0xfa202020.

I notice that for adder875, ep88xc and mpc885ads, the muram data's reg
=3D <0 0x1c00> but for mgsuvd, its reg =3D <0x800 0x1800>. How does the
kernel use muram for 885 family SoCs? How much muram should be
reserved for data?

My RCCR=3D0x1, meaning the first 512B is for microcode. So the data and
the TxBD should really be starting at 0xfa202200? Then my muram data's
reg should be <0x200 ?>? What size shall I specify?

Scott, you instructed
(http://lists.ozlabs.org/pipermail/linuxppc-dev/2010-July/083788.html)
me to change the buffer address to 0xfa203fb8 from 0xfa202008 for the
bootwrapper's cpm-serial driver, assuming reg=3D<0 0x1c00>. If I need to
change to the reg not starting at 0x0, how should I accordingly change
this buffer address?

Thanks a lot,
-Shawn.

I went back to 2.4.18 kernel and noticed that the

^ permalink raw reply

* Re: [PATCH V4] powerpc/prom: Export device tree physical address via proc
From: Grant Likely @ 2010-07-15 19:18 UTC (permalink / raw)
  To: Matthew McClintock
  Cc: Kumar Gala, Mitch Bradley, linuxppc-dev, Timur Tabi, David Gibson
In-Reply-To: <1279220310.19363.19.camel@localhost>

On Thu, Jul 15, 2010 at 12:58 PM, Matthew McClintock <msm@freescale.com> wr=
ote:
> On Thu, 2010-07-15 at 12:37 -0600, Grant Likely wrote:
>> On Thu, Jul 15, 2010 at 12:03 PM, Matthew McClintock <msm@freescale.com>=
 wrote:
>> > Yes. Where would we get a list of memreserve sections?
>>
>> I would say the list of reserves that are not under the control of
>> Linux should be explicitly described in the device tree proper. =A0For
>> instance, if you have a region that firmware depends on, then have a
>> node for describing the firmware and a property stating the memory
>> regions that it depends on. =A0The memreserve regions can be generated
>> from that.
>
> Ok, so we could traverse the tree node-by-bode for a
> persistent-memreserve property and add them to the /memreserve/ list in
> the kexec user space tools?

I *think* that is okay, but I'd like to hear from Segher, Ben, Mitch,
David Gibson, and other device tree experts on whether or not that
exact property naming is a good one.

Write up a proposed binding (you can use devicetree.org).  Post it for
review (make sure you cc: both devicetree-discuss and linuxppc-dev, as
well as cc'ing the people listed above.)

>> > Should we export
>> > the reserve sections instead of the device tree location?
>>
>> It shouldn't really be something that the kernel is explicitly
>> exporting because it is a characteristic of the board design. =A0It is
>> something that belongs in the tree-proper. =A0ie. when you extract the
>> tree you have data telling what the region is, and why it is reserved.
>
> Agreed.
>
>>
>> > We just need a
>> > way to preserve what was there at boot to pass to the new kernel.
>>
>> Yet there is no differentiation between the board-dictated memory
>> reserves and the things that U-Boot/Linux made an arbitrary decision
>> on. =A0The solution should focus not on "can I throw this one away?" but
>> rather "Is this one I should keep?" =A0:-) =A0A subtle difference, I kno=
w,
>> but it changes the way you approach the solution.
>
> Fair enough. I think the above solution will work nicely, and I can
> start implementing something if you agree - if I interpreted your idea
> correctly. Although it should not require any changes to the kernel
> proper.

Correct.

g.

^ permalink raw reply

* Re: [PATCH V4] powerpc/prom: Export device tree physical address via  proc
From: Matthew McClintock @ 2010-07-15 18:58 UTC (permalink / raw)
  To: Grant Likely; +Cc: Kumar Gala, linuxppc-dev, Timur Tabi
In-Reply-To: <AANLkTinUBPRsiKMd46EK6_9BBRYwB3jsfg49W9uKEbw-@mail.gmail.com>

On Thu, 2010-07-15 at 12:37 -0600, Grant Likely wrote:
> On Thu, Jul 15, 2010 at 12:03 PM, Matthew McClintock <msm@freescale.com> wrote:
> > On Thu, 2010-07-15 at 10:57 -0600, Grant Likely wrote:
> >> On Thu, Jul 15, 2010 at 10:39 AM, Matthew McClintock <msm@freescale.com> wrote:
> >> > On Thu, 2010-07-15 at 10:22 -0600, Grant Likely wrote:
> >> >> > Thanks for taking a look. My first thought was to just blow away all
> >> >> the
> >> >> > memreserve regions and start over. But, there are reserve regions
> >> >> for
> >> >> > other things that I might not want to blow away. For example, on
> >> >> mpc85xx
> >> >> > SMP systems we have an additional reserve region for our boot page.
> >> >>
> >> >> What is your starting point?  Where does the device tree (and
> >> >> memreserve list) come from
> >> >> that you're passing to kexec?  My first impression is that if you have
> >> >> to scrub the memreserve list, then the source being used to
> >> >> obtain the memreserves is either faulty or unsuitable to the task.
> >> >
> >> > I'm pulling the device tree passed in via u-boot and passing it to
> >> > kexec.
> >>
> >> How?  (what mechanism?)  I hope you're not using the debugfs
> >> flat-device-tree file.
> >
> > That is one way to get a good working copy. What is wrong with this
> > mechanism?
> 
> It's unstable.  It is in the debugfs, so there are no guarantees that
> the ABI will remain the same.  Plus it doesn't reflect any changes
> that the kernel may make to the device tree.  That interface is *debug
> only*.  Do not use it.

Ok.

> 
> > Should we duplicate everything u-boot does in kexec to build up a flat
> > device tree? Or is there another way to get a good tree?
> 
> That is one option.  U-Boot really shouldn't be modifying the tree
> very much anyway (I know on some platforms U-Boot is almost creating a
> tree from scratch, but that is insane and an entirely different
> discussion).  /proc/device-tree always gives the kernel's current view
> of the tree.  You can use dtc to extract it and write it into a dtb.

Ok wow, I've missed this completely. dtc to extract the device tree is a
very good option. I will pursue that line of thinking.

> 
> > Ideally, we
> > don't make the end user manually edit a device tree.
> 
> Of course not, any device tree manipulation is the job of the kexec
> tools.  None of this should be manual.  However, the data source is a
> significant and important question.

Ideally, we don't duplicate this in kexec and u-boot. Right now there is
nothing specific for say mpc85xx in kexec it's just ppc32. I would
prefer it stay this way.

> 
> >> > It is the most complete device tree and requires the least amount
> >> > of fixup.
> >> >
> >> > I have to scrub two items, the ramdisk/initrd and the device tree
> >> > because upon kexec'ing the kernel we have the ability to pass in new
> >> > ramdisk/initrd and device tree. They can also live at different physical
> >> > addresses for the second reboot.
> >>
> >> This sounds like the model is backwards.  Rather than scrubbing items,
> >> the memreserve list should be built up from a known good source.
> >
> > You can build one up yourself and it will still work out fine. Or you
> > can pull one from debugfs to get yourself started. Or you can pull it
> > every time.
> 
> What do you mean by "pull it every time"?

Exactly what you are saying is bad to do ;-P. Pull it from debugfs. But
the above "dts -I fs" solution practically fixes that issue.

> 
> Out of curiosity, what is responsible for building up the memreserve
> list?  The userspace portion, or the kernel portion of kexec?  Or is
> it done by a totally separate program?

Currently, neither. I have submitted patches for the user space tool to
fixup the memreserve regions.

> 
> >> > The initrd addresses are already exposed, so we can update/remove/reuse
> >> > that entry, we just need a way for kexec to determine the current device
> >> > tree address so it can replace the correct memreserve region for the
> >> > kexec'ing kernels' device tree.
> >> >
> >> > The whole problem comes from repeatedly kexec'ing, we need to make sure
> >> > we don't keep losing blobs of memory to reserve regions (so we can't
> >> > just blindly add). We also need to make sure we don't lose other
> >> > memreserve regions that might be important for other things (so we can't
> >> > just blow them all away).
> >>
> >> Right, so you need to have a known-good list of reserve sections.
> >> Trying to go the other way sounds very fragile.
> >>
> >
> > Yes. Where would we get a list of memreserve sections?
> 
> I would say the list of reserves that are not under the control of
> Linux should be explicitly described in the device tree proper.  For
> instance, if you have a region that firmware depends on, then have a
> node for describing the firmware and a property stating the memory
> regions that it depends on.  The memreserve regions can be generated
> from that.

Ok, so we could traverse the tree node-by-bode for a
persistent-memreserve property and add them to the /memreserve/ list in
the kexec user space tools?

> 
> > Should we export
> > the reserve sections instead of the device tree location?
> 
> It shouldn't really be something that the kernel is explicitly
> exporting because it is a characteristic of the board design.  It is
> something that belongs in the tree-proper.  ie. when you extract the
> tree you have data telling what the region is, and why it is reserved.

Agreed.

> 
> > We just need a
> > way to preserve what was there at boot to pass to the new kernel.
> 
> Yet there is no differentiation between the board-dictated memory
> reserves and the things that U-Boot/Linux made an arbitrary decision
> on.  The solution should focus not on "can I throw this one away?" but
> rather "Is this one I should keep?"  :-)  A subtle difference, I know,
> but it changes the way you approach the solution.

Fair enough. I think the above solution will work nicely, and I can
start implementing something if you agree - if I interpreted your idea
correctly. Although it should not require any changes to the kernel
proper.

-M

^ permalink raw reply

* [PATCH 5/5] v2 Enable multiple sections per directory for ppc
From: Nathan Fontenot @ 2010-07-15 18:41 UTC (permalink / raw)
  To: linux-kernel, linux-mm, linuxppc-dev; +Cc: KAMEZAWA Hiroyuki
In-Reply-To: <4C3F53D1.3090001@austin.ibm.com>

Update the powerpc/pseries code to initialize
the memory sysfs directory block size to be the
same size as a LMB.

Signed-off-by; Nathan Fontenot <nfont@austin.ibm.ocm>
---
 arch/powerpc/platforms/pseries/hotplug-memory.c |   66 +++++++++++++++++++-----
 1 file changed, 53 insertions(+), 13 deletions(-)

Index: linux-2.6/arch/powerpc/platforms/pseries/hotplug-memory.c
===================================================================
--- linux-2.6.orig/arch/powerpc/platforms/pseries/hotplug-memory.c	2010-07-15 09:54:06.000000000 -0500
+++ linux-2.6/arch/powerpc/platforms/pseries/hotplug-memory.c	2010-07-15 09:56:19.000000000 -0500
@@ -17,6 +17,54 @@
 #include <asm/pSeries_reconfig.h>
 #include <asm/sparsemem.h>
 
+static u32 get_memblock_size(void)
+{
+	struct device_node *np;
+	unsigned int memblock_size = 0;
+
+	np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+	if (np) {
+		const unsigned int *size;
+
+		size = of_get_property(np, "ibm,lmb-size", NULL);
+		memblock_size = size ? *size : 0;
+
+		of_node_put(np);
+	} else {
+		unsigned int memzero_size = 0;
+		const unsigned int *regs;
+
+		np = of_find_node_by_path("/memory@0");
+		if (np) {
+			regs = of_get_property(np, "reg", NULL);
+			memzero_size = regs ? regs[3] : 0;
+			of_node_put(np);
+		}
+
+		if (memzero_size) {
+			/* We now know the size of memory@0, use this to find
+			 * the first memoryblock and get its size.
+			 */
+			char buf[64];
+
+			sprintf(buf, "/memory@%x", memzero_size);
+			np = of_find_node_by_path(buf);
+			if (np) {
+				regs = of_get_property(np, "reg", NULL);
+				memblock_size = regs ? regs[3] : 0;
+				of_node_put(np);
+			}
+		}
+	}
+
+	return memblock_size;
+}
+
+u32 memory_block_size(void)
+{
+	return get_memblock_size();
+}
+
 static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
 {
 	unsigned long start, start_pfn;
@@ -127,30 +175,22 @@
 
 static int pseries_drconf_memory(unsigned long *base, unsigned int action)
 {
-	struct device_node *np;
-	const unsigned long *memblock_size;
+	unsigned long memblock_size;
 	int rc;
 
-	np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
-	if (!np)
+	memblock_size = get_memblock_size();
+	if (!memblock_size)
 		return -EINVAL;
 
-	memblock_size = of_get_property(np, "ibm,memblock-size", NULL);
-	if (!memblock_size) {
-		of_node_put(np);
-		return -EINVAL;
-	}
-
 	if (action == PSERIES_DRCONF_MEM_ADD) {
-		rc = memblock_add(*base, *memblock_size);
+		rc = memblock_add(*base, memblock_size);
 		rc = (rc < 0) ? -EINVAL : 0;
 	} else if (action == PSERIES_DRCONF_MEM_REMOVE) {
-		rc = pseries_remove_memblock(*base, *memblock_size);
+		rc = pseries_remove_memblock(*base, memblock_size);
 	} else {
 		rc = -EINVAL;
 	}
 
-	of_node_put(np);
 	return rc;
 }
 

^ permalink raw reply

* [PATCH] edac: mpc85xx: Fix coldplug/hotplug module autoloading
From: Anton Vorontsov @ 2010-07-15 18:40 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Peter Tyser, linux-kernel, Dave Jiang, linuxppc-dev,
	Doug Thompson

MPC85xx EDAC driver is missing module device aliases, so the driver
won't load automatically on boot. This patch fixes the issue by
adding proper MODULE_DEVICE_TABLE() macros.

Signed-off-by: Anton Vorontsov <avorontsov@mvista.com>
---
 drivers/edac/mpc85xx_edac.c |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index 3820879..cfa86f7 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -336,6 +336,7 @@ static struct of_device_id mpc85xx_pci_err_of_match[] = {
 	},
 	{},
 };
+MODULE_DEVICE_TABLE(of, mpc85xx_pci_err_of_match);
 
 static struct of_platform_driver mpc85xx_pci_err_driver = {
 	.probe = mpc85xx_pci_err_probe,
@@ -654,6 +655,7 @@ static struct of_device_id mpc85xx_l2_err_of_match[] = {
 	{ .compatible = "fsl,p4080-l2-cache-controller", },
 	{},
 };
+MODULE_DEVICE_TABLE(of, mpc85xx_l2_err_of_match);
 
 static struct of_platform_driver mpc85xx_l2_err_driver = {
 	.probe = mpc85xx_l2_err_probe,
@@ -1133,6 +1135,7 @@ static struct of_device_id mpc85xx_mc_err_of_match[] = {
 	{ .compatible = "fsl,p4080-memory-controller", },
 	{},
 };
+MODULE_DEVICE_TABLE(of, mpc85xx_mc_err_of_match);
 
 static struct of_platform_driver mpc85xx_mc_err_driver = {
 	.probe = mpc85xx_mc_err_probe,
-- 
1.7.0.5

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox