LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH V4 2/3] powerpc/swiotlb: Enable at early stage and disable if not necessary
From: Kumar Gala @ 2012-08-02 12:54 UTC (permalink / raw)
  To: Jia Hongtao; +Cc: B07421, linuxppc-dev
In-Reply-To: <1343907741-20589-3-git-send-email-B38951@freescale.com>


On Aug 2, 2012, at 6:42 AM, Jia Hongtao wrote:

> Remove the dependency on PCI initialization for SWIOTLB =
initialization.
> So that PCI can be initialized at proper time.
>=20
> SWIOTLB is partly determined by PCI inbound/outbound map which is =
assigned
> in PCI initialization. But swiotlb_init() should be done at the stage =
of
> mem_init() which is much earlier than PCI initialization. So we =
reserve the
> memory for SWIOTLB first and free it if not necessary.
>=20
> All boards are converted to fit this change.
>=20
> Signed-off-by: Jia Hongtao <B38951@freescale.com>
> Signed-off-by: Li Yang <leoli@freescale.com>
> ---

This doesn't seem like it addresses our issue w/regards to not being =
able to map all of memory from PCI.


> arch/powerpc/include/asm/swiotlb.h       |    6 ++++++
> arch/powerpc/kernel/dma-swiotlb.c        |   20 ++++++++++++++++++++
> arch/powerpc/mm/mem.c                    |    3 +--
> arch/powerpc/platforms/44x/currituck.c   |   10 ++--------
> arch/powerpc/platforms/85xx/mpc85xx_ds.c |    1 +
> arch/powerpc/platforms/85xx/qemu_e500.c  |    2 +-
> arch/powerpc/sysdev/fsl_pci.c            |    5 +----
> 7 files changed, 32 insertions(+), 15 deletions(-)

Don't we also want to update all these:

arch/powerpc/platforms/85xx/corenet_ds.c:               =
ppc_swiotlb_enable =3D 1;
arch/powerpc/platforms/85xx/ge_imp3a.c:         ppc_swiotlb_enable =3D =
1;
arch/powerpc/platforms/85xx/mpc8536_ds.c:               =
ppc_swiotlb_enable =3D 1;
arch/powerpc/platforms/85xx/mpc85xx_mds.c:              =
ppc_swiotlb_enable =3D 1;
arch/powerpc/platforms/85xx/p1022_ds.c:         ppc_swiotlb_enable =3D =
1;
arch/powerpc/platforms/86xx/mpc86xx_hpcn.c:             =
ppc_swiotlb_enable =3D 1;


>=20
> diff --git a/arch/powerpc/include/asm/swiotlb.h =
b/arch/powerpc/include/asm/swiotlb.h
> index 8979d4c..de99d6e 100644
> --- a/arch/powerpc/include/asm/swiotlb.h
> +++ b/arch/powerpc/include/asm/swiotlb.h
> @@ -22,4 +22,10 @@ int __init swiotlb_setup_bus_notifier(void);
>=20
> extern void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev);
>=20
> +#ifdef CONFIG_SWIOTLB
> +void swiotlb_detect_4g(void);
> +#else
> +static inline void swiotlb_detect_4g(void) {}
> +#endif
> +
> #endif /* __ASM_SWIOTLB_H */
> diff --git a/arch/powerpc/kernel/dma-swiotlb.c =
b/arch/powerpc/kernel/dma-swiotlb.c
> index 4ab88da..aa85550 100644
> --- a/arch/powerpc/kernel/dma-swiotlb.c
> +++ b/arch/powerpc/kernel/dma-swiotlb.c
> @@ -104,3 +104,23 @@ int __init swiotlb_setup_bus_notifier(void)
> 			      &ppc_swiotlb_plat_bus_notifier);
> 	return 0;
> }
> +
> +void swiotlb_detect_4g(void)
> +{
> +	if ((memblock_end_of_DRAM() - 1) > 0xffffffff)
> +		ppc_swiotlb_enable =3D 1;
> +}
> +
> +static int __init swiotlb_late_init(void)
> +{
> +	if (ppc_swiotlb_enable) {
> +		swiotlb_print_info();
> +		set_pci_dma_ops(&swiotlb_dma_ops);
> +		ppc_md.pci_dma_dev_setup =3D pci_dma_dev_setup_swiotlb;
> +	} else {
> +		swiotlb_free();
> +	}
> +
> +	return 0;
> +}
> +subsys_initcall(swiotlb_late_init);
> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> index baaafde..f23c4e0 100644
> --- a/arch/powerpc/mm/mem.c
> +++ b/arch/powerpc/mm/mem.c
> @@ -300,8 +300,7 @@ void __init mem_init(void)
> 	unsigned long reservedpages =3D 0, codesize, initsize, datasize, =
bsssize;
>=20
> #ifdef CONFIG_SWIOTLB
> -	if (ppc_swiotlb_enable)
> -		swiotlb_init(1);
> +	swiotlb_init(0);
> #endif
>=20
> 	num_physpages =3D memblock_phys_mem_size() >> PAGE_SHIFT;
> diff --git a/arch/powerpc/platforms/44x/currituck.c =
b/arch/powerpc/platforms/44x/currituck.c
> index 9f6c33d..6bd89a0 100644
> --- a/arch/powerpc/platforms/44x/currituck.c
> +++ b/arch/powerpc/platforms/44x/currituck.c
> @@ -21,7 +21,6 @@
>  */
>=20
> #include <linux/init.h>
> -#include <linux/memblock.h>
> #include <linux/of.h>
> #include <linux/of_platform.h>
> #include <linux/rtc.h>
> @@ -159,13 +158,8 @@ static void __init ppc47x_setup_arch(void)
>=20
> 	/* No need to check the DMA config as we /know/ our windows are =
all of
>  	 * RAM.  Lets hope that doesn't change */
> -#ifdef CONFIG_SWIOTLB
> -	if ((memblock_end_of_DRAM() - 1) > 0xffffffff) {
> -		ppc_swiotlb_enable =3D 1;
> -		set_pci_dma_ops(&swiotlb_dma_ops);
> -		ppc_md.pci_dma_dev_setup =3D pci_dma_dev_setup_swiotlb;
> -	}
> -#endif
> +	swiotlb_detect_4g();
> +
> 	ppc47x_smp_init();
> }
>=20
> diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c =
b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
> index 6d3265f..56f8c8f 100644
> --- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
> +++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
> @@ -159,6 +159,7 @@ static void __init mpc85xx_ds_setup_arch(void)
> 	if (ppc_md.progress)
> 		ppc_md.progress("mpc85xx_ds_setup_arch()", 0);
>=20
> +	swiotlb_detect_4g();
> 	mpc85xx_ds_pci_init();
> 	mpc85xx_smp_init();
>=20
> diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c =
b/arch/powerpc/platforms/85xx/qemu_e500.c
> index 95a2e53..04260cd 100644
> --- a/arch/powerpc/platforms/85xx/qemu_e500.c
> +++ b/arch/powerpc/platforms/85xx/qemu_e500.c
> @@ -41,7 +41,7 @@ static void __init qemu_e500_setup_arch(void)
> {
> 	ppc_md.progress("qemu_e500_setup_arch()", 0);
>=20
> -	fsl_pci_init();
> +	swiotlb_detect_4g();

removing fsl_pci_init() seems wrong.

> 	mpc85xx_smp_init();
> }
>=20
> diff --git a/arch/powerpc/sysdev/fsl_pci.c =
b/arch/powerpc/sysdev/fsl_pci.c
> index 6938792..da7a3d7 100644
> --- a/arch/powerpc/sysdev/fsl_pci.c
> +++ b/arch/powerpc/sysdev/fsl_pci.c
> @@ -872,11 +872,8 @@ void __devinit fsl_pci_init(void)
> 	 * we need SWIOTLB to handle buffers located outside of
> 	 * dma capable memory region
> 	 */
> -	if (memblock_end_of_DRAM() - 1 > max) {
> +	if (memblock_end_of_DRAM() - 1 > max)
> 		ppc_swiotlb_enable =3D 1;
> -		set_pci_dma_ops(&swiotlb_dma_ops);
> -		ppc_md.pci_dma_dev_setup =3D pci_dma_dev_setup_swiotlb;
> -	}
> #endif
> }
> #endif
> --=20
> 1.7.5.1
>=20

^ permalink raw reply

* Re: [PATCH v5 3/6] fsl-dma: change release process of dma descriptor for supporting async_tx
From: Ira W. Snyder @ 2012-08-02 15:55 UTC (permalink / raw)
  To: Liu Qiang-B32616
  Cc: herbert@gondor.apana.org.au, Vinod Koul,
	linux-kernel@vger.kernel.org, dan.j.williams@gmail.com,
	linux-crypto@vger.kernel.org, Dan Williams,
	linuxppc-dev@lists.ozlabs.org, davem@davemloft.net
In-Reply-To: <BCB48C05FCE8BC4D9E61E841ECBE6DB70D2E04@039-SN2MPN1-011.039d.mgd.msft.net>

On Thu, Aug 02, 2012 at 07:21:51AM +0000, Liu Qiang-B32616 wrote:
> > -----Original Message-----
> > From: Ira W. Snyder [mailto:iws@ovro.caltech.edu]
> > Sent: Thursday, August 02, 2012 1:25 AM
> > To: Liu Qiang-B32616
> > Cc: linux-crypto@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; linux-
> > kernel@vger.kernel.org; dan.j.williams@gmail.com; Vinod Koul;
> > herbert@gondor.hengli.com.au; Dan Williams; davem@davemloft.net
> > Subject: Re: [PATCH v5 3/6] fsl-dma: change release process of dma
> > descriptor for supporting async_tx
> > 
> > On Wed, Aug 01, 2012 at 04:49:17PM +0800, qiang.liu@freescale.com wrote:
> > > From: Qiang Liu <qiang.liu@freescale.com>
> > >
> > > Fix the potential risk when enable config NET_DMA and ASYNC_TX.
> > > Async_tx is lack of support in current release process of dma
> > > descriptor, all descriptors will be released whatever is acked or
> > > no-acked by async_tx, so there is a potential race condition when dma
> > > engine is uesd by others clients (e.g. when enable NET_DMA to offload
> > TCP).
> > >
> > > In our case, a race condition which is raised when use both of talitos
> > > and dmaengine to offload xor is because napi scheduler will sync all
> > > pending requests in dma channels, it affects the process of raid
> > > operations due to ack_tx is not checked in fsl dma. The no-acked
> > > descriptor is freed which is submitted just now, as a dependent tx,
> > > this freed descriptor trigger
> > > BUG_ON(async_tx_test_ack(depend_tx)) in async_tx_submit().
> > >
> > > TASK = ee1a94a0[1390] 'md0_raid5' THREAD: ecf40000 CPU: 0
> > > GPR00: 00000001 ecf41ca0 ee44/921a94a0 0000003f 00000001 c00593e4
> > > 00000000 00000001
> > > GPR08: 00000000 a7a7a7a7 00000001 045/920000002 42028042 100a38d4
> > > ed576d98 00000000
> > > GPR16: ed5a11b0 00000000 2b162000 00000200 046/920000000 2d555000
> > > ed3015e8 c15a7aa0
> > > GPR24: 00000000 c155fc40 00000000 ecb63220 ecf41d28 e47/92f640bb0
> > > ef640c30 ecf41ca0 NIP [c02b048c] async_tx_submit+0x6c/0x2b4 LR
> > > [c02b068c] async_tx_submit+0x26c/0x2b4 Call Trace:
> > > [ecf41ca0] [c02b068c] async_tx_submit+0x26c/0x2b448/92 (unreliable)
> > > [ecf41cd0] [c02b0a4c] async_memcpy+0x240/0x25c [ecf41d20] [c0421064]
> > > async_copy_data+0xa0/0x17c [ecf41d70] [c0421cf4]
> > > __raid_run_ops+0x874/0xe10 [ecf41df0] [c0426ee4]
> > > handle_stripe+0x820/0x25e8 [ecf41e90] [c0429080] raid5d+0x3d4/0x5b4
> > > [ecf41f40] [c04329b8] md_thread+0x138/0x16c [ecf41f90] [c008277c]
> > > kthread+0x8c/0x90 [ecf41ff0] [c0011630] kernel_thread+0x4c/0x68
> > >
> > > Another major modification in this patch is the change to completed
> > > descriptors, there is a potential risk which caused by exception
> > > interrupt, all descriptors in ld_running list are seemed completed
> > > when an interrupt raised, it works fine under normal condition, but if
> > > there is an exception occured, it cannot work as our excepted.
> > > Hardware should not depend on s/w list, the right way is to read
> > > current descriptor address register to find the last completed
> > > descriptor. If an interrupt is raised by an error, all descriptors in
> > > ld_running should not be seemed finished, or these unfinished
> > descriptors in ld_running will be released wrongly.
> > >
> > > A simple way to reproduce,
> > > Enable dmatest first, then insert some bad descriptors which can
> > > trigger Programming Error interrupts before the good descriptors.
> > > Last, the good descriptors will be freed before they are processsed
> > > because of the exception intrerrupt.
> > >
> > > Note: the bad descriptors are only for simulating an exception
> > interrupt.
> > > This case can illustrate the potential risk in current fsl-dma very
> > well.
> > >
> > 
> > I've never managed to trigger a PE (programming error) interrupt on the
> > 83xx hardware. Any time I intentionally caused an error, the hardware
> > wedged itself. The CB (channel busy) bit is stuck high, and cannot be
> > cleared without a hard reset of the board.
> Sorry, the exception indeed will be occurred, actually, the capability DMA_INTERRUPT
> will reproduce the issue under conditions. It will trigger a exception because of
> bad descriptor (length is zero, src and dst are zero, a->b->c->bada->badb->d, we cannot find out which one is really finished in an interrupt tasklet).
> So, we'd better consider this case.
> 
> BTW, I have already found out your patch which is used to resolve the issue of dma lock,
> http://lkml.indiana.edu/hypermail/linux/kernel/1103.0/01519.html
> 

Ok. I haven't tested bad descriptors since several years ago. I agree
that it can happen, so we should fix it.

> > 
> > I agree the "snoop on the hardware" technique works. As far as I can tell,
> > you have implemented the code correctly.
> > 
> > The MPC8349EARM.pdf from Freescale indicates that the hardware will halt
> > in response to a programming error, and generate a PE interrupt. See
> > section 12.5.3.3 (pg 568).
> > 
> > The driver, as it is written, will never recover from such a condition.
> > Since you are complaining about this situation, do you intend to fix it?
> Frankly, I don't think your patch really can resolve the issue. Now, I understand what problem happen to you, I will follow it.
> 

You are correct. My patch does not resolve the issue.

> Yes, you are right, the driver will never recover except reset the board.
> I see your description and I can reproduce it with dmatest on p1022ds with latest kernel, Dmatest with 6 threads, 200,000 iterations per thread several is passed with or without my patch, but dma is locked when up to 300,000 itrerations even though drop my patch.
> Another test on p4080, 8 threads with 1,000,000 iterations per thread is passed with/without my patch.
> I will follow this issue and try to find the root cause, but it should be another topic:)
> 

I agree, it can be another topic. We can focus on getting the MD RAID
problems fixed first, and then fix the lockup.

Do you have CONFIG_NET_DMA enabled? I have always set CONFIG_NET_DMA=n,
I do not use it. I wonder if this is a factor.

> Here, I agree with yours most comments, I will merge some functions from your patch, I will send v6 if you agree with my comments. Thanks.
> Please see my comments inline.
> 

All of the comments below look good. I look forward to reviewing v6.

Thanks for responding,
Ira

> > 
> > > Cc: Dan Williams <dan.j.williams@intel.com>
> > > Cc: Dan Williams <dan.j.williams@gmail.com>
> > > Cc: Vinod Koul <vinod.koul@intel.com>
> > > Cc: Li Yang <leoli@freescale.com>
> > > Cc: Ira W. Snyder <iws@ovro.caltech.edu>
> > > Signed-off-by: Qiang Liu <qiang.liu@freescale.com>
> > > ---
> > >  drivers/dma/fsldma.c |  242 +++++++++++++++++++++++++++++++++++-------
> > --------
> > >  drivers/dma/fsldma.h |    1 +
> > >  2 files changed, 172 insertions(+), 71 deletions(-)
> > >
> > > diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index
> > > 4f2f212..87f52c0 100644
> > > --- a/drivers/dma/fsldma.c
> > > +++ b/drivers/dma/fsldma.c
> > > @@ -400,6 +400,125 @@ out_splice:
> > >  	list_splice_tail_init(&desc->tx_list, &chan->ld_pending);  }
> > >
> > > +static void fsldma_cleanup_descriptor(struct fsldma_chan *chan);
> > > +static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan);
> > > +
> > 
> > As noted in my reply to patch 4/6, please swap the order of this patch
> > and the following patch.
> > 
> > These lines should not be added or removed in either patch.
> Ok.
> 
> > 
> > > +/**
> > > + * fsldma_clean_completed_descriptor - free all descriptors which
> > > + * has been completed and acked
> > > + * @chan: Freescale DMA channel
> > > + *
> > > + * This function is used on all completed and acked descriptors.
> > > + * All descriptors should only be freed in this function.
> > > + */
> > > +static int
> > > +fsldma_clean_completed_descriptor(struct fsldma_chan *chan)
> > 
> > This should be 'static void'. It does not return an error code.
> > 
> Ok.
> 
> > > +{
> > > +	struct fsl_desc_sw *desc, *_desc;
> > > +
> > > +	/* Run the callback for each descriptor, in order */
> > > +	list_for_each_entry_safe(desc, _desc, &chan->ld_completed, node) {
> > > +
> > > +		if (async_tx_test_ack(&desc->async_tx)) {
> > > +			/* Remove from the list of transactions */
> > > +			list_del(&desc->node);
> > > +#ifdef FSL_DMA_LD_DEBUG
> > > +			chan_dbg(chan, "LD %p free\n", desc); #endif
> > > +			dma_pool_free(chan->desc_pool, desc,
> > > +					desc->async_tx.phys);
> > 
> > This code appears in multiple places in the driver. Please consider
> > adding my patch 3/7 titled "[PATCH 3/7] fsl-dma: add
> > fsl_dma_free_descriptor() to reduce code duplication" to your patch
> > series.
> Accept.
> 
> > 
> > > +		}
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +/**
> > > + * fsldma_run_tx_complete_actions - cleanup and free a single link
> > > +descriptor
> > 
> > This documentation is incorrect. This code NEVER frees a descriptor.
> I will correct it.
> 
> > 
> > > + * @chan: Freescale DMA channel
> > > + * @desc: descriptor to cleanup and free
> > > + * @cookie: Freescale DMA transaction identifier
> > > + *
> > > + * This function is used on a descriptor which has been executed by
> > > +the DMA
> > > + * controller. It will run any callbacks, submit any dependencies.
> > > + */
> > > +static dma_cookie_t fsldma_run_tx_complete_actions(struct fsl_desc_sw
> > *desc,
> > > +		struct fsldma_chan *chan, dma_cookie_t cookie)
> > 
> > Please change the parameter order to:
> > 
> > static dma_cookie_t fsldma_run_tx_complete_actions(struct fsldma_chan
> > *chan,
> > 		struct fsl_desc_sw *desc, dma_cookie_t cookie)
> > 
> > Every other function in the driver uses this parameter order. Channel
> > comes first, then descriptor.
> > 
> My fault, I will correct it.
> 
> > > +{
> > > +	struct dma_async_tx_descriptor *txd = &desc->async_tx;
> > > +	struct device *dev = chan->common.device->dev;
> > > +	dma_addr_t src = get_desc_src(chan, desc);
> > > +	dma_addr_t dst = get_desc_dst(chan, desc);
> > > +	u32 len = get_desc_cnt(chan, desc);
> > > +
> > > +	BUG_ON(txd->cookie < 0);
> > > +
> > > +	if (txd->cookie > 0) {
> > 
> > It will significantly reduce your patch size if you move this if
> > statement to the function which calls this one. I've provided an example
> > down below, in the one place where this code is used.
> My comments as below.
> 
> > 
> > > +		cookie = txd->cookie;
> > > +
> > > +		/* Run the link descriptor callback function */
> > > +		if (txd->callback) {
> > > +#ifdef FSL_DMA_LD_DEBUG
> > > +			chan_dbg(chan, "LD %p callback\n", desc); #endif
> > > +			txd->callback(txd->callback_param);
> > > +		}
> > > +
> > > +		/* Unmap the dst buffer, if requested */
> > > +		if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
> > > +			if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
> > > +				dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
> > > +			else
> > > +				dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
> > > +		}
> > > +
> > > +		/* Unmap the src buffer, if requested */
> > > +		if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
> > > +			if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
> > > +				dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
> > > +			else
> > > +				dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
> > > +		}
> > > +	}
> > > +
> > > +	/* Run any dependencies */
> > > +	dma_run_dependencies(txd);
> > > +
> > > +	return cookie;
> > > +}
> > > +
> > > +/**
> > > + * fsldma_clean_running_descriptor - move the completed descriptor
> > > +from
> > > + * ld_running to ld_completed
> > > + * @chan: Freescale DMA channel
> > > + * @desc: the descriptor which is completed
> > > + *
> > > + * Free the descriptor directly if acked by async_tx api, or move it
> > > +to
> > > + * queue ld_completed.
> > > + */
> > > +static int
> > 
> > This code never returns an error code. It should be 'static void'.
> I will correct it.
> 
> > 
> > > +fsldma_clean_running_descriptor(struct fsldma_chan *chan,
> > > +		struct fsl_desc_sw *desc)
> > > +{
> > > +	/* Remove from the list of transactions */
> > > +	list_del(&desc->node);
> > > +	/*
> > > +	 * the client is allowed to attach dependent operations
> > > +	 * until 'ack' is set
> > > +	 */
> > > +	if (!async_tx_test_ack(&desc->async_tx)) {
> > > +		/*
> > > +		 * Move this descriptor to the list of descriptors which is
> > > +		 * completed, but still awaiting the 'ack' bit to be set.
> > > +		 */
> > > +		list_add_tail(&desc->node, &chan->ld_completed);
> > > +		return 0;
> > > +	}
> > > +
> > > +	dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
> > > +	return 0;
> > > +}
> > > +
> > >  static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor
> > > *tx)  {
> > >  	struct fsldma_chan *chan = to_fsl_chan(tx->chan); @@ -534,8 +653,10
> > > @@ static void fsl_dma_free_chan_resources(struct dma_chan *dchan)
> > >
> > >  	chan_dbg(chan, "free all channel resources\n");
> > >  	spin_lock_irqsave(&chan->desc_lock, flags);
> > > +	fsldma_cleanup_descriptor(chan);
> > >  	fsldma_free_desc_list(chan, &chan->ld_pending);
> > >  	fsldma_free_desc_list(chan, &chan->ld_running);
> > > +	fsldma_free_desc_list(chan, &chan->ld_completed);
> > >  	spin_unlock_irqrestore(&chan->desc_lock, flags);
> > >
> > >  	dma_pool_destroy(chan->desc_pool);
> > > @@ -819,46 +940,53 @@ static int fsl_dma_device_control(struct dma_chan
> > *dchan,
> > >   * controller. It will run any callbacks, submit any dependencies, and
> > then
> > >   * free the descriptor.
> > >   */
> > 
> > This documentation is now wrong. This function no longer operates on a
> > single descriptor. It operates on all descriptors in ld_running and
> > ld_completed.
> > 
> > Please fix the documentation, and add locking notes.
> No, it only frees one descriptor.
> 
> > 
> > > -static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
> > > -				      struct fsl_desc_sw *desc)
> > > +static void fsldma_cleanup_descriptor(struct fsldma_chan *chan)
> > 
> > I think the name should change to fsldma_cleanup_descriptors(). It cleans
> > up one or more descriptors now.
> It only frees one descriptor as its designed.
> 
> > 
> > >  {
> > > -	struct dma_async_tx_descriptor *txd = &desc->async_tx;
> > > -	struct device *dev = chan->common.device->dev;
> > > -	dma_addr_t src = get_desc_src(chan, desc);
> > > -	dma_addr_t dst = get_desc_dst(chan, desc);
> > > -	u32 len = get_desc_cnt(chan, desc);
> > > +	struct fsl_desc_sw *desc, *_desc;
> > > +	dma_cookie_t cookie = 0;
> > > +	dma_addr_t curr_phys = get_cdar(chan);
> > > +	int idle = dma_is_idle(chan);
> > > +	int seen_current = 0;
> > >
> > 
> > The hardware can advance quite a bit between here, where you save the
> > current descriptor address and idle status.
> > 
> > > -	/* Run the link descriptor callback function */
> > > -	if (txd->callback) {
> > > -#ifdef FSL_DMA_LD_DEBUG
> > > -		chan_dbg(chan, "LD %p callback\n", desc);
> > > -#endif
> > > -		txd->callback(txd->callback_param);
> > > -	}
> > > +	fsldma_clean_completed_descriptor(chan);
> > >
> > > -	/* Run any dependencies */
> > > -	dma_run_dependencies(txd);
> > > +	/* Run the callback for each descriptor, in order */
> > > +	list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) {
> > > +		/*
> > > +		 * do not advance past the current descriptor loaded into the
> > > +		 * hardware channel, subsequent descriptors are either in
> > > +		 * process or have not been submitted
> > > +		 */
> > > +		if (seen_current)
> > > +			break;
> > >
> > > -	/* Unmap the dst buffer, if requested */
> > > -	if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
> > > -		if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
> > > -			dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
> > > -		else
> > > -			dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
> > > -	}
> > > +		/*
> > > +		 * stop the search if we reach the current descriptor and the
> > > +		 * channel is busy
> > > +		 */
> > > +		if (desc->async_tx.phys == curr_phys) {
> > > +			seen_current = 1;
> > > +			if (!idle)
> > > +				break;
> > > +		}
> > 
> > And here, where you check the current descriptor address and idle status.
> > 
> > Should this change to:
> > 
> > if (desc->async_tx.phys == get_cdar(chan)) {
> > 	seen_current = 1;
> > 	if (!dma_is_idle(chan))
> > 		break;
> > }
> Ok, I will use your code here.
> 
> > 
> > > +
> > > +		cookie = fsldma_run_tx_complete_actions(desc, chan, cookie);
> > > +
> > 
> > I would prefer if the code just kept track of the cookie here, rather
> > than passing it through this function call. This code also illustrates
> > how you can remove the "if (txd->cookie > 0)" check from
> > fsldma_run_tx_complete_actions() to reduce the patch size.
> > 
> I cannot agree with you, patch size is important, but program readable is also important.
> My reason as below,
> According to my knowledge, cookie is used to judge whether this descriptor is finished, if it is zero, it means we didn't assign a value for it. We should keep it original meaning for clear?
> Second, I think we should not set a complex process to free descriptor, especially according to different state of the descriptor, the interface should be seemed more reusable and common.
> Last, I don't want to see the interface is coupled too many functions. It's easier extended for future.
> How's your thinking? Of course, your implement is also ok.
> 
> > /*
> >  * Only descriptors with non-zero cookies need their completion
> >  * actions run.
> >  */
> > if (desc->async_tx.cookie > 0) {
> > 	cookie = desc->async_tx.cookie;
> > 	fsldma_run_tx_complete_actions(chan, desc);
> > 	desc->async_tx.cookie = 0;
> > }
> > 
> > /* This descriptor has been ACKed, free it */ if
> > (async_tx_test_ack(&desc->async_tx)) {
> > 	fsl_dma_free_descriptor(chan, desc);
> > 	continue;
> > }
> > 
> > /*
> >  * This descriptor was not ACKed, add it to the ld_completed
> >  * list, to be freed after the ACK bit is set.
> >  */
> > list_del(&desc->node);
> > list_add_tail(&desc->node, &chan->ld_completed);
> > 
> > 
> > > +		if (fsldma_clean_running_descriptor(chan, desc))
> > > +			break;
> > >
> > 
> > This if statement will never trigger. fsldma_clean_running_descriptor()
> > only returns 0. It is useless.
> > 
> > > -	/* Unmap the src buffer, if requested */
> > > -	if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
> > > -		if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
> > > -			dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
> > > -		else
> > > -			dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
> > >  	}
> > >
> > > -#ifdef FSL_DMA_LD_DEBUG
> > > -	chan_dbg(chan, "LD %p free\n", desc);
> > > -#endif
> > > -	dma_pool_free(chan->desc_pool, desc, txd->phys);
> > > +	/*
> > > +	 * Start any pending transactions automatically
> > > +	 *
> > > +	 * In the ideal case, we keep the DMA controller busy while we go
> > > +	 * ahead and free the descriptors below.
> > > +	 */
> > > +	fsl_chan_xfer_ld_queue(chan);
> > > +
> > > +	if (cookie > 0)
> > > +		chan->common.completed_cookie = cookie;
> > >  }
> > >
> > >  /**
> > > @@ -954,11 +1082,15 @@ static enum dma_status fsl_tx_status(struct
> > dma_chan *dchan,
> > >  	enum dma_status ret;
> > >  	unsigned long flags;
> > >
> > > -	spin_lock_irqsave(&chan->desc_lock, flags);
> > >  	ret = dma_cookie_status(dchan, cookie, txstate);
> > > +	if (ret == DMA_SUCCESS)
> > > +		return ret;
> > > +
> > > +	spin_lock_irqsave(&chan->desc_lock, flags);
> > > +	fsldma_cleanup_descriptor(chan);
> > >  	spin_unlock_irqrestore(&chan->desc_lock, flags);
> > >
> > > -	return ret;
> > > +	return dma_cookie_status(dchan, cookie, txstate);
> > >  }
> > >
> > >
> > > /*--------------------------------------------------------------------
> > > --------*/ @@ -1035,52 +1167,19 @@ static irqreturn_t
> > > fsldma_chan_irq(int irq, void *data)  static void
> > > dma_do_tasklet(unsigned long data)  {
> > >  	struct fsldma_chan *chan = (struct fsldma_chan *)data;
> > > -	struct fsl_desc_sw *desc, *_desc;
> > > -	LIST_HEAD(ld_cleanup);
> > >  	unsigned long flags;
> > >
> > >  	chan_dbg(chan, "tasklet entry\n");
> > >
> > >  	spin_lock_irqsave(&chan->desc_lock, flags);
> > >
> > > -	/* update the cookie if we have some descriptors to cleanup */
> > > -	if (!list_empty(&chan->ld_running)) {
> > > -		dma_cookie_t cookie;
> > > -
> > > -		desc = to_fsl_desc(chan->ld_running.prev);
> > > -		cookie = desc->async_tx.cookie;
> > > -		dma_cookie_complete(&desc->async_tx);
> > > -
> > > -		chan_dbg(chan, "completed_cookie=%d\n", cookie);
> > > -	}
> > > -
> > > -	/*
> > > -	 * move the descriptors to a temporary list so we can drop the lock
> > > -	 * during the entire cleanup operation
> > > -	 */
> > > -	list_splice_tail_init(&chan->ld_running, &ld_cleanup);
> > > -
> > >  	/* the hardware is now idle and ready for more */
> > >  	chan->idle = true;
> > >
> > > -	/*
> > > -	 * Start any pending transactions automatically
> > > -	 *
> > > -	 * In the ideal case, we keep the DMA controller busy while we go
> > > -	 * ahead and free the descriptors below.
> > > -	 */
> > > -	fsl_chan_xfer_ld_queue(chan);
> > > -	spin_unlock_irqrestore(&chan->desc_lock, flags);
> > > -
> > > -	/* Run the callback for each descriptor, in order */
> > > -	list_for_each_entry_safe(desc, _desc, &ld_cleanup, node) {
> > > +	/* Run all cleanup for this descriptor */
> > 
> > Nitpick. This should be:
> > 
> > /* Run cleanup for all descriptors */
> No, this "all" means all operations for one descriptor but not "all descriptors".
> 
> > 
> > > +	fsldma_cleanup_descriptor(chan);
> > >
> > > -		/* Remove from the list of transactions */
> > > -		list_del(&desc->node);
> > > -
> > > -		/* Run all cleanup for this descriptor */
> > > -		fsldma_cleanup_descriptor(chan, desc);
> > > -	}
> > > +	spin_unlock_irqrestore(&chan->desc_lock, flags);
> > >
> > >  	chan_dbg(chan, "tasklet exit\n");
> > >  }
> > > @@ -1262,6 +1361,7 @@ static int __devinit fsl_dma_chan_probe(struct
> > fsldma_device *fdev,
> > >  	spin_lock_init(&chan->desc_lock);
> > >  	INIT_LIST_HEAD(&chan->ld_pending);
> > >  	INIT_LIST_HEAD(&chan->ld_running);
> > > +	INIT_LIST_HEAD(&chan->ld_completed);
> > >  	chan->idle = true;
> > >
> > >  	chan->common.device = &fdev->common; diff --git
> > > a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h index f5c3879..7ede908
> > > 100644
> > > --- a/drivers/dma/fsldma.h
> > > +++ b/drivers/dma/fsldma.h
> > > @@ -140,6 +140,7 @@ struct fsldma_chan {
> > >  	spinlock_t desc_lock;		/* Descriptor operation lock */
> > >  	struct list_head ld_pending;	/* Link descriptors queue */
> > >  	struct list_head ld_running;	/* Link descriptors queue */
> > > +	struct list_head ld_completed;	/* Link descriptors queue */
> > 
> > It may help to add some documentation here. It would have helped me to
> > review this patch. Something like this:
> > 
> > /*
> >  * Descriptors which are queued to run, but have not yet been handed
> >  * to the hardware for execution
> >  */
> > struct list_head ld_pending;
> > 
> > /*
> >  * Descriptors which are currently being executed by the hardware  */
> > struct list_head ld_running;
> > 
> > /*
> >  * Descriptors which have finished execution by the hardware. These
> >  * descriptors have already had their cleanup actions run. They are
> >  * waiting for the ACK bit to be set by the async_tx API.
> >  */
> > struct list_head ld_completed;
> Ok, I will add these comments. Thanks.
> 
> > 
> > >  	struct dma_chan common;		/* DMA common channel */
> > >  	struct dma_pool *desc_pool;	/* Descriptors pool */
> > >  	struct device *dev;		/* Channel device */
> > > --
> > > 1.7.5.1
> > >
> > >
> > > _______________________________________________
> > > Linuxppc-dev mailing list
> > > Linuxppc-dev@lists.ozlabs.org
> > > https://lists.ozlabs.org/listinfo/linuxppc-dev
> 
> 

^ permalink raw reply

* Re: [PATCH V4 3/3] powerpc/fsl-pci: Unify pci/pcie initialization code
From: Scott Wood @ 2012-08-02 15:59 UTC (permalink / raw)
  To: Kumar Gala; +Cc: B07421, linuxppc-dev, Jia Hongtao
In-Reply-To: <A4B0DB85-7B49-4748-8B94-0E1F1B7B663A@kernel.crashing.org>

On 08/02/2012 07:23 AM, Kumar Gala wrote:
> You need to convert all boards to use fsl_pci_init before this patch.  Otherwise we'll end up with PCI getting initialized twice on boards.

Alternatively, don't make fsl_pci_init an initcall until all boards have
been converted.

-Scott

^ permalink raw reply

* Re: [PATCH V4 3/3] powerpc/fsl-pci: Unify pci/pcie initialization code
From: Scott Wood @ 2012-08-02 20:18 UTC (permalink / raw)
  To: Jia Hongtao; +Cc: B07421, linuxppc-dev
In-Reply-To: <1343907741-20589-4-git-send-email-B38951@freescale.com>

On 08/02/2012 06:42 AM, Jia Hongtao wrote:
> We unified the Freescale pci/pcie initialization by changing the fsl_pci
> to a platform driver. In previous PCI code architecture the initialization
> routine is called at board_setup_arch stage. Now the initialization is done
> in probe function which is architectural better. Also It's convenient for
> adding PM support for PCI controller in later patch.
> 
> Now we registered pci controllers as platform devices. So we combine two
> initialization code as one platform driver.
> 
> Signed-off-by: Jia Hongtao <B38951@freescale.com>
> Signed-off-by: Li Yang <leoli@freescale.com>
> Signed-off-by: Chunhe Lan <Chunhe.Lan@freescale.com>
> ---
>  arch/powerpc/platforms/85xx/mpc85xx_ds.c |   32 ++--------
>  arch/powerpc/sysdev/fsl_pci.c            |  102 ++++++++++++++++++-----------
>  arch/powerpc/sysdev/fsl_pci.h            |    6 +-
>  drivers/edac/mpc85xx_edac.c              |   43 ++++---------
>  4 files changed, 83 insertions(+), 100 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
> index 56f8c8f..f2c7b1c 100644
> --- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
> +++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
> @@ -20,7 +20,6 @@
>  #include <linux/seq_file.h>
>  #include <linux/interrupt.h>
>  #include <linux/of_platform.h>
> -#include <linux/memblock.h>
>  
>  #include <asm/time.h>
>  #include <asm/machdep.h>
> @@ -117,40 +116,16 @@ void __init mpc85xx_ds_pic_init(void)
>  extern int uli_exclude_device(struct pci_controller *hose,
>  				u_char bus, u_char devfn);
>  
> -static struct device_node *pci_with_uli;
> -
>  static int mpc85xx_exclude_device(struct pci_controller *hose,
>  				   u_char bus, u_char devfn)
>  {
> -	if (hose->dn == pci_with_uli)
> +	if (hose->dn == fsl_pci_primary)
>  		return uli_exclude_device(hose, bus, devfn);
>  
>  	return PCIBIOS_SUCCESSFUL;
>  }
>  #endif	/* CONFIG_PCI */
>  
> -static void __init mpc85xx_ds_pci_init(void)
> -{
> -#ifdef CONFIG_PCI
> -	struct device_node *node;
> -
> -	fsl_pci_init();
> -
> -	/* See if we have a ULI under the primary */
> -
> -	node = of_find_node_by_name(NULL, "uli1575");
> -	while ((pci_with_uli = of_get_parent(node))) {
> -		of_node_put(node);
> -		node = pci_with_uli;
> -
> -		if (pci_with_uli == fsl_pci_primary) {
> -			ppc_md.pci_exclude_device = mpc85xx_exclude_device;
> -			break;
> -		}
> -	}
> -#endif
> -}
> -
>  /*
>   * Setup the architecture
>   */
> @@ -159,8 +134,11 @@ static void __init mpc85xx_ds_setup_arch(void)
>  	if (ppc_md.progress)
>  		ppc_md.progress("mpc85xx_ds_setup_arch()", 0);
>  
> +#ifdef CONFIG_PCI
> +	ppc_md.pci_exclude_device = mpc85xx_exclude_device;
> +#endif

Why are you eliminating the uli lookup?  We don't want to call
uli_exclude_device on boards that don't have a uli.

-Scott

^ permalink raw reply

* RE: [PATCH V4 3/3] powerpc/fsl-pci: Unify pci/pcie initialization code
From: Jia Hongtao-B38951 @ 2012-08-03  2:20 UTC (permalink / raw)
  To: Wood Scott-B07421; +Cc: linuxppc-dev@lists.ozlabs.org, Li Yang-R58472
In-Reply-To: <501AE0A2.4010809@freescale.com>

DQoNCj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gRnJvbTogV29vZCBTY290dC1CMDc0
MjENCj4gU2VudDogRnJpZGF5LCBBdWd1c3QgMDMsIDIwMTIgNDoxOSBBTQ0KPiBUbzogSmlhIEhv
bmd0YW8tQjM4OTUxDQo+IENjOiBsaW51eHBwYy1kZXZAbGlzdHMub3psYWJzLm9yZzsgZ2FsYWtA
a2VybmVsLmNyYXNoaW5nLm9yZzsgV29vZCBTY290dC0NCj4gQjA3NDIxOyBMaSBZYW5nLVI1ODQ3
Mg0KPiBTdWJqZWN0OiBSZTogW1BBVENIIFY0IDMvM10gcG93ZXJwYy9mc2wtcGNpOiBVbmlmeSBw
Y2kvcGNpZQ0KPiBpbml0aWFsaXphdGlvbiBjb2RlDQo+IA0KPiBPbiAwOC8wMi8yMDEyIDA2OjQy
IEFNLCBKaWEgSG9uZ3RhbyB3cm90ZToNCj4gPiBXZSB1bmlmaWVkIHRoZSBGcmVlc2NhbGUgcGNp
L3BjaWUgaW5pdGlhbGl6YXRpb24gYnkgY2hhbmdpbmcgdGhlDQo+IGZzbF9wY2kNCj4gPiB0byBh
IHBsYXRmb3JtIGRyaXZlci4gSW4gcHJldmlvdXMgUENJIGNvZGUgYXJjaGl0ZWN0dXJlIHRoZQ0K
PiBpbml0aWFsaXphdGlvbg0KPiA+IHJvdXRpbmUgaXMgY2FsbGVkIGF0IGJvYXJkX3NldHVwX2Fy
Y2ggc3RhZ2UuIE5vdyB0aGUgaW5pdGlhbGl6YXRpb24gaXMNCj4gZG9uZQ0KPiA+IGluIHByb2Jl
IGZ1bmN0aW9uIHdoaWNoIGlzIGFyY2hpdGVjdHVyYWwgYmV0dGVyLiBBbHNvIEl0J3MgY29udmVu
aWVudA0KPiBmb3INCj4gPiBhZGRpbmcgUE0gc3VwcG9ydCBmb3IgUENJIGNvbnRyb2xsZXIgaW4g
bGF0ZXIgcGF0Y2guDQo+ID4NCj4gPiBOb3cgd2UgcmVnaXN0ZXJlZCBwY2kgY29udHJvbGxlcnMg
YXMgcGxhdGZvcm0gZGV2aWNlcy4gU28gd2UgY29tYmluZQ0KPiB0d28NCj4gPiBpbml0aWFsaXph
dGlvbiBjb2RlIGFzIG9uZSBwbGF0Zm9ybSBkcml2ZXIuDQo+ID4NCj4gPiBTaWduZWQtb2ZmLWJ5
OiBKaWEgSG9uZ3RhbyA8QjM4OTUxQGZyZWVzY2FsZS5jb20+DQo+ID4gU2lnbmVkLW9mZi1ieTog
TGkgWWFuZyA8bGVvbGlAZnJlZXNjYWxlLmNvbT4NCj4gPiBTaWduZWQtb2ZmLWJ5OiBDaHVuaGUg
TGFuIDxDaHVuaGUuTGFuQGZyZWVzY2FsZS5jb20+DQo+ID4gLS0tDQo+ID4gIGFyY2gvcG93ZXJw
Yy9wbGF0Zm9ybXMvODV4eC9tcGM4NXh4X2RzLmMgfCAgIDMyICsrLS0tLS0tLS0NCj4gPiAgYXJj
aC9wb3dlcnBjL3N5c2Rldi9mc2xfcGNpLmMgICAgICAgICAgICB8ICAxMDIgKysrKysrKysrKysr
KysrKysrLS0tLQ0KPiAtLS0tLS0tDQo+ID4gIGFyY2gvcG93ZXJwYy9zeXNkZXYvZnNsX3BjaS5o
ICAgICAgICAgICAgfCAgICA2ICstDQo+ID4gIGRyaXZlcnMvZWRhYy9tcGM4NXh4X2VkYWMuYyAg
ICAgICAgICAgICAgfCAgIDQzICsrKystLS0tLS0tLS0NCj4gPiAgNCBmaWxlcyBjaGFuZ2VkLCA4
MyBpbnNlcnRpb25zKCspLCAxMDAgZGVsZXRpb25zKC0pDQo+ID4NCj4gPiBkaWZmIC0tZ2l0IGEv
YXJjaC9wb3dlcnBjL3BsYXRmb3Jtcy84NXh4L21wYzg1eHhfZHMuYw0KPiBiL2FyY2gvcG93ZXJw
Yy9wbGF0Zm9ybXMvODV4eC9tcGM4NXh4X2RzLmMNCj4gPiBpbmRleCA1NmY4YzhmLi5mMmM3YjFj
IDEwMDY0NA0KPiA+IC0tLSBhL2FyY2gvcG93ZXJwYy9wbGF0Zm9ybXMvODV4eC9tcGM4NXh4X2Rz
LmMNCj4gPiArKysgYi9hcmNoL3Bvd2VycGMvcGxhdGZvcm1zLzg1eHgvbXBjODV4eF9kcy5jDQo+
ID4gQEAgLTIwLDcgKzIwLDYgQEANCj4gPiAgI2luY2x1ZGUgPGxpbnV4L3NlcV9maWxlLmg+DQo+
ID4gICNpbmNsdWRlIDxsaW51eC9pbnRlcnJ1cHQuaD4NCj4gPiAgI2luY2x1ZGUgPGxpbnV4L29m
X3BsYXRmb3JtLmg+DQo+ID4gLSNpbmNsdWRlIDxsaW51eC9tZW1ibG9jay5oPg0KPiA+DQo+ID4g
ICNpbmNsdWRlIDxhc20vdGltZS5oPg0KPiA+ICAjaW5jbHVkZSA8YXNtL21hY2hkZXAuaD4NCj4g
PiBAQCAtMTE3LDQwICsxMTYsMTYgQEAgdm9pZCBfX2luaXQgbXBjODV4eF9kc19waWNfaW5pdCh2
b2lkKQ0KPiA+ICBleHRlcm4gaW50IHVsaV9leGNsdWRlX2RldmljZShzdHJ1Y3QgcGNpX2NvbnRy
b2xsZXIgKmhvc2UsDQo+ID4gIAkJCQl1X2NoYXIgYnVzLCB1X2NoYXIgZGV2Zm4pOw0KPiA+DQo+
ID4gLXN0YXRpYyBzdHJ1Y3QgZGV2aWNlX25vZGUgKnBjaV93aXRoX3VsaTsNCj4gPiAtDQo+ID4g
IHN0YXRpYyBpbnQgbXBjODV4eF9leGNsdWRlX2RldmljZShzdHJ1Y3QgcGNpX2NvbnRyb2xsZXIg
Kmhvc2UsDQo+ID4gIAkJCQkgICB1X2NoYXIgYnVzLCB1X2NoYXIgZGV2Zm4pDQo+ID4gIHsNCj4g
PiAtCWlmIChob3NlLT5kbiA9PSBwY2lfd2l0aF91bGkpDQo+ID4gKwlpZiAoaG9zZS0+ZG4gPT0g
ZnNsX3BjaV9wcmltYXJ5KQ0KPiA+ICAJCXJldHVybiB1bGlfZXhjbHVkZV9kZXZpY2UoaG9zZSwg
YnVzLCBkZXZmbik7DQo+ID4NCj4gPiAgCXJldHVybiBQQ0lCSU9TX1NVQ0NFU1NGVUw7DQo+ID4g
IH0NCj4gPiAgI2VuZGlmCS8qIENPTkZJR19QQ0kgKi8NCj4gPg0KPiA+IC1zdGF0aWMgdm9pZCBf
X2luaXQgbXBjODV4eF9kc19wY2lfaW5pdCh2b2lkKQ0KPiA+IC17DQo+ID4gLSNpZmRlZiBDT05G
SUdfUENJDQo+ID4gLQlzdHJ1Y3QgZGV2aWNlX25vZGUgKm5vZGU7DQo+ID4gLQ0KPiA+IC0JZnNs
X3BjaV9pbml0KCk7DQo+ID4gLQ0KPiA+IC0JLyogU2VlIGlmIHdlIGhhdmUgYSBVTEkgdW5kZXIg
dGhlIHByaW1hcnkgKi8NCj4gPiAtDQo+ID4gLQlub2RlID0gb2ZfZmluZF9ub2RlX2J5X25hbWUo
TlVMTCwgInVsaTE1NzUiKTsNCj4gPiAtCXdoaWxlICgocGNpX3dpdGhfdWxpID0gb2ZfZ2V0X3Bh
cmVudChub2RlKSkpIHsNCj4gPiAtCQlvZl9ub2RlX3B1dChub2RlKTsNCj4gPiAtCQlub2RlID0g
cGNpX3dpdGhfdWxpOw0KPiA+IC0NCj4gPiAtCQlpZiAocGNpX3dpdGhfdWxpID09IGZzbF9wY2lf
cHJpbWFyeSkgew0KPiA+IC0JCQlwcGNfbWQucGNpX2V4Y2x1ZGVfZGV2aWNlID0gbXBjODV4eF9l
eGNsdWRlX2RldmljZTsNCj4gPiAtCQkJYnJlYWs7DQo+ID4gLQkJfQ0KPiA+IC0JfQ0KPiA+IC0j
ZW5kaWYNCj4gPiAtfQ0KPiA+IC0NCj4gPiAgLyoNCj4gPiAgICogU2V0dXAgdGhlIGFyY2hpdGVj
dHVyZQ0KPiA+ICAgKi8NCj4gPiBAQCAtMTU5LDggKzEzNCwxMSBAQCBzdGF0aWMgdm9pZCBfX2lu
aXQgbXBjODV4eF9kc19zZXR1cF9hcmNoKHZvaWQpDQo+ID4gIAlpZiAocHBjX21kLnByb2dyZXNz
KQ0KPiA+ICAJCXBwY19tZC5wcm9ncmVzcygibXBjODV4eF9kc19zZXR1cF9hcmNoKCkiLCAwKTsN
Cj4gPg0KPiA+ICsjaWZkZWYgQ09ORklHX1BDSQ0KPiA+ICsJcHBjX21kLnBjaV9leGNsdWRlX2Rl
dmljZSA9IG1wYzg1eHhfZXhjbHVkZV9kZXZpY2U7DQo+ID4gKyNlbmRpZg0KPiANCj4gV2h5IGFy
ZSB5b3UgZWxpbWluYXRpbmcgdGhlIHVsaSBsb29rdXA/ICBXZSBkb24ndCB3YW50IHRvIGNhbGwN
Cj4gdWxpX2V4Y2x1ZGVfZGV2aWNlIG9uIGJvYXJkcyB0aGF0IGRvbid0IGhhdmUgYSB1bGkuDQo+
IA0KPiAtU2NvdHQNCg0KSSBmb3VuZCBvdXQgdGhhdCBhbGwgODV4eF9kcyBib2FyZHMgKG1wYzg1
NzJkcywgbXBjODU0NGRzLCBwMjAyMGRzKSBoYXZlDQpVTEkuIEFsc28gaW4gcGxhdGZvcm0gZHJp
dmVyIGZzbF9wY2lfcHJpbWFyeSBpcyBkZXRlcm1pbmVkIGF0IGFyY2hfaW5pdGNhbGwNCndoaWNo
IG1lYW5zIGF0IHRoZSBzdGFnZSBvZiBib2FyZF9zZXR1cF9hcmNoIGZzbF9wY2lfcHJpbWFyeSBp
cyBub3QgcmVhZHkuDQoNCi1Ib25ndGFvLg0KDQo=

^ permalink raw reply

* RE: [PATCH V4 2/3] powerpc/swiotlb: Enable at early stage and disable if not necessary
From: Jia Hongtao-B38951 @ 2012-08-03  2:21 UTC (permalink / raw)
  To: Kumar Gala
  Cc: Wood Scott-B07421, linuxppc-dev@lists.ozlabs.org, Li Yang-R58472
In-Reply-To: <6DEE036D-C59B-41C6-80F3-762D7E4AF2C1@kernel.crashing.org>


> -----Original Message-----
> From: Kumar Gala [mailto:galak@kernel.crashing.org]
> Sent: Thursday, August 02, 2012 8:55 PM
> To: Jia Hongtao-B38951
> Cc: linuxppc-dev@lists.ozlabs.org; Wood Scott-B07421; Li Yang-R58472
> Subject: Re: [PATCH V4 2/3] powerpc/swiotlb: Enable at early stage and
> disable if not necessary
>=20
>=20
> On Aug 2, 2012, at 6:42 AM, Jia Hongtao wrote:
>=20
> > Remove the dependency on PCI initialization for SWIOTLB initialization.
> > So that PCI can be initialized at proper time.
> >
> > SWIOTLB is partly determined by PCI inbound/outbound map which is
> assigned
> > in PCI initialization. But swiotlb_init() should be done at the stage
> of
> > mem_init() which is much earlier than PCI initialization. So we reserve
> the
> > memory for SWIOTLB first and free it if not necessary.
> >
> > All boards are converted to fit this change.
> >
> > Signed-off-by: Jia Hongtao <B38951@freescale.com>
> > Signed-off-by: Li Yang <leoli@freescale.com>
> > ---
>=20
> This doesn't seem like it addresses our issue w/regards to not being able
> to map all of memory from PCI.

PCI init will determine ppc_swiotlb_enable due to PCI map. swiotlb_late_ini=
t
will handle all swiotlb things depend on the result of pci init.

>=20
>=20
> > arch/powerpc/include/asm/swiotlb.h       |    6 ++++++
> > arch/powerpc/kernel/dma-swiotlb.c        |   20 ++++++++++++++++++++
> > arch/powerpc/mm/mem.c                    |    3 +--
> > arch/powerpc/platforms/44x/currituck.c   |   10 ++--------
> > arch/powerpc/platforms/85xx/mpc85xx_ds.c |    1 +
> > arch/powerpc/platforms/85xx/qemu_e500.c  |    2 +-
> > arch/powerpc/sysdev/fsl_pci.c            |    5 +----
> > 7 files changed, 32 insertions(+), 15 deletions(-)
>=20
> Don't we also want to update all these:
>=20
> arch/powerpc/platforms/85xx/corenet_ds.c:
> ppc_swiotlb_enable =3D 1;
> arch/powerpc/platforms/85xx/ge_imp3a.c:         ppc_swiotlb_enable =3D 1;
> arch/powerpc/platforms/85xx/mpc8536_ds.c:
> ppc_swiotlb_enable =3D 1;
> arch/powerpc/platforms/85xx/mpc85xx_mds.c:
> ppc_swiotlb_enable =3D 1;
> arch/powerpc/platforms/85xx/p1022_ds.c:         ppc_swiotlb_enable =3D 1;
> arch/powerpc/platforms/86xx/mpc86xx_hpcn.c:
> ppc_swiotlb_enable =3D 1;
>=20

They are works fine at this point.

I will update all these boards after
[PATCH 3/3] Unify pci/pcie initialization code

>=20
> >
> > diff --git a/arch/powerpc/include/asm/swiotlb.h
> b/arch/powerpc/include/asm/swiotlb.h
> > index 8979d4c..de99d6e 100644
> > --- a/arch/powerpc/include/asm/swiotlb.h
> > +++ b/arch/powerpc/include/asm/swiotlb.h
> > @@ -22,4 +22,10 @@ int __init swiotlb_setup_bus_notifier(void);
> >
> > extern void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev);
> >
> > +#ifdef CONFIG_SWIOTLB
> > +void swiotlb_detect_4g(void);
> > +#else
> > +static inline void swiotlb_detect_4g(void) {}
> > +#endif
> > +
> > #endif /* __ASM_SWIOTLB_H */
> > diff --git a/arch/powerpc/kernel/dma-swiotlb.c
> b/arch/powerpc/kernel/dma-swiotlb.c
> > index 4ab88da..aa85550 100644
> > --- a/arch/powerpc/kernel/dma-swiotlb.c
> > +++ b/arch/powerpc/kernel/dma-swiotlb.c
> > @@ -104,3 +104,23 @@ int __init swiotlb_setup_bus_notifier(void)
> > 			      &ppc_swiotlb_plat_bus_notifier);
> > 	return 0;
> > }
> > +
> > +void swiotlb_detect_4g(void)
> > +{
> > +	if ((memblock_end_of_DRAM() - 1) > 0xffffffff)
> > +		ppc_swiotlb_enable =3D 1;
> > +}
> > +
> > +static int __init swiotlb_late_init(void)
> > +{
> > +	if (ppc_swiotlb_enable) {
> > +		swiotlb_print_info();
> > +		set_pci_dma_ops(&swiotlb_dma_ops);
> > +		ppc_md.pci_dma_dev_setup =3D pci_dma_dev_setup_swiotlb;
> > +	} else {
> > +		swiotlb_free();
> > +	}
> > +
> > +	return 0;
> > +}
> > +subsys_initcall(swiotlb_late_init);
> > diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> > index baaafde..f23c4e0 100644
> > --- a/arch/powerpc/mm/mem.c
> > +++ b/arch/powerpc/mm/mem.c
> > @@ -300,8 +300,7 @@ void __init mem_init(void)
> > 	unsigned long reservedpages =3D 0, codesize, initsize, datasize,
> bsssize;
> >
> > #ifdef CONFIG_SWIOTLB
> > -	if (ppc_swiotlb_enable)
> > -		swiotlb_init(1);
> > +	swiotlb_init(0);
> > #endif
> >
> > 	num_physpages =3D memblock_phys_mem_size() >> PAGE_SHIFT;
> > diff --git a/arch/powerpc/platforms/44x/currituck.c
> b/arch/powerpc/platforms/44x/currituck.c
> > index 9f6c33d..6bd89a0 100644
> > --- a/arch/powerpc/platforms/44x/currituck.c
> > +++ b/arch/powerpc/platforms/44x/currituck.c
> > @@ -21,7 +21,6 @@
> >  */
> >
> > #include <linux/init.h>
> > -#include <linux/memblock.h>
> > #include <linux/of.h>
> > #include <linux/of_platform.h>
> > #include <linux/rtc.h>
> > @@ -159,13 +158,8 @@ static void __init ppc47x_setup_arch(void)
> >
> > 	/* No need to check the DMA config as we /know/ our windows are all
> of
> >  	 * RAM.  Lets hope that doesn't change */
> > -#ifdef CONFIG_SWIOTLB
> > -	if ((memblock_end_of_DRAM() - 1) > 0xffffffff) {
> > -		ppc_swiotlb_enable =3D 1;
> > -		set_pci_dma_ops(&swiotlb_dma_ops);
> > -		ppc_md.pci_dma_dev_setup =3D pci_dma_dev_setup_swiotlb;
> > -	}
> > -#endif
> > +	swiotlb_detect_4g();
> > +
> > 	ppc47x_smp_init();
> > }
> >
> > diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
> b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
> > index 6d3265f..56f8c8f 100644
> > --- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
> > +++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
> > @@ -159,6 +159,7 @@ static void __init mpc85xx_ds_setup_arch(void)
> > 	if (ppc_md.progress)
> > 		ppc_md.progress("mpc85xx_ds_setup_arch()", 0);
> >
> > +	swiotlb_detect_4g();
> > 	mpc85xx_ds_pci_init();
> > 	mpc85xx_smp_init();
> >
> > diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c
> b/arch/powerpc/platforms/85xx/qemu_e500.c
> > index 95a2e53..04260cd 100644
> > --- a/arch/powerpc/platforms/85xx/qemu_e500.c
> > +++ b/arch/powerpc/platforms/85xx/qemu_e500.c
> > @@ -41,7 +41,7 @@ static void __init qemu_e500_setup_arch(void)
> > {
> > 	ppc_md.progress("qemu_e500_setup_arch()", 0);
> >
> > -	fsl_pci_init();
> > +	swiotlb_detect_4g();
>=20
> removing fsl_pci_init() seems wrong.

You are right. I will fix this.
Thanks.

-Hongtao.

>=20
> > 	mpc85xx_smp_init();
> > }
> >
> > diff --git a/arch/powerpc/sysdev/fsl_pci.c
> b/arch/powerpc/sysdev/fsl_pci.c
> > index 6938792..da7a3d7 100644
> > --- a/arch/powerpc/sysdev/fsl_pci.c
> > +++ b/arch/powerpc/sysdev/fsl_pci.c
> > @@ -872,11 +872,8 @@ void __devinit fsl_pci_init(void)
> > 	 * we need SWIOTLB to handle buffers located outside of
> > 	 * dma capable memory region
> > 	 */
> > -	if (memblock_end_of_DRAM() - 1 > max) {
> > +	if (memblock_end_of_DRAM() - 1 > max)
> > 		ppc_swiotlb_enable =3D 1;
> > -		set_pci_dma_ops(&swiotlb_dma_ops);
> > -		ppc_md.pci_dma_dev_setup =3D pci_dma_dev_setup_swiotlb;
> > -	}
> > #endif
> > }
> > #endif
> > --
> > 1.7.5.1
> >
>=20

^ permalink raw reply

* [PATCH] powerpc/crypto: Remove virt_to_abs() usage in nx-842.c
From: Michael Ellerman @ 2012-08-03  2:23 UTC (permalink / raw)
  To: herbert; +Cc: linuxppc-dev, sjenning, linux-crypto

virt_to_abs() is just a wrapper around __pa(), use __pa() directly.

We should be including <asm/page.h> to get __pa(). abs_addr.h will be
removed shortly so drop that.

We were getting of.h via abs_addr.h so we need to include that directly.

Having done all that, clean up the ordering of the includes.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
---
 drivers/crypto/nx/nx-842.c |   34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c
index 9da0fb2..0ce6257 100644
--- a/drivers/crypto/nx/nx-842.c
+++ b/drivers/crypto/nx/nx-842.c
@@ -21,13 +21,15 @@
  *          Seth Jennings <sjenning@linux.vnet.ibm.com>
  */
 
+#include <linux/kernel.h>
 #include <linux/module.h>
-#include <asm/vio.h>
-#include <asm/pSeries_reconfig.h>
-#include <linux/slab.h>
-#include <asm/abs_addr.h>
 #include <linux/nx842.h>
-#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+
+#include <asm/page.h>
+#include <asm/pSeries_reconfig.h>
+#include <asm/vio.h>
 
 #include "nx_csbcpb.h" /* struct nx_csbcpb */
 
@@ -140,7 +142,7 @@ static unsigned long nx842_get_desired_dma(struct vio_dev *viodev)
 }
 
 struct nx842_slentry {
-	unsigned long ptr; /* Absolute address (use virt_to_abs()) */
+	unsigned long ptr; /* Real address (use __pa()) */
 	unsigned long len;
 };
 
@@ -167,7 +169,7 @@ static int nx842_build_scatterlist(unsigned long buf, int len,
 
 	entry = sl->entries;
 	while (len) {
-		entry->ptr = virt_to_abs(buf);
+		entry->ptr = __pa(buf);
 		nextpage = ALIGN(buf + 1, NX842_HW_PAGE_SIZE);
 		if (nextpage < buf + len) {
 			/* we aren't at the end yet */
@@ -369,8 +371,8 @@ int nx842_compress(const unsigned char *in, unsigned int inlen,
 	op.flags = NX842_OP_COMPRESS;
 	csbcpb = &workmem->csbcpb;
 	memset(csbcpb, 0, sizeof(*csbcpb));
-	op.csbcpb = virt_to_abs(csbcpb);
-	op.out = virt_to_abs(slout.entries);
+	op.csbcpb = __pa(csbcpb);
+	op.out = __pa(slout.entries);
 
 	for (i = 0; i < hdr->blocks_nr; i++) {
 		/*
@@ -400,13 +402,13 @@ int nx842_compress(const unsigned char *in, unsigned int inlen,
 		 */
 		if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) {
 			/* Create direct DDE */
-			op.in = virt_to_abs(inbuf);
+			op.in = __pa(inbuf);
 			op.inlen = max_sync_size;
 
 		} else {
 			/* Create indirect DDE (scatterlist) */
 			nx842_build_scatterlist(inbuf, max_sync_size, &slin);
-			op.in = virt_to_abs(slin.entries);
+			op.in = __pa(slin.entries);
 			op.inlen = -nx842_get_scatterlist_size(&slin);
 		}
 
@@ -564,7 +566,7 @@ int nx842_decompress(const unsigned char *in, unsigned int inlen,
 	op.flags = NX842_OP_DECOMPRESS;
 	csbcpb = &workmem->csbcpb;
 	memset(csbcpb, 0, sizeof(*csbcpb));
-	op.csbcpb = virt_to_abs(csbcpb);
+	op.csbcpb = __pa(csbcpb);
 
 	/*
 	 * max_sync_size may have changed since compression,
@@ -596,12 +598,12 @@ int nx842_decompress(const unsigned char *in, unsigned int inlen,
 		if (likely((inbuf & NX842_HW_PAGE_MASK) ==
 			((inbuf + hdr->sizes[i] - 1) & NX842_HW_PAGE_MASK))) {
 			/* Create direct DDE */
-			op.in = virt_to_abs(inbuf);
+			op.in = __pa(inbuf);
 			op.inlen = hdr->sizes[i];
 		} else {
 			/* Create indirect DDE (scatterlist) */
 			nx842_build_scatterlist(inbuf, hdr->sizes[i] , &slin);
-			op.in = virt_to_abs(slin.entries);
+			op.in = __pa(slin.entries);
 			op.inlen = -nx842_get_scatterlist_size(&slin);
 		}
 
@@ -612,12 +614,12 @@ int nx842_decompress(const unsigned char *in, unsigned int inlen,
 		 */
 		if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) {
 			/* Create direct DDE */
-			op.out = virt_to_abs(outbuf);
+			op.out = __pa(outbuf);
 			op.outlen = max_sync_size;
 		} else {
 			/* Create indirect DDE (scatterlist) */
 			nx842_build_scatterlist(outbuf, max_sync_size, &slout);
-			op.out = virt_to_abs(slout.entries);
+			op.out = __pa(slout.entries);
 			op.outlen = -nx842_get_scatterlist_size(&slout);
 		}
 
-- 
1.7.9.5

^ permalink raw reply related

* Re: [PATCH] powerpc/crypto: Remove virt_to_abs() usage in nx-842.c
From: Michael Ellerman @ 2012-08-03  2:32 UTC (permalink / raw)
  To: herbert; +Cc: linuxppc-dev, linux-crypto, sjenning
In-Reply-To: <1343960595-30232-1-git-send-email-michael@ellerman.id.au>

Hi Herbert,

We're planning on removing virt_to_abs() from the powerpc tree this
cycle. So if you can take this patch in your tree everything should
continue building when the two trees merge.

cheers

^ permalink raw reply

* RE: [PATCH V4 3/3] powerpc/fsl-pci: Unify pci/pcie initialization code
From: Jia Hongtao-B38951 @ 2012-08-03  3:39 UTC (permalink / raw)
  To: Kumar Gala
  Cc: Wood Scott-B07421, linuxppc-dev@lists.ozlabs.org, Li Yang-R58472
In-Reply-To: <A4B0DB85-7B49-4748-8B94-0E1F1B7B663A@kernel.crashing.org>



> -----Original Message-----
> From: Kumar Gala [mailto:galak@kernel.crashing.org]
> Sent: Thursday, August 02, 2012 8:24 PM
> To: Jia Hongtao-B38951
> Cc: linuxppc-dev@lists.ozlabs.org; Wood Scott-B07421; Li Yang-R58472
> Subject: Re: [PATCH V4 3/3] powerpc/fsl-pci: Unify pci/pcie
> initialization code
>=20
>=20
> On Aug 2, 2012, at 6:42 AM, Jia Hongtao wrote:
>=20
> > We unified the Freescale pci/pcie initialization by changing the
> fsl_pci
> > to a platform driver. In previous PCI code architecture the
> initialization
> > routine is called at board_setup_arch stage. Now the initialization is
> done
> > in probe function which is architectural better. Also It's convenient
> for
> > adding PM support for PCI controller in later patch.
> >
> > Now we registered pci controllers as platform devices. So we combine
> two
> > initialization code as one platform driver.
> >
> > Signed-off-by: Jia Hongtao <B38951@freescale.com>
> > Signed-off-by: Li Yang <leoli@freescale.com>
> > Signed-off-by: Chunhe Lan <Chunhe.Lan@freescale.com>
> > ---
> > arch/powerpc/platforms/85xx/mpc85xx_ds.c |   32 ++--------
> > arch/powerpc/sysdev/fsl_pci.c            |  102 ++++++++++++++++++-----
> ------
> > arch/powerpc/sysdev/fsl_pci.h            |    6 +-
> > drivers/edac/mpc85xx_edac.c              |   43 ++++---------
> > 4 files changed, 83 insertions(+), 100 deletions(-)
>=20
> You need to convert all boards to use fsl_pci_init before this patch.
> Otherwise we'll end up with PCI getting initialized twice on boards.
>=20
> - k

If we covert all boards with platform driver in this patch PCI will
be initialized only once without converting all boards to use
fsl_pci_init first.=20

If we convert all boards to use fsl_pci_init before this patch and
convert them to use platform driver again after this patch. Then
between this patch and next pci will be initialized twice too.

So I think convert all boards in this patch is the key not convert
all boards to use fsl_pci_init first before this patch.

-Hongtao.

^ permalink raw reply

* [RFC PATCH V6 00/19] memory-hotplug: hot-remove physical memory
From: wency @ 2012-08-03  7:49 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97

From: Wen Congyang <wency@cn.fujitsu.com>

This patch series aims to support physical memory hot-remove.

The patches can free/remove following things:

  - acpi_memory_info                          : [RFC PATCH 4/19]
  - /sys/firmware/memmap/X/{end, start, type} : [RFC PATCH 8/19]
  - iomem_resource                            : [RFC PATCH 9/19]
  - mem_section and related sysfs files       : [RFC PATCH 10-11, 13-16/19]
  - page table of removed memory              : [RFC PATCH 12/19]
  - node and related sysfs files              : [RFC PATCH 18-19/19]

If you find lack of function for physical memory hot-remove, please let me
know.

change log of v6:
 [RFC PATCH v5 12/19]
   * fix building error on other archtitectures than x86

 [RFC PATCH v5 15-16/19]
   * fix building error on other archtitectures than x86

change log of v5:
 * merge the patchset to clear page table and the patchset to hot remove
   memory(from ishimatsu) to one big patchset.

 [RFC PATCH v5 1/19]
   * rename remove_memory() to offline_memory()/offline_pages()

 [RFC PATCH v5 2/19]
   * new patch: implement offline_memory(). This function offlines pages,
     update memory block's state, and notify the userspace that the memory
     block's state is changed.

 [RFC PATCH v5 4/19]
   * offline and remove memory in acpi_memory_disable_device() too.

 [RFC PATCH v5 17/19]
   * new patch: add a new function __remove_zone() to revert the things done
     in the function __add_zone().

 [RFC PATCH v5 18/19]
   * flush work befor reseting node device.

change log of v4:
 * remove "memory-hotplug : unify argument of firmware_map_add_early/hotplug"
   from the patch series, since the patch is a bugfix. It is being disccussed
   on other thread. But for testing the patch series, the patch is needed.
   So I added the patch as [PATCH 0/13].

 [RFC PATCH v4 2/13]
   * check memory is online or not at remove_memory()
   * add memory_add_physaddr_to_nid() to acpi_memory_device_remove() for
     getting node id
 
 [RFC PATCH v4 3/13]
   * create new patch : check memory is online or not at online_pages()

 [RFC PATCH v4 4/13]
   * add __ref section to remove_memory()
   * call firmware_map_remove_entry() before remove_sysfs_fw_map_entry()

 [RFC PATCH v4 11/13]
   * rewrite register_page_bootmem_memmap() for removing page used as PT/PMD

change log of v3:
 * rebase to 3.5.0-rc6

 [RFC PATCH v2 2/13]
   * remove extra kobject_put()

   * The patch was commented by Wen. Wen's comment is
     "acpi_memory_device_remove() should ignore a return value of
     remove_memory() since caller does not care the return value".
     But I did not change it since I think caller should care the
     return value. And I am trying to fix it as follow:

     https://lkml.org/lkml/2012/7/5/624

 [RFC PATCH v2 4/13]
   * remove a firmware_memmap_entry allocated by kzmalloc()

change log of v2:
 [RFC PATCH v2 2/13]
   * check whether memory block is offline or not before calling offline_memory()
   * check whether section is valid or not in is_memblk_offline()
   * call kobject_put() for each memory_block in is_memblk_offline()

 [RFC PATCH v2 3/13]
   * unify the end argument of firmware_map_add_early/hotplug

 [RFC PATCH v2 4/13]
   * add release_firmware_map_entry() for freeing firmware_map_entry

 [RFC PATCH v2 6/13]
  * add release_memory_block() for freeing memory_block

 [RFC PATCH v2 11/13]
  * fix wrong arguments of free_pages()

Wen Congyang (6):
  memory-hotplug: implement offline_memory()
  memory-hotplug: store the node id in acpi_memory_device
  memory-hotplug: export the function acpi_bus_remove()
  memory-hotplug: call acpi_bus_remove() to remove memory device
  memory-hotplug: introduce new function arch_remove_memory()
  memory-hotplug: free memmap of sparse-vmemmap

Yasuaki Ishimatsu (13):
  memory-hotplug: rename remove_memory() to
    offline_memory()/offline_pages()
  memory-hotplug: offline and remove memory when removing the memory
    device
  memory-hotplug: check whether memory is present or not
  memory-hotplug: remove /sys/firmware/memmap/X sysfs
  memory-hotplug: does not release memory region in PAGES_PER_SECTION
    chunks
  memory-hotplug: add memory_block_release
  memory-hotplug: remove_memory calls __remove_pages
  memory-hotplug: check page type in get_page_bootmem
  memory-hotplug: move register_page_bootmem_info_node and
    put_page_bootmem for sparse-vmemmap
  memory-hotplug: implement register_page_bootmem_info_section of
    sparse-vmemmap
  memory_hotplug: clear zone when the memory is removed
  memory-hotplug: add node_device_release
  memory-hotplug: remove sysfs file of node

 arch/ia64/mm/discontig.c                        |   14 +
 arch/ia64/mm/init.c                             |   16 +
 arch/powerpc/mm/init_64.c                       |   14 +
 arch/powerpc/mm/mem.c                           |   14 +
 arch/powerpc/platforms/pseries/hotplug-memory.c |   16 +-
 arch/s390/mm/init.c                             |   12 +
 arch/s390/mm/vmem.c                             |   14 +
 arch/sh/mm/init.c                               |   15 +
 arch/sparc/mm/init_64.c                         |   14 +
 arch/tile/mm/init.c                             |    8 +
 arch/x86/include/asm/pgtable_types.h            |    1 +
 arch/x86/mm/init_32.c                           |   10 +
 arch/x86/mm/init_64.c                           |  331 ++++++++++++++++++++++
 arch/x86/mm/pageattr.c                          |   47 ++--
 drivers/acpi/acpi_memhotplug.c                  |   51 +++-
 drivers/acpi/scan.c                             |    3 +-
 drivers/base/memory.c                           |   90 ++++++-
 drivers/base/node.c                             |    8 +
 drivers/firmware/memmap.c                       |   78 +++++-
 include/acpi/acpi_bus.h                         |    1 +
 include/linux/firmware-map.h                    |    6 +
 include/linux/memory.h                          |    5 +
 include/linux/memory_hotplug.h                  |   25 +-
 include/linux/mm.h                              |    5 +-
 include/linux/mmzone.h                          |   19 ++
 mm/memory_hotplug.c                             |  337 +++++++++++++++++++++--
 mm/sparse.c                                     |    5 +-
 27 files changed, 1068 insertions(+), 91 deletions(-)

^ permalink raw reply

* [RFC PATCH V6 01/19] memory-hotplug: rename remove_memory() to offline_memory()/offline_pages()
From: wency @ 2012-08-03  7:49 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1343980161-14254-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

remove_memory() only try to offline pages. It is called in two cases:
1. hot remove a memory device
2. echo offline >/sys/devices/system/memory/memoryXX/state

In the 1st case, we should also change memory block's state, and notify
the userspace that the memory block's state is changed after offlining
pages.

So rename remove_memory() to offline_memory()/offline_pages(). And in
the 1st case, offline_memory() will be used. The function offline_memory()
is not implemented. In the 2nd case, offline_pages() will be used.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 drivers/acpi/acpi_memhotplug.c |    2 +-
 drivers/base/memory.c          |    9 +++------
 include/linux/memory_hotplug.h |    3 ++-
 mm/memory_hotplug.c            |   22 ++++++++++++++--------
 4 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index 81a9def..8957ed9 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -318,7 +318,7 @@ static int acpi_memory_disable_device(struct acpi_memory_device *mem_device)
 	 */
 	list_for_each_entry_safe(info, n, &mem_device->res_list, list) {
 		if (info->enabled) {
-			result = remove_memory(info->start_addr, info->length);
+			result = offline_memory(info->start_addr, info->length);
 			if (result)
 				return result;
 		}
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 7dda4f7..44e7de6 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -248,26 +248,23 @@ static bool pages_correctly_reserved(unsigned long start_pfn,
 static int
 memory_block_action(unsigned long phys_index, unsigned long action)
 {
-	unsigned long start_pfn, start_paddr;
+	unsigned long start_pfn;
 	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
 	struct page *first_page;
 	int ret;
 
 	first_page = pfn_to_page(phys_index << PFN_SECTION_SHIFT);
+	start_pfn = page_to_pfn(first_page);
 
 	switch (action) {
 		case MEM_ONLINE:
-			start_pfn = page_to_pfn(first_page);
-
 			if (!pages_correctly_reserved(start_pfn, nr_pages))
 				return -EBUSY;
 
 			ret = online_pages(start_pfn, nr_pages);
 			break;
 		case MEM_OFFLINE:
-			start_paddr = page_to_pfn(first_page) << PAGE_SHIFT;
-			ret = remove_memory(start_paddr,
-					    nr_pages << PAGE_SHIFT);
+			ret = offline_pages(start_pfn, nr_pages);
 			break;
 		default:
 			WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: "
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 910550f..c183f39 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -233,7 +233,8 @@ static inline int is_mem_section_removable(unsigned long pfn,
 extern int mem_online_node(int nid);
 extern int add_memory(int nid, u64 start, u64 size);
 extern int arch_add_memory(int nid, u64 start, u64 size);
-extern int remove_memory(u64 start, u64 size);
+extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
+extern int offline_memory(u64 start, u64 size);
 extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
 								int nr_pages);
 extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 3ad25f9..c182c76 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -866,7 +866,7 @@ check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
 	return offlined;
 }
 
-static int __ref offline_pages(unsigned long start_pfn,
+static int __ref __offline_pages(unsigned long start_pfn,
 		  unsigned long end_pfn, unsigned long timeout)
 {
 	unsigned long pfn, nr_pages, expire;
@@ -994,18 +994,24 @@ out:
 	return ret;
 }
 
-int remove_memory(u64 start, u64 size)
+int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
 {
-	unsigned long start_pfn, end_pfn;
+	return __offline_pages(start_pfn, start_pfn + nr_pages, 120 * HZ);
+}
 
-	start_pfn = PFN_DOWN(start);
-	end_pfn = start_pfn + PFN_DOWN(size);
-	return offline_pages(start_pfn, end_pfn, 120 * HZ);
+int offline_memory(u64 start, u64 size)
+{
+	return -EINVAL;
 }
 #else
-int remove_memory(u64 start, u64 size)
+int offline_pages(u64 start, u64 size)
+{
+	return -EINVAL;
+}
+
+int offline_memory(u64 start, u64 size)
 {
 	return -EINVAL;
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */
-EXPORT_SYMBOL_GPL(remove_memory);
+EXPORT_SYMBOL_GPL(offline_memory);
-- 
1.7.1

^ permalink raw reply related

* [RFC PATCH V6 06/19] memory-hotplug: export the function acpi_bus_remove()
From: wency @ 2012-08-03  7:49 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1343980161-14254-1-git-send-email-wency@cn.fujitsu.com>

From: Wen Congyang <wency@cn.fujitsu.com>

The function acpi_bus_remove() can remove a acpi device from acpi device.
When a acpi device is removed, we need to call this function to remove
the acpi device from acpi bus. So export this function.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 drivers/acpi/scan.c     |    3 ++-
 include/acpi/acpi_bus.h |    1 +
 2 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index d1ecca2..1cefc34 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1224,7 +1224,7 @@ static int acpi_device_set_context(struct acpi_device *device)
 	return -ENODEV;
 }
 
-static int acpi_bus_remove(struct acpi_device *dev, int rmdevice)
+int acpi_bus_remove(struct acpi_device *dev, int rmdevice)
 {
 	if (!dev)
 		return -EINVAL;
@@ -1246,6 +1246,7 @@ static int acpi_bus_remove(struct acpi_device *dev, int rmdevice)
 
 	return 0;
 }
+EXPORT_SYMBOL(acpi_bus_remove);
 
 static int acpi_add_single_object(struct acpi_device **child,
 				  acpi_handle handle, int type,
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index bde976e..2ccf109 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -360,6 +360,7 @@ bool acpi_bus_power_manageable(acpi_handle handle);
 bool acpi_bus_can_wakeup(acpi_handle handle);
 int acpi_power_resource_register_device(struct device *dev, acpi_handle handle);
 void acpi_power_resource_unregister_device(struct device *dev, acpi_handle handle);
+int acpi_bus_remove(struct acpi_device *dev, int rmdevice);
 #ifdef CONFIG_ACPI_PROC_EVENT
 int acpi_bus_generate_proc_event(struct acpi_device *device, u8 type, int data);
 int acpi_bus_generate_proc_event4(const char *class, const char *bid, u8 type, int data);
-- 
1.7.1

^ permalink raw reply related

* [RFC PATCH V6 04/19] memory-hotplug: offline and remove memory when removing the memory device
From: wency @ 2012-08-03  7:49 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1343980161-14254-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

We should offline and remove memory when removing the memory device.
The memory device can be removed by 2 ways:
1. send eject request by SCI
2. echo 1 >/sys/bus/pci/devices/PNP0C80:XX/eject

In the 1st case, acpi_memory_disable_device() will be called. In the 2nd
case, acpi_memory_device_remove() will be called. acpi_memory_device_remove()
will also be called when we unbind the memory device from the driver
acpi_memhotplug. If the type is ACPI_BUS_REMOVAL_EJECT, it means
that the user wants to eject the memory device, and we should offline
and remove memory in acpi_memory_device_remove().

The function remove_memory() is not implemeted now. It only check whether
all memory has been offllined now.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 drivers/acpi/acpi_memhotplug.c |   42 +++++++++++++++++++++++++++++++++------
 drivers/base/memory.c          |   39 +++++++++++++++++++++++++++++++++++++
 include/linux/memory.h         |    5 ++++
 include/linux/memory_hotplug.h |    5 ++++
 mm/memory_hotplug.c            |   22 ++++++++++++++++++++
 5 files changed, 106 insertions(+), 7 deletions(-)

diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index 293d718..ed37fc2 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -29,6 +29,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
+#include <linux/memory.h>
 #include <linux/memory_hotplug.h>
 #include <linux/slab.h>
 #include <acpi/acpi_drivers.h>
@@ -310,26 +311,42 @@ static int acpi_memory_powerdown_device(struct acpi_memory_device *mem_device)
 	return 0;
 }
 
-static int acpi_memory_disable_device(struct acpi_memory_device *mem_device)
+static int
+acpi_memory_device_remove_memory(struct acpi_memory_device *mem_device)
 {
 	int result;
 	struct acpi_memory_info *info, *n;
+	int node = mem_device->nid;
 
-
-	/*
-	 * Ask the VM to offline this memory range.
-	 * Note: Assume that this function returns zero on success
-	 */
 	list_for_each_entry_safe(info, n, &mem_device->res_list, list) {
 		if (info->enabled) {
 			result = offline_memory(info->start_addr, info->length);
 			if (result)
 				return result;
+
+			result = remove_memory(node, info->start_addr,
+					       info->length);
+			if (result)
+				return result;
 		}
+
 		list_del(&info->list);
 		kfree(info);
 	}
 
+	return 0;
+}
+
+static int acpi_memory_disable_device(struct acpi_memory_device *mem_device)
+{
+	int result;
+
+	/*
+	 * Ask the VM to offline this memory range.
+	 * Note: Assume that this function returns zero on success
+	 */
+	result = acpi_memory_device_remove_memory(mem_device);
+
 	/* Power-off and eject the device */
 	result = acpi_memory_powerdown_device(mem_device);
 	if (result) {
@@ -478,12 +495,23 @@ static int acpi_memory_device_add(struct acpi_device *device)
 static int acpi_memory_device_remove(struct acpi_device *device, int type)
 {
 	struct acpi_memory_device *mem_device = NULL;
-
+	int result;
 
 	if (!device || !acpi_driver_data(device))
 		return -EINVAL;
 
 	mem_device = acpi_driver_data(device);
+
+	if (type == ACPI_BUS_REMOVAL_EJECT) {
+		/*
+		 * offline and remove memory only when the memory device is
+		 * ejected.
+		 */
+		result = acpi_memory_device_remove_memory(mem_device);
+		if (result)
+			return result;
+	}
+
 	kfree(mem_device);
 
 	return 0;
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 86c8821..038be73 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -70,6 +70,45 @@ void unregister_memory_isolate_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(unregister_memory_isolate_notifier);
 
+bool is_memblk_offline(unsigned long start, unsigned long size)
+{
+	struct memory_block *mem = NULL;
+	struct mem_section *section;
+	unsigned long start_pfn, end_pfn;
+	unsigned long pfn, section_nr;
+
+	start_pfn = PFN_DOWN(start);
+	end_pfn = PFN_UP(start + size);
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+		section_nr = pfn_to_section_nr(pfn);
+		if (!present_section_nr(section_nr))
+			continue;
+
+		section = __nr_to_section(section_nr);
+		/* same memblock? */
+		if (mem)
+			if ((section_nr >= mem->start_section_nr) &&
+			    (section_nr <= mem->end_section_nr))
+				continue;
+
+		mem = find_memory_block_hinted(section, mem);
+		if (!mem)
+			continue;
+		if (mem->state == MEM_OFFLINE)
+			continue;
+
+		kobject_put(&mem->dev.kobj);
+		return false;
+	}
+
+	if (mem)
+		kobject_put(&mem->dev.kobj);
+
+	return true;
+}
+EXPORT_SYMBOL(is_memblk_offline);
+
 /*
  * register_memory - Setup a sysfs device for a memory block
  */
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 1ac7f6e..7c66126 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -106,6 +106,10 @@ static inline int memory_isolate_notify(unsigned long val, void *v)
 {
 	return 0;
 }
+static inline bool is_memblk_offline(unsigned long start, unsigned long size)
+{
+	return false;
+}
 #else
 extern int register_memory_notifier(struct notifier_block *nb);
 extern void unregister_memory_notifier(struct notifier_block *nb);
@@ -120,6 +124,7 @@ extern int memory_isolate_notify(unsigned long val, void *v);
 extern struct memory_block *find_memory_block_hinted(struct mem_section *,
 							struct memory_block *);
 extern struct memory_block *find_memory_block(struct mem_section *);
+extern bool is_memblk_offline(unsigned long start, unsigned long size);
 #define CONFIG_MEM_BLOCK_SIZE	(PAGES_PER_SECTION<<PAGE_SHIFT)
 enum mem_add_context { BOOT, HOTPLUG };
 #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 0b040bb..fd84ea9 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -222,6 +222,7 @@ static inline void unlock_memory_hotplug(void) {}
 #ifdef CONFIG_MEMORY_HOTREMOVE
 
 extern int is_mem_section_removable(unsigned long pfn, unsigned long nr_pages);
+extern int remove_memory(int nid, u64 start, u64 size);
 
 #else
 static inline int is_mem_section_removable(unsigned long pfn,
@@ -229,6 +230,10 @@ static inline int is_mem_section_removable(unsigned long pfn,
 {
 	return 0;
 }
+static inline int remove_memory(int nid, u64 start, u64 size)
+{
+	return -EBUSY;
+}
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
 extern int mem_online_node(int nid);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 3113cd4..80cded7 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1038,6 +1038,28 @@ int offline_memory(u64 start, u64 size)
 
 	return 0;
 }
+
+int remove_memory(int nid, u64 start, u64 size)
+{
+	int ret = -EBUSY;
+	lock_memory_hotplug();
+	/*
+	 * The memory might become online by other task, even if you offine it.
+	 * So we check whether the cpu has been onlined or not.
+	 */
+	if (!is_memblk_offline(start, size)) {
+		pr_warn("memory removing [mem %#010llx-%#010llx] failed, "
+			"because the memmory range is online\n",
+			start, start + size);
+		ret = -EAGAIN;
+	}
+
+	unlock_memory_hotplug();
+	return ret;
+
+}
+EXPORT_SYMBOL_GPL(remove_memory);
+
 #else
 int offline_pages(u64 start, u64 size)
 {
-- 
1.7.1

^ permalink raw reply related

* [RFC PATCH V6 05/19] memory-hotplug: check whether memory is present or not
From: wency @ 2012-08-03  7:49 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1343980161-14254-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

If system supports memory hot-remove, online_pages() may online removed pages.
So online_pages() need to check whether onlining pages are present or not.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
---
 include/linux/mmzone.h |   19 +++++++++++++++++++
 mm/memory_hotplug.c    |   13 +++++++++++++
 2 files changed, 32 insertions(+), 0 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 2daa54f..ac3ae30 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1180,6 +1180,25 @@ void sparse_init(void);
 #define sparse_index_init(_sec, _nid)  do {} while (0)
 #endif /* CONFIG_SPARSEMEM */
 
+#ifdef CONFIG_SPARSEMEM
+static inline int pfns_present(unsigned long pfn, unsigned long nr_pages)
+{
+	int i;
+	for (i = 0; i < nr_pages; i++) {
+		if (pfn_present(pfn + i))
+			continue;
+		else
+			return -EINVAL;
+	}
+	return 0;
+}
+#else
+static inline int pfns_present(unsigned long pfn, unsigned long nr_pages)
+{
+	return 0;
+}
+#endif /* CONFIG_SPARSEMEM*/
+
 #ifdef CONFIG_NODES_SPAN_OTHER_NODES
 bool early_pfn_in_nid(unsigned long pfn, int nid);
 #else
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 80cded7..3f1d7c5 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -467,6 +467,19 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages)
 	struct memory_notify arg;
 
 	lock_memory_hotplug();
+	/*
+	 * If system supports memory hot-remove, the memory may have been
+	 * removed. So we check whether the memory has been removed or not.
+	 *
+	 * Note: When CONFIG_SPARSEMEM is defined, pfns_present() become
+	 *       effective. If CONFIG_SPARSEMEM is not defined, pfns_present()
+	 *       always returns 0.
+	 */
+	ret = pfns_present(pfn, nr_pages);
+	if (ret) {
+		unlock_memory_hotplug();
+		return ret;
+	}
 	arg.start_pfn = pfn;
 	arg.nr_pages = nr_pages;
 	arg.status_change_nid = -1;
-- 
1.7.1

^ permalink raw reply related

* [RFC PATCH V6 07/19] memory-hotplug: call acpi_bus_remove() to remove memory device
From: wency @ 2012-08-03  7:49 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1343980161-14254-1-git-send-email-wency@cn.fujitsu.com>

From: Wen Congyang <wency@cn.fujitsu.com>

The memory device has been ejected and powoffed, so we can call
acpi_bus_remove() to remove the memory device from acpi bus.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 drivers/acpi/acpi_memhotplug.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index ed37fc2..755cc31 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -423,8 +423,9 @@ static void acpi_memory_device_notify(acpi_handle handle, u32 event, void *data)
 		}
 
 		/*
-		 * TBD: Invoke acpi_bus_remove to cleanup data structures
+		 * Invoke acpi_bus_remove() to remove memory device
 		 */
+		acpi_bus_remove(device, 1);
 
 		/* _EJ0 succeeded; _OST is not necessary */
 		return;
-- 
1.7.1

^ permalink raw reply related

* [RFC PATCH V6 02/19] memory-hotplug: implement offline_memory()
From: wency @ 2012-08-03  7:49 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, Vasilis Liaskovitis, isimatu.yasuaki,
	paulus, minchan.kim, kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1343980161-14254-1-git-send-email-wency@cn.fujitsu.com>

From: Wen Congyang <wency@cn.fujitsu.com>

The function offline_memory() will be called when hot removing a
memory device. The memory device may contain more than one memory
block. If the memory block has been offlined, __offline_pages()
will fail. So we should try to offline one memory block at a
time.

If the memory block is offlined in offline_memory(), we also
update it's state, and notify the userspace that its state is
changed.

The function offline_memory() also check each memory block's
state. So there is no need to check the memory block's state
before calling offline_memory().

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
CC: Vasilis Liaskovitis <vasilis.liaskovitis@profitbricks.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 drivers/base/memory.c          |   31 +++++++++++++++++++++++++++----
 include/linux/memory_hotplug.h |    2 ++
 mm/memory_hotplug.c            |   37 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 65 insertions(+), 5 deletions(-)

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 44e7de6..86c8821 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -275,13 +275,11 @@ memory_block_action(unsigned long phys_index, unsigned long action)
 	return ret;
 }
 
-static int memory_block_change_state(struct memory_block *mem,
+static int __memory_block_change_state(struct memory_block *mem,
 		unsigned long to_state, unsigned long from_state_req)
 {
 	int ret = 0;
 
-	mutex_lock(&mem->state_mutex);
-
 	if (mem->state != from_state_req) {
 		ret = -EINVAL;
 		goto out;
@@ -309,10 +307,20 @@ static int memory_block_change_state(struct memory_block *mem,
 		break;
 	}
 out:
-	mutex_unlock(&mem->state_mutex);
 	return ret;
 }
 
+static int memory_block_change_state(struct memory_block *mem,
+		unsigned long to_state, unsigned long from_state_req)
+{
+	int ret;
+
+	mutex_lock(&mem->state_mutex);
+	ret = __memory_block_change_state(mem, to_state, from_state_req);
+	mutex_unlock(&mem->state_mutex);
+
+	return ret;
+}
 static ssize_t
 store_mem_state(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
@@ -653,6 +661,21 @@ int unregister_memory_section(struct mem_section *section)
 }
 
 /*
+ * offline one memory block. If the memory block has been offlined, do nothing.
+ */
+int offline_memory_block(struct memory_block *mem)
+{
+	int ret = 0;
+
+	mutex_lock(&mem->state_mutex);
+	if (mem->state != MEM_OFFLINE)
+		ret = __memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
+	mutex_unlock(&mem->state_mutex);
+
+	return ret;
+}
+
+/*
  * Initialize the sysfs support for memory devices...
  */
 int __init memory_dev_init(void)
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index c183f39..0b040bb 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -10,6 +10,7 @@ struct page;
 struct zone;
 struct pglist_data;
 struct mem_section;
+struct memory_block;
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 
@@ -234,6 +235,7 @@ extern int mem_online_node(int nid);
 extern int add_memory(int nid, u64 start, u64 size);
 extern int arch_add_memory(int nid, u64 start, u64 size);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
+extern int offline_memory_block(struct memory_block *mem);
 extern int offline_memory(u64 start, u64 size);
 extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
 								int nr_pages);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c182c76..3113cd4 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1001,7 +1001,42 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
 
 int offline_memory(u64 start, u64 size)
 {
-	return -EINVAL;
+	struct memory_block *mem = NULL;
+	struct mem_section *section;
+	unsigned long start_pfn, end_pfn;
+	unsigned long pfn, section_nr;
+	int ret;
+
+	start_pfn = PFN_DOWN(start);
+	end_pfn = start_pfn + PFN_DOWN(size);
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+		section_nr = pfn_to_section_nr(pfn);
+		if (!present_section_nr(section_nr))
+			continue;
+
+		section = __nr_to_section(section_nr);
+		/* same memblock? */
+		if (mem)
+			if ((section_nr >= mem->start_section_nr) &&
+			    (section_nr <= mem->end_section_nr))
+				continue;
+
+		mem = find_memory_block_hinted(section, mem);
+		if (!mem)
+			continue;
+
+		ret = offline_memory_block(mem);
+		if (ret) {
+			kobject_put(&mem->dev.kobj);
+			return ret;
+		}
+	}
+
+	if (mem)
+		kobject_put(&mem->dev.kobj);
+
+	return 0;
 }
 #else
 int offline_pages(u64 start, u64 size)
-- 
1.7.1

^ permalink raw reply related

* [RFC PATCH V6 03/19] memory-hotplug: store the node id in acpi_memory_device
From: wency @ 2012-08-03  7:49 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1343980161-14254-1-git-send-email-wency@cn.fujitsu.com>

From: Wen Congyang <wency@cn.fujitsu.com>

The memory device has only one node id. Store the node id when
enable the memory device, and we can reuse it when removing the
memory device.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Reviewed-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
---
 drivers/acpi/acpi_memhotplug.c |    4 ++++
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index 8957ed9..293d718 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -83,6 +83,7 @@ struct acpi_memory_info {
 struct acpi_memory_device {
 	struct acpi_device * device;
 	unsigned int state;	/* State of the memory device */
+	int nid;
 	struct list_head res_list;
 };
 
@@ -256,6 +257,9 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
 		info->enabled = 1;
 		num_enabled++;
 	}
+
+	mem_device->nid = node;
+
 	if (!num_enabled) {
 		printk(KERN_ERR PREFIX "add_memory failed\n");
 		mem_device->state = MEMORY_INVALID_STATE;
-- 
1.7.1

^ permalink raw reply related

* [RFC PATCH V6 09/19] memory-hotplug: does not release memory region in PAGES_PER_SECTION chunks
From: wency @ 2012-08-03  7:49 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1343980161-14254-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

Since applying a patch(de7f0cba96786c), release_mem_region() has been changed
as called in PAGES_PER_SECTION chunks because register_memory_resource() is
called in PAGES_PER_SECTION chunks by add_memory(). But it seems firmware
dependency. If CRS are written in the PAGES_PER_SECTION chunks in ACPI DSDT
Table, register_memory_resource() is called in PAGES_PER_SECTION chunks.
But if CRS are written in the DIMM unit in ACPI DSDT Table,
register_memory_resource() is called in DIMM unit. So release_mem_region()
should not be called in PAGES_PER_SECTION chunks. The patch fixes it.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
---
 arch/powerpc/platforms/pseries/hotplug-memory.c |   13 +++++++++----
 mm/memory_hotplug.c                             |    4 ++--
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 11d8e05..dc0a035 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -77,7 +77,8 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz
 {
 	unsigned long start, start_pfn;
 	struct zone *zone;
-	int ret;
+	int i, ret;
+	int sections_to_remove;
 
 	start_pfn = base >> PAGE_SHIFT;
 
@@ -97,9 +98,13 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz
 	 * to sysfs "state" file and we can't remove sysfs entries
 	 * while writing to it. So we have to defer it to here.
 	 */
-	ret = __remove_pages(zone, start_pfn, memblock_size >> PAGE_SHIFT);
-	if (ret)
-		return ret;
+	sections_to_remove = (memblock_size >> PAGE_SHIFT) / PAGES_PER_SECTION;
+	for (i = 0; i < sections_to_remove; i++) {
+		unsigned long pfn = start_pfn + i * PAGES_PER_SECTION;
+		ret = __remove_pages(zone, start_pfn,  PAGES_PER_SECTION);
+		if (ret)
+			return ret;
+	}
 
 	/*
 	 * Update memory regions for memory remove
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 45b03b3..29aff4d 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -358,11 +358,11 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
 	BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
 	BUG_ON(nr_pages % PAGES_PER_SECTION);
 
+	release_mem_region(phys_start_pfn << PAGE_SHIFT,  nr_pages * PAGE_SIZE);
+
 	sections_to_remove = nr_pages / PAGES_PER_SECTION;
 	for (i = 0; i < sections_to_remove; i++) {
 		unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
-		release_mem_region(pfn << PAGE_SHIFT,
-				   PAGES_PER_SECTION << PAGE_SHIFT);
 		ret = __remove_section(zone, __pfn_to_section(pfn));
 		if (ret)
 			break;
-- 
1.7.1

^ permalink raw reply related

* [RFC PATCH V6 08/19] memory-hotplug: remove /sys/firmware/memmap/X sysfs
From: wency @ 2012-08-03  7:49 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1343980161-14254-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

When (hot)adding memory into system, /sys/firmware/memmap/X/{end, start, type}
sysfs files are created. But there is no code to remove these files. The patch
implements the function to remove them.

Note : The code does not free firmware_map_entry since there is no way to free
       memory which is allocated by bootmem.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
---
 drivers/firmware/memmap.c    |   78 +++++++++++++++++++++++++++++++++++++++++-
 include/linux/firmware-map.h |    6 +++
 mm/memory_hotplug.c          |    9 ++++-
 3 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index c1cdc92..b2e7e5e 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -21,6 +21,7 @@
 #include <linux/types.h>
 #include <linux/bootmem.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 
 /*
  * Data types ------------------------------------------------------------------
@@ -79,7 +80,22 @@ static const struct sysfs_ops memmap_attr_ops = {
 	.show = memmap_attr_show,
 };
 
+#define to_memmap_entry(obj) container_of(obj, struct firmware_map_entry, kobj)
+
+static void release_firmware_map_entry(struct kobject *kobj)
+{
+	struct firmware_map_entry *entry = to_memmap_entry(kobj);
+	struct page *page;
+
+	page = virt_to_page(entry);
+	if (PageSlab(page) || PageCompound(page))
+		kfree(entry);
+
+	/* There is no way to free memory allocated from bootmem*/
+}
+
 static struct kobj_type memmap_ktype = {
+	.release	= release_firmware_map_entry,
 	.sysfs_ops	= &memmap_attr_ops,
 	.default_attrs	= def_attrs,
 };
@@ -123,6 +139,16 @@ static int firmware_map_add_entry(u64 start, u64 end,
 	return 0;
 }
 
+/**
+ * firmware_map_remove_entry() - Does the real work to remove a firmware
+ * memmap entry.
+ * @entry: removed entry.
+ **/
+static inline void firmware_map_remove_entry(struct firmware_map_entry *entry)
+{
+	list_del(&entry->list);
+}
+
 /*
  * Add memmap entry on sysfs
  */
@@ -144,6 +170,31 @@ static int add_sysfs_fw_map_entry(struct firmware_map_entry *entry)
 	return 0;
 }
 
+/*
+ * Remove memmap entry on sysfs
+ */
+static inline void remove_sysfs_fw_map_entry(struct firmware_map_entry *entry)
+{
+	kobject_put(&entry->kobj);
+}
+
+/*
+ * Search memmap entry
+ */
+
+struct firmware_map_entry * __meminit
+find_firmware_map_entry(u64 start, u64 end, const char *type)
+{
+	struct firmware_map_entry *entry;
+
+	list_for_each_entry(entry, &map_entries, list)
+		if ((entry->start == start) && (entry->end == end) &&
+		    (!strcmp(entry->type, type)))
+			return entry;
+
+	return NULL;
+}
+
 /**
  * firmware_map_add_hotplug() - Adds a firmware mapping entry when we do
  * memory hotplug.
@@ -196,6 +247,32 @@ int __init firmware_map_add_early(u64 start, u64 end, const char *type)
 	return firmware_map_add_entry(start, end, type, entry);
 }
 
+/**
+ * firmware_map_remove() - remove a firmware mapping entry
+ * @start: Start of the memory range.
+ * @end:   End of the memory range.
+ * @type:  Type of the memory range.
+ *
+ * removes a firmware mapping entry.
+ *
+ * Returns 0 on success, or -EINVAL if no entry.
+ **/
+int __meminit firmware_map_remove(u64 start, u64 end, const char *type)
+{
+	struct firmware_map_entry *entry;
+
+	entry = find_firmware_map_entry(start, end - 1, type);
+	if (!entry)
+		return -EINVAL;
+
+	firmware_map_remove_entry(entry);
+
+	/* remove the memmap entry */
+	remove_sysfs_fw_map_entry(entry);
+
+	return 0;
+}
+
 /*
  * Sysfs functions -------------------------------------------------------------
  */
@@ -218,7 +295,6 @@ static ssize_t type_show(struct firmware_map_entry *entry, char *buf)
 }
 
 #define to_memmap_attr(_attr) container_of(_attr, struct memmap_attribute, attr)
-#define to_memmap_entry(obj) container_of(obj, struct firmware_map_entry, kobj)
 
 static ssize_t memmap_attr_show(struct kobject *kobj,
 				struct attribute *attr, char *buf)
diff --git a/include/linux/firmware-map.h b/include/linux/firmware-map.h
index 43fe52f..71d4fa7 100644
--- a/include/linux/firmware-map.h
+++ b/include/linux/firmware-map.h
@@ -25,6 +25,7 @@
 
 int firmware_map_add_early(u64 start, u64 end, const char *type);
 int firmware_map_add_hotplug(u64 start, u64 end, const char *type);
+int firmware_map_remove(u64 start, u64 end, const char *type);
 
 #else /* CONFIG_FIRMWARE_MEMMAP */
 
@@ -38,6 +39,11 @@ static inline int firmware_map_add_hotplug(u64 start, u64 end, const char *type)
 	return 0;
 }
 
+static inline int firmware_map_remove(u64 start, u64 end, const char *type)
+{
+	return 0;
+}
+
 #endif /* CONFIG_FIRMWARE_MEMMAP */
 
 #endif /* _LINUX_FIRMWARE_MAP_H */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 3f1d7c5..45b03b3 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1052,9 +1052,9 @@ int offline_memory(u64 start, u64 size)
 	return 0;
 }
 
-int remove_memory(int nid, u64 start, u64 size)
+int __ref remove_memory(int nid, u64 start, u64 size)
 {
-	int ret = -EBUSY;
+	int ret = 0;
 	lock_memory_hotplug();
 	/*
 	 * The memory might become online by other task, even if you offine it.
@@ -1065,8 +1065,13 @@ int remove_memory(int nid, u64 start, u64 size)
 			"because the memmory range is online\n",
 			start, start + size);
 		ret = -EAGAIN;
+		goto out;
 	}
 
+	/* remove memmap entry */
+	firmware_map_remove(start, start + size, "System RAM");
+
+out:
 	unlock_memory_hotplug();
 	return ret;
 
-- 
1.7.1

^ permalink raw reply related

* [RFC PATCH V6 11/19] memory-hotplug: remove_memory calls __remove_pages
From: wency @ 2012-08-03  7:49 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1343980161-14254-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

The patch adds __remove_pages() to remove_memory(). Then the range of
phys_start_pfn argument and nr_pages argument in __remove_pagse() may
have different zone. So zone argument is removed from __remove_pages()
and __remove_pages() caluculates zone in each section.

When CONFIG_SPARSEMEM_VMEMMAP is defined, there is no way to remove a memmap.
So __remove_section only calls unregister_memory_section().

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
---
 arch/powerpc/platforms/pseries/hotplug-memory.c |    5 +----
 include/linux/memory_hotplug.h                  |    3 +--
 mm/memory_hotplug.c                             |   18 +++++++++++-------
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index dc0a035..cc14da4 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -76,7 +76,6 @@ unsigned long memory_block_size_bytes(void)
 static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
 {
 	unsigned long start, start_pfn;
-	struct zone *zone;
 	int i, ret;
 	int sections_to_remove;
 
@@ -87,8 +86,6 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz
 		return 0;
 	}
 
-	zone = page_zone(pfn_to_page(start_pfn));
-
 	/*
 	 * Remove section mappings and sysfs entries for the
 	 * section of the memory we are removing.
@@ -101,7 +98,7 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz
 	sections_to_remove = (memblock_size >> PAGE_SHIFT) / PAGES_PER_SECTION;
 	for (i = 0; i < sections_to_remove; i++) {
 		unsigned long pfn = start_pfn + i * PAGES_PER_SECTION;
-		ret = __remove_pages(zone, start_pfn,  PAGES_PER_SECTION);
+		ret = __remove_pages(start_pfn,  PAGES_PER_SECTION);
 		if (ret)
 			return ret;
 	}
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index fd84ea9..8bf820d 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -90,8 +90,7 @@ extern bool is_pageblock_removable_nolock(struct page *page);
 /* reasonably generic interface to expand the physical pages in a zone  */
 extern int __add_pages(int nid, struct zone *zone, unsigned long start_pfn,
 	unsigned long nr_pages);
-extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
-	unsigned long nr_pages);
+extern int __remove_pages(unsigned long start_pfn, unsigned long nr_pages);
 
 #ifdef CONFIG_NUMA
 extern int memory_add_physaddr_to_nid(u64 start);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 29aff4d..713f1b9 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -275,11 +275,14 @@ static int __meminit __add_section(int nid, struct zone *zone,
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 static int __remove_section(struct zone *zone, struct mem_section *ms)
 {
-	/*
-	 * XXX: Freeing memmap with vmemmap is not implement yet.
-	 *      This should be removed later.
-	 */
-	return -EBUSY;
+	int ret = -EINVAL;
+
+	if (!valid_section(ms))
+		return ret;
+
+	ret = unregister_memory_section(ms);
+
+	return ret;
 }
 #else
 static int __remove_section(struct zone *zone, struct mem_section *ms)
@@ -346,11 +349,11 @@ EXPORT_SYMBOL_GPL(__add_pages);
  * sure that pages are marked reserved and zones are adjust properly by
  * calling offline_pages().
  */
-int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
-		 unsigned long nr_pages)
+int __remove_pages(unsigned long phys_start_pfn, unsigned long nr_pages)
 {
 	unsigned long i, ret = 0;
 	int sections_to_remove;
+	struct zone *zone;
 
 	/*
 	 * We can only remove entire sections
@@ -363,6 +366,7 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
 	sections_to_remove = nr_pages / PAGES_PER_SECTION;
 	for (i = 0; i < sections_to_remove; i++) {
 		unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
+		zone = page_zone(pfn_to_page(pfn));
 		ret = __remove_section(zone, __pfn_to_section(pfn));
 		if (ret)
 			break;
-- 
1.7.1

^ permalink raw reply related

* [RFC PATCH V6 18/19] memory-hotplug: add node_device_release
From: wency @ 2012-08-03  7:49 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1343980161-14254-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

When calling unregister_node(), the function shows following message at
device_release().

Device 'node2' does not have a release() function, it is broken and must be
fixed.

So the patch implements node_device_release()

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 drivers/base/node.c |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index af1a177..9bc2f57 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -252,6 +252,13 @@ static inline void hugetlb_register_node(struct node *node) {}
 static inline void hugetlb_unregister_node(struct node *node) {}
 #endif
 
+static void node_device_release(struct device *dev)
+{
+	struct node *node_dev = to_node(dev);
+
+	flush_work(&node_dev->node_work);
+	memset(node_dev, 0, sizeof(struct node));
+}
 
 /*
  * register_node - Setup a sysfs device for a node.
@@ -265,6 +272,7 @@ int register_node(struct node *node, int num, struct node *parent)
 
 	node->dev.id = num;
 	node->dev.bus = &node_subsys;
+	node->dev.release = node_device_release;
 	error = device_register(&node->dev);
 
 	if (!error){
-- 
1.7.1

^ permalink raw reply related

* [RFC PATCH V6 19/19] memory-hotplug: remove sysfs file of node
From: wency @ 2012-08-03  7:49 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1343980161-14254-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

The patch adds node_set_offline() and unregister_one_node() to remove_memory()
for removing sysfs file of node.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
---
 mm/memory_hotplug.c |    5 +++++
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 68d7123..7d68e0a 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1271,6 +1271,11 @@ int __ref remove_memory(int nid, u64 start, u64 size)
 	/* remove memmap entry */
 	firmware_map_remove(start, start + size, "System RAM");
 
+	if (!node_present_pages(nid)) {
+		node_set_offline(nid);
+		unregister_one_node(nid);
+	}
+
 	arch_remove_memory(start, size);
 out:
 	unlock_memory_hotplug();
-- 
1.7.1

^ permalink raw reply related

* [RFC PATCH V6 17/19] memory_hotplug: clear zone when the memory is removed
From: wency @ 2012-08-03  7:49 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1343980161-14254-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

When a memory is added, we update zone's and pgdat's start_pfn and spanned_pages
in the function __add_zone(). So we should revert these when the memory is
removed. Add a new function __remove_zone() to do this.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
---
 mm/memory_hotplug.c |  181 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 181 insertions(+), 0 deletions(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a1f3490..68d7123 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -300,10 +300,187 @@ static int __meminit __add_section(int nid, struct zone *zone,
 	return register_new_memory(nid, __pfn_to_section(phys_start_pfn));
 }
 
+/* find the smallest valid pfn in the range [start_pfn, end_pfn) */
+static int find_smallest_section_pfn(unsigned long start_pfn,
+				     unsigned long end_pfn)
+{
+	struct mem_section *ms;
+
+	for (; start_pfn < end_pfn; start_pfn += PAGES_PER_SECTION) {
+		ms = __pfn_to_section(start_pfn);
+
+		if (unlikely(!valid_section(ms)))
+			continue;
+
+		return start_pfn;
+	}
+
+	return 0;
+}
+
+/* find the biggest valid pfn in the range [start_pfn, end_pfn). */
+static int find_biggest_section_pfn(unsigned long start_pfn,
+				    unsigned long end_pfn)
+{
+	struct mem_section *ms;
+	unsigned long pfn;
+
+	/* pfn is the end pfn of a memory section. */
+	pfn = end_pfn - 1;
+	for (; pfn >= start_pfn; pfn -= PAGES_PER_SECTION) {
+		ms = __pfn_to_section(pfn);
+
+		if (unlikely(!valid_section(ms)))
+			continue;
+
+		return pfn;
+	}
+
+	return 0;
+}
+
+static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
+			     unsigned long end_pfn)
+{
+	unsigned long zone_start_pfn =  zone->zone_start_pfn;
+	unsigned long zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
+	unsigned long pfn;
+	struct mem_section *ms;
+
+	zone_span_writelock(zone);
+	if (zone_start_pfn == start_pfn) {
+		/*
+		 * If the section is smallest section in the zone, it need
+		 * shrink zone->zone_start_pfn and zone->zone_spanned_pages.
+		 * In this case, we find second smallest valid mem_section
+		 * for shrinking zone.
+		 */
+		pfn = find_smallest_section_pfn(end_pfn, zone_end_pfn);
+		if (pfn) {
+			zone->zone_start_pfn = pfn;
+			zone->spanned_pages = zone_end_pfn - pfn;
+		}
+	} else if (zone_end_pfn == end_pfn) {
+		/*
+		 * If the section is biggest section in the zone, it need
+		 * shrink zone->spanned_pages.
+		 * In this case, we find second biggest valid mem_section for
+		 * shrinking zone.
+		 */
+		pfn = find_biggest_section_pfn(zone_start_pfn, start_pfn);
+		if (pfn)
+			zone->spanned_pages = pfn - zone_start_pfn + 1;
+	}
+
+	/*
+	 * The section is not biggest or smallest mem_section in the zone, it
+	 * only creates a hole in the zone. So in this case, we need not
+	 * change the zone. But perhaps, the zone has only hole data. Thus
+	 * it check the zone has only hole or not.
+	 */
+	pfn = zone_start_pfn;
+	for (; pfn < zone_end_pfn; pfn += PAGES_PER_SECTION) {
+		ms = __pfn_to_section(pfn);
+
+		if (unlikely(!valid_section(ms)))
+			continue;
+
+		 /* If the section is current section, it continues the loop */
+		if (start_pfn == pfn)
+			continue;
+
+		/* If we find valid section, we have nothing to do */
+		zone_span_writeunlock(zone);
+		return;
+	}
+
+	/* The zone has no valid section */
+	zone->zone_start_pfn = 0;
+	zone->spanned_pages = 0;
+	zone_span_writeunlock(zone);
+}
+
+static void shrink_pgdat_span(struct pglist_data *pgdat,
+			      unsigned long start_pfn, unsigned long end_pfn)
+{
+	unsigned long pgdat_start_pfn =  pgdat->node_start_pfn;
+	unsigned long pgdat_end_pfn =
+		pgdat->node_start_pfn + pgdat->node_spanned_pages;
+	unsigned long pfn;
+	struct mem_section *ms;
+
+	if (pgdat_start_pfn == start_pfn) {
+		/*
+		 * If the section is smallest section in the pgdat, it need
+		 * shrink pgdat->node_start_pfn and pgdat->node_spanned_pages.
+		 * In this case, we find second smallest valid mem_section
+		 * for shrinking zone.
+		 */
+		pfn = find_smallest_section_pfn(end_pfn, pgdat_end_pfn);
+		if (pfn) {
+			pgdat->node_start_pfn = pfn;
+			pgdat->node_spanned_pages = pgdat_end_pfn - pfn;
+		}
+	} else if (pgdat_end_pfn == end_pfn) {
+		/*
+		 * If the section is biggest section in the pgdat, it need
+		 * shrink pgdat->node_spanned_pages.
+		 * In this case, we find second biggest valid mem_section for
+		 * shrinking zone.
+		 */
+		pfn = find_biggest_section_pfn(pgdat_start_pfn, start_pfn);
+		if (pfn)
+			pgdat->node_spanned_pages = pfn - pgdat_start_pfn + 1;
+	}
+
+	/*
+	 * If the section is not biggest or smallest mem_section in the pgdat,
+	 * it only creates a hole in the pgdat. So in this case, we need not
+	 * change the pgdat.
+	 * But perhaps, the pgdat has only hole data. Thus it check the pgdat
+	 * has only hole or not.
+	 */
+	pfn = pgdat_start_pfn;
+	for (; pfn < pgdat_end_pfn; pfn += PAGES_PER_SECTION) {
+		ms = __pfn_to_section(pfn);
+
+		if (unlikely(!valid_section(ms)))
+			continue;
+
+		 /* If the section is current section, it continues the loop */
+		if (start_pfn == pfn)
+			continue;
+
+		/* If we find valid section, we have nothing to do */
+		return;
+	}
+
+	/* The pgdat has no valid section */
+	pgdat->node_start_pfn = 0;
+	pgdat->node_spanned_pages = 0;
+}
+
+static void __remove_zone(struct zone *zone, unsigned long start_pfn)
+{
+	struct pglist_data *pgdat = zone->zone_pgdat;
+	int nr_pages = PAGES_PER_SECTION;
+	int zone_type;
+	unsigned long flags;
+
+	zone_type = zone - pgdat->node_zones;
+
+	pgdat_resize_lock(zone->zone_pgdat, &flags);
+	shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
+	shrink_pgdat_span(pgdat, start_pfn, start_pfn + nr_pages);
+	pgdat_resize_unlock(zone->zone_pgdat, &flags);
+}
+
 static int __remove_section(struct zone *zone, struct mem_section *ms)
 {
 	unsigned long flags;
 	struct pglist_data *pgdat = zone->zone_pgdat;
+	unsigned long start_pfn;
+	int scn_nr;
 	int ret = -EINVAL;
 
 	if (!valid_section(ms))
@@ -313,6 +490,10 @@ static int __remove_section(struct zone *zone, struct mem_section *ms)
 	if (ret)
 		return ret;
 
+	scn_nr = __section_nr(ms);
+	start_pfn = section_nr_to_pfn(scn_nr);
+	__remove_zone(zone, start_pfn);
+
 	pgdat_resize_lock(pgdat, &flags);
 	sparse_remove_one_section(zone, ms);
 	pgdat_resize_unlock(pgdat, &flags);
-- 
1.7.1

^ permalink raw reply related

* [RFC PATCH V6 13/19] memory-hotplug: check page type in get_page_bootmem
From: wency @ 2012-08-03  7:49 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1343980161-14254-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

There is a possibility that get_page_bootmem() is called to the same page many
times. So when get_page_bootmem is called to the same page, the function only
increments page->_count.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
---
 mm/memory_hotplug.c |   15 +++++++++++----
 1 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 5f9f8c7..710e593 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -95,10 +95,17 @@ static void release_memory_resource(struct resource *res)
 static void get_page_bootmem(unsigned long info,  struct page *page,
 			     unsigned long type)
 {
-	page->lru.next = (struct list_head *) type;
-	SetPagePrivate(page);
-	set_page_private(page, info);
-	atomic_inc(&page->_count);
+	unsigned long page_type;
+
+	page_type = (unsigned long) page->lru.next;
+	if (type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
+	    type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE){
+		page->lru.next = (struct list_head *) type;
+		SetPagePrivate(page);
+		set_page_private(page, info);
+		atomic_inc(&page->_count);
+	} else
+		atomic_inc(&page->_count);
 }
 
 /* reference to __meminit __free_pages_bootmem is valid
-- 
1.7.1

^ permalink raw reply related

* [RFC PATCH V6 14/19] memory-hotplug: move register_page_bootmem_info_node and put_page_bootmem for sparse-vmemmap
From: wency @ 2012-08-03  7:49 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi, linux-s390,
	linux-sh, linux-ia64, cmetcalf
  Cc: len.brown, Wen Congyang, isimatu.yasuaki, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, akpm, liuj97
In-Reply-To: <1343980161-14254-1-git-send-email-wency@cn.fujitsu.com>

From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

For implementing register_page_bootmem_info_node of sparse-vmemmap,
register_page_bootmem_info_node and put_page_bootmem are moved to
memory_hotplug.c

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
---
 include/linux/memory_hotplug.h |    9 ---------
 mm/memory_hotplug.c            |    8 ++++++--
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index cdbbd79..1133e63 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -162,17 +162,8 @@ static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
 #endif /* CONFIG_NUMA */
 #endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
 
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
-{
-}
-static inline void put_page_bootmem(struct page *page)
-{
-}
-#else
 extern void register_page_bootmem_info_node(struct pglist_data *pgdat);
 extern void put_page_bootmem(struct page *page);
-#endif
 
 /*
  * Lock for memory hotplug guarantees 1) all callbacks for memory hotplug
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 710e593..3a264a5 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -91,7 +91,6 @@ static void release_memory_resource(struct resource *res)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
-#ifndef CONFIG_SPARSEMEM_VMEMMAP
 static void get_page_bootmem(unsigned long info,  struct page *page,
 			     unsigned long type)
 {
@@ -127,6 +126,7 @@ void __ref put_page_bootmem(struct page *page)
 
 }
 
+#ifndef CONFIG_SPARSEMEM_VMEMMAP
 static void register_page_bootmem_info_section(unsigned long start_pfn)
 {
 	unsigned long *usemap, mapsize, section_nr, i;
@@ -163,6 +163,11 @@ static void register_page_bootmem_info_section(unsigned long start_pfn)
 		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
 
 }
+#else
+static inline void register_page_bootmem_info_section(unsigned long start_pfn)
+{
+}
+#endif
 
 void register_page_bootmem_info_node(struct pglist_data *pgdat)
 {
@@ -198,7 +203,6 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat)
 		register_page_bootmem_info_section(pfn);
 
 }
-#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
 
 static void grow_zone_span(struct zone *zone, unsigned long start_pfn,
 			   unsigned long end_pfn)
-- 
1.7.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox