* Re: [PATCH 2/2] powerpc/e500/qemu-e500: enable coreint
From: Kumar Gala @ 2013-02-15 20:14 UTC (permalink / raw)
To: Scott Wood; +Cc: linuxppc-dev
In-Reply-To: <1358819804-28665-3-git-send-email-scottwood@freescale.com>
On Jan 21, 2013, at 7:56 PM, Scott Wood wrote:
> The MPIC code will disable coreint if it detects an insufficient
> MPIC version.
>
> Signed-off-by: Scott Wood <scottwood@freescale.com>
> ---
> arch/powerpc/platforms/85xx/qemu_e500.c | 7 ++++---
> 1 file changed, 4 insertions(+), 3 deletions(-)
applied to next
- k
^ permalink raw reply
* [PATCH] i2c: Remove unneeded xxx_set_drvdata(..., NULL) calls
From: Doug Anderson @ 2013-02-15 23:18 UTC (permalink / raw)
To: Wolfram Sang
Cc: Tony Lindgren, Linus Walleij, Thierry Reding, Sekhar Nori,
linux-kernel, linux-i2c, Guan Xuetao, Kevin Hilman, Sonic Zhang,
linux-arm-kernel, Deepak Sikri, Havard Skinnemoen, Marek Vasut,
Pawel Moll, Stephen Warren, Sascha Hauer, Uwe Kleine-König,
Rob Herring, uclinux-dist-devel, Jean Delvare, Lars-Peter Clausen,
Ben Dooks (embedded platforms), Barry Song, linux-omap,
Mika Westerberg, Oskar Schirmer, Fabio Estevam,
davinci-linux-open-source, Shawn Guo, Jim Cromie,
Greg Kroah-Hartman, Tomoya MORINAGA, Doug Anderson, Kyungmin Park,
Viresh Kumar, Karol Lewandowski, Jiri Kosina, STEricsson,
Joe Perches, Andrew Morton, Alessandro Rubini, linuxppc-dev,
Alexander Stein
In-Reply-To: <1360953682-25066-1-git-send-email-dianders@chromium.org>
There is simply no reason to be manually setting the private driver
data to NULL in the remove/fail to probe cases. This is just extra
cruft code that can be removed.
A few notes:
* Nothing relies on drvdata being set to NULL.
* The __device_release_driver() function eventually calls
dev_set_drvdata(dev, NULL) anyway, so there's no need to do it
twice.
* I verified that there were no cases where xxx_get_drvdata() was
being called in these drivers and checking for / relying on the NULL
return value.
This could be cleaned up kernel-wide but for now just take the baby
step and remove from the i2c subsystem.
Reported-by: Wolfram Sang <wsa@the-dreams.de>
Reported-by: Stephen Warren <swarren@wwwdotorg.org>
Signed-off-by: Doug Anderson <dianders@chromium.org>
---
drivers/i2c/busses/i2c-au1550.c | 1 -
drivers/i2c/busses/i2c-bfin-twi.c | 2 --
drivers/i2c/busses/i2c-cpm.c | 2 --
drivers/i2c/busses/i2c-davinci.c | 2 --
drivers/i2c/busses/i2c-designware-pcidrv.c | 2 --
drivers/i2c/busses/i2c-designware-platdrv.c | 2 --
drivers/i2c/busses/i2c-eg20t.c | 2 --
drivers/i2c/busses/i2c-highlander.c | 4 ----
drivers/i2c/busses/i2c-i801.c | 1 -
drivers/i2c/busses/i2c-ibm_iic.c | 3 ---
drivers/i2c/busses/i2c-imx.c | 1 -
drivers/i2c/busses/i2c-intel-mid.c | 2 --
drivers/i2c/busses/i2c-iop3xx.c | 2 --
drivers/i2c/busses/i2c-mpc.c | 2 --
drivers/i2c/busses/i2c-mxs.c | 2 --
drivers/i2c/busses/i2c-nomadik.c | 2 --
drivers/i2c/busses/i2c-ocores.c | 1 -
drivers/i2c/busses/i2c-octeon.c | 5 +----
drivers/i2c/busses/i2c-omap.c | 3 ---
drivers/i2c/busses/i2c-pca-platform.c | 1 -
drivers/i2c/busses/i2c-pmcmsp.c | 2 --
drivers/i2c/busses/i2c-pnx.c | 2 --
drivers/i2c/busses/i2c-powermac.c | 1 -
drivers/i2c/busses/i2c-puv3.c | 2 --
drivers/i2c/busses/i2c-pxa-pci.c | 2 --
drivers/i2c/busses/i2c-pxa.c | 2 --
drivers/i2c/busses/i2c-s6000.c | 1 -
drivers/i2c/busses/i2c-sh7760.c | 1 -
drivers/i2c/busses/i2c-stu300.c | 1 -
drivers/i2c/busses/i2c-taos-evm.c | 2 --
drivers/i2c/busses/i2c-versatile.c | 2 --
drivers/i2c/busses/i2c-xiic.c | 2 --
drivers/i2c/busses/i2c-xlr.c | 1 -
drivers/i2c/busses/scx200_acb.c | 1 -
drivers/i2c/muxes/i2c-mux-gpio.c | 1 -
35 files changed, 1 insertion(+), 64 deletions(-)
diff --git a/drivers/i2c/busses/i2c-au1550.c b/drivers/i2c/busses/i2c-au1550.c
index b278298..b5b8923 100644
--- a/drivers/i2c/busses/i2c-au1550.c
+++ b/drivers/i2c/busses/i2c-au1550.c
@@ -376,7 +376,6 @@ static int i2c_au1550_remove(struct platform_device *pdev)
{
struct i2c_au1550_data *priv = platform_get_drvdata(pdev);
- platform_set_drvdata(pdev, NULL);
i2c_del_adapter(&priv->adap);
i2c_au1550_disable(priv);
iounmap(priv->psc_base);
diff --git a/drivers/i2c/busses/i2c-bfin-twi.c b/drivers/i2c/busses/i2c-bfin-twi.c
index 0cf780f..05080c4 100644
--- a/drivers/i2c/busses/i2c-bfin-twi.c
+++ b/drivers/i2c/busses/i2c-bfin-twi.c
@@ -724,8 +724,6 @@ static int i2c_bfin_twi_remove(struct platform_device *pdev)
{
struct bfin_twi_iface *iface = platform_get_drvdata(pdev);
- platform_set_drvdata(pdev, NULL);
-
i2c_del_adapter(&(iface->adap));
free_irq(iface->irq, iface);
peripheral_free_list((unsigned short *)pdev->dev.platform_data);
diff --git a/drivers/i2c/busses/i2c-cpm.c b/drivers/i2c/busses/i2c-cpm.c
index 2e79c10..3823623 100644
--- a/drivers/i2c/busses/i2c-cpm.c
+++ b/drivers/i2c/busses/i2c-cpm.c
@@ -682,7 +682,6 @@ static int cpm_i2c_probe(struct platform_device *ofdev)
out_shut:
cpm_i2c_shutdown(cpm);
out_free:
- dev_set_drvdata(&ofdev->dev, NULL);
kfree(cpm);
return result;
@@ -696,7 +695,6 @@ static int cpm_i2c_remove(struct platform_device *ofdev)
cpm_i2c_shutdown(cpm);
- dev_set_drvdata(&ofdev->dev, NULL);
kfree(cpm);
return 0;
diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c
index 6a0a553..7d1e590 100644
--- a/drivers/i2c/busses/i2c-davinci.c
+++ b/drivers/i2c/busses/i2c-davinci.c
@@ -755,7 +755,6 @@ err_mem_ioremap:
clk_put(dev->clk);
dev->clk = NULL;
err_free_mem:
- platform_set_drvdata(pdev, NULL);
put_device(&pdev->dev);
kfree(dev);
err_release_region:
@@ -771,7 +770,6 @@ static int davinci_i2c_remove(struct platform_device *pdev)
i2c_davinci_cpufreq_deregister(dev);
- platform_set_drvdata(pdev, NULL);
i2c_del_adapter(&dev->adapter);
put_device(&pdev->dev);
diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c
index 6add851..7c5e383 100644
--- a/drivers/i2c/busses/i2c-designware-pcidrv.c
+++ b/drivers/i2c/busses/i2c-designware-pcidrv.c
@@ -319,7 +319,6 @@ err_free_irq:
free_irq(pdev->irq, dev);
err_iounmap:
iounmap(dev->base);
- pci_set_drvdata(pdev, NULL);
put_device(&pdev->dev);
kfree(dev);
err_release_region:
@@ -336,7 +335,6 @@ static void i2c_dw_pci_remove(struct pci_dev *pdev)
pm_runtime_forbid(&pdev->dev);
pm_runtime_get_noresume(&pdev->dev);
- pci_set_drvdata(pdev, NULL);
i2c_del_adapter(&dev->adapter);
put_device(&pdev->dev);
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index d2a33e9..0ceb6e1 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -207,7 +207,6 @@ err_unuse_clocks:
clk_put(dev->clk);
dev->clk = NULL;
err_free_mem:
- platform_set_drvdata(pdev, NULL);
put_device(&pdev->dev);
kfree(dev);
err_release_region:
@@ -221,7 +220,6 @@ static int dw_i2c_remove(struct platform_device *pdev)
struct dw_i2c_dev *dev = platform_get_drvdata(pdev);
struct resource *mem;
- platform_set_drvdata(pdev, NULL);
pm_runtime_get_sync(&pdev->dev);
i2c_del_adapter(&dev->adapter);
diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c
index 5e7886e..0f37529 100644
--- a/drivers/i2c/busses/i2c-eg20t.c
+++ b/drivers/i2c/busses/i2c-eg20t.c
@@ -869,8 +869,6 @@ static void pch_i2c_remove(struct pci_dev *pdev)
for (i = 0; i < adap_info->ch_num; i++)
adap_info->pch_data[i].pch_base_address = NULL;
- pci_set_drvdata(pdev, NULL);
-
pci_release_regions(pdev);
pci_disable_device(pdev);
diff --git a/drivers/i2c/busses/i2c-highlander.c b/drivers/i2c/busses/i2c-highlander.c
index 3351cc7..436b0f2 100644
--- a/drivers/i2c/busses/i2c-highlander.c
+++ b/drivers/i2c/busses/i2c-highlander.c
@@ -436,8 +436,6 @@ err_unmap:
err:
kfree(dev);
- platform_set_drvdata(pdev, NULL);
-
return ret;
}
@@ -453,8 +451,6 @@ static int highlander_i2c_remove(struct platform_device *pdev)
iounmap(dev->base);
kfree(dev);
- platform_set_drvdata(pdev, NULL);
-
return 0;
}
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index b00c29d..38e13cd 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -1239,7 +1239,6 @@ static void i801_remove(struct pci_dev *dev)
free_irq(dev->irq, priv);
pci_release_region(dev, SMBBAR);
- pci_set_drvdata(dev, NULL);
kfree(priv);
/*
* do not call pci_disable_device(dev) since it can cause hard hangs on
diff --git a/drivers/i2c/busses/i2c-ibm_iic.c b/drivers/i2c/busses/i2c-ibm_iic.c
index 33a2abb..405a2e2 100644
--- a/drivers/i2c/busses/i2c-ibm_iic.c
+++ b/drivers/i2c/busses/i2c-ibm_iic.c
@@ -773,7 +773,6 @@ error_cleanup:
if (dev->vaddr)
iounmap(dev->vaddr);
- dev_set_drvdata(&ofdev->dev, NULL);
kfree(dev);
return ret;
}
@@ -785,8 +784,6 @@ static int iic_remove(struct platform_device *ofdev)
{
struct ibm_iic_private *dev = dev_get_drvdata(&ofdev->dev);
- dev_set_drvdata(&ofdev->dev, NULL);
-
i2c_del_adapter(&dev->adap);
if (dev->irq) {
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index a71ece6..82f20c6 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -605,7 +605,6 @@ static int __exit i2c_imx_remove(struct platform_device *pdev)
/* remove adapter */
dev_dbg(&i2c_imx->adapter.dev, "adapter removed\n");
i2c_del_adapter(&i2c_imx->adapter);
- platform_set_drvdata(pdev, NULL);
/* setup chip registers to defaults */
writeb(0, i2c_imx->base + IMX_I2C_IADR);
diff --git a/drivers/i2c/busses/i2c-intel-mid.c b/drivers/i2c/busses/i2c-intel-mid.c
index de3736b..323fa01 100644
--- a/drivers/i2c/busses/i2c-intel-mid.c
+++ b/drivers/i2c/busses/i2c-intel-mid.c
@@ -1069,7 +1069,6 @@ static int intel_mid_i2c_probe(struct pci_dev *dev,
fail3:
free_irq(dev->irq, mrst);
fail2:
- pci_set_drvdata(dev, NULL);
kfree(mrst);
fail1:
iounmap(base);
@@ -1087,7 +1086,6 @@ static void intel_mid_i2c_remove(struct pci_dev *dev)
dev_err(&dev->dev, "Failed to delete i2c adapter");
free_irq(dev->irq, mrst);
- pci_set_drvdata(dev, NULL);
iounmap(mrst->base);
kfree(mrst);
pci_release_region(dev, 0);
diff --git a/drivers/i2c/busses/i2c-iop3xx.c b/drivers/i2c/busses/i2c-iop3xx.c
index 2f99613..bc99333 100644
--- a/drivers/i2c/busses/i2c-iop3xx.c
+++ b/drivers/i2c/busses/i2c-iop3xx.c
@@ -415,8 +415,6 @@ iop3xx_i2c_remove(struct platform_device *pdev)
kfree(adapter_data);
kfree(padapter);
- platform_set_drvdata(pdev, NULL);
-
return 0;
}
diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c
index a69459e..5e705ee 100644
--- a/drivers/i2c/busses/i2c-mpc.c
+++ b/drivers/i2c/busses/i2c-mpc.c
@@ -696,7 +696,6 @@ static int fsl_i2c_probe(struct platform_device *op)
return result;
fail_add:
- dev_set_drvdata(&op->dev, NULL);
free_irq(i2c->irq, i2c);
fail_request:
irq_dispose_mapping(i2c->irq);
@@ -711,7 +710,6 @@ static int fsl_i2c_remove(struct platform_device *op)
struct mpc_i2c *i2c = dev_get_drvdata(&op->dev);
i2c_del_adapter(&i2c->adap);
- dev_set_drvdata(&op->dev, NULL);
if (i2c->irq)
free_irq(i2c->irq, i2c);
diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c
index 22d8ad3..120f246 100644
--- a/drivers/i2c/busses/i2c-mxs.c
+++ b/drivers/i2c/busses/i2c-mxs.c
@@ -697,8 +697,6 @@ static int mxs_i2c_remove(struct platform_device *pdev)
writel(MXS_I2C_CTRL0_SFTRST, i2c->regs + MXS_I2C_CTRL0_SET);
- platform_set_drvdata(pdev, NULL);
-
return 0;
}
diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c
index 5b1b194..650293f 100644
--- a/drivers/i2c/busses/i2c-nomadik.c
+++ b/drivers/i2c/busses/i2c-nomadik.c
@@ -1105,7 +1105,6 @@ static int nmk_i2c_probe(struct amba_device *adev, const struct amba_id *id)
err_irq:
iounmap(dev->virtbase);
err_no_ioremap:
- amba_set_drvdata(adev, NULL);
kfree(dev);
err_pinctrl:
err_no_mem:
@@ -1130,7 +1129,6 @@ static int nmk_i2c_remove(struct amba_device *adev)
release_mem_region(res->start, resource_size(res));
clk_put(dev->clk);
pm_runtime_disable(&adev->dev);
- amba_set_drvdata(adev, NULL);
kfree(dev);
return 0;
diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index a337d08..45150e3 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -452,7 +452,6 @@ static int ocores_i2c_remove(struct platform_device *pdev)
/* remove adapter & data */
i2c_del_adapter(&i2c->adap);
- platform_set_drvdata(pdev, NULL);
return 0;
}
diff --git a/drivers/i2c/busses/i2c-octeon.c b/drivers/i2c/busses/i2c-octeon.c
index 484ca77..935585e 100644
--- a/drivers/i2c/busses/i2c-octeon.c
+++ b/drivers/i2c/busses/i2c-octeon.c
@@ -595,7 +595,7 @@ static int octeon_i2c_probe(struct platform_device *pdev)
result = i2c_add_adapter(&i2c->adap);
if (result < 0) {
dev_err(i2c->dev, "failed to add adapter\n");
- goto fail_add;
+ goto out;
}
dev_info(i2c->dev, "version %s\n", DRV_VERSION);
@@ -603,8 +603,6 @@ static int octeon_i2c_probe(struct platform_device *pdev)
return 0;
-fail_add:
- platform_set_drvdata(pdev, NULL);
out:
return result;
};
@@ -614,7 +612,6 @@ static int octeon_i2c_remove(struct platform_device *pdev)
struct octeon_i2c *i2c = platform_get_drvdata(pdev);
i2c_del_adapter(&i2c->adap);
- platform_set_drvdata(pdev, NULL);
return 0;
};
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 3ee1886..e02f9e3 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -1260,7 +1260,6 @@ err_unuse_clocks:
pm_runtime_put(dev->dev);
pm_runtime_disable(&pdev->dev);
err_free_mem:
- platform_set_drvdata(pdev, NULL);
return r;
}
@@ -1270,8 +1269,6 @@ static int omap_i2c_remove(struct platform_device *pdev)
struct omap_i2c_dev *dev = platform_get_drvdata(pdev);
int ret;
- platform_set_drvdata(pdev, NULL);
-
i2c_del_adapter(&dev->adapter);
ret = pm_runtime_get_sync(&pdev->dev);
if (IS_ERR_VALUE(ret))
diff --git a/drivers/i2c/busses/i2c-pca-platform.c b/drivers/i2c/busses/i2c-pca-platform.c
index a30d2f6..aa00df1 100644
--- a/drivers/i2c/busses/i2c-pca-platform.c
+++ b/drivers/i2c/busses/i2c-pca-platform.c
@@ -260,7 +260,6 @@ e_print:
static int i2c_pca_pf_remove(struct platform_device *pdev)
{
struct i2c_pca_pf_data *i2c = platform_get_drvdata(pdev);
- platform_set_drvdata(pdev, NULL);
i2c_del_adapter(&i2c->adap);
diff --git a/drivers/i2c/busses/i2c-pmcmsp.c b/drivers/i2c/busses/i2c-pmcmsp.c
index 083d68c..f6389e2 100644
--- a/drivers/i2c/busses/i2c-pmcmsp.c
+++ b/drivers/i2c/busses/i2c-pmcmsp.c
@@ -349,7 +349,6 @@ static int pmcmsptwi_probe(struct platform_device *pldev)
return 0;
ret_unmap:
- platform_set_drvdata(pldev, NULL);
if (pmcmsptwi_data.irq) {
pmcmsptwi_writel(0,
pmcmsptwi_data.iobase + MSP_TWI_INT_MSK_REG_OFFSET);
@@ -374,7 +373,6 @@ static int pmcmsptwi_remove(struct platform_device *pldev)
i2c_del_adapter(&pmcmsptwi_adapter);
- platform_set_drvdata(pldev, NULL);
if (pmcmsptwi_data.irq) {
pmcmsptwi_writel(0,
pmcmsptwi_data.iobase + MSP_TWI_INT_MSK_REG_OFFSET);
diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c
index ce40970..5f39c6d 100644
--- a/drivers/i2c/busses/i2c-pnx.c
+++ b/drivers/i2c/busses/i2c-pnx.c
@@ -761,7 +761,6 @@ out_clkget:
out_drvdata:
kfree(alg_data);
err_kzalloc:
- platform_set_drvdata(pdev, NULL);
return ret;
}
@@ -776,7 +775,6 @@ static int i2c_pnx_remove(struct platform_device *pdev)
release_mem_region(alg_data->base, I2C_PNX_REGION_SIZE);
clk_put(alg_data->clk);
kfree(alg_data);
- platform_set_drvdata(pdev, NULL);
return 0;
}
diff --git a/drivers/i2c/busses/i2c-powermac.c b/drivers/i2c/busses/i2c-powermac.c
index 0dd5b33..da54e67 100644
--- a/drivers/i2c/busses/i2c-powermac.c
+++ b/drivers/i2c/busses/i2c-powermac.c
@@ -221,7 +221,6 @@ static int i2c_powermac_remove(struct platform_device *dev)
printk(KERN_WARNING
"i2c-powermac.c: Failed to remove bus %s !\n",
adapter->name);
- platform_set_drvdata(dev, NULL);
memset(adapter, 0, sizeof(*adapter));
return 0;
diff --git a/drivers/i2c/busses/i2c-puv3.c b/drivers/i2c/busses/i2c-puv3.c
index d7c512d..261d7db 100644
--- a/drivers/i2c/busses/i2c-puv3.c
+++ b/drivers/i2c/busses/i2c-puv3.c
@@ -223,7 +223,6 @@ static int puv3_i2c_probe(struct platform_device *pdev)
return 0;
fail_add_adapter:
- platform_set_drvdata(pdev, NULL);
kfree(adapter);
fail_nomem:
release_mem_region(mem->start, resource_size(mem));
@@ -245,7 +244,6 @@ static int puv3_i2c_remove(struct platform_device *pdev)
}
put_device(&pdev->dev);
- platform_set_drvdata(pdev, NULL);
mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
release_mem_region(mem->start, resource_size(mem));
diff --git a/drivers/i2c/busses/i2c-pxa-pci.c b/drivers/i2c/busses/i2c-pxa-pci.c
index 3d49856..9639be8 100644
--- a/drivers/i2c/busses/i2c-pxa-pci.c
+++ b/drivers/i2c/busses/i2c-pxa-pci.c
@@ -128,7 +128,6 @@ static int ce4100_i2c_probe(struct pci_dev *dev,
return 0;
err_dev_add:
- pci_set_drvdata(dev, NULL);
kfree(sds);
err_mem:
pci_disable_device(dev);
@@ -141,7 +140,6 @@ static void ce4100_i2c_remove(struct pci_dev *dev)
unsigned int i;
sds = pci_get_drvdata(dev);
- pci_set_drvdata(dev, NULL);
for (i = 0; i < ARRAY_SIZE(sds->pdev); i++)
platform_device_unregister(sds->pdev[i]);
diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index 1034d93..fec18a4 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -1219,8 +1219,6 @@ static int __exit i2c_pxa_remove(struct platform_device *dev)
{
struct pxa_i2c *i2c = platform_get_drvdata(dev);
- platform_set_drvdata(dev, NULL);
-
i2c_del_adapter(&i2c->adap);
if (!i2c->use_pio)
free_irq(i2c->irq, i2c);
diff --git a/drivers/i2c/busses/i2c-s6000.c b/drivers/i2c/busses/i2c-s6000.c
index 0088364..7c1ca5a 100644
--- a/drivers/i2c/busses/i2c-s6000.c
+++ b/drivers/i2c/busses/i2c-s6000.c
@@ -365,7 +365,6 @@ static int s6i2c_remove(struct platform_device *pdev)
{
struct s6i2c_if *iface = platform_get_drvdata(pdev);
i2c_wr16(iface, S6_I2C_ENABLE, 0);
- platform_set_drvdata(pdev, NULL);
i2c_del_adapter(&iface->adap);
free_irq(iface->irq, iface);
clk_disable(iface->clk);
diff --git a/drivers/i2c/busses/i2c-sh7760.c b/drivers/i2c/busses/i2c-sh7760.c
index 3a2253e..5351a2f 100644
--- a/drivers/i2c/busses/i2c-sh7760.c
+++ b/drivers/i2c/busses/i2c-sh7760.c
@@ -546,7 +546,6 @@ static int sh7760_i2c_remove(struct platform_device *pdev)
release_resource(id->ioarea);
kfree(id->ioarea);
kfree(id);
- platform_set_drvdata(pdev, NULL);
return 0;
}
diff --git a/drivers/i2c/busses/i2c-stu300.c b/drivers/i2c/busses/i2c-stu300.c
index 60195b5..0a6f941 100644
--- a/drivers/i2c/busses/i2c-stu300.c
+++ b/drivers/i2c/busses/i2c-stu300.c
@@ -975,7 +975,6 @@ stu300_remove(struct platform_device *pdev)
i2c_del_adapter(&dev->adapter);
/* Turn off everything */
stu300_wr8(0x00, dev->virtbase + I2C_CR);
- platform_set_drvdata(pdev, NULL);
return 0;
}
diff --git a/drivers/i2c/busses/i2c-taos-evm.c b/drivers/i2c/busses/i2c-taos-evm.c
index 26c352a..6ffa56e0 100644
--- a/drivers/i2c/busses/i2c-taos-evm.c
+++ b/drivers/i2c/busses/i2c-taos-evm.c
@@ -271,7 +271,6 @@ static int taos_connect(struct serio *serio, struct serio_driver *drv)
exit_close:
serio_close(serio);
exit_kfree:
- serio_set_drvdata(serio, NULL);
kfree(taos);
exit:
return err;
@@ -285,7 +284,6 @@ static void taos_disconnect(struct serio *serio)
i2c_unregister_device(taos->client);
i2c_del_adapter(&taos->adapter);
serio_close(serio);
- serio_set_drvdata(serio, NULL);
kfree(taos);
dev_info(&serio->dev, "Disconnected from TAOS EVM\n");
diff --git a/drivers/i2c/busses/i2c-versatile.c b/drivers/i2c/busses/i2c-versatile.c
index eec20db..f3a8790 100644
--- a/drivers/i2c/busses/i2c-versatile.c
+++ b/drivers/i2c/busses/i2c-versatile.c
@@ -125,8 +125,6 @@ static int i2c_versatile_remove(struct platform_device *dev)
{
struct i2c_versatile *i2c = platform_get_drvdata(dev);
- platform_set_drvdata(dev, NULL);
-
i2c_del_adapter(&i2c->adap);
return 0;
}
diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index f042f6d..332c720 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -784,8 +784,6 @@ static int xiic_i2c_remove(struct platform_device *pdev)
xiic_deinit(i2c);
- platform_set_drvdata(pdev, NULL);
-
free_irq(platform_get_irq(pdev, 0), i2c);
iounmap(i2c->base);
diff --git a/drivers/i2c/busses/i2c-xlr.c b/drivers/i2c/busses/i2c-xlr.c
index 93f029e..7945b05 100644
--- a/drivers/i2c/busses/i2c-xlr.c
+++ b/drivers/i2c/busses/i2c-xlr.c
@@ -256,7 +256,6 @@ static int xlr_i2c_remove(struct platform_device *pdev)
priv = platform_get_drvdata(pdev);
i2c_del_adapter(&priv->adap);
- platform_set_drvdata(pdev, NULL);
return 0;
}
diff --git a/drivers/i2c/busses/scx200_acb.c b/drivers/i2c/busses/scx200_acb.c
index 3862a95..2d1d2c5 100644
--- a/drivers/i2c/busses/scx200_acb.c
+++ b/drivers/i2c/busses/scx200_acb.c
@@ -542,7 +542,6 @@ static int scx200_remove(struct platform_device *pdev)
struct scx200_acb_iface *iface;
iface = platform_get_drvdata(pdev);
- platform_set_drvdata(pdev, NULL);
scx200_cleanup_iface(iface);
return 0;
diff --git a/drivers/i2c/muxes/i2c-mux-gpio.c b/drivers/i2c/muxes/i2c-mux-gpio.c
index 9f50ef0..abc2e55a 100644
--- a/drivers/i2c/muxes/i2c-mux-gpio.c
+++ b/drivers/i2c/muxes/i2c-mux-gpio.c
@@ -250,7 +250,6 @@ static int i2c_mux_gpio_remove(struct platform_device *pdev)
for (i = 0; i < mux->data.n_gpios; i++)
gpio_free(mux->gpio_base + mux->data.gpios[i]);
- platform_set_drvdata(pdev, NULL);
i2c_put_adapter(mux->parent);
return 0;
--
1.8.1.3
^ permalink raw reply related
* Re: [RFC PATCH 2/5] powerpc: Exception hooks for context tracking subsystem
From: Li Zhong @ 2013-02-16 9:41 UTC (permalink / raw)
To: Frederic Weisbecker; +Cc: paulmck, linuxppc-dev, linux-kernel, paulus
In-Reply-To: <CAFTL4hzfXfCGDjwjW=Frcs5+jS_i3U_JHytNOEhEF5X7_Fw6Sg@mail.gmail.com>
On Sun, 2013-02-10 at 15:10 +0100, Frederic Weisbecker wrote:
> 2013/2/1 Li Zhong <zhong@linux.vnet.ibm.com>:
> > This is the exception hooks for context tracking subsystem, including
> > data access, program check, single step, instruction breakpoint, machine check,
> > alignment, fp unavailable, altivec assist, unknown exception, whose handlers
> > might use RCU.
> >
> > This patch corresponds to
> > [PATCH] x86: Exception hooks for userspace RCU extended QS
> > commit 6ba3c97a38803883c2eee489505796cb0a727122
> >
> > Signed-off-by: Li Zhong <zhong@linux.vnet.ibm.com>
>
> Looks good!
>
> I guess we should move exception_enter/exit definition to the generic
> code. They should be the same for all archs after all.
Indeed.
> Also we are
> relying on user_mode(regs) but this may be buggy with some corner
> cases. For example if an exception happen after a call to user_exit()
I guess you mean user_enter() here, or am I confused?
> (on syscall exit) but before we actually resume in userspace, the
> exception will exit in kernel mode from the context tracking POV.
>
> So instead on relying on the regs, which are not sync with the context
> tracking state, we should use something like:
>
> prev_state = exception_enter();
> ...
> exception_exit(prev_state);
>
> Also preempt_schedule_irq() is concerned as well by this problem. So I
> should convert it to that scheme as well. I'm going to prepare some
> patches.
>
> Feel free to merge this patch in the powerpc tree, I'll do the
> conversion along the way.
Or if your patches gets merged earlier than these, I can update my code
according to yours.
Thanks, Zhong
>
> Thanks.
>
^ permalink raw reply
* Re: PS3: Strange issue with kexec and FreeBSD loader
From: Phileas Fogg @ 2013-02-16 10:53 UTC (permalink / raw)
To: Phileas Fogg; +Cc: linuxppc-dev
In-Reply-To: <1360365046.495584377@f356.mail.ru>
I was able to capture the debug output from the purgatory code and it's very odd.
This the SHA256 digest calculated by kexec-tools:
root@ps3-linux:~# kexec -l loader.ps3
Warning: append= option is not passed. Using the first kernel root partition
Modified cmdline:
Unable to find /proc/device-tree//chosen/linux,stdout-path, printing from
purgatory is diabled
segment[0].mem:0x131d000 memsz:262144
segment[1].mem:0x135d000 memsz:36864
segment[2].mem:0x7fff000 memsz:4096
sha256_digest: 77 d5 30 a7 67 5f 67 93 f1 e0 ce 84 bd 4e 1b ec 3c 4a 9e 86 5c a1
33 87 9e b1 5f c8 91 ce e8 61
And this is the debug output i'm always getting from the purgatory code:
I'm in purgatory
sha256 digests do not match :(
digest: fd 4f df a8 af 5b e1 6b bc 51 5d b8 ab be 75 fb 76 fd 64 64 26
3e a8 9f 46 ec 91 de 05 4e 72 78
sha256_digest: 00 39 e3 b2 45 0d 20 68 74 c2 4e ee e4 4a cf ec c3 78 4f 1c 65 ff
a8 76 73 68 5d 01 70 0b b6 50
regards
^ permalink raw reply
* Re: PS3: Strange issue with kexec and FreeBSD loader
From: Phileas Fogg @ 2013-02-16 18:51 UTC (permalink / raw)
To: Phileas Fogg; +Cc: linuxppc-dev
In-Reply-To: <1360365046.495584377@f356.mail.ru>
Phileas Fogg wrote:
>
> Hi,
>
> i'm using OpenWRT petitboot bootloader on my PS3 to boot FreeBSD loader which is a simple PPC32 ELF file.
> I haven't had any issues with it and OpenWRT based on Linux 3.3.8.
> Recently i built an OpenWRT image with Linux 3.7, i have no issues at all with kexec and any Linux kernels starting with 2.6 but
> FreeBSD loader won't boot and just hangs. The same issue with OpenWRT based on Linux 3.6 kernel.
> So, i started to analyze this problem and found out where it hangs.
>
> It seems that the purgatory code from kexec-tools loops endlessly if SHA256 verification of the loaded segments
> fails.
>
> See
> http://git.kernel.org/?p=utils/kernel/kexec/kexec-tools.git;a=blob_plain;f=purgatory/purgatory.c;hb=566ca8a12145196b00ad37939cfd58a97f96ba89
>
> Because the function _verify_sha256_digest fails, the function _purgatory_ loops endlessly.
> This problem occurs only with Linux 3.6 or Linux 3.7 and FreeBSD loader.
> I killed the endless loop and could boot the FreeBSD loader on Linux 3.7 too.
>
> Any idea what could cause this problem ?
>
> Thanks.
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
>
Found another strange problem. I'm not able to boot FreeBSD LiveCD with
OpenWRT + Linux 3.8 (or Linux 3.7), the same CD which boots on
OpenWRT + Linux 3.3.8.
The LiveCD just panics and the PS3 console shuts down. Very odd.
The problem is probably connected with the kexec issue i'm having
and happens only with the recent Linux kernels.
regards
^ permalink raw reply
* Re: [PATCH] i2c: Remove unneeded xxx_set_drvdata(..., NULL) calls
From: Jean Delvare @ 2013-02-16 19:52 UTC (permalink / raw)
To: Doug Anderson
Cc: Wolfram Sang, Tony Lindgren, Linus Walleij, Thierry Reding,
Sekhar Nori, linux-i2c, Guan Xuetao, Kevin Hilman, Sonic Zhang,
linux-arm-kernel, Deepak Sikri, Havard Skinnemoen, Marek Vasut,
Pawel Moll, Stephen Warren, Sascha Hauer, Uwe Kleine-König,
Rob Herring, Joe Perches, Lars-Peter Clausen,
Ben Dooks (embedded platforms), Barry Song, linux-omap,
Mika Westerberg, Oskar Schirmer, Fabio Estevam,
davinci-linux-open-source, Shawn Guo, Jim Cromie,
Greg Kroah-Hartman, Tomoya MORINAGA, linux-kernel, Kyungmin Park,
Viresh Kumar, Karol Lewandowski, Jiri Kosina, STEricsson,
uclinux-dist-devel, Andrew Morton, Alessandro Rubini,
linuxppc-dev, Alexander Stein
In-Reply-To: <1360970315-32116-1-git-send-email-dianders@chromium.org>
On Fri, 15 Feb 2013 15:18:35 -0800, Doug Anderson wrote:
> There is simply no reason to be manually setting the private driver
> data to NULL in the remove/fail to probe cases. This is just extra
> cruft code that can be removed.
>
> A few notes:
> * Nothing relies on drvdata being set to NULL.
> * The __device_release_driver() function eventually calls
> dev_set_drvdata(dev, NULL) anyway, so there's no need to do it
> twice.
I had not noticed this change. Very good news!
> * I verified that there were no cases where xxx_get_drvdata() was
> being called in these drivers and checking for / relying on the NULL
> return value.
>
> This could be cleaned up kernel-wide but for now just take the baby
> step and remove from the i2c subsystem.
>
> Reported-by: Wolfram Sang <wsa@the-dreams.de>
> Reported-by: Stephen Warren <swarren@wwwdotorg.org>
> Signed-off-by: Doug Anderson <dianders@chromium.org>
> ---
> (...)
For i2c-taos-evm:
Reviewed-by: Jean Delvare <khali@linux-fr.org>
Also a note:
> --- a/drivers/i2c/busses/i2c-octeon.c
> +++ b/drivers/i2c/busses/i2c-octeon.c
> @@ -595,7 +595,7 @@ static int octeon_i2c_probe(struct platform_device *pdev)
> result = i2c_add_adapter(&i2c->adap);
> if (result < 0) {
> dev_err(i2c->dev, "failed to add adapter\n");
> - goto fail_add;
> + goto out;
> }
> dev_info(i2c->dev, "version %s\n", DRV_VERSION);
>
> @@ -603,8 +603,6 @@ static int octeon_i2c_probe(struct platform_device *pdev)
>
> return 0;
>
> -fail_add:
> - platform_set_drvdata(pdev, NULL);
> out:
> return result;
> };
There no longer is any point in this error path, all gotos in this
function could be changed to returns (in a separate patch, obviously.)
--
Jean Delvare
^ permalink raw reply
* Re: PS3: Strange issue with kexec and FreeBSD loader
From: Phileas Fogg @ 2013-02-16 22:14 UTC (permalink / raw)
To: Phileas Fogg; +Cc: linuxppc-dev
In-Reply-To: <511F652F.4090508@mail.ru>
Phileas Fogg wrote:
> I was able to capture the debug output from the purgatory code and it's very odd.
>
> This the SHA256 digest calculated by kexec-tools:
>
> root@ps3-linux:~# kexec -l loader.ps3
> Warning: append= option is not passed. Using the first kernel root partition
> Modified cmdline:
> Unable to find /proc/device-tree//chosen/linux,stdout-path, printing from
> purgatory is diabled
> segment[0].mem:0x131d000 memsz:262144
> segment[1].mem:0x135d000 memsz:36864
> segment[2].mem:0x7fff000 memsz:4096
> sha256_digest: 77 d5 30 a7 67 5f 67 93 f1 e0 ce 84 bd 4e 1b ec 3c 4a 9e 86 5c a1
> 33 87 9e b1 5f c8 91 ce e8 61
>
>
> And this is the debug output i'm always getting from the purgatory code:
>
> I'm in purgatory
> sha256 digests do not match :(
> digest: fd 4f df a8 af 5b e1 6b bc 51 5d b8 ab be 75 fb 76 fd 64 64 26
> 3e a8 9f 46 ec 91 de 05 4e 72 78
> sha256_digest: 00 39 e3 b2 45 0d 20 68 74 c2 4e ee e4 4a cf ec c3 78 4f 1c 65 ff
> a8 76 73 68 5d 01 70 0b b6 50
>
> regards
>
>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
I was able to analyze the problem more and found out that the device tree memory
region gets corrupted. I slightly modified kexec-tools and made it first compute
a checksum of the first segment only where the new kernel is located.
And the checksum was always verified as correct in the purgatoroy code.
Then i made kexec-tools compute the checksum of the 3rd segment only where a
device tree is stored. And this time the verify function in the purgatory failed
always.
Output form the purgatory code:
--------------------------------
I'm in purgatory
sha256 digests do not match :(
digest: e3 b0 c4 42 98 fc 1c 14 9a fb f4 c8 99 6f b9 24 27 ae 41 e4 64
9b 93 4c a4 95 99 1b 78 52 b8 55
sha256_digest: 57 08 81 e7 62 c3 22 2f d9 1d 94 a5 d0 f7 53 8f fe 69 64 84 4d 71
2d aa e2 07 45 b3 78 79 6e 26
sha256_regions:
start=0x0000000007fff000 len=0x0000000000001000
The sha256_digest is actually the correct SHA256 checksum precomputed by
kexec-tools when the new kernel was given to the old kernel.
I will try to analyze the problem more later.
regards
^ permalink raw reply
* Re: PS3: Strange issue with kexec and FreeBSD loader
From: Phileas Fogg @ 2013-02-16 23:12 UTC (permalink / raw)
To: Phileas Fogg; +Cc: linuxppc-dev
In-Reply-To: <511F652F.4090508@mail.ru>
I found new clues about the problem.
Normally the device tree memory segment is allocated at the top of the boot
memory region. The boot memory size on the PS3 console is 128MB.
root@ps3-linux:~# kexec -l loader.ps3
segment[0].mem:0x131d000 memsz:262144
segment[1].mem:0x135d000 memsz:36864
segment[2].mem:0x7fff000 memsz:4096
And the device tree is located at address 0x7fff000, it's the last page of the
boot memory.
I changed the kexec-tools and made it store the device tree just after the
purgatory code which is located at address 0x135d000. Like here:
root@ps3-linux:~# kexec -l loader.ps3
segment[0].mem:0x131d000 memsz:262144
segment[1].mem:0x135d000 memsz:36864
segment[2].mem:0x1366000 memsz:4096 <---- new address of device tree segment
And now the sha256 verification is always successful for the FreeBSD loader too.
But still no idea what actually corrupts the device tree segment when it's
located at the top of the boot memory region. And why it happens on Linux 3.7
and Linux 3.8 but not on Linux 3.3.8.
regards
^ permalink raw reply
* RE: [PATCH] powerpc/85xx: dts - add ranges property for SEC
From: Liu Po-B43644 @ 2013-02-17 2:40 UTC (permalink / raw)
To: Kumar Gala, Phillips Kim-R1AAHA; +Cc: linuxppc-dev@ozlabs.org
In-Reply-To: <3D277CC6-2C2C-46E7-B0F8-FAECED8BC868@kernel.crashing.org>
Hi Kim,
Thank you for the fixing.=20
Best regards,
Liu Po
- 8038
-----Original Message-----
From: Kumar Gala [mailto:galak@kernel.crashing.org]=20
Sent: Wednesday, February 13, 2013 1:27 AM
To: Phillips Kim-R1AAHA
Cc: Liu Po-B43644; linuxppc-dev@ozlabs.org
Subject: Re: [PATCH] powerpc/85xx: dts - add ranges property for SEC
On Jan 18, 2013, at 2:40 PM, Kim Phillips wrote:
> On Fri, 18 Jan 2013 17:16:13 +0800
> Po Liu <po.liu@freescale.com> wrote:
>=20
>> This facilitates getting the physical address of the SEC node.
>>=20
>> Signed-off-by: Liu po <po.liu@freescale.com>
>> ---
> Reviewed-by: Kim Phillips <kim.phillips@freescale.com>
>=20
> Kim
This was missing a trailing ';', so wondering if it was ever tested?
I fixed when I applied.
applied.
- k
^ permalink raw reply
* Re: [PATCH 2/2] of: use platform_device_add
From: Shawn Guo @ 2013-02-17 3:03 UTC (permalink / raw)
To: Grant Likely
Cc: linux-kernel, Rob Herring, Jason Gunthorpe, Greg Kroah-Hartman,
linuxppc-dev, linux-arm-kernel
In-Reply-To: <1358473200-17886-2-git-send-email-grant.likely@secretlab.ca>
On Fri, Jan 18, 2013 at 01:40:00AM +0000, Grant Likely wrote:
> This allows platform_device_add a chance to call insert_resource on all
> of the resources from OF. At a minimum this fills in proc/iomem and
> presumably makes resource tracking and conflict detection work better.
> However, it has the side effect of moving all OF generated platform
> devices from /sys/devices to /sys/devices/platform/. It /shouldn't/
> break userspace because userspace is not supposed to depend on the full
> path (because userspace always does what it is supposed to, right?).
>
> This may cause breakage if either:
> 1) any two nodes in a given device tree have overlapping & staggered
> regions (ie. 0x80..0xbf and 0xa0..0xdf; where one is not contained
> within the other). In this case one of the devices will fail to
> register and an exception will be needed in platform_device_add() to
> complain but not fail.
Grant,
The patch introduce a regression on imx6q boot. The IOMUXC block on
imx6q is special. It acts not only a pin controller but also a system
controller with a bunch of system level registers in there. That's why
we currently have the following two nodes in imx6q device tree with the
same start "reg" address, which work with drivers/mfd/syscon.c and
drivers/pinctrl/pinctrl-imx6q.c respectively.
gpr: iomuxc-gpr@020e0000 {
compatible = "fsl,imx6q-iomuxc-gpr", "syscon";
reg = <0x020e0000 0x38>;
};
iomuxc: iomuxc@020e0000 {
compatible = "fsl,imx6q-iomuxc";
reg = <0x020e0000 0x4000>;
};
With the patch in place, pinctrl-imx6q fails to register like below.
syscon 20e0000.iomuxc: syscon regmap start 0x20e0000 end 0x20e3fff registered
imx6q-pinctrl 20e0000.iomuxc: can't request region for resource [mem 0x020e0000-0x020e3fff]
imx6q-pinctrl: probe of 20e0000.iomuxc failed with error -16
Shawn
> 2) any device calls request_mem_region() on a region larger than
> specified in the device tree. In this case the device node may be
> wrong, or the driver is overreaching. In either case I'd like to know
> about any problems and fix them.
^ permalink raw reply
* Re: [PATCH] arch/powerpc/kernel: using %12.12s instead of %12s for avoiding memory overflow.
From: Chen Gang @ 2013-02-17 4:00 UTC (permalink / raw)
To: benh, paulus; +Cc: linuxppc-dev
In-Reply-To: <5100B53C.3030109@asianux.com>
Hello relative members:
please give a glance to this patch, when you have time.
thanks.
:-)
gchen.
于 2013年01月24日 12:14, Chen Gang 写道:
>
> for tmp_part->header.name:
> it is "Terminating null required only for names < 12 chars".
> so need to limit the %.12s for it in printk
>
> additional info:
>
> %12s limit the width, not for the original string output length
> if name length is more than 12, it still can be fully displayed.
> if name length is less than 12, the ' ' will be filled before name.
>
> %.12s truly limit the original string output length (precision)
>
>
> Signed-off-by: Chen Gang <gang.chen@asianux.com>
> ---
> arch/powerpc/kernel/nvram_64.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
> index bec1e93..57bf6d2 100644
> --- a/arch/powerpc/kernel/nvram_64.c
> +++ b/arch/powerpc/kernel/nvram_64.c
> @@ -202,7 +202,7 @@ static void __init nvram_print_partitions(char * label)
> printk(KERN_WARNING "--------%s---------\n", label);
> printk(KERN_WARNING "indx\t\tsig\tchks\tlen\tname\n");
> list_for_each_entry(tmp_part, &nvram_partitions, partition) {
> - printk(KERN_WARNING "%4d \t%02x\t%02x\t%d\t%12s\n",
> + printk(KERN_WARNING "%4d \t%02x\t%02x\t%d\t%12.12s\n",
> tmp_part->index, tmp_part->header.signature,
> tmp_part->header.checksum, tmp_part->header.length,
> tmp_part->header.name);
>
--
Chen Gang
Asianux Corporation
^ permalink raw reply
* Re: [PATCH 2/2] of: use platform_device_add
From: Shawn Guo @ 2013-02-17 7:43 UTC (permalink / raw)
To: Grant Likely
Cc: linux-kernel, Rob Herring, Jason Gunthorpe, Greg Kroah-Hartman,
linuxppc-dev, linux-arm-kernel
In-Reply-To: <20130217030331.GA15048@S2101-09.ap.freescale.net>
On Sun, Feb 17, 2013 at 11:03:35AM +0800, Shawn Guo wrote:
> On Fri, Jan 18, 2013 at 01:40:00AM +0000, Grant Likely wrote:
> > This allows platform_device_add a chance to call insert_resource on all
> > of the resources from OF. At a minimum this fills in proc/iomem and
> > presumably makes resource tracking and conflict detection work better.
> > However, it has the side effect of moving all OF generated platform
> > devices from /sys/devices to /sys/devices/platform/. It /shouldn't/
> > break userspace because userspace is not supposed to depend on the full
> > path (because userspace always does what it is supposed to, right?).
> >
> > This may cause breakage if either:
> > 1) any two nodes in a given device tree have overlapping & staggered
> > regions (ie. 0x80..0xbf and 0xa0..0xdf; where one is not contained
> > within the other). In this case one of the devices will fail to
> > register and an exception will be needed in platform_device_add() to
> > complain but not fail.
>
> Grant,
>
> The patch introduce a regression on imx6q boot.
It also breaks all of_amba_device users.
of_amba_device_create() --> amba_device_add() --> request_resource()
and fails.
Shawn
^ permalink raw reply
* Re: [PATCH 2/2] of: use platform_device_add
From: Russell King - ARM Linux @ 2013-02-17 10:19 UTC (permalink / raw)
To: Shawn Guo
Cc: Jason Gunthorpe, linux-kernel, Rob Herring, Greg Kroah-Hartman,
linuxppc-dev, linux-arm-kernel
In-Reply-To: <20130217074317.GB16632@S2101-09.ap.freescale.net>
On Sun, Feb 17, 2013 at 03:43:20PM +0800, Shawn Guo wrote:
> It also breaks all of_amba_device users.
>
> of_amba_device_create() --> amba_device_add() --> request_resource()
> and fails.
Presumably that's because we no longer know what the parent resource
is supposed to be?
^ permalink raw reply
* Re: PS3: Strange issue with kexec and FreeBSD loader
From: Geert Uytterhoeven @ 2013-02-17 8:53 UTC (permalink / raw)
To: Phileas Fogg; +Cc: linuxppc-dev
In-Reply-To: <51201276.8020104@mail.ru>
Hi Phileas,
On Sun, Feb 17, 2013 at 12:12 AM, Phileas Fogg <phileas-fogg@mail.ru> wrote:
> I found new clues about the problem.
>
> Normally the device tree memory segment is allocated at the top of the boot
> memory region. The boot memory size on the PS3 console is 128MB.
>
>
> root@ps3-linux:~# kexec -l loader.ps3
> segment[0].mem:0x131d000 memsz:262144
> segment[1].mem:0x135d000 memsz:36864
> segment[2].mem:0x7fff000 memsz:4096
>
> And the device tree is located at address 0x7fff000, it's the last page of
> the boot memory.
>
> I changed the kexec-tools and made it store the device tree just after the
> purgatory code which is located at address 0x135d000. Like here:
>
>
> root@ps3-linux:~# kexec -l loader.ps3
> segment[0].mem:0x131d000 memsz:262144
> segment[1].mem:0x135d000 memsz:36864
> segment[2].mem:0x1366000 memsz:4096 <---- new address of device tree
> segment
>
> And now the sha256 verification is always successful for the FreeBSD loader
> too.
> But still no idea what actually corrupts the device tree segment when it's
> located at the top of the boot memory region. And why it happens on Linux
> 3.7 and Linux 3.8 but not on Linux 3.3.8.
Have you looked at the actual data that ends up being written there?
It may give a clue...
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply
* Re: [PATCH 2/2] of: use platform_device_add
From: Grant Likely @ 2013-02-17 10:49 UTC (permalink / raw)
To: Russell King - ARM Linux
Cc: Linux Kernel Mailing List, Rob Herring, Jason Gunthorpe,
Greg Kroah-Hartman, Shawn Guo, linuxppc-dev@lists.ozlabs.org,
linux-arm-kernel@lists.infradead.org
In-Reply-To: <20130217101958.GR17833@n2100.arm.linux.org.uk>
On Sun, Feb 17, 2013 at 10:19 AM, Russell King - ARM Linux
<linux@arm.linux.org.uk> wrote:
> On Sun, Feb 17, 2013 at 03:43:20PM +0800, Shawn Guo wrote:
> > The patch introduce a regression on imx6q boot. The IOMUXC block on
> > imx6q is special. It acts not only a pin controller but also a system
> > controller with a bunch of system level registers in there. That's why
> > we currently have the following two nodes in imx6q device tree with the
> > same start "reg" address, which work with drivers/mfd/syscon.c and
> > drivers/pinctrl/pinctrl-imx6q.c respectively.
> >
> > gpr: iomuxc-gpr@020e0000 {
> > compatible = "fsl,imx6q-iomuxc-gpr", "syscon";
> > reg = <0x020e0000 0x38>;
> > };
> >
> > iomuxc: iomuxc@020e0000 {
> > compatible = "fsl,imx6q-iomuxc";
> > reg = <0x020e0000 0x4000>;
> > };
> >
> > With the patch in place, pinctrl-imx6q fails to register like below.
> >
> > syscon 20e0000.iomuxc: syscon regmap start 0x20e0000 end 0x20e3fff registered
> > imx6q-pinctrl 20e0000.iomuxc: can't request region for resource [mem 0x020e0000-0x020e3fff]
> > imx6q-pinctrl: probe of 20e0000.iomuxc failed with error -16
Strictly you're not supposed to do that with the device tree. There
shouldn't be two nodes using the same overlapping memory region unless
they are parent/child. That rule has been around for a long time, but
the core never checked for it. What /should/ happen is the two drivers
should be cooperating for the register region and only one device
driver probe sets up both behaviours.
However, neither is it okay to just break the existing device trees.
Best thing to do I think is to deprecate one of the nodes.
>> It also breaks all of_amba_device users.
>>
>> of_amba_device_create() --> amba_device_add() --> request_resource()
>> and fails.
>
> Presumably that's because we no longer know what the parent resource
> is supposed to be?
Hmmm, it looks that way, yes. Currently the OF code is using
iomem_resource as the parent for all amba_device_add() calls
(driver/of/platform.c), but if the parent node gets registered as a
platform device and it has the resources then the parenthood chain
doesn't match up. It isn't immediately obvious to me how to fix this.
I'm going to drop the patch from my tree. I could use some help
figuring out what the correct behaviour really should be here.
g.
--
Grant Likely, B.Sc., P.Eng.
Secret Lab Technologies Ltd.
^ permalink raw reply
* Re: PS3: Strange issue with kexec and FreeBSD loader
From: Phileas Fogg @ 2013-02-17 12:40 UTC (permalink / raw)
To: Geert Uytterhoeven; +Cc: linuxppc-dev
In-Reply-To: <CAMuHMdUTQWpQH=UaHc=iZPiJrry7_VqOjtm7bX+jW6r3vOge8A@mail.gmail.com>
Geert Uytterhoeven wrote:
> Hi Phileas,
>
> On Sun, Feb 17, 2013 at 12:12 AM, Phileas Fogg <phileas-fogg@mail.ru> wrote:
>> I found new clues about the problem.
>>
>> Normally the device tree memory segment is allocated at the top of the boot
>> memory region. The boot memory size on the PS3 console is 128MB.
>>
>>
>> root@ps3-linux:~# kexec -l loader.ps3
>> segment[0].mem:0x131d000 memsz:262144
>> segment[1].mem:0x135d000 memsz:36864
>> segment[2].mem:0x7fff000 memsz:4096
>>
>> And the device tree is located at address 0x7fff000, it's the last page of
>> the boot memory.
>>
>> I changed the kexec-tools and made it store the device tree just after the
>> purgatory code which is located at address 0x135d000. Like here:
>>
>>
>> root@ps3-linux:~# kexec -l loader.ps3
>> segment[0].mem:0x131d000 memsz:262144
>> segment[1].mem:0x135d000 memsz:36864
>> segment[2].mem:0x1366000 memsz:4096 <---- new address of device tree
>> segment
>>
>> And now the sha256 verification is always successful for the FreeBSD loader
>> too.
>> But still no idea what actually corrupts the device tree segment when it's
>> located at the top of the boot memory region. And why it happens on Linux
>> 3.7 and Linux 3.8 but not on Linux 3.3.8.
>
> Have you looked at the actual data that ends up being written there?
> It may give a clue...
>
> Gr{oetje,eeting}s,
>
> Geert
>
> --
> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
>
> In personal conversations with technical people, I call myself a hacker. But
> when I'm talking to journalists I just say "programmer" or something like that.
> -- Linus Torvalds
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
>
i was able to dump the device tree data from the purgatory code and compared the
original DT which i dumped from kexec-tools and the one from purgatory.
About 20 bytes at the end of the string table of the device tree were corrupted.
Large part of the new data are 0s.
regards
^ permalink raw reply
* Re: [PATCH 2/2] of: use platform_device_add
From: Fabio Estevam @ 2013-02-17 13:18 UTC (permalink / raw)
To: Shawn Guo
Cc: Jason Gunthorpe, linux-kernel, Rob Herring, Greg Kroah-Hartman,
linuxppc-dev, linux-arm-kernel
In-Reply-To: <20130217074317.GB16632@S2101-09.ap.freescale.net>
On Sun, Feb 17, 2013 at 4:43 AM, Shawn Guo <shawn.guo@linaro.org> wrote:
> On Sun, Feb 17, 2013 at 11:03:35AM +0800, Shawn Guo wrote:
>> On Fri, Jan 18, 2013 at 01:40:00AM +0000, Grant Likely wrote:
>> > This allows platform_device_add a chance to call insert_resource on all
>> > of the resources from OF. At a minimum this fills in proc/iomem and
>> > presumably makes resource tracking and conflict detection work better.
>> > However, it has the side effect of moving all OF generated platform
>> > devices from /sys/devices to /sys/devices/platform/. It /shouldn't/
>> > break userspace because userspace is not supposed to depend on the full
>> > path (because userspace always does what it is supposed to, right?).
>> >
>> > This may cause breakage if either:
>> > 1) any two nodes in a given device tree have overlapping & staggered
>> > regions (ie. 0x80..0xbf and 0xa0..0xdf; where one is not contained
>> > within the other). In this case one of the devices will fail to
>> > register and an exception will be needed in platform_device_add() to
>> > complain but not fail.
>>
>> Grant,
>>
>> The patch introduce a regression on imx6q boot.
>
> It also breaks all of_amba_device users.
>
> of_amba_device_create() --> amba_device_add() --> request_resource()
> and fails.
Yes, correct: amba-pl011 does not register anymore after this patch,
which causes the serial console to be not functional.
^ permalink raw reply
* Re: [PATCH] i2c: Remove unneeded xxx_set_drvdata(..., NULL) calls
From: Peter Korsgaard @ 2013-02-17 15:12 UTC (permalink / raw)
To: Doug Anderson
Cc: Wolfram Sang, Tony Lindgren, Linus Walleij, Thierry Reding,
Sekhar Nori, linux-i2c, Guan Xuetao, Kevin Hilman, Sonic Zhang,
linux-arm-kernel, Deepak Sikri, Havard Skinnemoen, Marek Vasut,
Pawel Moll, Stephen Warren, Sascha Hauer, Uwe Kleine-König,
Rob Herring, uclinux-dist-devel, Jean Delvare, Lars-Peter Clausen,
Ben Dooks (embedded platforms), Barry Song, linux-omap,
Mika Westerberg, Oskar Schirmer, Fabio Estevam,
davinci-linux-open-source, Shawn Guo, Jim Cromie,
Greg Kroah-Hartman, Tomoya MORINAGA, linux-kernel, Kyungmin Park,
Viresh Kumar, Karol Lewandowski, Jiri Kosina, STEricsson,
Joe Perches, Andrew Morton, Alessandro Rubini, linuxppc-dev,
Alexander Stein
In-Reply-To: <1360970315-32116-1-git-send-email-dianders@chromium.org>
>>>>> "Doug" == Doug Anderson <dianders@chromium.org> writes:
Doug> There is simply no reason to be manually setting the private driver
Doug> data to NULL in the remove/fail to probe cases. This is just extra
Doug> cruft code that can be removed.
Doug> A few notes:
Doug> * Nothing relies on drvdata being set to NULL.
Doug> * The __device_release_driver() function eventually calls
Doug> dev_set_drvdata(dev, NULL) anyway, so there's no need to do it
Doug> twice.
Doug> * I verified that there were no cases where xxx_get_drvdata() was
Doug> being called in these drivers and checking for / relying on the NULL
Doug> return value.
Doug> This could be cleaned up kernel-wide but for now just take the baby
Doug> step and remove from the i2c subsystem.
Doug> Reported-by: Wolfram Sang <wsa@the-dreams.de>
Doug> Reported-by: Stephen Warren <swarren@wwwdotorg.org>
Doug> Signed-off-by: Doug Anderson <dianders@chromium.org>
For i2c-ocores.c + i2c-mux-gpio.c:
Acked-by: Peter Korsgaard <jacmet@sunsite.dk>
--
Bye, Peter Korsgaard
^ permalink raw reply
* RE: [PATCH][UPSTEAM] powerpc/mpic: add irq_set_wake support
From: Wang Dongsheng-B40534 @ 2013-02-18 2:52 UTC (permalink / raw)
To: Kumar Gala, linuxppc-dev@lists.ozlabs.org
In-Reply-To: <1359601823-9861-1-git-send-email-dongsheng.wang@freescale.com>
Hi Kumar,
Could you ack this patch?
Thanks.
> -----Original Message-----
> From: Wang Dongsheng-B40534
> Sent: Thursday, January 31, 2013 11:10 AM
> To: linuxppc-dev@lists.ozlabs.org
> Cc: Wang Dongsheng-B40534
> Subject: [PATCH][UPSTEAM] powerpc/mpic: add irq_set_wake support
>=20
> Add irq_set_wake support. Just add IRQF_NO_SUSPEND to desc->action->flag.
> So the wake up interrupt will not be disable in suspend_device_irqs.
>=20
> Signed-off-by: Wang Dongsheng <dongsheng.wang@freescale.com>
> ---
> arch/powerpc/sysdev/mpic.c | 15 +++++++++++++++
> 1 files changed, 15 insertions(+), 0 deletions(-)
>=20
> diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
> index 9c6e535..2ed0220 100644
> --- a/arch/powerpc/sysdev/mpic.c
> +++ b/arch/powerpc/sysdev/mpic.c
> @@ -920,6 +920,18 @@ int mpic_set_irq_type(struct irq_data *d, unsigned
> int flow_type)
> return IRQ_SET_MASK_OK_NOCOPY;
> }
>=20
> +static int mpic_irq_set_wake(struct irq_data *d, unsigned int on)
> +{
> + struct irq_desc *desc =3D container_of(d, struct irq_desc, irq_data);
> +
> + if (on)
> + desc->action->flags |=3D IRQF_NO_SUSPEND;
> + else
> + desc->action->flags &=3D ~IRQF_NO_SUSPEND;
> +
> + return 0;
> +}
> +
> void mpic_set_vector(unsigned int virq, unsigned int vector)
> {
> struct mpic *mpic =3D mpic_from_irq(virq);
> @@ -957,6 +969,7 @@ static struct irq_chip mpic_irq_chip =3D {
> .irq_unmask =3D mpic_unmask_irq,
> .irq_eoi =3D mpic_end_irq,
> .irq_set_type =3D mpic_set_irq_type,
> + .irq_set_wake =3D mpic_irq_set_wake,
> };
>=20
> #ifdef CONFIG_SMP
> @@ -971,6 +984,7 @@ static struct irq_chip mpic_tm_chip =3D {
> .irq_mask =3D mpic_mask_tm,
> .irq_unmask =3D mpic_unmask_tm,
> .irq_eoi =3D mpic_end_irq,
> + .irq_set_wake =3D mpic_irq_set_wake,
> };
>=20
> #ifdef CONFIG_MPIC_U3_HT_IRQS
> @@ -981,6 +995,7 @@ static struct irq_chip mpic_irq_ht_chip =3D {
> .irq_unmask =3D mpic_unmask_ht_irq,
> .irq_eoi =3D mpic_end_ht_irq,
> .irq_set_type =3D mpic_set_irq_type,
> + .irq_set_wake =3D mpic_irq_set_wake,
> };
> #endif /* CONFIG_MPIC_U3_HT_IRQS */
>=20
> --
> 1.7.5.1
^ permalink raw reply
* Re: [PATCH 2/4] powerpc kvm: added multiple TCEs requests support
From: Alexey Kardashevskiy @ 2013-02-18 8:14 UTC (permalink / raw)
To: Paul Mackerras
Cc: kvm, Alexander Graf, kvm-ppc, linux-kernel, linuxppc-dev,
David Gibson
In-Reply-To: <20130215032458.GB25015@drongo>
On 15/02/13 14:24, Paul Mackerras wrote:
> On Mon, Feb 11, 2013 at 11:12:41PM +1100, aik@ozlabs.ru wrote:
>
>> +static long emulated_h_put_tce(struct kvmppc_spapr_tce_table *stt,
>> + unsigned long ioba, unsigned long tce)
>> +{
>> + unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
>> + struct page *page;
>> + u64 *tbl;
>> +
>> + /* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p window_size=0x%x\n", */
>> + /* liobn, stt, stt->window_size); */
>> + if (ioba >= stt->window_size) {
>> + pr_err("%s failed on ioba=%lx\n", __func__, ioba);
>
> Doesn't this give the guest a way to spam the host logs? And in fact
> printk in real mode is potentially problematic. I would just leave
> out this statement.
>
>> + return H_PARAMETER;
>> + }
>> +
>> + page = stt->pages[idx / TCES_PER_PAGE];
>> + tbl = (u64 *)page_address(page);
>
> I would like to see an explanation of why we are confident that
> page_address() will work correctly in real mode, across all the
> combinations of config options that we can have for a ppc64 book3s
> kernel.
It was there before this patch, I just moved it so I would think it has
been explained before :)
There is no combination on PPC to get WANT_PAGE_VIRTUAL enabled.
CONFIG_HIGHMEM is supported for PPC32 only so HASHED_PAGE_VIRTUAL is not
enabled on PPC64 either.
So this definition is supposed to work on PPC64:
#define page_address(page) lowmem_page_address(page)
where lowmem_page_address() is arithmetic operation on a page struct address:
static __always_inline void *lowmem_page_address(const struct page *page)
{
return __va(PFN_PHYS(page_to_pfn(page)));
}
PPC32 will use page_address() from mm/highmem.c, I need some lesson about
memory layout in 32bit but for now I cannot see how it can possibly fail here.
>> +
>> + /* FIXME: Need to validate the TCE itself */
>> + /* udbg_printf("tce @ %p\n", &tbl[idx % TCES_PER_PAGE]); */
>> + tbl[idx % TCES_PER_PAGE] = tce;
>> +
>> + return H_SUCCESS;
>> +}
>> +
>> +/*
>> + * Real mode handlers
>> */
>> long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
>> unsigned long ioba, unsigned long tce)
>> {
>> - struct kvm *kvm = vcpu->kvm;
>> struct kvmppc_spapr_tce_table *stt;
>>
>> - /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
>> - /* liobn, ioba, tce); */
>> + stt = find_tce_table(vcpu, liobn);
>> + /* Didn't find the liobn, put it to userspace */
>> + if (!stt)
>> + return H_TOO_HARD;
>> +
>> + /* Emulated IO */
>> + return emulated_h_put_tce(stt, ioba, tce);
>> +}
>> +
>> +long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
>> + unsigned long liobn, unsigned long ioba,
>> + unsigned long tce_list, unsigned long npages)
>> +{
>> + struct kvmppc_spapr_tce_table *stt;
>> + long i, ret = 0;
>> + unsigned long *tces;
>> +
>> + stt = find_tce_table(vcpu, liobn);
>> + /* Didn't find the liobn, put it to userspace */
>> + if (!stt)
>> + return H_TOO_HARD;
>>
>> - list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
>> - if (stt->liobn == liobn) {
>> - unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
>> - struct page *page;
>> - u64 *tbl;
>> + tces = (void *) get_real_address(vcpu, tce_list, false, NULL, NULL);
>> + if (!tces)
>> + return H_TOO_HARD;
>>
>> - /* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p window_size=0x%x\n", */
>> - /* liobn, stt, stt->window_size); */
>> - if (ioba >= stt->window_size)
>> - return H_PARAMETER;
>> + /* Emulated IO */
>> + for (i = 0; (i < npages) && !ret; ++i, ioba += IOMMU_PAGE_SIZE)
>> + ret = emulated_h_put_tce(stt, ioba, tces[i]);
>
> So, tces is a pointer to somewhere inside a real page. Did we check
> somewhere that tces[npages-1] is in the same page as tces[0]? If so,
> I missed it. If we didn't, then we probably should check and do
> something about it.
>
>>
>> - page = stt->pages[idx / TCES_PER_PAGE];
>> - tbl = (u64 *)page_address(page);
>> + return ret;
>> +}
>>
>> - /* FIXME: Need to validate the TCE itself */
>> - /* udbg_printf("tce @ %p\n", &tbl[idx % TCES_PER_PAGE]); */
>> - tbl[idx % TCES_PER_PAGE] = tce;
>> - return H_SUCCESS;
>> - }
>> - }
>> +long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
>> + unsigned long liobn, unsigned long ioba,
>> + unsigned long tce_value, unsigned long npages)
>> +{
>> + struct kvmppc_spapr_tce_table *stt;
>> + long i, ret = 0;
>> +
>> + stt = find_tce_table(vcpu, liobn);
>> + /* Didn't find the liobn, put it to userspace */
>> + if (!stt)
>> + return H_TOO_HARD;
>>
>> - /* Didn't find the liobn, punt it to userspace */
>> - return H_TOO_HARD;
>> + /* Emulated IO */
>> + for (i = 0; (i < npages) && !ret; ++i, ioba += IOMMU_PAGE_SIZE)
>> + ret = emulated_h_put_tce(stt, ioba, tce_value);
>> +
>> + return ret;
>> +}
>> +
>> +/*
>> + * Virtual mode handlers
>> + */
>> +extern long kvmppc_virtmode_h_put_tce(struct kvm_vcpu *vcpu,
>> + unsigned long liobn, unsigned long ioba,
>> + unsigned long tce)
>> +{
>> + /* At the moment emulated IO is handled the same way */
>> + return kvmppc_h_put_tce(vcpu, liobn, ioba, tce);
>> +}
>> +
>> +extern long kvmppc_virtmode_h_put_tce_indirect(struct kvm_vcpu *vcpu,
>> + unsigned long liobn, unsigned long ioba,
>> + unsigned long tce_list, unsigned long npages)
>> +{
>> + struct kvmppc_spapr_tce_table *stt;
>> + unsigned long *tces;
>> + long ret = 0, i;
>> +
>> + stt = find_tce_table(vcpu, liobn);
>> + /* Didn't find the liobn, put it to userspace */
>> + if (!stt)
>> + return H_TOO_HARD;
>> +
>> + tces = (void *) get_virt_address(vcpu, tce_list, false, NULL, NULL);
>> + if (!tces)
>> + return H_TOO_HARD;
>> +
>> + /* Emulated IO */
>> + for (i = 0; (i < npages) && !ret; ++i, ioba += IOMMU_PAGE_SIZE)
>> + ret = emulated_h_put_tce(stt, ioba, tces[i]);
>
> Same comment here about tces[i] overflowing a page boundary.
>
>> +
>> + return ret;
>> +}
>> +
>> +extern long kvmppc_virtmode_h_stuff_tce(struct kvm_vcpu *vcpu,
>> + unsigned long liobn, unsigned long ioba,
>> + unsigned long tce_value, unsigned long npages)
>> +{
>> + /* At the moment emulated IO is handled the same way */
>> + return kvmppc_h_stuff_tce(vcpu, liobn, ioba, tce_value, npages);
>> }
>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>> index 71d0c90..13c8436 100644
>> --- a/arch/powerpc/kvm/book3s_hv.c
>> +++ b/arch/powerpc/kvm/book3s_hv.c
>> @@ -515,6 +515,29 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
>> kvmppc_get_gpr(vcpu, 5),
>> kvmppc_get_gpr(vcpu, 6));
>> break;
>> + case H_PUT_TCE:
>> + ret = kvmppc_virtmode_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
>> + kvmppc_get_gpr(vcpu, 5),
>> + kvmppc_get_gpr(vcpu, 6));
>> + if (ret == H_TOO_HARD)
>> + return RESUME_HOST;
>> + break;
>> + case H_PUT_TCE_INDIRECT:
>> + ret = kvmppc_virtmode_h_put_tce_indirect(vcpu, kvmppc_get_gpr(vcpu, 4),
>> + kvmppc_get_gpr(vcpu, 5),
>> + kvmppc_get_gpr(vcpu, 6),
>> + kvmppc_get_gpr(vcpu, 7));
>> + if (ret == H_TOO_HARD)
>> + return RESUME_HOST;
>> + break;
>> + case H_STUFF_TCE:
>> + ret = kvmppc_virtmode_h_stuff_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
>> + kvmppc_get_gpr(vcpu, 5),
>> + kvmppc_get_gpr(vcpu, 6),
>> + kvmppc_get_gpr(vcpu, 7));
>> + if (ret == H_TOO_HARD)
>> + return RESUME_HOST;
>> + break;
>> default:
>> return RESUME_HOST;
>> }
> [snip]
>> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
>> index 70739a0..95614c7 100644
>> --- a/arch/powerpc/kvm/powerpc.c
>> +++ b/arch/powerpc/kvm/powerpc.c
>> @@ -383,6 +383,9 @@ int kvm_dev_ioctl_check_extension(long ext)
>> r = 1;
>> break;
>> #endif
>> + case KVM_CAP_PPC_MULTITCE:
>> + r = 1;
>> + break;
>> default:
>> r = 0;
>> break;
>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> index e6e5d4b..26e2b271 100644
>> --- a/include/uapi/linux/kvm.h
>> +++ b/include/uapi/linux/kvm.h
>> @@ -635,6 +635,7 @@ struct kvm_ppc_smmu_info {
>> #define KVM_CAP_IRQFD_RESAMPLE 82
>> #define KVM_CAP_PPC_BOOKE_WATCHDOG 83
>> #define KVM_CAP_PPC_HTAB_FD 84
>> +#define KVM_CAP_PPC_MULTITCE 87
>
> The capability should be described in
> Documentation/virtual/kvm/api.txt.
Is it enough description?
===
4.79 KVM_CAP_PPC_MULTITCE
Architectures: ppc
Parameters: none
Returns: 0 on success; -1 on error
This capability enables the guest to put/remove multiple TCE entries
per hypercall which significanly accelerates DMA operations for PPC KVM
guests.
When this capability is enabled, H_PUT_TCE_INDIRECT and H_STUFF_TCE are
expected to occur rather than H_PUT_TCE which supports only one TCE entry
per call.
===
--
Alexey
^ permalink raw reply
* Re: [PATCH v5 00/45] CPU hotplug: stop_machine()-free CPU hotplug
From: Vincent Guittot @ 2013-02-18 10:24 UTC (permalink / raw)
To: Srivatsa S. Bhat
Cc: linux-doc, peterz, fweisbec, linux-kernel, walken, mingo,
linux-arch, Russell King - ARM Linux, xiaoguangrong, wangyun,
paulmck, nikunj, linux-pm, Rusty Russell, rostedt, rjw, namhyung,
tglx, linux-arm-kernel, netdev, oleg, sbw, tj, akpm, linuxppc-dev
In-Reply-To: <511E8F3C.2010406@linux.vnet.ibm.com>
On 15 February 2013 20:40, Srivatsa S. Bhat
<srivatsa.bhat@linux.vnet.ibm.com> wrote:
> Hi Vincent,
>
> On 02/15/2013 06:58 PM, Vincent Guittot wrote:
>> Hi Srivatsa,
>>
>> I have run some tests with you branch (thanks Paul for the git tree)
>> and you will find results below.
>>
>
> Thank you very much for testing this patchset!
>
>> The tests condition are:
>> - 5 CPUs system in 2 clusters
>> - The test plugs/unplugs CPU2 and it increases the system load each 20
>> plug/unplug sequence with either more cyclictests threads
>> - The test is done with all CPUs online and with only CPU0 and CPU2
>>
>> The main conclusion is that there is no differences with and without
>> your patches with my stress tests. I'm not sure that it was the
>> expected results but the cpu_down is already quite low : 4-5ms in
>> average
>>
>
> Atleast my patchset doesn't perform _worse_ than mainline, with respect
> to cpu_down duration :-)
yes exactly and it has pass more than 400 consecutive plug/unplug on
an ARM platform
>
> So, here is the analysis:
> Stop-machine() doesn't really slow down CPU-down operation, if the rest
> of the CPUs are mostly running in userspace all the time. Because, the
> CPUs running userspace workloads cooperate very eagerly with the stop-machine
> dance - they receive the resched IPI, and allow the per-cpu cpu-stopper
> thread to monopolize the CPU, almost immediately.
>
> The scenario where stop-machine() takes longer to take effect is when
> most of the online CPUs are running in kernelspace, because, then the
> probability that they call preempt_disable() frequently (and hence inhibit
> stop-machine) is higher. That's why, in my tests, I ran genload from LTP
> which generated a lot of system-time (system-time in 'top' indicates activity
> in kernelspace). Hence my patchset showed significant improvement over
> mainline in my tests.
>
ok, I hadn't noticed this important point for the test
> However, your test is very useful too, if we measure a different parameter:
> the latency impact on the workloads running on the system (cyclic test).
> One other important aim of this patchset is to make hotplug as less intrusive
> as possible, for other workloads running on the system. So if you measure
> the cyclictest numbers, I would expect my patchset to show better numbers
> than mainline, when you do cpu-hotplug in parallel (same test that you did).
> Mainline would run stop-machine and hence interrupt the cyclic test tasks
> too often. My patchset wouldn't do that, and hence cyclic test should
> ideally show better numbers.
In fact, I haven't looked at the results as i was more interested by
the load that was generated
>
> I'd really appreciate if you could try that out and let me know how it
> goes.. :-) Thank you very much!
ok, I'm going to try to run a test series
Vincent
>
> Regards,
> Srivatsa S. Bhat
>
>>
>>
>> On 12 February 2013 04:58, Srivatsa S. Bhat
>> <srivatsa.bhat@linux.vnet.ibm.com> wrote:
>>> On 02/12/2013 12:38 AM, Paul E. McKenney wrote:
>>>> On Mon, Feb 11, 2013 at 05:53:41PM +0530, Srivatsa S. Bhat wrote:
>>>>> On 02/11/2013 05:28 PM, Vincent Guittot wrote:
>>>>>> On 8 February 2013 19:09, Srivatsa S. Bhat
>>>>>> <srivatsa.bhat@linux.vnet.ibm.com> wrote:
>>>>
>>>> [ . . . ]
>>>>
>>>>>>> Adding Vincent to CC, who had previously evaluated the performance and
>>>>>>> latency implications of CPU hotplug on ARM platforms, IIRC.
>>>>>>>
>>>>>>
>>>>>> Hi Srivatsa,
>>>>>>
>>>>>> I can try to run some of our stress tests on your patches.
>>>>>
>>>>> Great!
>>>>>
>>>>>> Have you
>>>>>> got a git tree that i can pull ?
>>>>>>
>>>>>
>>>>> Unfortunately, no, none at the moment.. :-(
>>>>
>>>> You do need to create an externally visible git tree.
>>>
>>> Ok, I'll do that soon.
>>>
>>>> In the meantime,
>>>> I have added your series at rcu/bhat.2013.01.21a on -rcu:
>>>>
>>>> git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
>>>>
>>>> This should appear soon on a kernel.org mirror near you. ;-)
>>>>
>>>
>>> Thank you very much, Paul! :-)
>>>
>>> Regards,
>>> Srivatsa S. Bhat
>>>
>
>
^ permalink raw reply
* [RFC PATCH 08/17] powerpc: print both base and actual page size on hash failure
From: Aneesh Kumar K.V @ 2013-02-18 10:28 UTC (permalink / raw)
To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1361183295-6958-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/mmu-hash64.h | 3 ++-
arch/powerpc/mm/hash_utils_64.c | 12 +++++++-----
arch/powerpc/mm/hugetlbpage-hash64.c | 2 +-
3 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 6290e26..6ec65b6 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -321,7 +321,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
unsigned int shift, unsigned int mmu_psize);
extern void hash_failure_debug(unsigned long ea, unsigned long access,
unsigned long vsid, unsigned long trap,
- int ssize, int psize, unsigned long pte);
+ int ssize, int psize, int lpsize,
+ unsigned long pte);
extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
unsigned long pstart, unsigned long prot,
int psize, int ssize);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 48edb46..df48ba5 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -917,14 +917,14 @@ static inline int subpage_protection(struct mm_struct *mm, unsigned long ea)
void hash_failure_debug(unsigned long ea, unsigned long access,
unsigned long vsid, unsigned long trap,
- int ssize, int psize, unsigned long pte)
+ int ssize, int psize, int lpsize, unsigned long pte)
{
if (!printk_ratelimit())
return;
pr_info("mm: Hashing failure ! EA=0x%lx access=0x%lx current=%s\n",
ea, access, current->comm);
- pr_info(" trap=0x%lx vsid=0x%lx ssize=%d psize=%d pte=0x%lx\n",
- trap, vsid, ssize, psize, pte);
+ pr_info(" trap=0x%lx vsid=0x%lx ssize=%d base psize=%d psize %d pte=0x%lx\n",
+ trap, vsid, ssize, psize, lpsize, pte);
}
/* Result code is:
@@ -1097,7 +1097,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
*/
if (rc == -1)
hash_failure_debug(ea, access, vsid, trap, ssize, psize,
- pte_val(*ptep));
+ psize, pte_val(*ptep));
#ifndef CONFIG_PPC_64K_PAGES
DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
#else
@@ -1175,7 +1175,9 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
*/
if (rc == -1)
hash_failure_debug(ea, access, vsid, trap, ssize,
- mm->context.user_psize, pte_val(*ptep));
+ mm->context.user_psize,
+ mm->context.user_psize,
+ pte_val(*ptep));
local_irq_restore(flags);
}
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index cecad34..af98ee8 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -129,7 +129,7 @@ repeat:
if (unlikely(slot == -2)) {
*ptep = __pte(old_pte);
hash_failure_debug(ea, access, vsid, trap, ssize,
- mmu_psize, old_pte);
+ mmu_psize, mmu_psize, old_pte);
return -1;
}
--
1.7.10
^ permalink raw reply related
* [RFC PATCH 10/17] powerpc/mm: Fix hpte_decode to use the correct decoding for page sizes
From: Aneesh Kumar K.V @ 2013-02-18 10:28 UTC (permalink / raw)
To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1361183295-6958-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
As per ISA doc, we encode base and actual page size in the LP bits of
PTE. The number of bit used to encode the page sizes depend on actual
page size. ISA doc lists this as
PTE LP actual page size
rrrr rrrz ≥8KB
rrrr rrzz ≥16KB
rrrr rzzz ≥32KB
rrrr zzzz ≥64KB
rrrz zzzz ≥128KB
rrzz zzzz ≥256KB
rzzz zzzz ≥512KB
zzzz zzzz ≥1MB
ISA doc also says
"The values of the “z” bits used to specify each size, along with all possible
values of “r” bits in the LP field, must result in LP values distinct from
other LP values for other sizes."
based on the above update hpte_decode to use the correct decoding for LP bits.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
arch/powerpc/mm/hash_native_64.c | 26 ++++++++++++++++----------
1 file changed, 16 insertions(+), 10 deletions(-)
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 4cf361f..d36ddef 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -449,19 +449,14 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
int *psize, int *apsize, int *ssize, unsigned long *vpn)
{
unsigned long avpn, pteg, vpi;
- unsigned long hpte_r = hpte->r;
unsigned long hpte_v = hpte->v;
unsigned long vsid, seg_off;
- int i, size, a_size = MMU_PAGE_4K, shift, penc;
+ int size, a_size = MMU_PAGE_4K, shift, mask;
+ unsigned int lp = (hpte->r >> LP_SHIFT) & LP_BITS;
if (!(hpte_v & HPTE_V_LARGE))
size = MMU_PAGE_4K;
else {
- for (i = 0; i < LP_BITS; i++) {
- if ((hpte_r & LP_MASK(i+1)) == LP_MASK(i+1))
- break;
- }
- penc = LP_MASK(i+1) >> LP_SHIFT;
for (size = 0; size < MMU_PAGE_COUNT; size++) {
/* 4K pages are not represented by LP */
@@ -471,12 +466,23 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
/* valid entries have a shift value */
if (!mmu_psize_defs[size].shift)
continue;
- for (a_size = 0; a_size < MMU_PAGE_COUNT; a_size++)
- if (penc == mmu_psize_defs[size].penc[a_size])
+
+ for (a_size = 0; a_size < MMU_PAGE_COUNT; a_size++) {
+ /* valid entries have a shift value */
+ if (!mmu_psize_defs[a_size].shift)
+ continue;
+
+ shift = mmu_psize_defs[a_size].shift - 11;
+ if (shift > 9)
+ shift = 9;
+ mask = (1 << shift) - 1;
+ if ((lp & mask) ==
+ mmu_psize_defs[size].penc[a_size]) {
goto out;
+ }
+ }
}
}
-
out:
/* This works for all page sizes, and for 256M and 1T segments */
*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
--
1.7.10
^ permalink raw reply related
* [RFC PATCH 12/17] powerpc/THP: Implement transparent huge pages for ppc64
From: Aneesh Kumar K.V @ 2013-02-18 10:28 UTC (permalink / raw)
To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1361183295-6958-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
We now have pmd entries covering to 16MB range. To implement THP on powerpc,
we double the size of PMD. The second half is used to deposit the pgtable (PTE page).
We also use the depoisted PTE page for tracking the HPTE information. The information
include [ secondary group | 3 bit hidx | valid ]. We use one byte per each HPTE entry.
With 16MB huge page and 64K HPTE we need 256 entries and with 4K HPTE we need
4096 entries. Both will fit in a 4K PTE page.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/page.h | 2 +-
arch/powerpc/include/asm/pgtable-ppc64-64k.h | 3 +-
arch/powerpc/include/asm/pgtable-ppc64.h | 6 +-
arch/powerpc/include/asm/pgtable.h | 247 +++++++++++++++++++
arch/powerpc/mm/init_64.c | 14 ++
arch/powerpc/mm/pgtable.c | 340 ++++++++++++++++++++++++++
arch/powerpc/platforms/Kconfig.cputype | 1 +
7 files changed, 610 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 38e7ff6..b927447 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -40,7 +40,7 @@
#ifdef CONFIG_HUGETLB_PAGE
extern unsigned int HPAGE_SHIFT;
#else
-#define HPAGE_SHIFT PAGE_SHIFT
+#define HPAGE_SHIFT PMD_SHIFT
#endif
#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
diff --git a/arch/powerpc/include/asm/pgtable-ppc64-64k.h b/arch/powerpc/include/asm/pgtable-ppc64-64k.h
index 3c529b4..5c5541a 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64-64k.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64-64k.h
@@ -33,7 +33,8 @@
#define PGDIR_MASK (~(PGDIR_SIZE-1))
/* Bits to mask out from a PMD to get to the PTE page */
-#define PMD_MASKED_BITS 0x1ff
+/* PMDs point to PTE table fragments which are 4K aligned. */
+#define PMD_MASKED_BITS 0xfff
/* Bits to mask out from a PGD/PUD to get to the PMD page */
#define PUD_MASKED_BITS 0x1ff
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index 658ba7c..0da8840 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -149,8 +149,12 @@
|| (pmd_val(pmd) & PMD_BAD_BITS))
#define pmd_present(pmd) (pmd_val(pmd) != 0)
#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0)
+/*
+ * FIXME PMD_MASKED_BITS should include all of PMD_HUGE_PROTBITS
+ * should only be called for non huge pages.
+ */
#define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS)
-#define pmd_page(pmd) virt_to_page(pmd_page_vaddr(pmd))
+extern struct page *pmd_page(pmd_t pmd);
#define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval))
#define pud_none(pud) (!pud_val(pud))
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index fc57855..4e49c34 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -23,6 +23,253 @@ struct mm_struct;
*/
#define PTE_PAGE_HIDX_OFFSET (PTRS_PER_PTE * 8)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/* A large part matches with pte bits */
+#define PMD_HUGE_PROTBITS 0x7ff
+#define PMD_HUGE_PRESENT 0x001 /* software: pte contains a translation */
+#define PMD_HUGE_USER 0x002 /* matches one of the PP bits */
+#define PMD_HUGE_FILE 0x002 /* (!present only) software: pte holds file offset */
+#define PMD_HUGE_EXEC 0x004 /* No execute on POWER4 and newer (we invert) */
+#define PMD_HUGE_SPLITTING 0x008
+#define PMD_HUGE_HASHPTE 0x010
+#define PMD_ISHUGE 0x020
+#define PMD_HUGE_DIRTY 0x080 /* C: page changed */
+#define PMD_HUGE_ACCESSED 0x100 /* R: page referenced */
+#define PMD_HUGE_RW 0x200 /* software: user write access allowed */
+#define PMD_HUGE_BUSY 0x800 /* software: PTE & hash are busy */
+#define PMD_HUGE_HPTEFLAGS (PMD_HUGE_BUSY | PMD_HUGE_HASHPTE)
+/*
+ * We keep both the pmd and pte rpn shift same, eventhough we use only
+ * lower 12 bits for huge page flags at pmd level
+ */
+#define PMD_HUGE_RPN_SHIFT PTE_RPN_SHIFT
+#define HUGE_PAGE_SIZE (ASM_CONST(1) << 24)
+#define HUGE_PAGE_MASK (~(HUGE_PAGE_SIZE - 1))
+
+#ifndef __ASSEMBLY__
+extern void hpte_need_hugepage_flush(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp);
+extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
+extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
+extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
+extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd);
+extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd);
+
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+ /*
+ * Only called for huge page pmd
+ */
+// unsigned long val = pmd_val(pmd) & ~PMD_HUGE_PROTBITS;
+ return pmd_val(pmd) >> PMD_HUGE_RPN_SHIFT;
+}
+
+static inline int pmd_young(pmd_t pmd)
+{
+ return pmd_val(pmd) & PMD_HUGE_ACCESSED;
+}
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+ /* Do nothing, mk_pmd() does this part. */
+ return pmd;
+}
+
+#define __HAVE_ARCH_PMD_WRITE
+static inline int pmd_write(pmd_t pmd)
+{
+ return pmd_val(pmd) & PMD_HUGE_RW;
+}
+
+static inline int pmd_large(pmd_t pmd)
+{
+ return (pmd_val(pmd) & (PMD_ISHUGE | PMD_HUGE_PRESENT)) ==
+ (PMD_ISHUGE | PMD_HUGE_PRESENT);
+}
+
+static inline int pmd_trans_splitting(pmd_t pmd)
+{
+ return (pmd_val(pmd) & (PMD_ISHUGE|PMD_HUGE_SPLITTING)) ==
+ (PMD_ISHUGE|PMD_HUGE_SPLITTING);
+}
+
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+ return pmd_val(pmd) & PMD_ISHUGE;
+}
+
+#define has_transparent_hugepage() 1
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~PMD_HUGE_ACCESSED;
+ return pmd;
+}
+
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~PMD_HUGE_RW;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkdirty(pmd_t pmd)
+{
+ pmd_val(pmd) |= PMD_HUGE_DIRTY;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+ pmd_val(pmd) |= PMD_HUGE_ACCESSED;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkwrite(pmd_t pmd)
+{
+ pmd_val(pmd) |= PMD_HUGE_RW;
+ return pmd;
+}
+
+static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~PMD_HUGE_PRESENT;
+ return pmd;
+}
+
+static inline pmd_t pmd_mksplitting(pmd_t pmd)
+{
+ pmd_val(pmd) |= PMD_HUGE_SPLITTING;
+ return pmd;
+}
+
+extern pgprot_t pmd_pgprot(pmd_t entry);
+
+/*
+ * Set the dirty and/or accessed bits atomically in a linux hugepage PMD, this
+ * function doesn't need to flush the hash entry
+ */
+static inline void __pmdp_set_access_flags(pmd_t *pmdp, pmd_t entry)
+{
+ unsigned long bits = pmd_val(entry) & (PMD_HUGE_DIRTY |
+ PMD_HUGE_ACCESSED |
+ PMD_HUGE_RW | PMD_HUGE_EXEC);
+#ifdef PTE_ATOMIC_UPDATES
+ unsigned long old, tmp;
+
+ __asm__ __volatile__(
+ "1: ldarx %0,0,%4\n\
+ andi. %1,%0,%6\n\
+ bne- 1b \n\
+ or %0,%3,%0\n\
+ stdcx. %0,0,%4\n\
+ bne- 1b"
+ :"=&r" (old), "=&r" (tmp), "=m" (*pmdp)
+ :"r" (bits), "r" (pmdp), "m" (*pmdp), "i" (PMD_HUGE_BUSY)
+ :"cc");
+#else
+ unsigned long old = pmd_val(*pmdp);
+ *pmdp = __pmd(old | bits);
+#endif
+}
+
+#define __HAVE_ARCH_PMD_SAME
+static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+ return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~PMD_HUGE_HPTEFLAGS) == 0);
+}
+
+#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+extern int pmdp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp,
+ pmd_t entry, int dirty);
+
+static inline unsigned long pmd_hugepage_update(struct mm_struct *mm,
+ unsigned long addr,
+ pmd_t *pmdp, unsigned long clr)
+{
+#ifdef PTE_ATOMIC_UPDATES
+ unsigned long old, tmp;
+
+ __asm__ __volatile__(
+ "1: ldarx %0,0,%3 # pmd_hugepage_update\n\
+ andi. %1,%0,%6\n\
+ bne- 1b \n\
+ andc %1,%0,%4 \n\
+ stdcx. %1,0,%3 \n\
+ bne- 1b"
+ : "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
+ : "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (PMD_HUGE_BUSY)
+ : "cc" );
+#else
+ unsigned long old = pmd_val(*pmdp);
+ *pmdp = __pmd(old & ~clr);
+#endif
+
+#ifdef CONFIG_PPC_STD_MMU_64 /* FIXME!! do we support anything else ? */
+ /*
+ * FIXME!! How do we find all the hash values
+ */
+ if (old & PMD_HUGE_HASHPTE)
+ hpte_need_hugepage_flush(mm, addr, pmdp);
+#endif
+ return old;
+}
+
+static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ unsigned long old;
+
+ if ((pmd_val(*pmdp) & (PMD_HUGE_ACCESSED | PMD_HUGE_HASHPTE)) == 0)
+ return 0;
+ old = pmd_hugepage_update(mm, addr, pmdp, PMD_HUGE_ACCESSED);
+ return ((old & PMD_HUGE_ACCESSED) != 0);
+}
+
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
+extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
+static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ unsigned long old = pmd_hugepage_update(mm, addr, pmdp, ~0UL);
+ return __pmd(old);
+}
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp)
+{
+
+ if ((pmd_val(*pmdp) & PMD_HUGE_RW) == 0)
+ return;
+
+ pmd_hugepage_update(mm, addr, pmdp, PMD_HUGE_RW);
+}
+
+#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
+extern void pmdp_splitting_flush(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable);
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMDP_INVALIDATE
+extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp);
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
#ifndef __ASSEMBLY__
#include <asm/tlbflush.h>
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index b378438..398a700 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -88,7 +88,12 @@ static void pgd_ctor(void *addr)
static void pmd_ctor(void *addr)
{
+/* FIXME may be we can take size as arg ? */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ memset(addr, 0, PMD_TABLE_SIZE * 2);
+#else
memset(addr, 0, PMD_TABLE_SIZE);
+#endif
}
struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
@@ -138,7 +143,16 @@ void __pgtable_cache_add(unsigned int index, unsigned long table_size,
void pgtable_cache_init(void)
{
pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ /*
+ * we store the pgtable details in the second half of PMD
+ */
+ if (PGT_CACHE(PMD_INDEX_SIZE))
+ pr_err("PMD Page cache already initialized with different size\n");
+ __pgtable_cache_add(PMD_INDEX_SIZE, PMD_TABLE_SIZE * 2, pmd_ctor);
+#else
pgtable_cache_add(PMD_INDEX_SIZE, pmd_ctor);
+#endif
if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_INDEX_SIZE))
panic("Couldn't allocate pgtable caches");
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 214130a..e173b5e 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -31,6 +31,7 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
+#include <asm/machdep.h>
#include "mmu_decl.h"
@@ -240,3 +241,342 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
}
#endif /* CONFIG_DEBUG_VM */
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static pmd_t set_hugepage_access_flags_filter(pmd_t pmd,
+ struct vm_area_struct *vma,
+ int dirty)
+{
+ return pmd;
+}
+
+/*
+ * This is called when relaxing access to a huge page. It's also called in the page
+ * fault path when we don't hit any of the major fault cases, ie, a minor
+ * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
+ * handled those two for us, we additionally deal with missing execute
+ * permission here on some processors
+ */
+int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp, pmd_t entry, int dirty)
+{
+ int changed;
+ entry = set_hugepage_access_flags_filter(entry, vma, dirty);
+ changed = !pmd_same(*(pmdp), entry);
+ if (changed) {
+ __pmdp_set_access_flags(pmdp, entry);
+#if 0 /* FIXME!! We are not supporting SW TLB systems */
+ flush_tlb_hugepage_nohash(vma, address);
+#endif
+ }
+ return changed;
+}
+
+int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
+}
+
+/*
+ * We currently remove entries from the hashtable regardless of whether
+ * the entry was young or dirty. The generic routines only flush if the
+ * entry was young or dirty which is not good enough.
+ *
+ * We should be more intelligent about this but for the moment we override
+ * these functions and force a tlb flush unconditionally
+ */
+int pmdp_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
+}
+
+/*
+ * We mark the pmd splitting and invalidate all the hpte
+ * entries for this huge page.
+ */
+void pmdp_splitting_flush(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ unsigned long old, tmp;
+
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+#ifdef PTE_ATOMIC_UPDATES
+
+ __asm__ __volatile__(
+ "1: ldarx %0,0,%3\n\
+ andi. %1,%0,%6\n\
+ bne- 1b \n\
+ ori %1,%0,%4 \n\
+ stdcx. %1,0,%3 \n\
+ bne- 1b"
+ : "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
+ : "r" (pmdp), "i" (PMD_HUGE_SPLITTING), "m" (*pmdp), "i" (PMD_HUGE_BUSY)
+ : "cc" );
+#else
+ old = pmd_val(*pmdp);
+ *pmdp = __pmd(old | PMD_HUGE_SPLITTING);
+#endif
+ /*
+ * If we didn't had the splitting flag set, go and flush the
+ * HPTE entries and serialize against gup fast.
+ */
+ if (!(old & PMD_HUGE_SPLITTING)) {
+#ifdef CONFIG_PPC_STD_MMU_64
+ /* We need to flush the hpte */
+ if (old & PMD_HUGE_HASHPTE)
+ hpte_need_hugepage_flush(vma->vm_mm, address, pmdp);
+#endif
+ /* need tlb flush only to serialize against gup-fast */
+ flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+ }
+}
+
+/*
+ * We want to put the pgtable in pmd and use pgtable for tracking
+ * the base page size hptes
+ */
+/*
+ * FIXME!! pmd_page need to be validated, we may get a different value than expected
+ */
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable)
+{
+ unsigned long *pgtable_slot;
+ assert_spin_locked(&mm->page_table_lock);
+ /*
+ * we store the pgtable in the second half of PMD
+ */
+ pgtable_slot = pmdp + PTRS_PER_PMD;
+ *pgtable_slot = (unsigned long )pgtable;
+}
+
+/* FIXME!! May be all this should be in pgtable_64.c ? */
+#define PTE_FRAG_SIZE (2 * PTRS_PER_PTE * sizeof(pte_t))
+
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
+{
+ pgtable_t pgtable;
+ unsigned long *pgtable_slot;
+
+ assert_spin_locked(&mm->page_table_lock);
+ pgtable_slot = pmdp + PTRS_PER_PMD;
+ pgtable = (pgtable_t) *pgtable_slot;
+
+ /* FIXME!
+ * Make sure we are invalidating all the entries. So that
+ * we fault and create new entries later
+ */
+ /* zero out the table before returning */
+ memset(pgtable, 0, PTE_FRAG_SIZE);
+ return pgtable;
+}
+
+/*
+ * Since we are looking at latest ppc64, we don't need to worry about
+ * i/d cache coherency on exec fault
+ */
+static pmd_t set_pmd_filter(pmd_t pmd, unsigned long addr)
+{
+ pmd = __pmd(pmd_val(pmd) & ~PMD_HUGE_HPTEFLAGS);
+ return pmd;
+}
+
+/*
+ * We can make it less convoluted than __set_pte_at, because
+ * we can ignore lot of hardware here, because this is only for
+ * MPSS
+ */
+static inline void __set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd, int percpu)
+{
+ /*
+ * There is nothing in hash page table now, so nothing to
+ * invalidate, set_pte_at is used for adding new entry.
+ * For updating we should use update_hugepage_pmd()
+ */
+ *pmdp = pmd;
+}
+
+/*
+ * set a new huge pmd
+ */
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd)
+{
+ /*
+ * Note: mm->context.id might not yet have been assigned as
+ * this context might not have been activated yet when this
+ * is called.
+ * FIXME!! catch a pmd update here. Those should actually go via
+ * pmd_hugepage_update.
+ */
+ pmd = set_pmd_filter(pmd, addr);
+
+ __set_pmd_at(mm, addr, pmdp, pmd, 0);
+
+}
+
+void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp)
+{
+ /* FIXME!! validate it more closely */
+ pmd_hugepage_update(vma->vm_mm, address, pmdp, PMD_HUGE_PRESENT);
+ flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+}
+
+/*
+ * A linux huge page PMD was changed and the corresponding hash table entry
+ * neesd to be flushed. FIXME!! there is no batching support yet.
+ *
+ * The linux huge page PMD now include the pmd entries followed by the address
+ * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
+ * [ secondary group | 3 bit hidx | valid ]. We use one byte per each HPTE entry.
+ * With 16MB huge page and 64K HPTE we need 256 entries and with 4K HPTE we need
+ * 4096 entries. Both will fit in a 4K pgtable_t.
+ */
+void hpte_need_hugepage_flush(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp)
+{
+ int ssize, i;
+ unsigned long s_addr;
+ unsigned int psize, valid;
+ unsigned char *hpte_slot_array;
+ unsigned long hidx, vpn, vsid, hash, shift, slot;
+
+ /*
+ * Flush all the hptes mapping this huge page
+ */
+ s_addr = addr & HUGE_PAGE_MASK;
+ /*
+ * The hpte hindex are stored in the pgtable whose address is in the
+ * second half of the PMD
+ */
+ hpte_slot_array = *(char **)(pmdp + PTRS_PER_PMD);
+
+ /* get the base page size */
+ psize = get_slice_psize(mm, s_addr);
+ shift = mmu_psize_defs[psize].shift;
+
+ for (i = 0; i < HUGE_PAGE_SIZE/(1ul << shift); i++) {
+ /*
+ * 8 bits per each hpte entries
+ * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
+ */
+ valid = hpte_slot_array[i] & 0x1;
+ if (!valid)
+ continue;
+ hidx = hpte_slot_array[i] >> 1;
+
+ /* get the vpn */
+ addr = s_addr + (i * (1ul << shift));
+ if (!is_kernel_addr(addr)) {
+ ssize = user_segment_size(addr);
+ vsid = get_vsid(mm->context.id, addr, ssize);
+ WARN_ON(vsid == 0);
+ } else {
+ vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
+ ssize = mmu_kernel_ssize;
+ }
+
+ vpn = hpt_vpn(addr, vsid, ssize);
+ hash = hpt_hash(vpn, shift, ssize);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+
+// DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx);
+ ppc_md.hpte_invalidate(slot, vpn, psize, ssize, 0);
+
+ /* mark the slot array invalid ?? pte variant doesn't do this*/
+// hpte_slot_array[i] = 0x0;
+ }
+}
+
+static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
+{
+ unsigned long pmd_prot = 0;
+ unsigned long prot = pgprot_val(pgprot);
+
+ if (prot & _PAGE_PRESENT)
+ pmd_prot |= PMD_HUGE_PRESENT;
+ if (prot & _PAGE_USER)
+ pmd_prot |= PMD_HUGE_USER;
+ if (prot & _PAGE_FILE)
+ pmd_prot |= PMD_HUGE_FILE;
+ if (prot & _PAGE_EXEC)
+ pmd_prot |= PMD_HUGE_EXEC;
+
+// WARN_ON(prot & _PAGE_GUARDED);
+// WARN_ON(prot & _PAGE_COHERENT);
+// WARN_ON(prot & _PAGE_NO_CACHE);
+// WARN_ON(prot & _PAGE_WRITETHRU);
+
+ if (prot & _PAGE_DIRTY)
+ pmd_prot |= PMD_HUGE_DIRTY;
+ if (prot & _PAGE_ACCESSED)
+ pmd_prot |= PMD_HUGE_ACCESSED;
+ if (prot & _PAGE_RW)
+ pmd_prot |= PMD_HUGE_RW;
+
+// WARN_ON(prot & _PAGE_BUSY);
+ /*
+ * FIXME!! we need to do some sanity check. But the
+ * values map easily.
+ */
+ pmd_val(pmd) |= pmd_prot;
+ return pmd;
+}
+
+pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
+{
+ pmd_t pmd;
+
+ pmd_val(pmd) = pfn << PMD_HUGE_RPN_SHIFT;
+ /*
+ * pgtable_t is always 4K aligned, even in case where we use the
+ * pmd_t to store a large page which is 16MB aligned
+ */
+ pmd_val(pmd) |= PMD_ISHUGE;
+ pmd = pmd_set_protbits(pmd, pgprot);
+ return pmd;
+}
+
+pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
+{
+ return pfn_pmd(page_to_pfn(page), pgprot);
+}
+
+pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+ /* FIXME!! why are this bits cleared ? */
+ pmd_val(pmd) &= ~(PMD_HUGE_PRESENT |
+ PMD_HUGE_RW |
+ PMD_HUGE_EXEC);
+ pmd = pmd_set_protbits(pmd, newprot);
+ return pmd;
+}
+
+void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd)
+{
+ /* FIXME!! fill in later looking at update_mmu_cache */
+}
+
+/*
+ * For huge page we have pfn in the pmd, we use PMD_HUGE_RPN_SHIFT bits for flags
+ * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address.
+ */
+struct page *pmd_page(pmd_t pmd)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ if (pmd_val(pmd) & PMD_ISHUGE)
+ return pfn_to_page(pmd_pfn(pmd));
+#endif
+ return virt_to_page(pmd_page_vaddr(pmd));
+}
+
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 72afd28..90ee19b 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -71,6 +71,7 @@ config PPC_BOOK3S_64
select PPC_FPU
select PPC_HAVE_PMU_SUPPORT
select SYS_SUPPORTS_HUGETLBFS
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES
config PPC_BOOK3E_64
bool "Embedded processors"
--
1.7.10
^ permalink raw reply related
* [RFC PATCH 14/17] powerpc: support for zerout withdraw.
From: Aneesh Kumar K.V @ 2013-02-18 10:28 UTC (permalink / raw)
To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1361183295-6958-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Need changes to other archs. This need to be fixed further
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/pgtable.h | 3 ++-
arch/powerpc/mm/pgtable.c | 11 ++++++++---
mm/huge_memory.c | 18 ++++++++++++------
3 files changed, 22 insertions(+), 10 deletions(-)
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 4e49c34..3dfbec9 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -262,7 +262,8 @@ extern void pmdp_splitting_flush(struct vm_area_struct *vma,
extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pgtable);
#define __HAVE_ARCH_PGTABLE_WITHDRAW
-extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm,
+ pmd_t *pmdp, int tozero);
#define __HAVE_ARCH_PMDP_INVALIDATE
extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 841271f..fa5e108 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -355,7 +355,7 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
/* FIXME!! May be all this should be in pgtable_64.c ? */
#define PTE_FRAG_SIZE (2 * PTRS_PER_PTE * sizeof(pte_t))
-pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp, int tozero)
{
pgtable_t pgtable;
unsigned long *pgtable_slot;
@@ -368,8 +368,13 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
* Make sure we are invalidating all the entries. So that
* we fault and create new entries later
*/
- /* zero out the table before returning */
- memset(pgtable, 0, PTE_FRAG_SIZE);
+ /* FIXME!! this is not correct. zero out the table before returning
+ * because we are using this for other things.
+ * zap_huge_pmd
+ */
+ if (tozero)
+ /* Not needed, because we depoist a zeroed table ? */
+ memset(pgtable, 0, PTE_FRAG_SIZE);
return pgtable;
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 5beb2e2..3777a5b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -781,7 +781,7 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
entry = pmd_wrprotect(entry);
entry = pmd_mkhuge(entry);
set_pmd_at(mm, haddr, pmd, entry);
- pgtable_trans_huge_deposit(mm, pgtable);
+ pgtable_trans_huge_deposit(mm, pmd, pgtable);
mm->nr_ptes++;
return true;
}
@@ -996,7 +996,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm,
pmdp_clear_flush(vma, haddr, pmd);
/* leave pmd empty until pte is filled */
- pgtable = pgtable_trans_huge_withdraw(mm);
+ pgtable = pgtable_trans_huge_withdraw(mm, pmd, 1);
pmd_populate(mm, &_pmd, pgtable);
for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
@@ -1091,7 +1091,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
goto out_free_pages;
VM_BUG_ON(!PageHead(page));
- pgtable = pgtable_trans_huge_withdraw(mm, pmd);
+ pgtable = pgtable_trans_huge_withdraw(mm, pmd, 1);
pmdp_clear_flush(vma, haddr, pmd);
/* leave pmd empty until pte is filled */
@@ -1373,7 +1373,13 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
struct page *page;
pgtable_t pgtable;
pmd_t orig_pmd;
- pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
+ /*
+ * Withdraw the pgtable without zero out, because
+ * the following pmd_get_and_clear will look at
+ * pgtable contents, in case of some architectures
+ * like ppc64
+ */
+ pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd, 0);
orig_pmd = pmdp_get_and_clear(tlb->mm, addr, pmd);
tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
if (is_huge_zero_pmd(orig_pmd)) {
@@ -1705,7 +1711,7 @@ static int __split_huge_page_map(struct page *page,
pmd = page_check_address_pmd(page, mm, address,
PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG);
if (pmd) {
- pgtable = pgtable_trans_huge_withdraw(mm, pmd);
+ pgtable = pgtable_trans_huge_withdraw(mm, pmd, 1);
pmd_populate(mm, &_pmd, pgtable);
haddr = address;
@@ -2699,7 +2705,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
pmdp_clear_flush(vma, haddr, pmd);
/* leave pmd empty until pte is filled */
- pgtable = pgtable_trans_huge_withdraw(mm);
+ pgtable = pgtable_trans_huge_withdraw(mm, pmd, 1);
pmd_populate(mm, &_pmd, pgtable);
for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
--
1.7.10
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox