LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v3] gianfar: add support for wake-on-packet
From: Zhao Chenhui @ 2011-12-27 11:30 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: netdev, devicetree-discuss, afleming

On certain chip like MPC8536 and P1022, system can be waked up from
sleep by user-defined packet and Magic Patcket.(The eTSEC cannot
supports both types of wake-up event simultaneously.) This patch
implements wake-up on user-defined patcket including ARP request
packet and unicast patcket to this station.

When entering suspend state, the gianfar driver sets receive queue
filer table to filter all of packets except ARP request packet and
unicast patcket to this station. The driver temporarily uses the last
receive queue to receive the user defined packet.

In suspend state, the receive part of eTSEC keeps working. When
receiving a user defined packet, it generates an interrupt to
wake up the system.

The rule of the filer table is as below.
  if (arp request to local ip address)
      accept it to the last queue
  elif (unicast packet to local mac address)
      accept it to the last queue
  else
      reject it
  endif
Note: The local ip/mac address is the ethernet primary IP/MAC address of
the station. Do not support multiple IP/MAC addresses.

Here is an example of enabling and testing wake up on user defined packet.
  ifconfig eth0 10.193.20.169
  ethtool -s eth0 wol a
  echo standby > /sys/power/state or echo mem > /sys/power/state

Ping from PC host to wake up the station:
  ping 10.193.20.169

Signed-off-by: Dave Liu <daveliu@freescale.com>
Signed-off-by: Jin Qing <b24347@freescale.com>
Signed-off-by: Li Yang <leoli@freescale.com>
Signed-off-by: Zhao Chenhui <chenhui.zhao@freescale.com>
Acked-by: Andy Fleming <afleming@freescale.com>
---
This patch depends on my previous patches related to power management.

Changes for v3:
 - Add "CONFIG_FSL_PMC"

 .../devicetree/bindings/net/fsl-tsec-phy.txt       |    3 +
 drivers/net/ethernet/freescale/gianfar.c           |  363 +++++++++++++++++---
 drivers/net/ethernet/freescale/gianfar.h           |   33 ++-
 drivers/net/ethernet/freescale/gianfar_ethtool.c   |   52 ++-
 4 files changed, 373 insertions(+), 78 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt b/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
index 2c6be03..543e36c 100644
--- a/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
+++ b/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
@@ -56,6 +56,9 @@ Properties:
     hardware.
   - fsl,magic-packet : If present, indicates that the hardware supports
     waking up via magic packet.
+  - fsl,wake-on-filer : If present, indicates that the hardware supports
+    waking up via arp request to local ip address or unicast packet to
+    local mac address.
   - bd-stash : If present, indicates that the hardware supports stashing
     buffer descriptors in the L2.
   - rx-stash-len : Denotes the number of bytes of a received buffer to stash
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 83199fd..d0a46d8 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -85,6 +85,8 @@
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/in.h>
+#include <linux/inetdevice.h>
+#include <sysdev/fsl_soc.h>
 #include <linux/net_tstamp.h>
 
 #include <asm/io.h>
@@ -147,6 +149,17 @@ static void gfar_clear_exact_match(struct net_device *dev);
 static void gfar_set_mac_for_addr(struct net_device *dev, int num,
 				  const u8 *addr);
 static int gfar_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static void gfar_schedule_cleanup(struct gfar_priv_grp *gfargrp);
+
+#ifdef CONFIG_PM
+static void gfar_halt_rx(struct net_device *dev);
+static void gfar_rx_start(struct net_device *dev);
+static void gfar_enable_filer(struct net_device *dev);
+static void gfar_disable_filer(struct net_device *dev);
+static void gfar_config_filer_table(struct net_device *dev);
+static void gfar_restore_filer_table(struct net_device *dev);
+static int gfar_get_ip(struct net_device *dev);
+#endif
 
 MODULE_AUTHOR("Freescale Semiconductor, Inc");
 MODULE_DESCRIPTION("Gianfar Ethernet Driver");
@@ -751,7 +764,6 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
 			FSL_GIANFAR_DEV_HAS_PADDING |
 			FSL_GIANFAR_DEV_HAS_CSUM |
 			FSL_GIANFAR_DEV_HAS_VLAN |
-			FSL_GIANFAR_DEV_HAS_MAGIC_PACKET |
 			FSL_GIANFAR_DEV_HAS_EXTENDED_HASH |
 			FSL_GIANFAR_DEV_HAS_TIMER;
 
@@ -763,9 +775,21 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
 	else
 		priv->interface = PHY_INTERFACE_MODE_MII;
 
-	if (of_get_property(np, "fsl,magic-packet", NULL))
+	/* Init Wake-on-LAN */
+	priv->wol_opts = 0;
+	priv->wol_supported = 0;
+#ifdef CONFIG_FSL_PMC
+	if (of_get_property(np, "fsl,magic-packet", NULL)) {
 		priv->device_flags |= FSL_GIANFAR_DEV_HAS_MAGIC_PACKET;
+		priv->wol_supported |= GIANFAR_WOL_MAGIC;
+	}
 
+	if (of_get_property(np, "fsl,wake-on-filer", NULL)) {
+		priv->device_flags |= FSL_GIANFAR_DEV_HAS_WAKE_ON_FILER;
+		priv->wol_supported |= GIANFAR_WOL_ARP;
+		priv->wol_supported |= GIANFAR_WOL_UCAST;
+	}
+#endif
 	priv->phy_node = of_parse_phandle(np, "phy-handle", 0);
 
 	/* Find the TBI PHY.  If it's not there, we don't support SGMII */
@@ -1168,8 +1192,10 @@ static int gfar_probe(struct platform_device *ofdev)
 		goto register_fail;
 	}
 
-	device_init_wakeup(&dev->dev,
-		priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET);
+	if (priv->wol_supported) {
+		device_set_wakeup_capable(&ofdev->dev, true);
+		device_set_wakeup_enable(&ofdev->dev, false);
+	}
 
 	/* fill out IRQ number and name fields */
 	len_devname = strlen(dev->name);
@@ -1260,6 +1286,143 @@ static int gfar_remove(struct platform_device *ofdev)
 }
 
 #ifdef CONFIG_PM
+static void gfar_enable_filer(struct net_device *dev)
+{
+	struct gfar_private *priv = netdev_priv(dev);
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+	u32 temp;
+
+	lock_rx_qs(priv);
+
+	temp = gfar_read(&regs->rctrl);
+	temp &= ~(RCTRL_FSQEN | RCTRL_PRSDEP_MASK);
+	temp |= RCTRL_FILREN | RCTRL_PRSDEP_L2L3;
+	gfar_write(&regs->rctrl, temp);
+
+	unlock_rx_qs(priv);
+}
+
+static void gfar_disable_filer(struct net_device *dev)
+{
+	struct gfar_private *priv = netdev_priv(dev);
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+	u32 temp;
+
+	lock_rx_qs(priv);
+
+	temp = gfar_read(&regs->rctrl);
+	temp &= ~RCTRL_FILREN;
+	gfar_write(&regs->rctrl, temp);
+
+	unlock_rx_qs(priv);
+}
+
+static int gfar_get_ip(struct net_device *dev)
+{
+	struct gfar_private *priv = netdev_priv(dev);
+	struct in_device *in_dev;
+	int ret = -ENOENT;
+
+	in_dev = in_dev_get(dev);
+	if (!in_dev)
+		return ret;
+
+	/* Get the primary IP address */
+	for_primary_ifa(in_dev) {
+		priv->ip_addr = ifa->ifa_address;
+		ret = 0;
+		break;
+	} endfor_ifa(in_dev);
+
+	in_dev_put(in_dev);
+	return ret;
+}
+
+static void gfar_restore_filer_table(struct net_device *dev)
+{
+	struct gfar_private *priv = netdev_priv(dev);
+	u32 rqfcr, rqfpr;
+	int i;
+
+	lock_rx_qs(priv);
+
+	for (i = 0; i <= MAX_FILER_IDX; i++) {
+		rqfcr = priv->ftp_rqfcr[i];
+		rqfpr = priv->ftp_rqfpr[i];
+		gfar_write_filer(priv, i, rqfcr, rqfpr);
+	}
+
+	unlock_rx_qs(priv);
+}
+
+static void gfar_config_filer_table(struct net_device *dev)
+{
+	struct gfar_private *priv = netdev_priv(dev);
+	u32 dest_mac_addr;
+	u32 rqfcr, rqfpr;
+	u8  rqfcr_queue = priv->num_rx_queues - 1;
+	unsigned int index;
+
+	if (gfar_get_ip(dev)) {
+		netif_err(priv, wol, dev, "WOL: get the ip address error\n");
+		return;
+	}
+
+	lock_rx_qs(priv);
+
+	/* init filer table */
+	rqfcr = RQFCR_RJE | RQFCR_CMP_MATCH;
+	rqfpr = 0x0;
+	for (index = 0; index <= MAX_FILER_IDX; index++)
+		gfar_write_filer(priv, index, rqfcr, rqfpr);
+
+	index = 0;
+	if (priv->wol_opts & GIANFAR_WOL_ARP) {
+		/* ARP request filer, filling the packet to the last queue */
+		rqfcr = (rqfcr_queue << 10) | RQFCR_AND |
+					RQFCR_CMP_EXACT | RQFCR_PID_MASK;
+		rqfpr = RQFPR_ARQ;
+		gfar_write_filer(priv, index++, rqfcr, rqfpr);
+
+		rqfcr = (rqfcr_queue << 10) | RQFCR_AND |
+					RQFCR_CMP_EXACT | RQFCR_PID_PARSE;
+		rqfpr = RQFPR_ARQ;
+		gfar_write_filer(priv, index++, rqfcr, rqfpr);
+
+		/*
+		 * DEST_IP address in ARP packet,
+		 * filling it to the last queue.
+		 */
+		rqfcr = (rqfcr_queue << 10) | RQFCR_AND |
+					RQFCR_CMP_EXACT | RQFCR_PID_MASK;
+		rqfpr = FPR_FILER_MASK;
+		gfar_write_filer(priv, index++, rqfcr, rqfpr);
+
+		rqfcr = (rqfcr_queue << 10) | RQFCR_GPI |
+					RQFCR_CMP_EXACT | RQFCR_PID_DIA;
+		rqfpr = priv->ip_addr;
+		gfar_write_filer(priv, index++, rqfcr, rqfpr);
+	}
+
+	if (priv->wol_opts & GIANFAR_WOL_UCAST) {
+		/* Unicast packet, filling it to the last queue */
+		dest_mac_addr = (dev->dev_addr[0] << 16) |
+				(dev->dev_addr[1] << 8) | dev->dev_addr[2];
+		rqfcr = (rqfcr_queue << 10) | RQFCR_AND |
+					RQFCR_CMP_EXACT | RQFCR_PID_DAH;
+		rqfpr = dest_mac_addr;
+		gfar_write_filer(priv, index++, rqfcr, rqfpr);
+
+		dest_mac_addr = (dev->dev_addr[3] << 16) |
+				(dev->dev_addr[4] << 8) | dev->dev_addr[5];
+		rqfcr = (rqfcr_queue << 10) | RQFCR_GPI |
+					RQFCR_CMP_EXACT | RQFCR_PID_DAL;
+		rqfpr = dest_mac_addr;
+		gfar_write_filer(priv, index++, rqfcr, rqfpr);
+	}
+
+	unlock_rx_qs(priv);
+}
 
 static int gfar_suspend(struct device *dev)
 {
@@ -1269,48 +1432,43 @@ static int gfar_suspend(struct device *dev)
 	unsigned long flags;
 	u32 tempval;
 
-	int magic_packet = priv->wol_en &&
-		(priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET);
-
 	netif_device_detach(ndev);
 
-	if (netif_running(ndev)) {
-
-		local_irq_save(flags);
-		lock_tx_qs(priv);
-		lock_rx_qs(priv);
-
-		gfar_halt_nodisable(ndev);
-
-		/* Disable Tx, and Rx if wake-on-LAN is disabled. */
-		tempval = gfar_read(&regs->maccfg1);
-
-		tempval &= ~MACCFG1_TX_EN;
+	if (!netif_running(ndev))
+		return 0;
 
-		if (!magic_packet)
-			tempval &= ~MACCFG1_RX_EN;
+	local_irq_save(flags);
+	lock_tx_qs(priv);
+	lock_rx_qs(priv);
 
-		gfar_write(&regs->maccfg1, tempval);
+	gfar_halt(ndev);
 
-		unlock_rx_qs(priv);
-		unlock_tx_qs(priv);
-		local_irq_restore(flags);
+	unlock_rx_qs(priv);
+	unlock_tx_qs(priv);
+	local_irq_restore(flags);
 
-		disable_napi(priv);
+	disable_napi(priv);
 
-		if (magic_packet) {
-			/* Enable interrupt on Magic Packet */
-			gfar_write(&regs->imask, IMASK_MAG);
+	if (!priv->wol_opts && priv->phydev) {
+		phy_stop(priv->phydev);
+		return 0;
+	}
 
-			/* Enable Magic Packet mode */
-			tempval = gfar_read(&regs->maccfg2);
-			tempval |= MACCFG2_MPEN;
-			gfar_write(&regs->maccfg2, tempval);
-		} else {
-			phy_stop(priv->phydev);
-		}
+	mpc85xx_pmc_set_wake(priv->ofdev, 1);
+	if (priv->wol_opts & GIANFAR_WOL_MAGIC) {
+		/* Enable Magic Packet mode */
+		tempval = gfar_read(&regs->maccfg2);
+		tempval |= MACCFG2_MPEN;
+		gfar_write(&regs->maccfg2, tempval);
 	}
 
+	if (priv->wol_opts & (GIANFAR_WOL_ARP | GIANFAR_WOL_UCAST)) {
+		mpc85xx_pmc_set_lossless_ethernet(1);
+		gfar_disable_filer(ndev);
+		gfar_config_filer_table(ndev);
+		gfar_enable_filer(ndev);
+	}
+	gfar_rx_start(ndev);
 	return 0;
 }
 
@@ -1320,39 +1478,49 @@ static int gfar_resume(struct device *dev)
 	struct net_device *ndev = priv->ndev;
 	struct gfar __iomem *regs = priv->gfargrp[0].regs;
 	unsigned long flags;
-	u32 tempval;
-	int magic_packet = priv->wol_en &&
-		(priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET);
+	u32 tempval, i;
 
 	if (!netif_running(ndev)) {
 		netif_device_attach(ndev);
 		return 0;
 	}
 
-	if (!magic_packet && priv->phydev)
+	if (!priv->wol_opts && priv->phydev) {
 		phy_start(priv->phydev);
+		goto out;
+	}
+
+	mpc85xx_pmc_set_wake(priv->ofdev, 0);
 
-	/* Disable Magic Packet mode, in case something
-	 * else woke us up.
-	 */
 	local_irq_save(flags);
-	lock_tx_qs(priv);
 	lock_rx_qs(priv);
-
-	tempval = gfar_read(&regs->maccfg2);
-	tempval &= ~MACCFG2_MPEN;
-	gfar_write(&regs->maccfg2, tempval);
-
-	gfar_start(ndev);
-
+	gfar_halt_rx(ndev);
 	unlock_rx_qs(priv);
-	unlock_tx_qs(priv);
 	local_irq_restore(flags);
 
-	netif_device_attach(ndev);
+	if (priv->wol_opts & (GIANFAR_WOL_ARP | GIANFAR_WOL_UCAST)) {
+		mpc85xx_pmc_set_lossless_ethernet(0);
+		gfar_disable_filer(ndev);
+		gfar_restore_filer_table(ndev);
+	}
+
+	if (priv->wol_opts & GIANFAR_WOL_MAGIC) {
+		/* Disable Magic Packet mode */
+		tempval = gfar_read(&regs->maccfg2);
+		tempval &= ~MACCFG2_MPEN;
+		gfar_write(&regs->maccfg2, tempval);
+	}
 
+out:
+	gfar_start(ndev);
+	netif_device_attach(ndev);
 	enable_napi(priv);
 
+	if (priv->wol_opts & (GIANFAR_WOL_ARP | GIANFAR_WOL_UCAST)) {
+		/* send requests to process the received packets */
+		for (i = 0; i < priv->num_grps; i++)
+			gfar_schedule_cleanup(&priv->gfargrp[i]);
+	}
 	return 0;
 }
 
@@ -1602,6 +1770,48 @@ static int __gfar_is_rx_idle(struct gfar_private *priv)
 	return 0;
 }
 
+#ifdef CONFIG_PM
+/* Halt the receive queues */
+static void gfar_halt_rx(struct net_device *dev)
+{
+	struct gfar_private *priv = netdev_priv(dev);
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+	u32 tempval;
+	int i = 0;
+
+	for (i = 0; i < priv->num_grps; i++) {
+		regs = priv->gfargrp[i].regs;
+		/* Mask all interrupts */
+		gfar_write(&regs->imask, IMASK_INIT_CLEAR);
+
+		/* Clear all interrupts */
+		gfar_write(&regs->ievent, IEVENT_INIT_CLEAR);
+	}
+
+	regs = priv->gfargrp[0].regs;
+	/* Stop the DMA, and wait for it to stop */
+	tempval = gfar_read(&regs->dmactrl);
+	if ((tempval & DMACTRL_GRS) != DMACTRL_GRS) {
+		int ret;
+
+		tempval |= DMACTRL_GRS;
+		gfar_write(&regs->dmactrl, tempval);
+
+		do {
+			ret = spin_event_timeout(((gfar_read(&regs->ievent) &
+				IEVENT_GRSC) == IEVENT_GRSC), 1000000, 0);
+			if (!ret && !(gfar_read(&regs->ievent) & IEVENT_GRSC))
+				ret = __gfar_is_rx_idle(priv);
+		} while (!ret);
+	}
+
+	/* Disable Rx in MACCFG1  */
+	tempval = gfar_read(&regs->maccfg1);
+	tempval &= ~MACCFG1_RX_EN;
+	gfar_write(&regs->maccfg1, tempval);
+}
+#endif
+
 /* Halt the receive and transmit queues */
 static void gfar_halt_nodisable(struct net_device *dev)
 {
@@ -1808,6 +2018,40 @@ void gfar_start(struct net_device *dev)
 	dev->trans_start = jiffies; /* prevent tx timeout */
 }
 
+#ifdef CONFIG_PM
+void gfar_rx_start(struct net_device *dev)
+{
+	struct gfar_private *priv = netdev_priv(dev);
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+	u32 tempval;
+	int i = 0;
+
+	/* Enable Rx in MACCFG1 */
+	tempval = gfar_read(&regs->maccfg1);
+	tempval |= MACCFG1_RX_EN;
+	gfar_write(&regs->maccfg1, tempval);
+
+	/* Initialize DMACTRL to have WWR and WOP */
+	tempval = gfar_read(&regs->dmactrl);
+	tempval |= DMACTRL_INIT_SETTINGS;
+	gfar_write(&regs->dmactrl, tempval);
+
+	/* Make sure we aren't stopped */
+	tempval = gfar_read(&regs->dmactrl);
+	tempval &= ~DMACTRL_GRS;
+	gfar_write(&regs->dmactrl, tempval);
+
+	for (i = 0; i < priv->num_grps; i++) {
+		regs = priv->gfargrp[i].regs;
+		/* Clear RHLT, so that the DMA starts polling now */
+		gfar_write(&regs->rstat, priv->gfargrp[i].rstat);
+
+		/* Unmask the interrupts we look for */
+		gfar_write(&regs->imask, IMASK_DEFAULT);
+	}
+}
+#endif
+
 void gfar_configure_coalescing(struct gfar_private *priv,
 	unsigned long tx_mask, unsigned long rx_mask)
 {
@@ -1970,7 +2214,7 @@ static int gfar_enet_open(struct net_device *dev)
 
 	netif_tx_start_all_queues(dev);
 
-	device_set_wakeup_enable(&dev->dev, priv->wol_en);
+	device_set_wakeup_enable(&priv->ofdev->dev, priv->wol_opts);
 
 	return err;
 }
@@ -2657,6 +2901,17 @@ static inline void count_errors(unsigned short status, struct net_device *dev)
 
 irqreturn_t gfar_receive(int irq, void *grp_id)
 {
+	struct gfar_priv_grp *gfargrp = grp_id;
+	struct gfar __iomem *regs = gfargrp->regs;
+	u32 ievent;
+
+	ievent = gfar_read(&regs->ievent);
+
+	if ((ievent & IEVENT_FGPI) == IEVENT_FGPI) {
+		gfar_write(&regs->ievent, ievent & IEVENT_RX_MASK);
+		return IRQ_HANDLED;
+	}
+
 	gfar_schedule_cleanup((struct gfar_priv_grp *)grp_id);
 	return IRQ_HANDLED;
 }
diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index 9aa4377..1abebc4 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h
@@ -232,6 +232,13 @@ extern const char gfar_driver_version[];
 #define RQUEUE_EN7		0x00000001
 #define RQUEUE_EN_ALL		0x000000FF
 
+/* Wake-On-Lan options */
+#define GIANFAR_WOL_UCAST	0x00000001	/* Unicast wakeup */
+#define GIANFAR_WOL_MCAST	0x00000002	/* Multicast wakeup */
+#define GIANFAR_WOL_BCAST	0x00000004	/* Broadcase wakeup */
+#define GIANFAR_WOL_ARP		0x00000008	/* ARP request wakeup */
+#define GIANFAR_WOL_MAGIC	0x00000010	/* Magic packet wakeup */
+
 /* Init to do tx snooping for buffers and descriptors */
 #define DMACTRL_INIT_SETTINGS   0x000000c3
 #define DMACTRL_GRS             0x00000010
@@ -277,11 +284,15 @@ extern const char gfar_driver_version[];
 #define RCTRL_PAL_MASK		0x001f0000
 #define RCTRL_VLEX		0x00002000
 #define RCTRL_FILREN		0x00001000
+#define RCTRL_FSQEN		0x00000800
 #define RCTRL_GHTX		0x00000400
 #define RCTRL_IPCSEN		0x00000200
 #define RCTRL_TUCSEN		0x00000100
 #define RCTRL_PRSDEP_MASK	0x000000c0
 #define RCTRL_PRSDEP_INIT	0x000000c0
+#define RCTRL_PRSDEP_L2		0x00000040
+#define RCTRL_PRSDEP_L2L3	0x00000080
+#define RCTRL_PRSDEP_L2L3L4	0x000000c0
 #define RCTRL_PRSFM		0x00000020
 #define RCTRL_PROM		0x00000008
 #define RCTRL_EMEN		0x00000002
@@ -327,18 +338,20 @@ extern const char gfar_driver_version[];
 #define IEVENT_MAG		0x00000800
 #define IEVENT_GRSC		0x00000100
 #define IEVENT_RXF0		0x00000080
+#define IEVENT_FGPI		0x00000010
 #define IEVENT_FIR		0x00000008
 #define IEVENT_FIQ		0x00000004
 #define IEVENT_DPE		0x00000002
 #define IEVENT_PERR		0x00000001
-#define IEVENT_RX_MASK          (IEVENT_RXB0 | IEVENT_RXF0 | IEVENT_BSY)
+#define IEVENT_RX_MASK          (IEVENT_RXB0 | IEVENT_RXF0 | \
+					IEVENT_FGPI | IEVENT_BSY)
 #define IEVENT_TX_MASK          (IEVENT_TXB | IEVENT_TXF)
 #define IEVENT_RTX_MASK         (IEVENT_RX_MASK | IEVENT_TX_MASK)
 #define IEVENT_ERR_MASK         \
-(IEVENT_RXC | IEVENT_BSY | IEVENT_EBERR | IEVENT_MSRO | \
- IEVENT_BABT | IEVENT_TXC | IEVENT_TXE | IEVENT_LC \
- | IEVENT_CRL | IEVENT_XFUN | IEVENT_DPE | IEVENT_PERR \
- | IEVENT_MAG | IEVENT_BABR)
+	(IEVENT_RXC | IEVENT_BSY | IEVENT_EBERR | IEVENT_MSRO | \
+	 IEVENT_BABT | IEVENT_TXC | IEVENT_TXE | IEVENT_LC | \
+	 IEVENT_CRL | IEVENT_XFUN | IEVENT_FIR | IEVENT_FIQ | \
+	 IEVENT_DPE | IEVENT_PERR | IEVENT_MAG | IEVENT_BABR)
 
 #define IMASK_INIT_CLEAR	0x00000000
 #define IMASK_BABR              0x80000000
@@ -359,14 +372,15 @@ extern const char gfar_driver_version[];
 #define IMASK_MAG		0x00000800
 #define IMASK_GRSC              0x00000100
 #define IMASK_RXFEN0		0x00000080
+#define IMASK_FGPI		0x00000010
 #define IMASK_FIR		0x00000008
 #define IMASK_FIQ		0x00000004
 #define IMASK_DPE		0x00000002
 #define IMASK_PERR		0x00000001
 #define IMASK_DEFAULT  (IMASK_TXEEN | IMASK_TXFEN | IMASK_TXBEN | \
 		IMASK_RXFEN0 | IMASK_BSY | IMASK_EBERR | IMASK_BABR | \
-		IMASK_XFUN | IMASK_RXC | IMASK_BABT | IMASK_DPE \
-		| IMASK_PERR)
+		IMASK_XFUN | IMASK_RXC | IMASK_BABT | IMASK_FGPI | \
+		IMASK_FIR | IMASK_FIQ | IMASK_DPE | IMASK_PERR | IMASK_MAG)
 #define IMASK_RTX_DISABLED ((~(IMASK_RXFEN0 | IMASK_TXFEN | IMASK_BSY)) \
 			   & IMASK_DEFAULT)
 
@@ -883,6 +897,7 @@ struct gfar {
 #define FSL_GIANFAR_DEV_HAS_BD_STASHING		0x00000200
 #define FSL_GIANFAR_DEV_HAS_BUF_STASHING	0x00000400
 #define FSL_GIANFAR_DEV_HAS_TIMER		0x00000800
+#define FSL_GIANFAR_DEV_HAS_WAKE_ON_FILER	0x00001000
 
 #if (MAXGROUPS == 2)
 #define DEFAULT_MAPPING 	0xAA
@@ -1115,6 +1130,10 @@ struct gfar_private {
 
 	struct work_struct reset_task;
 
+	u32 ip_addr;
+	u32 wol_opts;
+	u32 wol_supported;
+
 	/* Network Statistics */
 	struct gfar_extra_stats extra_stats;
 
diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c
index 212736b..296e762 100644
--- a/drivers/net/ethernet/freescale/gianfar_ethtool.c
+++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c
@@ -29,6 +29,7 @@
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <linux/mm.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -577,32 +578,49 @@ static void gfar_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
 	struct gfar_private *priv = netdev_priv(dev);
 
-	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) {
-		wol->supported = WAKE_MAGIC;
-		wol->wolopts = priv->wol_en ? WAKE_MAGIC : 0;
-	} else {
-		wol->supported = wol->wolopts = 0;
-	}
+	wol->supported = 0;
+	wol->wolopts = 0;
+
+	if (!priv->wol_supported || !device_can_wakeup(&priv->ofdev->dev))
+		return;
+
+	if (priv->wol_supported & GIANFAR_WOL_MAGIC)
+		wol->supported |= WAKE_MAGIC;
+
+	if (priv->wol_supported & GIANFAR_WOL_ARP)
+		wol->supported |= WAKE_ARP;
+
+	if (priv->wol_supported & GIANFAR_WOL_UCAST)
+		wol->supported |= WAKE_UCAST;
+
+	if (priv->wol_opts & GIANFAR_WOL_MAGIC)
+		wol->wolopts |= WAKE_MAGIC;
+
+	if (priv->wol_opts & GIANFAR_WOL_ARP)
+		wol->wolopts |= WAKE_ARP;
+
+	if (priv->wol_opts & GIANFAR_WOL_UCAST)
+		wol->wolopts |= WAKE_UCAST;
 }
 
 static int gfar_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
 	struct gfar_private *priv = netdev_priv(dev);
-	unsigned long flags;
 
-	if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) &&
-	    wol->wolopts != 0)
-		return -EINVAL;
-
-	if (wol->wolopts & ~WAKE_MAGIC)
-		return -EINVAL;
+	if (!priv->wol_supported || !device_can_wakeup(&priv->ofdev->dev) ||
+		(wol->wolopts & ~(WAKE_MAGIC | WAKE_ARP | WAKE_UCAST)))
+		return -EOPNOTSUPP;
 
-	device_set_wakeup_enable(&dev->dev, wol->wolopts & WAKE_MAGIC);
+	priv->wol_opts = 0;
 
-	spin_lock_irqsave(&priv->bflock, flags);
-	priv->wol_en =  !!device_may_wakeup(&dev->dev);
-	spin_unlock_irqrestore(&priv->bflock, flags);
+	if (wol->wolopts & WAKE_MAGIC)
+		priv->wol_opts |= GIANFAR_WOL_MAGIC;
+	if (wol->wolopts & WAKE_ARP)
+		priv->wol_opts |= GIANFAR_WOL_ARP;
+	if (wol->wolopts & WAKE_UCAST)
+		priv->wol_opts |= GIANFAR_WOL_UCAST;
 
+	device_set_wakeup_enable(&priv->ofdev->dev, (u32)priv->wol_opts);
 	return 0;
 }
 #endif
-- 
1.6.4.1

^ permalink raw reply related

* [PATCH v3] powerpc/85xx: add support to JOG feature using cpufreq interface
From: Zhao Chenhui @ 2011-12-27 11:25 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Scott Wood, Jerry Huang, Zhao Chenhui

From: Li Yang <leoli@freescale.com>

Some 85xx silicons like MPC8536 and P1022 have a JOG feature, which provides
a dynamic mechanism to lower or raise the CPU core clock at runtime.

This patch adds the support to change CPU frequency using the standard
cpufreq interface. The ratio CORE to CCB can be 1:1(except MPC8536), 3:2,
2:1, 5:2, 3:1, 7:2 and 4:1.

Two CPU cores on P1022 must not in the low power state during the frequency
transition. The driver uses a flag to meet the requirement.

The jog mode frequency transition process on the MPC8536 is similar to
the deep sleep process. The driver need save the CPU state and restore
it after CPU warm reset.

Note:
 * The I/O peripherals such as PCIe and eTSEC may lose packets during
   the jog mode frequency transition.
 * The driver doesn't support MPC8536 Rev 1.0 due to a JOG erratum.
   Subsequent revisions of MPC8536 have corrected the erratum.

Signed-off-by: Dave Liu <daveliu@freescale.com>
Signed-off-by: Li Yang <leoli@freescale.com>
Signed-off-by: Jerry Huang <Chang-Ming.Huang@freescale.com>
Signed-off-by: Zhao Chenhui <chenhui.zhao@freescale.com>
CC: Scott Wood <scottwood@freescale.com>
---
This patch depends on my previous patches related to power management.

Changes for v3:
 - Use different set_pll() functions for P1022 and MPC8536.
 - Fix a race issue for p1022.
 - Add "mpc85xx_enter_jog".

 arch/powerpc/platforms/85xx/Makefile      |    1 +
 arch/powerpc/platforms/85xx/cpufreq-jog.c |  404 +++++++++++++++++++++++++++++
 arch/powerpc/platforms/85xx/sleep.S       |    1 +
 arch/powerpc/platforms/Kconfig            |    8 +
 arch/powerpc/sysdev/fsl_soc.h             |    1 +
 5 files changed, 415 insertions(+), 0 deletions(-)
 create mode 100644 arch/powerpc/platforms/85xx/cpufreq-jog.c

diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
index f9fcbf4..eeaa1f4 100644
--- a/arch/powerpc/platforms/85xx/Makefile
+++ b/arch/powerpc/platforms/85xx/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_SMP) += smp.o
 ifneq ($(CONFIG_PPC_E500MC),y)
 obj-$(CONFIG_SUSPEND)	+= sleep.o
 endif
+obj-$(CONFIG_MPC85xx_CPUFREQ) += cpufreq-jog.o
 
 obj-y += common.o
 
diff --git a/arch/powerpc/platforms/85xx/cpufreq-jog.c b/arch/powerpc/platforms/85xx/cpufreq-jog.c
new file mode 100644
index 0000000..d1418d9
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/cpufreq-jog.c
@@ -0,0 +1,404 @@
+/*
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc.
+ * Author: Dave Liu <daveliu@freescale.com>
+ * Modifier: Chenhui Zhao <chenhui.zhao@freescale.com>
+ *
+ * The cpufreq driver is for Freescale 85xx processor,
+ * based on arch/powerpc/platforms/cell/cbe_cpufreq.c
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
+ *	Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/cpufreq.h>
+#include <linux/of_platform.h>
+#include <linux/suspend.h>
+
+#include <asm/prom.h>
+#include <asm/time.h>
+#include <asm/reg.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/smp.h>
+
+#include <sysdev/fsl_soc.h>
+
+static DEFINE_MUTEX(mpc85xx_switch_mutex);
+static void __iomem *guts;
+
+static u32 sysfreq;
+static int in_jog_process;
+static struct cpufreq_frequency_table *mpc85xx_freqs;
+static int (*set_pll)(unsigned int cpu, unsigned int pll);
+
+static struct cpufreq_frequency_table mpc8536_freqs_table[] = {
+	{3,	0},
+	{4,	0},
+	{5,	0},
+	{6,	0},
+	{7,	0},
+	{8,	0},
+	{0,	CPUFREQ_TABLE_END},
+};
+
+static struct cpufreq_frequency_table p1022_freqs_table[] = {
+	{2,	0},
+	{3,	0},
+	{4,	0},
+	{5,	0},
+	{6,	0},
+	{7,	0},
+	{8,	0},
+	{0,	CPUFREQ_TABLE_END},
+};
+
+#define FREQ_533MHz	533340000
+#define FREQ_800MHz	800000000
+
+#define CORE_RATIO_BITS		8
+#define CORE_RATIO_MASK		0x3f
+#define CORE0_RATIO_SHIFT	16
+
+#define PORPLLSR	0x0
+
+#define PMJCR		0x7c
+#define PMJCR_CORE0_SPD_MASK	0x00001000
+#define PMJCR_CORE_SPD_MASK	0x00002000
+
+#define POWMGTCSR	0x80
+#define POWMGTCSR_LOSSLESS_MASK	0x00400000
+#define POWMGTCSR_JOG_MASK	0x00200000
+#define POWMGTCSR_CORE0_IRQ_MSK	0x80000000
+#define POWMGTCSR_CORE0_CI_MSK	0x40000000
+#define POWMGTCSR_CORE0_DOZING	0x00000008
+#define POWMGTCSR_CORE0_NAPPING	0x00000004
+
+#define POWMGTCSR_CORE_INT_MSK	0x00000800
+#define POWMGTCSR_CORE_CINT_MSK	0x00000400
+#define POWMGTCSR_CORE_UDE_MSK	0x00000200
+#define POWMGTCSR_CORE_MCP_MSK	0x00000100
+#define P1022_POWMGTCSR_MSK	(POWMGTCSR_CORE_INT_MSK | \
+				 POWMGTCSR_CORE_CINT_MSK | \
+				 POWMGTCSR_CORE_UDE_MSK | \
+				 POWMGTCSR_CORE_MCP_MSK)
+
+static void keep_waking_up(void *dummy)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	mb();
+
+	in_jog_process = 1;
+	mb();
+
+	while (in_jog_process != 0)
+		mb();
+
+	local_irq_restore(flags);
+}
+
+/*
+ * hardware specific functions
+ */
+static int get_pll(int hw_cpu)
+{
+	int ret, shift;
+	u32 cur_pll = in_be32(guts + PORPLLSR);
+
+	shift = hw_cpu * CORE_RATIO_BITS + CORE0_RATIO_SHIFT;
+	ret = (cur_pll >> shift) & CORE_RATIO_MASK;
+	return ret;
+}
+
+static int mpc8536_set_pll(unsigned int cpu, unsigned int pll)
+{
+	u32 corefreq, val, mask;
+	unsigned int cur_pll = get_pll(0);
+	int ret = 0;
+	unsigned long flags;
+
+	if (pll == cur_pll)
+		return 0;
+
+	val = (pll & CORE_RATIO_MASK) << CORE0_RATIO_SHIFT;
+
+	corefreq = sysfreq * pll / 2;
+	/*
+	 * Set the COREx_SPD bit if the requested core frequency
+	 * is larger than the threshold frequency.
+	 */
+	if (corefreq > FREQ_800MHz)
+			val |= PMJCR_CORE_SPD_MASK;
+
+	mask = (CORE_RATIO_MASK << CORE0_RATIO_SHIFT) | PMJCR_CORE_SPD_MASK;
+	clrsetbits_be32(guts + PMJCR, mask, val);
+
+	/* readback to sync write */
+	val = in_be32(guts + PMJCR);
+
+	local_irq_save(flags);
+	mpc85xx_enter_jog(get_immrbase(), POWMGTCSR_JOG_MASK);
+	local_irq_restore(flags);
+
+	/* verify */
+	cur_pll =  get_pll(0);
+	if (cur_pll != pll) {
+		pr_err("%s: Error. The current PLL of core 0 is %d instead of %d.\n",
+				__func__, cur_pll, pll);
+		ret = -EFAULT;
+	}
+	return ret;
+}
+
+static int p1022_set_pll(unsigned int cpu, unsigned int pll)
+{
+	int index, hw_cpu = get_hard_smp_processor_id(cpu);
+	int shift;
+	u32 corefreq, val, mask = 0;
+	unsigned int cur_pll = get_pll(hw_cpu);
+	unsigned long flags;
+	int ret = 0;
+
+	if (pll == cur_pll)
+		return 0;
+
+	shift = hw_cpu * CORE_RATIO_BITS + CORE0_RATIO_SHIFT;
+	val = (pll & CORE_RATIO_MASK) << shift;
+
+	corefreq = sysfreq * pll / 2;
+	/*
+	 * Set the COREx_SPD bit if the requested core frequency
+	 * is larger than the threshold frequency.
+	 */
+	if (corefreq > FREQ_533MHz)
+		val |= PMJCR_CORE0_SPD_MASK << hw_cpu;
+
+	mask = (CORE_RATIO_MASK << shift) | (PMJCR_CORE0_SPD_MASK << hw_cpu);
+	clrsetbits_be32(guts + PMJCR, mask, val);
+
+	/* readback to sync write */
+	val = in_be32(guts + PMJCR);
+
+	/*
+	 * A Jog request can not be asserted when any core is in a low
+	 * power state on P1022. Before executing a jog request, any
+	 * core which is in a low power state must be waked by a
+	 * interrupt, and keep waking up until the sequence is
+	 * finished.
+	 */
+	for_each_present_cpu(index) {
+		if (!cpu_online(index))
+			return -EFAULT;
+	}
+
+	in_jog_process = -1;
+	mb();
+	smp_call_function(keep_waking_up, NULL, 0);
+
+	local_irq_save(flags);
+	mb();
+	/* Wait for the other core to wake. */
+	while (in_jog_process != 1)
+		mb();
+
+	out_be32(guts + POWMGTCSR, POWMGTCSR_JOG_MASK | P1022_POWMGTCSR_MSK);
+
+	if (!spin_event_timeout(((in_be32(guts + POWMGTCSR) &
+	    POWMGTCSR_JOG_MASK) == 0), 10000, 10)) {
+		pr_err("%s: Fail to switch the core frequency.\n", __func__);
+		ret = -EFAULT;
+	}
+
+	clrbits32(guts + POWMGTCSR, P1022_POWMGTCSR_MSK);
+	in_jog_process = 0;
+	mb();
+
+	local_irq_restore(flags);
+
+	/* verify */
+	cur_pll =  get_pll(hw_cpu);
+	if (cur_pll != pll) {
+		pr_err("%s: Error. The current PLL of core %d is %d instead of %d.\n",
+				__func__, hw_cpu, cur_pll, pll);
+		ret = -EFAULT;
+	}
+	return ret;
+}
+
+/*
+ * cpufreq functions
+ */
+static int mpc85xx_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+	unsigned int i, cur_pll;
+	int hw_cpu = get_hard_smp_processor_id(policy->cpu);
+
+	if (!cpu_present(policy->cpu))
+		return -ENODEV;
+
+	/* the latency of a transition, the unit is ns */
+	policy->cpuinfo.transition_latency = 2000;
+
+	cur_pll = get_pll(hw_cpu);
+
+	/* initialize frequency table */
+	pr_debug("core%d frequency table:\n", hw_cpu);
+	for (i = 0; mpc85xx_freqs[i].frequency != CPUFREQ_TABLE_END; i++) {
+		/* The frequency unit is kHz. */
+		mpc85xx_freqs[i].frequency =
+				(sysfreq * mpc85xx_freqs[i].index / 2) / 1000;
+		pr_debug("%d: %dkHz\n", i, mpc85xx_freqs[i].frequency);
+
+		if (mpc85xx_freqs[i].index == cur_pll)
+			policy->cur = mpc85xx_freqs[i].frequency;
+	}
+	pr_debug("current pll is at %d, and core freq is%d\n",
+					cur_pll, policy->cur);
+
+	cpufreq_frequency_table_get_attr(mpc85xx_freqs, policy->cpu);
+
+	/*
+	 * This ensures that policy->cpuinfo_min
+	 * and policy->cpuinfo_max are set correctly.
+	 */
+	return cpufreq_frequency_table_cpuinfo(policy, mpc85xx_freqs);
+}
+
+static int mpc85xx_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
+static int mpc85xx_cpufreq_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, mpc85xx_freqs);
+}
+
+static int mpc85xx_cpufreq_target(struct cpufreq_policy *policy,
+			      unsigned int target_freq,
+			      unsigned int relation)
+{
+	struct cpufreq_freqs freqs;
+	unsigned int new;
+	int ret = 0;
+
+	if (!set_pll)
+		return -ENODEV;
+
+	cpufreq_frequency_table_target(policy,
+				       mpc85xx_freqs,
+				       target_freq,
+				       relation,
+				       &new);
+
+	freqs.old = policy->cur;
+	freqs.new = mpc85xx_freqs[new].frequency;
+	freqs.cpu = policy->cpu;
+
+	mutex_lock(&mpc85xx_switch_mutex);
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	ret = set_pll(policy->cpu, mpc85xx_freqs[new].index);
+	if (!ret) {
+		pr_info("cpufreq: Setting core%d frequency to %d kHz and " \
+			 "PLL ratio to %d:2\n",
+			 policy->cpu,
+			 mpc85xx_freqs[new].frequency,
+			 mpc85xx_freqs[new].index);
+
+		ppc_proc_freq = freqs.new * 1000ul;
+	}
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	mutex_unlock(&mpc85xx_switch_mutex);
+
+	return ret;
+}
+
+static struct cpufreq_driver mpc85xx_cpufreq_driver = {
+	.verify		= mpc85xx_cpufreq_verify,
+	.target		= mpc85xx_cpufreq_target,
+	.init		= mpc85xx_cpufreq_cpu_init,
+	.exit		= mpc85xx_cpufreq_cpu_exit,
+	.name		= "mpc85xx-JOG",
+	.owner		= THIS_MODULE,
+	.flags		= CPUFREQ_CONST_LOOPS,
+};
+
+static int mpc85xx_job_probe(struct platform_device *ofdev)
+{
+	struct device_node *np = ofdev->dev.of_node;
+
+	if (of_device_is_compatible(np, "fsl,mpc8536-guts")) {
+		mpc85xx_freqs = mpc8536_freqs_table;
+		set_pll = mpc8536_set_pll;
+	} else if (of_device_is_compatible(np, "fsl,p1022-guts")) {
+		mpc85xx_freqs = p1022_freqs_table;
+		set_pll = p1022_set_pll;
+	}
+
+	sysfreq = fsl_get_sys_freq();
+
+	guts = of_iomap(np, 0);
+	if (guts == NULL)
+		return -ENOMEM;
+
+	pr_info("Freescale MPC85xx CPU frequency switching(JOG) driver\n");
+
+	return cpufreq_register_driver(&mpc85xx_cpufreq_driver);
+}
+
+static int mpc85xx_jog_remove(struct platform_device *ofdev)
+{
+	iounmap(guts);
+	cpufreq_unregister_driver(&mpc85xx_cpufreq_driver);
+
+	return 0;
+}
+
+static struct of_device_id mpc85xx_jog_ids[] = {
+	{ .compatible = "fsl,mpc8536-guts", },
+	{ .compatible = "fsl,p1022-guts", },
+	{}
+};
+
+static struct platform_driver mpc85xx_jog_driver = {
+	.driver = {
+		.name = "mpc85xx_cpufreq_jog",
+		.owner = THIS_MODULE,
+		.of_match_table = mpc85xx_jog_ids,
+	},
+	.probe = mpc85xx_job_probe,
+	.remove = mpc85xx_jog_remove,
+};
+
+static int __init mpc85xx_jog_init(void)
+{
+	return platform_driver_register(&mpc85xx_jog_driver);
+}
+
+static void __exit mpc85xx_jog_exit(void)
+{
+	platform_driver_unregister(&mpc85xx_jog_driver);
+}
+
+module_init(mpc85xx_jog_init);
+module_exit(mpc85xx_jog_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dave Liu <daveliu@freescale.com>");
diff --git a/arch/powerpc/platforms/85xx/sleep.S b/arch/powerpc/platforms/85xx/sleep.S
index 763d2f2..919781d 100644
--- a/arch/powerpc/platforms/85xx/sleep.S
+++ b/arch/powerpc/platforms/85xx/sleep.S
@@ -59,6 +59,7 @@ powmgtreq:
 	 * r5 = JOG or deep sleep request
 	 *      JOG-0x00200000, deep sleep-0x00100000
 	 */
+_GLOBAL(mpc85xx_enter_jog)
 _GLOBAL(mpc85xx_enter_deep_sleep)
 	lis	r6, ccsrbase_low@ha
 	stw	r4, ccsrbase_low@l(r6)
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 3fe6d92..1d0c4e0 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -200,6 +200,14 @@ config CPU_FREQ_PMAC64
 	  This adds support for frequency switching on Apple iMac G5,
 	  and some of the more recent desktop G5 machines as well.
 
+config MPC85xx_CPUFREQ
+	bool "Support for Freescale MPC85xx CPU freq"
+	depends on PPC_85xx && PPC32
+	select CPU_FREQ_TABLE
+	help
+	  This adds support for frequency switching on Freescale MPC85xx,
+	  currently including P1022 and MPC8536.
+
 config PPC_PASEMI_CPUFREQ
 	bool "Support for PA Semi PWRficient"
 	depends on PPC_PASEMI
diff --git a/arch/powerpc/sysdev/fsl_soc.h b/arch/powerpc/sysdev/fsl_soc.h
index 29a87ee..8735ab0 100644
--- a/arch/powerpc/sysdev/fsl_soc.h
+++ b/arch/powerpc/sysdev/fsl_soc.h
@@ -62,5 +62,6 @@ void fsl_hv_halt(void);
  * code can be compatible with both 32-bit & 36-bit.
  */
 extern void mpc85xx_enter_deep_sleep(u64 ccsrbar, u32 powmgtreq);
+extern void mpc85xx_enter_jog(u64 ccsrbar, u32 powmgtreq);
 #endif
 #endif
-- 
1.6.4.1

^ permalink raw reply related

* Re: Kernel not booting when supplying boot parameter mem
From: tiejun.chen @ 2011-12-27 11:07 UTC (permalink / raw)
  To: Arshad, Farrukh; +Cc: linuxppc-dev@lists.ozlabs.org
In-Reply-To: <93CD5F41FDBC6042A6B449764F3B35CC050C9F21@EU-MBX-03.mgc.mentorg.com>

Arshad, Farrukh wrote:
> Greetings All,
> 
> I have a basic question. I have 512 MB memory. I want my kernel to use only last 128 MB of memory starting from address 0x10000000. I have configured the kernel CONFIG_PHYSICAL_START=0x10000000 and in kernel boot parameter I have set mem=128M. In this scenario my kernel is not booting and it just stuck after uncompressing it. If I do not provide mem=128M boot parameter my kernel boots fine, but in that case I can not restrict kernel to use only 128M memory. Why supplying mem=128M causing kernel to fail.
> 

Any kernel option else is configured, such as CONFIG_KERNEL_START and
CONFIG_RELOCATABLE?

Or any u-boot argument is reconfigured, such as 'bootm_low', 'bootm_mapsize' and
'bootm_size'.

Tiejun

^ permalink raw reply

* Re: [PATCH] net/ucc_geth: some fix in current kernel
From: Joakim Tjernlund @ 2011-12-27 10:56 UTC (permalink / raw)
  To: Xie Xiaobo; +Cc: netdev, linuxppc-dev
In-Reply-To: <1324979328-32081-1-git-send-email-X.Xie@freescale.com>

>
> * Revert commit "ucc_geth: Fix hangs after switching from full to half duplex"
>   This commit impacted the driver in all link state change more than
>   duplex change.

hmm, so what will happen now when switching from full to half duplex? Will it just hang? If so
that doesn't seem like an improvement.

> * Change some parameters.
>   Increased the BD ring length.

Increasing both TX and RX from 16 to 64 seems a bit much. I got away with
just increasing RX to 32. What memory are the BD ring using, the internal or external RAM?

 Jocke

>
> Signed-off-by: Haiying Wang <Haiying.Wang@freescale.com>
> Signed-off-by: Xie Xiaobo <X.Xie@freescale.com>
> ---
>  drivers/net/ethernet/freescale/ucc_geth.c |   41 ++++------------------------
>  drivers/net/ethernet/freescale/ucc_geth.h |    6 ++--
>  2 files changed, 9 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
> index b5dc027..3e18902 100644
> --- a/drivers/net/ethernet/freescale/ucc_geth.c
> +++ b/drivers/net/ethernet/freescale/ucc_geth.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (C) 2006-2009 Freescale Semicondutor, Inc. All rights reserved.
> + * Copyright (C) 2006-2010 Freescale Semiconductor, Inc. All rights reserved.
>   *
>   * Author: Shlomi Gridish <gridish@freescale.com>
>   *      Li Yang <leoli@freescale.com>
> @@ -1570,28 +1570,6 @@ static int ugeth_disable(struct ucc_geth_private *ugeth, enum comm_dir mode)
>     return 0;
>  }
>
> -static void ugeth_quiesce(struct ucc_geth_private *ugeth)
> -{
> -   /* Prevent any further xmits, plus detach the device. */
> -   netif_device_detach(ugeth->ndev);
> -
> -   /* Wait for any current xmits to finish. */
> -   netif_tx_disable(ugeth->ndev);
> -
> -   /* Disable the interrupt to avoid NAPI rescheduling. */
> -   disable_irq(ugeth->ug_info->uf_info.irq);
> -
> -   /* Stop NAPI, and possibly wait for its completion. */
> -   napi_disable(&ugeth->napi);
> -}
> -
> -static void ugeth_activate(struct ucc_geth_private *ugeth)
> -{
> -   napi_enable(&ugeth->napi);
> -   enable_irq(ugeth->ug_info->uf_info.irq);
> -   netif_device_attach(ugeth->ndev);
> -}
> -
>  /* Called every time the controller might need to be made
>   * aware of new link state.  The PHY code conveys this
>   * information through variables in the ugeth structure, and this
> @@ -1605,11 +1583,14 @@ static void adjust_link(struct net_device *dev)
>     struct ucc_geth __iomem *ug_regs;
>     struct ucc_fast __iomem *uf_regs;
>     struct phy_device *phydev = ugeth->phydev;
> +   unsigned long flags;
>     int new_state = 0;
>
>     ug_regs = ugeth->ug_regs;
>     uf_regs = ugeth->uccf->uf_regs;
>
> +   spin_lock_irqsave(&ugeth->lock, flags);
> +
>     if (phydev->link) {
>        u32 tempval = in_be32(&ug_regs->maccfg2);
>        u32 upsmr = in_be32(&uf_regs->upsmr);
> @@ -1666,21 +1647,9 @@ static void adjust_link(struct net_device *dev)
>        }
>
>        if (new_state) {
> -         /*
> -          * To change the MAC configuration we need to disable
> -          * the controller. To do so, we have to either grab
> -          * ugeth->lock, which is a bad idea since 'graceful
> -          * stop' commands might take quite a while, or we can
> -          * quiesce driver's activity.
> -          */
> -         ugeth_quiesce(ugeth);
> -         ugeth_disable(ugeth, COMM_DIR_RX_AND_TX);
> -
>           out_be32(&ug_regs->maccfg2, tempval);
>           out_be32(&uf_regs->upsmr, upsmr);
>
> -         ugeth_enable(ugeth, COMM_DIR_RX_AND_TX);
> -         ugeth_activate(ugeth);
>        }
>     } else if (ugeth->oldlink) {
>           new_state = 1;
> @@ -1691,6 +1660,8 @@ static void adjust_link(struct net_device *dev)
>
>     if (new_state && netif_msg_link(ugeth))
>        phy_print_status(phydev);
> +
> +   spin_unlock_irqrestore(&ugeth->lock, flags);
>  }
>
>  /* Initialize TBI PHY interface for communicating with the
> diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h
> index d12fcad..b0b42b4 100644
> --- a/drivers/net/ethernet/freescale/ucc_geth.h
> +++ b/drivers/net/ethernet/freescale/ucc_geth.h
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (C) Freescale Semicondutor, Inc. 2006-2009. All rights reserved.
> + * Copyright (C) 2006-2010 Freescale Semiconductor, Inc. All rights reserved.
>   *
>   * Author: Shlomi Gridish <gridish@freescale.com>
>   *
> @@ -875,8 +875,8 @@ struct ucc_geth_hardware_statistics {
>  #define UCC_GETH_SIZE_OF_BD                     QE_SIZEOF_BD
>
>  /* Driver definitions */
> -#define TX_BD_RING_LEN                          0x10
> -#define RX_BD_RING_LEN                          0x10
> +#define TX_BD_RING_LEN                          64
> +#define RX_BD_RING_LEN                          64
>
>  #define TX_RING_MOD_MASK(size)                  (size-1)
>  #define RX_RING_MOD_MASK(size)                  (size-1)
> --
> 1.7.5.1
>
>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply

* [PATCH] net/ucc_geth: some fix in current kernel
From: Xie Xiaobo @ 2011-12-27  9:48 UTC (permalink / raw)
  To: linuxppc-dev, netdev, leoli; +Cc: Xie Xiaobo

* Revert commit "ucc_geth: Fix hangs after switching from full to half duplex"
  This commit impacted the driver in all link state change more than
  duplex change.
* Change some parameters.
  Increased the BD ring length.

Signed-off-by: Haiying Wang <Haiying.Wang@freescale.com>
Signed-off-by: Xie Xiaobo <X.Xie@freescale.com>
---
 drivers/net/ethernet/freescale/ucc_geth.c |   41 ++++------------------------
 drivers/net/ethernet/freescale/ucc_geth.h |    6 ++--
 2 files changed, 9 insertions(+), 38 deletions(-)

diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index b5dc027..3e18902 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2006-2009 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006-2010 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Author: Shlomi Gridish <gridish@freescale.com>
  *	   Li Yang <leoli@freescale.com>
@@ -1570,28 +1570,6 @@ static int ugeth_disable(struct ucc_geth_private *ugeth, enum comm_dir mode)
 	return 0;
 }
 
-static void ugeth_quiesce(struct ucc_geth_private *ugeth)
-{
-	/* Prevent any further xmits, plus detach the device. */
-	netif_device_detach(ugeth->ndev);
-
-	/* Wait for any current xmits to finish. */
-	netif_tx_disable(ugeth->ndev);
-
-	/* Disable the interrupt to avoid NAPI rescheduling. */
-	disable_irq(ugeth->ug_info->uf_info.irq);
-
-	/* Stop NAPI, and possibly wait for its completion. */
-	napi_disable(&ugeth->napi);
-}
-
-static void ugeth_activate(struct ucc_geth_private *ugeth)
-{
-	napi_enable(&ugeth->napi);
-	enable_irq(ugeth->ug_info->uf_info.irq);
-	netif_device_attach(ugeth->ndev);
-}
-
 /* Called every time the controller might need to be made
  * aware of new link state.  The PHY code conveys this
  * information through variables in the ugeth structure, and this
@@ -1605,11 +1583,14 @@ static void adjust_link(struct net_device *dev)
 	struct ucc_geth __iomem *ug_regs;
 	struct ucc_fast __iomem *uf_regs;
 	struct phy_device *phydev = ugeth->phydev;
+	unsigned long flags;
 	int new_state = 0;
 
 	ug_regs = ugeth->ug_regs;
 	uf_regs = ugeth->uccf->uf_regs;
 
+	spin_lock_irqsave(&ugeth->lock, flags);
+
 	if (phydev->link) {
 		u32 tempval = in_be32(&ug_regs->maccfg2);
 		u32 upsmr = in_be32(&uf_regs->upsmr);
@@ -1666,21 +1647,9 @@ static void adjust_link(struct net_device *dev)
 		}
 
 		if (new_state) {
-			/*
-			 * To change the MAC configuration we need to disable
-			 * the controller. To do so, we have to either grab
-			 * ugeth->lock, which is a bad idea since 'graceful
-			 * stop' commands might take quite a while, or we can
-			 * quiesce driver's activity.
-			 */
-			ugeth_quiesce(ugeth);
-			ugeth_disable(ugeth, COMM_DIR_RX_AND_TX);
-
 			out_be32(&ug_regs->maccfg2, tempval);
 			out_be32(&uf_regs->upsmr, upsmr);
 
-			ugeth_enable(ugeth, COMM_DIR_RX_AND_TX);
-			ugeth_activate(ugeth);
 		}
 	} else if (ugeth->oldlink) {
 			new_state = 1;
@@ -1691,6 +1660,8 @@ static void adjust_link(struct net_device *dev)
 
 	if (new_state && netif_msg_link(ugeth))
 		phy_print_status(phydev);
+
+	spin_unlock_irqrestore(&ugeth->lock, flags);
 }
 
 /* Initialize TBI PHY interface for communicating with the
diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h
index d12fcad..b0b42b4 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.h
+++ b/drivers/net/ethernet/freescale/ucc_geth.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) Freescale Semicondutor, Inc. 2006-2009. All rights reserved.
+ * Copyright (C) 2006-2010 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Author: Shlomi Gridish <gridish@freescale.com>
  *
@@ -875,8 +875,8 @@ struct ucc_geth_hardware_statistics {
 #define UCC_GETH_SIZE_OF_BD                     QE_SIZEOF_BD
 
 /* Driver definitions */
-#define TX_BD_RING_LEN                          0x10
-#define RX_BD_RING_LEN                          0x10
+#define TX_BD_RING_LEN                          64
+#define RX_BD_RING_LEN                          64
 
 #define TX_RING_MOD_MASK(size)                  (size-1)
 #define RX_RING_MOD_MASK(size)                  (size-1)
-- 
1.7.5.1

^ permalink raw reply related

* RE: [PATCH 00/14] DMA-mapping framework redesign preparation
From: Marek Szyprowski @ 2011-12-27  8:25 UTC (permalink / raw)
  To: 'Matthew Wilcox'
  Cc: linux-mips, linux-ia64, linux-sh, linux-mm, sparclinux,
	linux-arch, 'Stephen Rothwell', 'Jonathan Corbet',
	x86, 'Arnd Bergmann', microblaze-uclinux, linaro-mm-sig,
	Andrzej Pietrasiewicz, 'Thomas Gleixner',
	linux-arm-kernel, discuss, linux-kernel, 'Kyungmin Park',
	linux-alpha, 'Andrew Morton', linuxppc-dev
In-Reply-To: <20111223163516.GO20129@parisc-linux.org>

Hello,

On Friday, December 23, 2011 5:35 PM Matthew Wilcox wrote:

> On Fri, Dec 23, 2011 at 01:27:19PM +0100, Marek Szyprowski wrote:
> > The first issue we identified is the fact that on some platform (again,
> > mainly ARM) there are several functions for allocating DMA buffers:
> > dma_alloc_coherent, dma_alloc_writecombine and dma_alloc_noncoherent
> 
> Is this write-combining from the point of view of the device (ie iommu),
> or from the point of view of the CPU, or both?

It is about write-combining from the CPU point of view. Right now there are
no devices with such advanced memory interface to do write combining on the
DMA side, but I believe that they might appear at some point in the future 
as well.

> > The next step in dma mapping framework update is the introduction of
> > dma_mmap/dma_mmap_attrs() function. There are a number of drivers
> > (mainly V4L2 and ALSA) that only exports the DMA buffers to user space.
> > Creating a userspace mapping with correct page attributes is not an easy
> > task for the driver. Also the DMA-mapping framework is the only place
> > where the complete information about the allocated pages is available,
> > especially if the implementation uses IOMMU controller to provide a
> > contiguous buffer in DMA address space which is scattered in physical
> > memory space.
> 
> Surely we only need a helper which drivrs can call from their mmap routine
> to solve this?

On ARM architecture it is already implemented this way and a bunch of drivers
use dma_mmap_coherent/dma_mmap_writecombine calls. We would like to standardize
these calls across all architectures.

> > Usually these drivers don't touch the buffer data at all, so the mapping
> > in kernel virtual address space is not needed. We can introduce
> > DMA_ATTRIB_NO_KERNEL_MAPPING attribute which lets kernel to skip/ignore
> > creation of kernel virtual mapping. This way we can save previous
> > vmalloc area and simply some mapping operation on a few architectures.
> 
> I really think this wants to be a separate function.  dma_alloc_coherent
> is for allocating memory to be shared between the kernel and a driver;
> we already have dma_map_sg for mapping userspace I/O as an alternative
> interface.  This feels like it's something different again rather than
> an option to dma_alloc_coherent.

That is just a starting point for the discussion. 

I thought about this API a bit and came to conclusion that there is no much
difference between a dma_alloc_coherent which creates a mapping in kernel
virtual space and the one that does not. It is just a hint from the driver
that it will not use that mapping at all. Of course this attribute makes sense
only together with adding a dma_mmap_attrs() call, because otherwise drivers
won't be able to get access to the buffer data.

On coherent architectures where dma_alloc_coherent is just a simple wrapper
around alloc_pages_exact() such attribute can be simply ignored without any
impact on the drivers (that's the main idea behind dma attributes!).
However such hint will help a lot on non-coherent architectures where 
additional work need to be done to provide a cohenent mapping in kernel 
address space. It also saves some precious kernel resources like vmalloc
address range.

Best regards
-- 
Marek Szyprowski
Samsung Poland R&D Center

^ permalink raw reply

* [PATCH]  dmaengine: async_xor, fix zero address issue when xor highmem page
From: b29237 @ 2011-12-27  6:30 UTC (permalink / raw)
  To: vinod.koul, dan.j.williams, linuxppc-dev, linux-kernel, r58472
  Cc: Forrest shi

From: Forrest shi <b29237@freescale.com>

	we may do_sync_xor high mem pages, in this case, page_address will
        return zero address which cause a failure.

	this patch uses kmap_atomic before xor the pages and kunmap_atomic
        after it.

	Signed-off-by: b29237@freescale.com <xuelin.shi@freescale.com>
---
 crypto/async_tx/async_xor.c |   16 ++++++++++++----
 1 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index bc28337..5b416d1 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -26,6 +26,7 @@
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 #include <linux/mm.h>
+#include <linux/highmem.h>
 #include <linux/dma-mapping.h>
 #include <linux/raid/xor.h>
 #include <linux/async_tx.h>
@@ -126,7 +127,7 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
 	    int src_cnt, size_t len, struct async_submit_ctl *submit)
 {
 	int i;
-	int xor_src_cnt = 0;
+	int xor_src_cnt = 0, kmap_cnt=0;
 	int src_off = 0;
 	void *dest_buf;
 	void **srcs;
@@ -138,11 +139,13 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
 
 	/* convert to buffer pointers */
 	for (i = 0; i < src_cnt; i++)
-		if (src_list[i])
-			srcs[xor_src_cnt++] = page_address(src_list[i]) + offset;
+		if (src_list[i]) {
+			srcs[xor_src_cnt++] = kmap_atomic(src_list[i], KM_USER1) + offset;
+		}
+	kmap_cnt = xor_src_cnt;
 	src_cnt = xor_src_cnt;
 	/* set destination address */
-	dest_buf = page_address(dest) + offset;
+	dest_buf = kmap_atomic(dest, KM_USER0) + offset;
 
 	if (submit->flags & ASYNC_TX_XOR_ZERO_DST)
 		memset(dest_buf, 0, len);
@@ -157,6 +160,11 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
 		src_off += xor_src_cnt;
 	}
 
+	kunmap_atomic(dest_buf, KM_USER0);
+	for (i = 0; i < kmap_cnt; i++) 
+		if (src_list[i])
+			kunmap_atomic(srcs[i], KM_USER1);
+
 	async_tx_sync_epilog(submit);
 }
 
-- 
1.7.0.4

^ permalink raw reply related

* Re: [PATCH] KVM: Move gfn_to_memslot() to kvm_host.h
From: Avi Kivity @ 2011-12-26 13:22 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev, Alexander Graf, kvm-ppc, kvm
In-Reply-To: <20111220092102.GA5626@bloggs.ozlabs.ibm.com>

On 12/20/2011 11:21 AM, Paul Mackerras wrote:
> This moves gfn_to_memslot(), and the functions it calls, that is,
> search_memslots() and __gfn_to_memslot(), from kvm_main.c to kvm_host.h
> so that gfn_to_memslot() can be called from non-modular code even
> when KVM is a module.  On powerpc, the Book3S HV style of KVM has
> code that is called from real mode which needs to call gfn_to_memslot()
> and thus needs this.  (Module code is allocated in the vmalloc region,
> which can't be accessed in real mode.)
>
> With this, we can remove builtin_gfn_to_memslot() from book3s_hv_rm_mmu.c
> and thus eliminate a little bit of duplication.

Those functions are too big to be inlined IMO.  How about moving them to
another C file, and making it builtin for ppc?

The only issue is what to call it. virt/kvm/builtin-for-ppc seems silly.

Or we could move the implementation into a header file, with an extra __
prefix, and have the C stubs call those inlines, so we have exactly on
instantiation.  Your real mode code can then call the inlines.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply

* RE: [PATCH 3/3] powerpc/44x: Add support PCI-E for APM821xx SoC and Bluestone board
From: Vinh Huu Tuong Nguyen @ 2011-12-26  8:00 UTC (permalink / raw)
  To: Josh Boyer
  Cc: Ayman El-Khashab, Dave Kleikamp, Lucas De Marchi, Rob Herring,
	Paul Gortmaker, Paul Mackerras, Anton Blanchard, Jiri Kosina,
	linuxppc-dev, linux-kernel
In-Reply-To: <CA+5PVA4cjiUvn4YavkFs6Jx87YpO8yHZZbm4pcPJQn1r1cggUA@mail.gmail.com>

-----Original Message-----
From: Josh Boyer [mailto:jwboyer@gmail.com]
Sent: Thursday, December 22, 2011 3:20 AM
To: Vinh Huu Tuong Nguyen
Cc: Benjamin Herrenschmidt; Paul Mackerras; Matt Porter; Kumar Gala; Paul
Gortmaker; Anton Blanchard; Dave Kleikamp; Grant Likely; Tony Breeds; Rob
Herring; Jiri Kosina; Lucas De Marchi; Ayman El-Khashab;
linuxppc-dev@lists.ozlabs.org; linux-kernel@vger.kernel.org
Subject: Re: [PATCH 3/3] powerpc/44x: Add support PCI-E for APM821xx SoC
and Bluestone board

On Tue, Dec 20, 2011 at 11:47 PM, Vinh Huu Tuong Nguyen
<vhtnguyen@apm.com> wrote:
>> +
>> + =A0 =A0 =A0 mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 mfdcri(SDR0, port->sdr_base + PESDRn_RCSSE=
T) |
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 (PESDRx_RCSSET_RSTGU | PESDRx_RCSSET_RSTPY=
N));
>> +
>> + =A0 =A0 =A0 /* Poll for PHY reset */
>> + =A0 =A0 =A0 timeout =3D 0;
>> + =A0 =A0 =A0 while ((!(mfdcri(SDR0, PESDR0_460EX_RSTSTA +
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 (port->index * 0x55)) & 0x=
1)) &&
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0(timeout < PCIE_PHY_RESET_TIMEOUT)) {
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 udelay(10);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 timeout++;
>> + =A0 =A0 =A0 }
>> +
>> + =A0 =A0 =A0 if (timeout < PCIE_PHY_RESET_TIMEOUT) {
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 mtdcri(SDR0, port->sdr_base + PESDRn_RCSSE=
T,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 (mfdcri(SDR0, port->sdr_ba=
se + PESDRn_RCSSET) &
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 ~(PESDRx_RCSSET_RSTGU | PE=
SDRx_RCSSET_RSTDL)) |
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 PESDRx_RCSSET_RSTPYN);
>> +
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 port->has_ibpre =3D 1;
>> +
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return 0;
>> + =A0 =A0 =A0 } else {
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 printk(KERN_INFO "PCIE: Can't reset PHY\n"=
);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return -1;
>> + =A0 =A0 =A0 }
>
> If we can't reset the PHY, does this whole function essentially fail?
> Do the devices not get renumbered, etc? =A0If so, you probably want to
> make that KERN_ERR.
> [Vinh Nguyen] if we can't reset the PHY, maybe the device can't work
> properly. I will update codes to return the error in case PHY can't
reset.

OK.

>> @@ -1751,9 +1856,9 @@ static void __init
> ppc4xx_configure_pciex_PIMs(struct ppc4xx_pciex_port *port,
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 * if it works
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 */
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0out_le32(mbase + PECFG_PIM0LAL, 0x0000000=
0);
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 out_le32(mbase + PECFG_PIM0LAH, 0x00000000=
);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 out_le32(mbase + PECFG_PIM0LAH, 0x00000008=
); /* Moving
> on HB */
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0out_le32(mbase + PECFG_PIM1LAL, 0x0000000=
0);
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 out_le32(mbase + PECFG_PIM1LAH, 0x00000000=
);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 out_le32(mbase + PECFG_PIM1LAH, 0x0000000c=
); /* Moving
> on HB */
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0out_le32(mbase + PECFG_PIM01SAH, 0xffff00=
00);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0out_le32(mbase + PECFG_PIM01SAL, 0x000000=
00);
>
> Why are these values changed, and are those changes only needed on
> APM821xx?
> [Vinh Nguyen] In system memory map of 460EX chip, we have an "alias DDR
> SDRAM" area that is Local Memory Alias for HB(high bandwidth), so we
tried
> to initialize it for speedup. For APM821xx, although we didn't mention
> about this area, this configuration still works well. So we keep it.

This function isn't just called for 460EX or APM821xx SoCs.  It's
called on any 4xx SoC with PCIe configured.
Is this change going to work on them all?  If not, it needs to be
conditionalized.
[Vinh Nguyen] I will update my codes as your comments. Please see next
submitted                                     codes.
>> diff --git a/arch/powerpc/sysdev/ppc4xx_pci.h
> b/arch/powerpc/sysdev/ppc4xx_pci.h
>> index 32ce763..faf3017 100644
>> --- a/arch/powerpc/sysdev/ppc4xx_pci.h
>> +++ b/arch/powerpc/sysdev/ppc4xx_pci.h
>> @@ -441,6 +441,7 @@
>> =A0/*
>> =A0* Config space register offsets
>> =A0*/
>> +#define PECFG_ECDEVCTL =A0 =A0 =A0 =A0 0x060
>> =A0#define PECFG_ECRTCTL =A0 =A0 =A0 =A0 =A00x074
>>
>> =A0#define PECFG_BAR0LMPA =A0 =A0 =A0 =A0 0x210
>> @@ -448,6 +449,7 @@
>> =A0#define PECFG_BAR1MPA =A0 =A0 =A0 =A0 =A00x218
>> =A0#define PECFG_BAR2LMPA =A0 =A0 =A0 =A0 0x220
>> =A0#define PECFG_BAR2HMPA =A0 =A0 =A0 =A0 0x224
>> +#define PECFG_ECDEVCAPPA =A0 =A0 =A0 0x25c
>>
>> =A0#define PECFG_PIMEN =A0 =A0 =A0 =A0 =A0 =A00x33c
>> =A0#define PECFG_PIM0LAL =A0 =A0 =A0 =A0 =A00x340
>> @@ -494,5 +496,7 @@ enum
>> =A0 =A0 =A0 =A0LNKW_X8 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =3D 0x8
>> =A0};
>>
>> +/* Timout for reset phy */
>> +#define PCIE_PHY_RESET_TIMEOUT 10
>
> Is this value applicable to all the 44x devices with PCI-e?
> [Vinh Nguyen] At this time, we only checked this on APM821xx chips.

Then it should probably be conditionalized somehow.  Or put as a
worst-case value that will work for them all.
[Vinh Nguyen] At this time, this definition is called in
apm821xx_pciex_init_port_hw function that is used only for apm821xx chip.
So I think it's ok, but as your recommendation, I'll check with various
boards that I have to get the best value and update it in next submitted
codes.
Best regards,
Vinh Nguyen.

^ permalink raw reply

* [PATCH] fsldma: fix performance degradation by optimizing spinlock use.
From: b29237 @ 2011-12-26  6:01 UTC (permalink / raw)
  To: iws, vinod.koul, dan.j.williams, linuxppc-dev, linux-kernel; +Cc: Forrest shi

From: Forrest shi <b29237@freescale.com>

    dma status check function fsl_tx_status is heavily called in
    a tight loop and the desc lock in fsl_tx_status contended by
    the dma status update function. this caused the dma performance
    degrades much.

    this patch releases the lock in the fsl_tx_status function, and
    introduce the smp_mb() to avoid possible memory inconsistency.

    Signed-off-by: Forrest Shi <xuelin.shi@freescale.com>
---
 drivers/dma/fsldma.c |    6 +-----
 1 files changed, 1 insertions(+), 5 deletions(-)

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 8a78154..008fb5e 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -986,15 +986,11 @@ static enum dma_status fsl_tx_status(struct dma_chan *dchan,
 	struct fsldma_chan *chan = to_fsl_chan(dchan);
 	dma_cookie_t last_complete;
 	dma_cookie_t last_used;
-	unsigned long flags;
-
-	spin_lock_irqsave(&chan->desc_lock, flags);
 
	last_complete = chan->completed_cookie;
+	smp_mb();
	last_used = dchan->cookie;
 
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
-
 	dma_set_tx_state(txstate, last_complete, last_used, 0);
 	return dma_async_is_complete(cookie, last_complete, last_used);
}
-- 
1.7.0.4

^ permalink raw reply related

* Re: [PATCH 4/5] KVM: PPC: Book3s HV: Implement get_dirty_log using hardware changed bit
From: Takuya Yoshikawa @ 2011-12-26  5:05 UTC (permalink / raw)
  To: Paul Mackerras
  Cc: linuxppc-dev, Takuya Yoshikawa, Alexander Graf, kvm-ppc, KVM list
In-Reply-To: <20111225233524.GA5348@sammy.paulus.ozlabs.org>

(2011/12/26 8:35), Paul Mackerras wrote:
> On Fri, Dec 23, 2011 at 02:23:30PM +0100, Alexander Graf wrote:
>
>> So if I read things correctly, this is the only case you're setting
>> pages as dirty. What if you have the following:
>>
>>    guest adds HTAB entry x
>>    guest writes to page mapped by x
>>    guest removes HTAB entry x
>>    host fetches dirty log
>
> In that case the dirtiness is preserved in the setting of the
> KVMPPC_RMAP_CHANGED bit in the rmap entry.  kvm_test_clear_dirty()
> returns 1 if that bit is set (and clears it).  Using the rmap entry
> for this is convenient because (a) we also use it for saving the
> referenced bit when a HTAB entry is removed, and we can transfer both
> R and C over in one operation; (b) we need to be able to save away the
> C bit in real mode, and we already need to get the real-mode address
> of the rmap entry -- if we wanted to save it in a dirty bitmap we'd
> have to do an extra translation to get the real-mode address of the
> dirty bitmap word; (c) to avoid SMP races, if we were asynchronously
> setting bits in the dirty bitmap we'd have to do the double-buffering
> thing that x86 does, which seems more complicated than using the rmap
> entry (which we already have a lock bit for).

 From my x86 dirty logging experience I have some concern about your code:
your code looks slow even when there is no/few dirty pages in the slot.

+	for (i = 0; i < memslot->npages; ++i) {
+		if (kvm_test_clear_dirty(kvm, rmapp))
+			__set_bit_le(i, map);
+		++rmapp;
+	}

The check is being done for each page and this can be very expensive because
the number of pages is not small.

	When we scan the dirty_bitmap 64 pages are checked at once and
	the problem is not so significant.

Though I do not know well what kvm-ppc's dirty logging is aiming at, I guess
reporting cleanliness without noticeable delay to the user-space is important.

	E.g. for VGA most of the cases are clean.  For live migration, the
	chance of seeing complete clean slot is small but almost all cases
	are sparse.

>
>> PS: Always CC kvm@vger for stuff that other might want to review
>> (basically all patches)

(Though I sometimes check kvm-ppc on the archives,)

GET_DIRTY_LOG thing will be welcome.

	Takuya

>
> So why do we have a separate kvm-ppc list then? :)

^ permalink raw reply

* Re: [PATCH 4/5] KVM: PPC: Book3s HV: Implement get_dirty_log using hardware changed bit
From: Paul Mackerras @ 2011-12-25 23:35 UTC (permalink / raw)
  To: Alexander Graf; +Cc: linuxppc-dev, KVM list, kvm-ppc
In-Reply-To: <C59FE93D-AD1B-4382-BE8B-FF2DDD22930E@suse.de>

On Fri, Dec 23, 2011 at 02:23:30PM +0100, Alexander Graf wrote:

> So if I read things correctly, this is the only case you're setting
> pages as dirty. What if you have the following:
> 
>   guest adds HTAB entry x
>   guest writes to page mapped by x
>   guest removes HTAB entry x
>   host fetches dirty log

In that case the dirtiness is preserved in the setting of the
KVMPPC_RMAP_CHANGED bit in the rmap entry.  kvm_test_clear_dirty()
returns 1 if that bit is set (and clears it).  Using the rmap entry
for this is convenient because (a) we also use it for saving the
referenced bit when a HTAB entry is removed, and we can transfer both
R and C over in one operation; (b) we need to be able to save away the
C bit in real mode, and we already need to get the real-mode address
of the rmap entry -- if we wanted to save it in a dirty bitmap we'd
have to do an extra translation to get the real-mode address of the
dirty bitmap word; (c) to avoid SMP races, if we were asynchronously
setting bits in the dirty bitmap we'd have to do the double-buffering
thing that x86 does, which seems more complicated than using the rmap
entry (which we already have a lock bit for).

> PS: Always CC kvm@vger for stuff that other might want to review
> (basically all patches)

So why do we have a separate kvm-ppc list then? :)

Paul.

^ permalink raw reply

* Re: p1020 unstable with 3.2
From: Alexander Graf @ 2011-12-25 10:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Scott Wood, linuxppc-dev, Fleming Andy-AFLEMING
In-Reply-To: <1324709633.6632.20.camel@pasglop>


On 24.12.2011, at 07:53, Benjamin Herrenschmidt wrote:

> On Fri, 2011-12-23 at 17:54 +0100, Alexander Graf wrote:
>> Hi guys,
>> 
>> While trying to test my latest patch queue for ppc kvm, I realized
>> that even though the device trees got updated, the p1020 box still is
>> unstable. The trace below is the one I've seen the most. It only
>> occurs during network I/O which happens a lot on that box, since I'm
>> running it using NFS root.
>> 
>> As for configuration, I use kumar's "merge" branch from today and the
>> p1020rdb.dts device tree provided in that tree.
>> 
>> The last known good configuration I'm aware of is 3.0.
>> 
>> Any ideas what's going wrong here?
> 
> Try SLAB instead of SLUB and let me know. It -could- be a bogon in SLUB
> that should be fixed upstream now but I think did hit 3.2

Yup, things seem a lot more stable with SLAB now :).


Alex

^ permalink raw reply

* Re: [PATCH 00/14] DMA-mapping framework redesign preparation
From: Benjamin Herrenschmidt @ 2011-12-24  7:00 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: linux-mips, linux-ia64, linux-sh, linux-mm, sparclinux,
	Marek Szyprowski, linux-arch, Stephen Rothwell, Jonathan Corbet,
	x86, Arnd Bergmann, microblaze-uclinux, linaro-mm-sig,
	Andrzej Pietrasiewicz, Thomas Gleixner, linux-arm-kernel, discuss,
	linux-kernel, Kyungmin Park, linux-alpha, Andrew Morton,
	linuxppc-dev
In-Reply-To: <20111223163516.GO20129@parisc-linux.org>

On Fri, 2011-12-23 at 09:35 -0700, Matthew Wilcox wrote:
> I really think this wants to be a separate function.
> dma_alloc_coherent
> is for allocating memory to be shared between the kernel and a driver;
> we already have dma_map_sg for mapping userspace I/O as an alternative
> interface.  This feels like it's something different again rather than
> an option to dma_alloc_coherent. 

Depends. There can be some interesting issues with some of the ARM stuff
out there (and to a lesser extent older ppc embedded stuff).

For example, some devices really want a physically contiguous chunk, and
are not cache coherent. In that case, you can't keep the linear mapping
around. But you also don't waste your precious kernel virtual space
creating a separate non-cachable mapping for those.

In general, dma mapping attributes as a generic feature make sense,
whether this specific attribute does or not though. And we probably want
space for platform specific attributes, for example, FSL embedded
iommu's have "interesting" features for directing data toward a specific
core cache etc... that we might want to expose using such attributes.

Cheers,
Ben.

^ permalink raw reply

* Re: p1020 unstable with 3.2
From: Benjamin Herrenschmidt @ 2011-12-24  6:53 UTC (permalink / raw)
  To: Alexander Graf; +Cc: Scott Wood, linuxppc-dev, Fleming Andy-AFLEMING
In-Reply-To: <0048E411-D82D-4EA4-B9BA-EF233AC6ED34@suse.de>

On Fri, 2011-12-23 at 17:54 +0100, Alexander Graf wrote:
> Hi guys,
> 
> While trying to test my latest patch queue for ppc kvm, I realized
> that even though the device trees got updated, the p1020 box still is
> unstable. The trace below is the one I've seen the most. It only
> occurs during network I/O which happens a lot on that box, since I'm
> running it using NFS root.
> 
> As for configuration, I use kumar's "merge" branch from today and the
> p1020rdb.dts device tree provided in that tree.
> 
> The last known good configuration I'm aware of is 3.0.
> 
> Any ideas what's going wrong here?

Try SLAB instead of SLUB and let me know. It -could- be a bogon in SLUB
that should be fixed upstream now but I think did hit 3.2

Cheers,
Ben.

> Alex
> 
> ---
> 
> Unable to handle kernel paging request for data at address 0x00000004
> Faulting instruction address: 0xc00eb38c
> Oops: Kernel access of bad area, sig: 11 [#1]
> SMP NR_CPUS=2 P1020 RDB
> Modules linked in:
> NIP: c00eb38c LR: c00eb278 CTR: c0340e48
> REGS: effedc70 TRAP: 0300   Not tainted
> (3.2.0-rc3-00013-gaca3173-dirty)
> MSR: 00021000 <ME,CE>  CR: 28842422  XER: 00000000
> DEAR: 00000004, ESR: 00800000
> TASK = ef4bd900[4816] 'cc1' THREAD: ee4c4000 CPU: 0
> GPR00: 00004080 effedd20 ef4bd900 ef001180 c15e5700 ffffffff c03e7448
> 00100021 
> GPR08: 00100020 00010001 00000000 00000000 28842442 10a3e610 00210d00
> 00200200 
> GPR16: 00100100 00000001 c06d6748 ef002670 00000000 c03e7448 ffffffff
> 00000020 
> GPR24: effec000 ffffffec 00029000 ef001188 00000000 ef002600 c18079e0
> ef001180 
> NIP [c00eb38c] __slab_alloc+0x3d4/0x4f8
> LR [c00eb278] __slab_alloc+0x2c0/0x4f8
> Call Trace:
> [effedd20] [c06d6b78] hashrnd+0x0/0x4 (unreliable)
> [effeddc0] [c00eb680] __kmalloc_track_caller+0x1d0/0x200
> [effedde0] [c03e6064] __alloc_skb+0x74/0x150
> [effede00] [c03e7448] __netdev_alloc_skb+0x28/0x60
> [effede10] [c03408f0] gfar_new_skb+0x50/0x7c
> [effede20] [c0340acc] gfar_clean_rx_ring+0x1b0/0x52c
> [effede90] [c03412d0] gfar_poll+0x488/0x624
> [effedf60] [c03f062c] net_rx_action+0x140/0x1e8
> [effedfa0] [c0061aa0] __do_softirq+0x124/0x210
> [effedff0] [c000e0fc] call_do_softirq+0x14/0x24
> [ee4c5c40] [c000564c] do_softirq+0xb4/0xe0
> [ee4c5c60] [c006170c] irq_exit+0x94/0xb4
> [ee4c5c70] [c000591c] do_IRQ+0xb0/0x1ac
> [ee4c5ca0] [c000fc5c] ret_from_except+0x0/0x18
> --- Exception: 501 at do_lookup+0x118/0x3cc
>     LR = do_lookup+0xec/0x3cc
> [ee4c5db0] [c00ff898] link_path_walk+0x308/0xc78
> [ee4c5e30] [c0103cb0] path_openat+0xc8/0x3ec
> [ee4c5e90] [c01040f4] do_filp_open+0x44/0xb0
> [ee4c5f10] [c00efcf8] do_sys_open+0x198/0x24c
> [ee4c5f40] [c000f604] ret_from_syscall+0x0/0x3c
> --- Exception: c01 at 0xfdb6658
>     LR = 0xfe50be8
> Instruction dump:
> 8004000c 7f880000 409effa8 91240008 9164000c 7c0004ac 80040000
> 2f8a0000 
> 81240018 81640014 5400003c 90040000 <912b0004> 91690000 92040014
> 91e40018 
> Kernel panic - not syncing: Fatal exception in interrupt
> Rebooting in 180 seconds..
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply

* [PATCH 8/9] arch/powerpc/sysdev/fsl_rmu.c: introduce missing kfree
From: Julia Lawall @ 2011-12-23 17:39 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: devicetree-discuss, kernel-janitors, linux-kernel, Rob Herring,
	Paul Mackerras, linuxppc-dev

rmu needs to be freed before leaving the function in an error case.

A simplified version of the semantic match that finds the problem is as
follows: (http://coccinelle.lip6.fr)

// <smpl>
@r exists@
local idexpression x;
statement S;
identifier f1;
position p1,p2;
expression *ptr != NULL;
@@

x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...);
...
if (x == NULL) S
<... when != x
     when != if (...) { <+...x...+> }
x->f1
...>
(
 return \(0\|<+...x...+>\|ptr\);
|
 return@p2 ...;
)

@script:python@
p1 << r.p1;
p2 << r.p2;
@@

print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>

---
 arch/powerpc/sysdev/fsl_rmu.c |    1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/sysdev/fsl_rmu.c b/arch/powerpc/sysdev/fsl_rmu.c
index 02445a5..1548578 100644
--- a/arch/powerpc/sysdev/fsl_rmu.c
+++ b/arch/powerpc/sysdev/fsl_rmu.c
@@ -1081,6 +1081,7 @@ int fsl_rio_setup_rmu(struct rio_mport *mport, struct device_node *node)
 	if (!msg_addr) {
 		pr_err("%s: unable to find 'reg' property of message-unit\n",
 			node->full_name);
+		kfree(rmu);
 		return -ENOMEM;
 	}
 	msg_start = of_read_number(msg_addr, aw);

^ permalink raw reply related

* p1020 unstable with 3.2
From: Alexander Graf @ 2011-12-23 16:54 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Scott Wood, Fleming Andy-AFLEMING

Hi guys,

While trying to test my latest patch queue for ppc kvm, I realized that =
even though the device trees got updated, the p1020 box still is =
unstable. The trace below is the one I've seen the most. It only occurs =
during network I/O which happens a lot on that box, since I'm running it =
using NFS root.

As for configuration, I use kumar's "merge" branch from today and the =
p1020rdb.dts device tree provided in that tree.

The last known good configuration I'm aware of is 3.0.

Any ideas what's going wrong here?

Alex

---

Unable to handle kernel paging request for data at address 0x00000004
Faulting instruction address: 0xc00eb38c
Oops: Kernel access of bad area, sig: 11 [#1]
SMP NR_CPUS=3D2 P1020 RDB
Modules linked in:
NIP: c00eb38c LR: c00eb278 CTR: c0340e48
REGS: effedc70 TRAP: 0300   Not tainted  =
(3.2.0-rc3-00013-gaca3173-dirty)
MSR: 00021000 <ME,CE>  CR: 28842422  XER: 00000000
DEAR: 00000004, ESR: 00800000
TASK =3D ef4bd900[4816] 'cc1' THREAD: ee4c4000 CPU: 0
GPR00: 00004080 effedd20 ef4bd900 ef001180 c15e5700 ffffffff c03e7448 =
00100021=20
GPR08: 00100020 00010001 00000000 00000000 28842442 10a3e610 00210d00 =
00200200=20
GPR16: 00100100 00000001 c06d6748 ef002670 00000000 c03e7448 ffffffff =
00000020=20
GPR24: effec000 ffffffec 00029000 ef001188 00000000 ef002600 c18079e0 =
ef001180=20
NIP [c00eb38c] __slab_alloc+0x3d4/0x4f8
LR [c00eb278] __slab_alloc+0x2c0/0x4f8
Call Trace:
[effedd20] [c06d6b78] hashrnd+0x0/0x4 (unreliable)
[effeddc0] [c00eb680] __kmalloc_track_caller+0x1d0/0x200
[effedde0] [c03e6064] __alloc_skb+0x74/0x150
[effede00] [c03e7448] __netdev_alloc_skb+0x28/0x60
[effede10] [c03408f0] gfar_new_skb+0x50/0x7c
[effede20] [c0340acc] gfar_clean_rx_ring+0x1b0/0x52c
[effede90] [c03412d0] gfar_poll+0x488/0x624
[effedf60] [c03f062c] net_rx_action+0x140/0x1e8
[effedfa0] [c0061aa0] __do_softirq+0x124/0x210
[effedff0] [c000e0fc] call_do_softirq+0x14/0x24
[ee4c5c40] [c000564c] do_softirq+0xb4/0xe0
[ee4c5c60] [c006170c] irq_exit+0x94/0xb4
[ee4c5c70] [c000591c] do_IRQ+0xb0/0x1ac
[ee4c5ca0] [c000fc5c] ret_from_except+0x0/0x18
--- Exception: 501 at do_lookup+0x118/0x3cc
    LR =3D do_lookup+0xec/0x3cc
[ee4c5db0] [c00ff898] link_path_walk+0x308/0xc78
[ee4c5e30] [c0103cb0] path_openat+0xc8/0x3ec
[ee4c5e90] [c01040f4] do_filp_open+0x44/0xb0
[ee4c5f10] [c00efcf8] do_sys_open+0x198/0x24c
[ee4c5f40] [c000f604] ret_from_syscall+0x0/0x3c
--- Exception: c01 at 0xfdb6658
    LR =3D 0xfe50be8
Instruction dump:
8004000c 7f880000 409effa8 91240008 9164000c 7c0004ac 80040000 2f8a0000=20=

81240018 81640014 5400003c 90040000 <912b0004> 91690000 92040014 =
91e40018=20
Kernel panic - not syncing: Fatal exception in interrupt
Rebooting in 180 seconds..=

^ permalink raw reply

* Re: [PATCH 00/14] DMA-mapping framework redesign preparation
From: Matthew Wilcox @ 2011-12-23 16:35 UTC (permalink / raw)
  To: Marek Szyprowski
  Cc: linux-mips, linux-ia64, linux-sh, linux-mm, sparclinux,
	linux-arch, Stephen Rothwell, Jonathan Corbet, x86, Arnd Bergmann,
	microblaze-uclinux, linaro-mm-sig, Andrzej Pietrasiewicz,
	Thomas Gleixner, linux-arm-kernel, discuss, linux-kernel,
	Kyungmin Park, linux-alpha, Andrew Morton, linuxppc-dev
In-Reply-To: <1324643253-3024-1-git-send-email-m.szyprowski@samsung.com>

On Fri, Dec 23, 2011 at 01:27:19PM +0100, Marek Szyprowski wrote:
> The first issue we identified is the fact that on some platform (again,
> mainly ARM) there are several functions for allocating DMA buffers:
> dma_alloc_coherent, dma_alloc_writecombine and dma_alloc_noncoherent

Is this write-combining from the point of view of the device (ie iommu),
or from the point of view of the CPU, or both?

> The next step in dma mapping framework update is the introduction of
> dma_mmap/dma_mmap_attrs() function. There are a number of drivers
> (mainly V4L2 and ALSA) that only exports the DMA buffers to user space.
> Creating a userspace mapping with correct page attributes is not an easy
> task for the driver. Also the DMA-mapping framework is the only place
> where the complete information about the allocated pages is available,
> especially if the implementation uses IOMMU controller to provide a
> contiguous buffer in DMA address space which is scattered in physical
> memory space.

Surely we only need a helper which drivrs can call from their mmap routine to solve this?

> Usually these drivers don't touch the buffer data at all, so the mapping
> in kernel virtual address space is not needed. We can introduce
> DMA_ATTRIB_NO_KERNEL_MAPPING attribute which lets kernel to skip/ignore
> creation of kernel virtual mapping. This way we can save previous
> vmalloc area and simply some mapping operation on a few architectures.

I really think this wants to be a separate function.  dma_alloc_coherent
is for allocating memory to be shared between the kernel and a driver;
we already have dma_map_sg for mapping userspace I/O as an alternative
interface.  This feels like it's something different again rather than
an option to dma_alloc_coherent.

-- 
Matthew Wilcox				Intel Open Source Technology Centre
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply

* Kernel not booting when supplying boot parameter mem
From: Arshad, Farrukh @ 2011-12-23 15:07 UTC (permalink / raw)
  To: linuxppc-dev@lists.ozlabs.org

[-- Attachment #1: Type: text/plain, Size: 691 bytes --]

Greetings All,

I have a basic question. I have 512 MB memory. I want my kernel to use only last 128 MB of memory starting from address 0x10000000. I have configured the kernel CONFIG_PHYSICAL_START=0x10000000 and in kernel boot parameter I have set mem=128M. In this scenario my kernel is not booting and it just stuck after uncompressing it. If I do not provide mem=128M boot parameter my kernel boots fine, but in that case I can not restrict kernel to use only 128M memory. Why supplying mem=128M causing kernel to fail.

Best Regards

Farrukh Arshad
Sr. Software Development Engineer
Mentor Graphics Pakistan
Ph:   +92 - 423 - 609 - 92 - 09
Cell: +92 - 303 - 444 - 77 - 05

[-- Attachment #2: Type: text/html, Size: 3301 bytes --]

^ permalink raw reply

* Re: [PATCH 0/5] Make use of hardware reference and change bits in HPT
From: Alexander Graf @ 2011-12-23 13:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev, kvm-ppc
In-Reply-To: <20111215120018.GA20629@bloggs.ozlabs.ibm.com>


On 15.12.2011, at 13:00, Paul Mackerras wrote:

> This series of patches builds on top of my previous series and
> modifies the Book3S HV memory management code to use the hardware
> reference and change bits in the guest hashed page table.  This makes
> kvm_age_hva() more efficient, lets us implement the dirty page
> tracking properly (which in turn means that things like VGA emulation
> in qemu can work), and also means that we can supply hardware
> reference and change information to the guest -- not that Linux guests
> currently use that information, but possibly they will want it in
> future, and there is an interface defined in PAPR for it.

I applied Patches 1-4 to kvm-ppc-next.


Alex

^ permalink raw reply

* Re: [PATCH] arch/powerpc/kvm/e500: Additional module.h => export.h fixup
From: Alexander Graf @ 2011-12-23 13:35 UTC (permalink / raw)
  To: Kyle Moffett
  Cc: KVM list, Marcelo Tosatti, linux-kernel@vger.kernel.org List,
	kvm-ppc, Paul Mackerras, Avi Kivity, Scott Wood, linuxppc-dev
In-Reply-To: <1324570863-11447-1-git-send-email-Kyle.D.Moffett@boeing.com>


On 22.12.2011, at 17:21, Kyle Moffett wrote:

> This file, like many others, needs to include <linux/export.h>.
> 
> Signed-off-by: Kyle Moffett <Kyle.D.Moffett@boeing.com>

Thanks, I already applied a patch from Scott that addresses the same issue.


Alex

^ permalink raw reply

* Re: [PATCH] KVM: Move gfn_to_memslot() to kvm_host.h
From: Alexander Graf @ 2011-12-23 13:33 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev, KVM list, kvm-ppc, Avi Kivity
In-Reply-To: <20111220092102.GA5626@bloggs.ozlabs.ibm.com>


On 20.12.2011, at 10:21, Paul Mackerras wrote:

> This moves gfn_to_memslot(), and the functions it calls, that is,
> search_memslots() and __gfn_to_memslot(), from kvm_main.c to kvm_host.h
> so that gfn_to_memslot() can be called from non-modular code even
> when KVM is a module.  On powerpc, the Book3S HV style of KVM has
> code that is called from real mode which needs to call gfn_to_memslot()
> and thus needs this.  (Module code is allocated in the vmalloc region,
> which can't be accessed in real mode.)
> 
> With this, we can remove builtin_gfn_to_memslot() from book3s_hv_rm_mmu.c
> and thus eliminate a little bit of duplication.
> 
> Signed-off-by: Paul Mackerras <paulus@samba.org>

Avi, please ack.


Alex

^ permalink raw reply

* Re: [PATCH 5/5] KVM: PPC: Book3s HV: Implement H_CLEAR_REF and H_CLEAR_MOD hcalls
From: Alexander Graf @ 2011-12-23 13:26 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev, KVM list, kvm-ppc
In-Reply-To: <20111215120417.GF20629@bloggs.ozlabs.ibm.com>


On 15.12.2011, at 13:04, Paul Mackerras wrote:

> This adds implementations for the H_CLEAR_REF (test and clear =
reference
> bit) and H_CLEAR_MOD (test and clear changed bit) hypercalls.  These
> hypercalls are not used by Linux guests at this stage, and these
> implementations are only compile tested.

Do we need them then? Are they mandatory in PAPR? I don't feel all that =
great having unused / untested code accessible from the guest.

Alex

^ permalink raw reply

* Re: [PATCH 4/5] KVM: PPC: Book3s HV: Implement get_dirty_log using hardware changed bit
From: Alexander Graf @ 2011-12-23 13:23 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev, KVM list, kvm-ppc
In-Reply-To: <20111215120322.GE20629@bloggs.ozlabs.ibm.com>


On 15.12.2011, at 13:03, Paul Mackerras wrote:

> This changes the implementation of kvm_vm_ioctl_get_dirty_log() for
> Book3s HV guests to use the hardware C (changed) bits in the guest
> hashed page table.  Since this makes the implementation quite =
different
> from the Book3s PR case, this moves the existing implementation from
> book3s.c to book3s_pr.c and creates a new implementation in =
book3s_hv.c.
> That implementation calls kvmppc_hv_get_dirty_log() to do the actual
> work by calling kvm_test_clear_dirty on each page.  It iterates over
> the HPTEs, clearing the C bit if set, and returns 1 if any C bit was
> set (including the saved C bit in the rmap entry).
>=20
> Signed-off-by: Paul Mackerras <paulus@samba.org>
> ---
> arch/powerpc/include/asm/kvm_book3s.h |    2 +
> arch/powerpc/kvm/book3s.c             |   39 ------------------
> arch/powerpc/kvm/book3s_64_mmu_hv.c   |   69 =
+++++++++++++++++++++++++++++++++
> arch/powerpc/kvm/book3s_hv.c          |   37 +++++++++++++++++
> arch/powerpc/kvm/book3s_pr.c          |   39 ++++++++++++++++++
> 5 files changed, 147 insertions(+), 39 deletions(-)
>=20
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h =
b/arch/powerpc/include/asm/kvm_book3s.h
> index 6ececb4..aa795cc 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -158,6 +158,8 @@ extern long kvmppc_virtmode_h_enter(struct =
kvm_vcpu *vcpu, unsigned long flags,
> 			long pte_index, unsigned long pteh, unsigned =
long ptel);
> extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
> 			long pte_index, unsigned long pteh, unsigned =
long ptel);
> +extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
> +			struct kvm_memory_slot *memslot);
>=20
> extern void kvmppc_entry_trampoline(void);
> extern void kvmppc_hv_entry_trampoline(void);
> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> index 6bf7e05..7d54f4e 100644
> --- a/arch/powerpc/kvm/book3s.c
> +++ b/arch/powerpc/kvm/book3s.c
> @@ -477,45 +477,6 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu =
*vcpu,
> 	return 0;
> }
>=20
> -/*
> - * Get (and clear) the dirty memory log for a memory slot.
> - */
> -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
> -				      struct kvm_dirty_log *log)
> -{
> -	struct kvm_memory_slot *memslot;
> -	struct kvm_vcpu *vcpu;
> -	ulong ga, ga_end;
> -	int is_dirty =3D 0;
> -	int r;
> -	unsigned long n;
> -
> -	mutex_lock(&kvm->slots_lock);
> -
> -	r =3D kvm_get_dirty_log(kvm, log, &is_dirty);
> -	if (r)
> -		goto out;
> -
> -	/* If nothing is dirty, don't bother messing with page tables. =
*/
> -	if (is_dirty) {
> -		memslot =3D id_to_memslot(kvm->memslots, log->slot);
> -
> -		ga =3D memslot->base_gfn << PAGE_SHIFT;
> -		ga_end =3D ga + (memslot->npages << PAGE_SHIFT);
> -
> -		kvm_for_each_vcpu(n, vcpu, kvm)
> -			kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
> -
> -		n =3D kvm_dirty_bitmap_bytes(memslot);
> -		memset(memslot->dirty_bitmap, 0, n);
> -	}
> -
> -	r =3D 0;
> -out:
> -	mutex_unlock(&kvm->slots_lock);
> -	return r;
> -}
> -
> void kvmppc_decrementer_func(unsigned long data)
> {
> 	struct kvm_vcpu *vcpu =3D (struct kvm_vcpu *)data;
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c =
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index 926e2b9..783cd35 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -870,6 +870,75 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned =
long hva, pte_t pte)
> 	kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
> }
>=20
> +static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long =
*rmapp)
> +{
> +	struct revmap_entry *rev =3D kvm->arch.revmap;
> +	unsigned long head, i, j;
> +	unsigned long *hptep;
> +	int ret =3D 0;
> +
> + retry:
> +	lock_rmap(rmapp);
> +	if (*rmapp & KVMPPC_RMAP_CHANGED) {
> +		*rmapp &=3D ~KVMPPC_RMAP_CHANGED;
> +		ret =3D 1;
> +	}
> +	if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
> +		unlock_rmap(rmapp);
> +		return ret;
> +	}
> +
> +	i =3D head =3D *rmapp & KVMPPC_RMAP_INDEX;
> +	do {
> +		hptep =3D (unsigned long *) (kvm->arch.hpt_virt + (i << =
4));
> +		j =3D rev[i].forw;
> +
> +		if (!(hptep[1] & HPTE_R_C))
> +			continue;
> +
> +		if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
> +			/* unlock rmap before spinning on the HPTE lock =
*/
> +			unlock_rmap(rmapp);
> +			while (hptep[0] & HPTE_V_HVLOCK)
> +				cpu_relax();
> +			goto retry;
> +		}
> +
> +		/* Now check and modify the HPTE */
> +		if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_C)) =
{
> +			/* need to make it temporarily absent to clear C =
*/
> +			hptep[0] |=3D HPTE_V_ABSENT;
> +			kvmppc_invalidate_hpte(kvm, hptep, i);
> +			hptep[1] &=3D ~HPTE_R_C;
> +			eieio();
> +			hptep[0] =3D (hptep[0] & ~HPTE_V_ABSENT) | =
HPTE_V_VALID;
> +			rev[i].guest_rpte |=3D HPTE_R_C;
> +			ret =3D 1;
> +		}
> +		hptep[0] &=3D ~HPTE_V_HVLOCK;
> +	} while ((i =3D j) !=3D head);
> +
> +	unlock_rmap(rmapp);
> +	return ret;
> +}
> +
> +long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot =
*memslot)
> +{
> +	unsigned long i;
> +	unsigned long *rmapp, *map;
> +
> +	preempt_disable();
> +	rmapp =3D memslot->rmap;
> +	map =3D memslot->dirty_bitmap;
> +	for (i =3D 0; i < memslot->npages; ++i) {
> +		if (kvm_test_clear_dirty(kvm, rmapp))
> +			__set_bit_le(i, map);

So if I read things correctly, this is the only case you're setting =
pages as dirty. What if you have the following:

  guest adds HTAB entry x
  guest writes to page mapped by x
  guest removes HTAB entry x
  host fetches dirty log

You can replace "removes" by "is overwritten by another mapping" if you =
like.


Alex

PS: Always CC kvm@vger for stuff that other might want to review =
(basically all patches)

^ permalink raw reply

* [PATCH 14/14] common: DMA-mapping: add NON-CONSISTENT attribute
From: Marek Szyprowski @ 2011-12-23 12:27 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-mips, linux-ia64, linux-sh, linux-mm, sparclinux,
	Marek Szyprowski, linux-arch, Stephen Rothwell, Jonathan Corbet,
	x86, Arnd Bergmann, microblaze-uclinux, linaro-mm-sig,
	Andrzej Pietrasiewicz, Thomas Gleixner, linux-arm-kernel, discuss,
	Kyungmin Park, linux-alpha, Andrew Morton, linuxppc-dev
In-Reply-To: <1324643253-3024-1-git-send-email-m.szyprowski@samsung.com>

DMA_ATTR_NON_CONSISTENT lets the platform to choose to return either
consistent or non-consistent memory as it sees fit.  By using this API,
you are guaranteeing to the platform that you have all the correct and
necessary sync points for this memory in the driver should it choose to
return non-consistent memory.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
---
 Documentation/DMA-attributes.txt |    9 +++++++++
 include/linux/dma-attrs.h        |    1 +
 2 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
index 811a5d4..9120de2 100644
--- a/Documentation/DMA-attributes.txt
+++ b/Documentation/DMA-attributes.txt
@@ -41,3 +41,12 @@ buffered to improve performance.
 Since it is optional for platforms to implement DMA_ATTR_WRITE_COMBINE,
 those that do not will simply ignore the attribute and exhibit default
 behavior.
+
+DMA_ATTR_NON_CONSISTENT
+-----------------------
+
+DMA_ATTR_NON_CONSISTENT lets the platform to choose to return either
+consistent or non-consistent memory as it sees fit.  By using this API,
+you are guaranteeing to the platform that you have all the correct and
+necessary sync points for this memory in the driver should it choose to
+return non-consistent memory.
diff --git a/include/linux/dma-attrs.h b/include/linux/dma-attrs.h
index ada61e1..547ab56 100644
--- a/include/linux/dma-attrs.h
+++ b/include/linux/dma-attrs.h
@@ -14,6 +14,7 @@ enum dma_attr {
 	DMA_ATTR_WRITE_BARRIER,
 	DMA_ATTR_WEAK_ORDERING,
 	DMA_ATTR_WRITE_COMBINE,
+	DMA_ATTR_NON_CONSISTENT,
 	DMA_ATTR_MAX,
 };
 
-- 
1.7.1.569.g6f426

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox