linux-ide.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC/Review] libata driver for Apple "macio" pata
@ 2008-08-01  9:08 Benjamin Herrenschmidt
  2008-08-01  9:59 ` Tejun Heo
  0 siblings, 1 reply; 7+ messages in thread
From: Benjamin Herrenschmidt @ 2008-08-01  9:08 UTC (permalink / raw)
  To: list linux-ide; +Cc: Jeff Garzik, Tejun Heo, Alan Cox

Hi !

This isn't a complete driver yet, ie. not candidate for merging, there's
quite a few things to do such as power management, hot swap media bay
support, etc...

However, I've a first cut at the core transfer timing and DMA logic, and
I would appreciate some review / comments on those parts, since it's
using a custom DMA engine, and thus the integration with libata isn't
totally trivial.

Thanks in advance !

Index: linux-work/drivers/ata/pata_macio.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-work/drivers/ata/pata_macio.c	2008-08-01 17:20:44.000000000 +1000
@@ -0,0 +1,974 @@
+/*
+ * Libata based driver for Apple "macio" family of PATA controllers
+ *
+ * Copyright 2008 Benjamin Herrenschmidt, IBM Corp
+ *                <benh@kernel.crashing.org>
+ *
+ * Some bits and pieces from drivers/ide/ppc/pmac.c
+ *
+ * TODO:
+ *
+ * - Implement detach/driver removal
+ * - Add proper media-bay hotplug support (involves reworking the
+ *   media-bay code)
+ * - Fix OHare broken DMA
+ * - Power management
+ */
+
+#define DEBUG
+#define DEBUG_DMA
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <scsi/scsi_host.h>
+#include <linux/ata.h>
+#include <linux/libata.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/scatterlist.h>
+#include <linux/of.h>
+
+#include <asm/macio.h>
+#include <asm/io.h>
+#include <asm/dbdma.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/pmac_feature.h>
+
+#include "pata_macio.h"
+
+#ifdef DEBUG_DMA
+#define dev_dbgdma(dev, format, arg...)		\
+	dev_printk(KERN_DEBUG , dev , format , ## arg)
+#else
+#define dev_dbgdma(dev, format, arg...)		\
+	({ if (0) dev_printk(KERN_DEBUG, dev, format, ##arg); 0; })
+#endif
+
+#define DRV_NAME	"pata_macio"
+#define DRV_VERSION	"0.1"
+
+/* Previous variants of this driver used to calculate timings
+ * for various variants of the chip and use tables for others.
+ *
+ * Not only was this confusing, but in addition, it isn't clear
+ * whether our calculation code was correct. It didn't entirely
+ * match the darwin code and whatever documentation I could find
+ * on these cells
+ *
+ * I decided to entirely rely on a table instead for this version
+ * of the driver. Also, because I don't really care about derated
+ * modes and really old HW other than making it work, I'm not going
+ * to calculate / snoop timing values for something else than the
+ * standard modes.
+ */
+struct pata_macio_timing {
+	int	mode;
+	u32	reg1;	/* Bits to set in first timing reg */
+	u32	reg2;	/* Bits to set in second timing reg */
+};
+
+static const struct pata_macio_timing pata_macio_ohare_timings[] = {
+	{ XFER_PIO_0,		0x00000526,	0, },
+	{ XFER_PIO_1,		0x00000085,	0, },
+	{ XFER_PIO_2,		0x00000025,	0, },
+	{ XFER_PIO_3,		0x00000025,	0, },
+	{ XFER_PIO_4,		0x00000025,	0, },
+	{ XFER_MW_DMA_0,	0x00074000,	0, },
+	{ XFER_MW_DMA_1,	0x00221000,	0, },
+	{ -1, 0, 0 }
+};
+
+static const struct pata_macio_timing pata_macio_heathrow_timings[] = {
+	{ XFER_PIO_0,		0x00000526,	0, },
+	{ XFER_PIO_1,		0x00000085,	0, },
+	{ XFER_PIO_2,		0x00000025,	0, },
+	{ XFER_PIO_3,		0x00000025,	0, },
+	{ XFER_PIO_4,		0x00000025,	0, },
+	{ XFER_MW_DMA_0,	0x00074000,	0, },
+	{ XFER_MW_DMA_1,	0x00221000,	0, },
+	{ XFER_MW_DMA_2,	0x00211000,	0, },
+	{ -1, 0, 0 }
+};
+
+static const struct pata_macio_timing pata_macio_kl33_timings[] = {
+	{ XFER_PIO_0,		0x00000526,	0, },
+	{ XFER_PIO_1,		0x00000085,	0, },
+	{ XFER_PIO_2,		0x00000025,	0, },
+	{ XFER_PIO_3,		0x00000025,	0, },
+	{ XFER_PIO_4,		0x00000025,	0, },
+	{ XFER_MW_DMA_0,	0x00084000,	0, },
+	{ XFER_MW_DMA_1,	0x00021800,	0, },
+	{ XFER_MW_DMA_2,	0x00011800,	0, },
+	{ -1, 0, 0 }
+};
+
+static const struct pata_macio_timing pata_macio_kl66_timings[] = {
+	{ XFER_PIO_0,		0x0000038c,	0, },
+	{ XFER_PIO_1,		0x0000020a,	0, },
+	{ XFER_PIO_2,		0x00000127,	0, },
+	{ XFER_PIO_3,		0x000000c6,	0, },
+	{ XFER_PIO_4,		0x00000065,	0, },
+	{ XFER_MW_DMA_0,	0x00084000,	0, },
+	{ XFER_MW_DMA_1,	0x00029800,	0, },
+	{ XFER_MW_DMA_2,	0x00019400,	0, },
+	{ XFER_UDMA_0	,	0x19100000,	0, },
+	{ XFER_UDMA_1	,	0x14d00000,	0, },
+	{ XFER_UDMA_2	,	0x10900000,	0, },
+	{ XFER_UDMA_3	,	0x0c700000,	0, },
+	{ XFER_UDMA_4	,	0x0c500000,	0, },
+	{ -1, 0, 0 }
+};
+
+static const struct pata_macio_timing pata_macio_kauai_timings[] = {
+	{ XFER_PIO_0,		0x08000a92,	0, },
+	{ XFER_PIO_1,		0x0800060f,	0, },
+	{ XFER_PIO_2,		0x0800038b,	0, },
+	{ XFER_PIO_3,		0x05000249,	0, },
+	{ XFER_PIO_4,		0x04000148,	0, },
+	{ XFER_MW_DMA_0,	0x00618000,	0, },
+	{ XFER_MW_DMA_1,	0x00209000,	0, },
+	{ XFER_MW_DMA_2,	0x00148000,	0, },
+	{ XFER_UDMA_0	,	         0,	0x000070c1, },
+	{ XFER_UDMA_1	,	         0,	0x00005d81, },
+	{ XFER_UDMA_2	,	         0,	0x00004a61, },
+	{ XFER_UDMA_3	,	         0,	0x00003a51, },
+	{ XFER_UDMA_4	,	         0,	0x00002a31, },
+	{ XFER_UDMA_5	,	         0,	0x00002921, },
+	{ -1, 0, 0 }
+};
+
+static const struct pata_macio_timing pata_macio_shasta_timings[] = {
+	{ XFER_PIO_0,		0x0a000c97,	0, },
+	{ XFER_PIO_1,		0x07000712,	0, },
+	{ XFER_PIO_2,		0x040003cd,	0, },
+	{ XFER_PIO_3,		0x0500028b,	0, },
+	{ XFER_PIO_4,		0x0400010a,	0, },
+	{ XFER_MW_DMA_0,	0x00820800,	0, },
+	{ XFER_MW_DMA_1,	0x0028b000,	0, },
+	{ XFER_MW_DMA_2,	0x001ca000,	0, },
+	{ XFER_UDMA_0	,	         0,	0x00035901, },
+	{ XFER_UDMA_1	,	         0,	0x000348b1, },
+	{ XFER_UDMA_2	,	         0,	0x00033881, },
+	{ XFER_UDMA_3	,	         0,	0x00033861, },
+	{ XFER_UDMA_4	,	         0,	0x00033841, },
+	{ XFER_UDMA_5	,	         0,	0x00033031, },
+	{ XFER_UDMA_6	,	         0,	0x00033021, },
+	{ -1, 0, 0 }
+};
+
+static const struct pata_macio_timing *pata_macio_find_timing(
+					    struct pata_macio_priv *priv,
+					    int mode)
+{
+	int i = 0;
+
+	while (priv->timings[i].mode > 0) {
+		if (priv->timings[i].mode == mode)
+			return &priv->timings[i];
+		i++;
+	}
+	return NULL;
+}
+
+
+static void pata_macio_apply_timings(struct ata_port *ap, unsigned int device)
+{
+	struct pata_macio_priv *priv = ap->private_data;
+	void __iomem *rbase = ap->ioaddr.cmd_addr;
+
+	if (device != 0 && device != 1)
+		return;
+
+	if (priv->kind == controller_sh_ata6 ||
+	    priv->kind == controller_un_ata6 ||
+	    priv->kind == controller_k2_ata6) {
+		writel(priv->treg[device][0], rbase + IDE_KAUAI_PIO_CONFIG);
+		writel(priv->treg[device][1], rbase + IDE_KAUAI_ULTRA_CONFIG);
+	} else
+		writel(priv->treg[device][0], rbase + IDE_TIMING_CONFIG);
+}
+
+static void pata_macio_dev_select(struct ata_port *ap, unsigned int device)
+{
+	ata_sff_dev_select(ap, device);
+
+	/* Apply timings */
+	pata_macio_apply_timings(ap, device);
+}
+
+static void pata_macio_set_timings(struct ata_port *ap,
+				   struct ata_device *adev)
+{
+	struct pata_macio_priv *priv = ap->private_data;
+	const struct pata_macio_timing *t;
+
+	dev_dbg(priv->dev, "Set timings: DEV=%d, PIO=0x%x (%s), DMA=0x%x (%s)\n",
+		adev->devno,
+		adev->pio_mode, ata_mode_string(ata_xfer_mode2mask(adev->pio_mode)),
+		adev->dma_mode, ata_mode_string(ata_xfer_mode2mask(adev->dma_mode)));
+
+	if (adev->devno != 0 && adev->devno != 1)
+		return;
+
+	/* First clear timings */
+	priv->treg[adev->devno][0] = priv->treg[adev->devno][1] = 0;
+
+	/* Now get the PIO timings */
+	t = pata_macio_find_timing(priv, adev->pio_mode);
+	if (t == NULL) {
+		dev_warn(priv->dev, "Invalid PIO timing requested: 0x%x\n", adev->pio_mode);
+		t = pata_macio_find_timing(priv, XFER_PIO_0);
+	}
+	BUG_ON(t == NULL);
+
+	/* PIO timings only ever use the first treg */
+	priv->treg[adev->devno][0] |= t->reg1;
+
+	/* Now get DMA timings */
+	t = pata_macio_find_timing(priv, adev->dma_mode);
+	if (t == NULL || (t->reg1 == 0 && t->reg2 == 0)) {
+		dev_dbg(priv->dev, "DMA timing not set yet, using MW_DMA_0\n");
+		t = pata_macio_find_timing(priv, XFER_MW_DMA_0);
+	}
+	BUG_ON(t == NULL);
+
+	/* DMA timings can use both tregs */
+	priv->treg[adev->devno][0] |= t->reg1;
+	priv->treg[adev->devno][1] |= t->reg2;
+
+	dev_dbg(priv->dev, " -> %08x %08x\n",
+		priv->treg[adev->devno][0],
+		priv->treg[adev->devno][1]);
+
+	/* Apply to hardware */
+	pata_macio_apply_timings(ap, adev->devno);
+}
+
+/*
+ * Blast some well known "safe" values to the timing registers at init or
+ * wakeup from sleep time, before we do real calculation
+ */
+static void pata_macio_default_timings(struct pata_macio_priv *priv)
+{
+	unsigned int value, value2 = 0;
+
+	switch(priv->kind) {
+		case controller_sh_ata6:
+			value = 0x0a820c97;
+			value2 = 0x00033031;
+			break;
+		case controller_un_ata6:
+		case controller_k2_ata6:
+			value = 0x08618a92;
+			value2 = 0x00002921;
+			break;
+		case controller_kl_ata4:
+			value = 0x0008438c;
+			break;
+		case controller_kl_ata3:
+			value = 0x00084526;
+			break;
+		case controller_heathrow:
+		case controller_ohare:
+		default:
+			value = 0x00074526;
+			break;
+	}
+	priv->treg[0][0] = priv->treg[1][0] = value;
+	priv->treg[0][1] = priv->treg[1][1] = value2;
+}
+
+static unsigned long pata_macio_mode_filter(struct ata_device *adev,
+					    unsigned long xfer_mask)
+{
+	struct pata_macio_priv *priv = adev->link->ap->private_data;
+
+	if (priv->dma_regs == NULL)
+		xfer_mask &= ~(ATA_MASK_MWDMA | ATA_MASK_UDMA);
+	return xfer_mask;
+}
+
+static int pata_macio_cable_detect(struct ata_port *ap)
+{
+	struct pata_macio_priv *priv = ap->private_data;
+
+	return priv->cable_80 ? ATA_CBL_PATA80 : ATA_CBL_PATA40;
+}
+
+static void pata_macio_qc_prep(struct ata_queued_cmd *qc)
+{
+	unsigned int write = (qc->tf.flags & ATA_TFLAG_WRITE);
+	struct ata_port *ap = qc->ap;
+	struct pata_macio_priv *priv = ap->private_data;
+	struct scatterlist *sg;
+	struct dbdma_cmd *table;
+	unsigned int si, pi;
+
+	dev_dbgdma(priv->dev, "%s: qc %p flags %lx, write %d dev %d\n",
+		   __func__, qc, qc->flags, write, qc->dev->devno);
+
+	if (!(qc->flags & ATA_QCFLAG_DMAMAP))
+		return;
+
+	table = (struct dbdma_cmd *) priv->dma_table_cpu;
+
+	pi = 0;
+	for_each_sg(qc->sg, sg, qc->n_elem, si) {
+		u32 addr, sg_len, len;
+
+		/* determine if physical DMA addr spans 64K boundary.
+		 * Note h/w doesn't support 64-bit, so we unconditionally
+		 * truncate dma_addr_t to u32.
+		 */
+		addr = (u32) sg_dma_address(sg);
+		sg_len = sg_dma_len(sg);
+
+		while (sg_len) {
+			/* table overflow should never happen */
+			BUG_ON (pi++ >= MAX_DCMDS);
+
+			len = (sg_len < 0xfe00) ? sg_len : 0xfe00;
+			st_le16(&table->command, write ? OUTPUT_MORE: INPUT_MORE);
+			st_le16(&table->req_count, len);
+			st_le32(&table->phy_addr, addr);
+			table->cmd_dep = 0;
+			table->xfer_status = 0;
+			table->res_count = 0;
+			addr += len;
+			sg_len -= len;
+			++table;
+		}
+	}
+
+	/* Convert the last command to an input/output */
+	if (pi) {
+		table--;
+		st_le16(&table->command, write ? OUTPUT_LAST: INPUT_LAST);
+		table++;
+	}
+
+	/* Add the stop command to the end of the list */
+	memset(table, 0, sizeof(struct dbdma_cmd));
+	st_le16(&table->command, DBDMA_STOP);
+
+	dev_dbgdma(priv->dev, "%s: %d DMA list entries\n", __func__, pi);
+}
+
+static void pata_macio_bmdma_setup (struct ata_queued_cmd *qc)
+{
+	struct ata_port *ap = qc->ap;
+	struct pata_macio_priv *priv = ap->private_data;
+	int dev = qc->dev->devno;
+
+	dev_dbgdma(priv->dev, "%s: qc %p\n", __func__, qc);
+
+	/* Make sure DMA commands updates are visible */
+	writel(priv->dma_table_dma, &priv->dma_regs->cmdptr);
+
+	/* On KeyLargo 66Mhz cell, we need to add 60ns to wrDataSetup on
+	 * UDMA reads
+	 */
+	if (priv->kind == controller_kl_ata4 &&
+	    (priv->treg[dev][0] & TR_66_UDMA_EN)) {
+		void __iomem *rbase = ap->ioaddr.cmd_addr;
+		u32 reg = priv->treg[dev][0];
+
+		if (!(qc->tf.flags & ATA_TFLAG_WRITE))
+			reg += 0x00800000;
+		writel(reg, rbase + IDE_TIMING_CONFIG);
+	}
+
+	/* issue r/w command */
+	ap->ops->sff_exec_command(ap, &qc->tf);
+}
+
+static void pata_macio_bmdma_start (struct ata_queued_cmd *qc)
+{
+	struct ata_port *ap = qc->ap;
+	struct pata_macio_priv *priv = ap->private_data;
+
+	dev_dbgdma(priv->dev, "%s: qc %p\n", __func__, qc);
+
+	writel((RUN << 16) | RUN, &priv->dma_regs->control);
+	/* Make sure it gets to the controller right now */
+	(void)readl(&priv->dma_regs->control);
+}
+
+static void pata_macio_bmdma_stop (struct ata_queued_cmd *qc)
+{
+	struct ata_port *ap = qc->ap;
+	struct pata_macio_priv *priv = ap->private_data;
+
+	dev_dbgdma(priv->dev, "%s: qc %p\n", __func__, qc);
+
+	/* Stop the DMA engine and wait for it to full halt */
+	writel (((RUN|WAKE|DEAD) << 16), &priv->dma_regs->control);
+	while (readl(&priv->dma_regs->status) & RUN)
+		udelay(1);
+}
+
+static u8 pata_macio_bmdma_status (struct ata_port *ap)
+{
+	struct pata_macio_priv *priv = ap->private_data;
+	u32 dstat, rstat = ATA_DMA_INTR;
+	unsigned long timeout = 0;
+
+	dstat = readl(&priv->dma_regs->status);
+
+	dev_dbgdma(priv->dev, "%s: dstat=%x\n", __func__, dstat);
+
+	/* We have to things to deal with here:
+	 *
+	 * - The dbdma won't stop if the command was started
+	 * but completed with an error without transferring all
+	 * datas. This happens when bad blocks are met during
+	 * a multi-block transfer.
+	 *
+	 * - The dbdma fifo hasn't yet finished flushing to
+	 * to system memory when the disk interrupt occurs.
+	 *
+	 */
+
+	/* First check for errors */
+	if ((dstat & (RUN|DEAD)) != RUN)
+		rstat |= ATA_DMA_ERR;
+
+	/* If ACTIVE is cleared, the STOP command have passed and
+	 * transfer is complete. If not, we to flush the channel.
+	 */
+	if ((dstat & ACTIVE) == 0)
+		return rstat;
+
+	dev_dbgdma(priv->dev, "%s: DMA still active, flushing...\n", __func__);
+
+	/* If dbdma didn't execute the STOP command yet, the
+	 * active bit is still set. We consider that we aren't
+	 * sharing interrupts (which is hopefully the case with
+	 * those controllers) and so we just try to flush the
+	 * channel for pending data in the fifo
+	 */
+	udelay(1);
+	writel((FLUSH << 16) | FLUSH, &priv->dma_regs->control);
+	for (;;) {
+		udelay(1);
+		dstat = readl(&priv->dma_regs->status);
+		if ((dstat & FLUSH) == 0)
+			break;
+		if (++timeout > 1000) {
+			dev_warn(priv->dev, "timeout flushing DMA\n");
+			rstat |= ATA_DMA_ERR;
+			break;
+		}
+	}
+	return rstat;
+}
+
+/* port_start is when we allocate the DMA command list */
+static int pata_macio_port_start (struct ata_port *ap)
+{
+	struct pata_macio_priv *priv = ap->private_data;
+
+	if (priv->dma_regs == NULL)
+		return 0;
+
+	/* Make sure DMA controller is stopped */
+	writel((RUN|PAUSE|FLUSH|WAKE|DEAD) << 16, &priv->dma_regs->control);
+	while (readl(&priv->dma_regs->status) & RUN)
+		udelay(1);
+
+	/* Allocate space for the DBDMA commands.
+	 *
+	 * The +2 is +1 for the stop command and +1 to allow for
+	 * aligning the start address to a multiple of 16 bytes.
+	 */
+	priv->dma_table_cpu = dma_alloc_coherent(priv->dev,
+						 (MAX_DCMDS + 2) * sizeof(struct dbdma_cmd),
+						 &priv->dma_table_dma, GFP_KERNEL);
+	if (priv->dma_table_cpu == NULL) {
+		dev_err(priv->dev, "Unable to allocate DMA command list\n");
+		priv->dma_regs = NULL;
+	}
+	return 0;
+}
+
+static void pata_macio_irq_clear(struct ata_port *ap)
+{
+	struct pata_macio_priv *priv = ap->private_data;
+
+	/* Nothing to do here */
+
+	dev_dbgdma(priv->dev, "%s\n", __func__);
+}
+
+static void pata_macio_reset_hw(struct pata_macio_priv *priv)
+{
+	if (priv->mediabay) {
+		/* XXX TODO */
+	} else if (priv->kind == controller_ohare) {
+		/* The code below is having trouble on some ohare machines
+		 * (timing related ?). Until I can put my hand on one of these
+		 * units, I keep the old way
+		 */
+		ppc_md.feature_call(PMAC_FTR_IDE_ENABLE, priv->node, 0, 1);
+	} else {
+ 		/* Reset and enable controller */
+		ppc_md.feature_call(PMAC_FTR_IDE_RESET, priv->node, priv->aapl_bus_id, 1);
+		ppc_md.feature_call(PMAC_FTR_IDE_ENABLE, priv->node, priv->aapl_bus_id, 1);
+		msleep(10);
+		ppc_md.feature_call(PMAC_FTR_IDE_RESET, priv->node, priv->aapl_bus_id, 0);
+		msleep(jiffies_to_msecs(IDE_WAKEUP_DELAY));
+	}
+
+	/* On Kauai, initialize the FCR */
+	if (priv->kauai_fcr)
+		writel(KAUAI_FCR_UATA_MAGIC |
+		       KAUAI_FCR_UATA_RESET_N |
+		       KAUAI_FCR_UATA_ENABLE, priv->kauai_fcr);
+}
+
+static struct scsi_host_template pata_macio_sht = {
+	ATA_BASE_SHT(DRV_NAME),
+	.sg_tablesize		= MAX_DCMDS / 2,	/* XXX Check what our max really is */
+	.dma_boundary		= ATA_DMA_BOUNDARY,	/* We may not need that strict one */
+};
+
+static struct ata_port_operations pata_macio_ops = {
+	.inherits		= &ata_sff_port_ops,
+
+	.set_piomode		= pata_macio_set_timings,
+	.set_dmamode		= pata_macio_set_timings,
+	.mode_filter		= pata_macio_mode_filter,
+	.cable_detect		= pata_macio_cable_detect,
+	.sff_dev_select		= pata_macio_dev_select,
+	.qc_prep		= pata_macio_qc_prep,
+	.bmdma_setup		= pata_macio_bmdma_setup,
+	.bmdma_start		= pata_macio_bmdma_start,
+	.bmdma_stop		= pata_macio_bmdma_stop,
+	.bmdma_status		= pata_macio_bmdma_status,
+	.port_start		= pata_macio_port_start,
+	.sff_irq_clear		= pata_macio_irq_clear,
+};
+
+static void __devinit pata_macio_invariants(struct pata_macio_priv *priv)
+{
+	const int *bidp;
+
+	/* Identify the type of controller */
+	if (of_device_is_compatible(priv->node, "shasta-ata")) {
+		priv->kind = controller_sh_ata6;
+	        priv->timings = pata_macio_shasta_timings;
+	} else if (of_device_is_compatible(priv->node, "kauai-ata")) {
+		priv->kind = controller_un_ata6;
+	        priv->timings = pata_macio_kauai_timings;
+	} else if (of_device_is_compatible(priv->node, "K2-UATA")) {
+		priv->kind = controller_k2_ata6;
+	        priv->timings = pata_macio_kauai_timings;
+	} else if (of_device_is_compatible(priv->node, "keylargo-ata")) {
+		if (strcmp(priv->node->name, "ata-4") == 0) {
+			priv->kind = controller_kl_ata4;
+			priv->timings = pata_macio_kl66_timings;
+		} else {
+			priv->kind = controller_kl_ata3;
+			priv->timings = pata_macio_kl33_timings;
+		}
+	} else if (of_device_is_compatible(priv->node, "heathrow-ata")) {
+		priv->kind = controller_heathrow;
+		priv->timings = pata_macio_heathrow_timings;
+	} else {
+		priv->kind = controller_ohare;
+		priv->timings = pata_macio_ohare_timings;
+		priv->broken_dma = 1;
+	}
+
+	/* XXX FIXME --- setup priv->mediabay here */
+
+	/* Get Apple bus ID (for clock and ASIC control) */
+	bidp = of_get_property(priv->node, "AAPL,bus-id", NULL);
+	priv->aapl_bus_id =  bidp ? *bidp : 0;
+
+	/* Fixup missing Apple bus ID in case of media-bay */
+	if (priv->mediabay && bidp == 0)
+		priv->aapl_bus_id = 1;
+
+	/* Get cable type from device-tree */
+	if (priv->kind == controller_kl_ata4 ||
+	    priv->kind == controller_un_ata6 ||
+	    priv->kind == controller_k2_ata6 ||
+	    priv->kind == controller_sh_ata6) {
+		const char* cable = of_get_property(priv->node, "cable-type",
+						    NULL);
+		if (cable && !strncmp(cable, "80-", 3))
+			priv->cable_80 = 1;
+	}
+
+	/* G5's seem to have incorrect cable type in device-tree.
+	 * Let's assume they always have a 80 conductor cable, this seem to
+	 * be always the case unless the user mucked around
+	 */
+	if (of_device_is_compatible(priv->node, "K2-UATA") ||
+	    of_device_is_compatible(priv->node, "shasta-ata"))
+		priv->cable_80 = 1;
+}
+
+static void __devinit pata_macio_setup_ios(struct ata_ioports *ioaddr,
+					   void __iomem * base)
+{
+	/* cmd_addr is the base of regs for that port */
+	ioaddr->cmd_addr	= base;
+
+	/* taskfile registers */
+	ioaddr->data_addr	= base + (ATA_REG_DATA    << 4);
+	ioaddr->error_addr	= base + (ATA_REG_ERR     << 4);
+	ioaddr->feature_addr	= base + (ATA_REG_FEATURE << 4);
+	ioaddr->nsect_addr	= base + (ATA_REG_NSECT   << 4);
+	ioaddr->lbal_addr	= base + (ATA_REG_LBAL    << 4);
+	ioaddr->lbam_addr	= base + (ATA_REG_LBAM    << 4);
+	ioaddr->lbah_addr	= base + (ATA_REG_LBAH    << 4);
+	ioaddr->device_addr	= base + (ATA_REG_DEVICE  << 4);
+	ioaddr->status_addr	= base + (ATA_REG_STATUS  << 4);
+	ioaddr->command_addr	= base + (ATA_REG_CMD     << 4);
+	ioaddr->altstatus_addr	= base + 0x160;
+	ioaddr->ctl_addr	= base + 0x160;
+
+	/* Keep bmdma_addr 0 to trigger oopses if generic code tries
+	 * to use it instead of our specific code
+	 */
+	ioaddr->bmdma_addr	= 0;
+}
+
+static void __devinit pmac_macio_calc_timing_masks(struct pata_macio_priv *priv,
+						   struct ata_port_info   *pinfo)
+{
+	int i = 0;
+
+	pinfo->pio_mask		= 0;
+	pinfo->mwdma_mask	= 0;
+	pinfo->udma_mask	= 0;
+
+	while (priv->timings[i].mode > 0) {
+		unsigned int mask = 1U << (priv->timings[i].mode & 0x0f);
+		switch(priv->timings[i].mode & 0xf0) {
+		case 0x00: /* PIO */
+			pinfo->pio_mask |= (mask >> 8);
+			break;
+		case 0x20: /* MWDMA */
+			pinfo->mwdma_mask |= mask;
+			break;
+		case 0x40: /* UDMA */
+			pinfo->udma_mask |= mask;
+			break;
+		}
+		i++;
+	}
+	dev_dbg(priv->dev, "Supported masks: PIO=%lx, MWDMA=%lx, UDMA=%lx\n",
+		pinfo->pio_mask, pinfo->mwdma_mask, pinfo->udma_mask);
+	msleep(5000);
+}
+
+static int __devinit pata_macio_common_init(struct pata_macio_priv	*priv,
+					    resource_size_t		tfregs,
+					    resource_size_t		dmaregs,
+					    resource_size_t		fcregs,
+					    unsigned long		irq)
+{
+	struct ata_port_info		pinfo;
+	const struct ata_port_info	*ppi[] = { &pinfo, NULL };
+
+	/* Fill up privates with various invariants collected from the
+	 * device-tree
+	 */
+	pata_macio_invariants(priv);
+
+	/* Make sure we have sane initial timings in the cache */
+	pata_macio_default_timings(priv);
+
+	/* Allocate libata host for 1 port */
+	memset(&pinfo, 0, sizeof(struct ata_port_info));
+	pmac_macio_calc_timing_masks(priv, &pinfo);
+	pinfo.flags		= ATA_FLAG_SLAVE_POSS | ATA_FLAG_MMIO |
+				  ATA_FLAG_NO_LEGACY;
+	pinfo.port_ops		= &pata_macio_ops;
+	pinfo.private_data	= priv;
+
+	priv->host = ata_host_alloc_pinfo(priv->dev, ppi, 1);
+	if (priv->host == NULL) {
+		dev_err(priv->dev, "Failed to allocate ATA port structure\n");
+		return -ENOMEM;
+	}
+
+	/* Map base registers */
+	priv->tfregs = devm_ioremap(priv->dev, tfregs, 0x100);
+	if (priv->tfregs == NULL) {
+		dev_err(priv->dev, "Failed to map ATA ports\n");
+		return -ENOMEM;
+	}
+	priv->host->iomap = &priv->tfregs;
+
+	/* Map DMA regs */
+	if (dmaregs != 0) {
+		priv->dma_regs = devm_ioremap(priv->dev, dmaregs,
+					      sizeof(struct dbdma_regs));
+		if (priv->dma_regs == NULL)
+			dev_warn(priv->dev, "Failed to map ATA DMA registers\n");
+	}
+
+	/* If chip has local feature control, map those regs too */
+	if (fcregs != 0) {
+		priv->kauai_fcr = devm_ioremap(priv->dev, fcregs, 4);
+		if (priv->kauai_fcr == NULL) {
+			dev_err(priv->dev, "Failed to map ATA FCR register\n");
+			return -ENOMEM;
+		}
+	}
+
+	/* Setup port data structure */
+	pata_macio_setup_ios(&priv->host->ports[0]->ioaddr, priv->tfregs);
+	priv->host->ports[0]->private_data = priv;
+
+	/* hard-reset the controller */
+	pata_macio_reset_hw(priv);
+	pata_macio_apply_timings(priv->host->ports[0], 0);
+
+	/* Enable bus master if necessary */
+	if (priv->pdev && priv->dma_regs)
+		pci_set_master(priv->pdev);
+
+	dev_info(priv->dev, "Activating pata-macio chipset %s, Apple bus ID %d\n",
+		 macio_ata_names[priv->kind], priv->aapl_bus_id);
+
+	/* Start it up */
+	return ata_host_activate(priv->host, irq, ata_sff_interrupt, IRQF_SHARED,
+				 &pata_macio_sht);
+}
+
+static int __devinit pata_macio_attach(struct macio_dev *mdev,
+				       const struct of_device_id *match)
+{
+	struct pata_macio_priv	*priv;
+	resource_size_t		tfregs, dmaregs = 0;
+	unsigned long		irq;
+
+	/* Check for broken device-trees */
+	if (macio_resource_count(mdev) == 0) {
+		dev_err(&mdev->ofdev.dev,
+			"No addresses for controller\n");
+		return -ENXIO;
+	}
+
+	/* Allocate and init private data structure */
+	priv = devm_kzalloc(&mdev->ofdev.dev,
+			    sizeof(struct pata_macio_priv), GFP_KERNEL);
+	if (priv == NULL) {
+		dev_err(&mdev->ofdev.dev,
+			"Failed to allocate private memory\n");
+		return -ENOMEM;
+	}
+	priv->node = of_node_get(mdev->ofdev.node);
+	priv->mdev = mdev;
+	priv->dev = &mdev->ofdev.dev;
+
+	/* Request memory resource for taskfile registers */
+	if (macio_request_resource(mdev, 0, "pata-macio")) {
+		dev_err(&mdev->ofdev.dev,
+			"Cannot obtain taskfile resource\n");
+		return -EBUSY;
+	}
+	tfregs = macio_resource_start(mdev, 0);
+
+	/* Request resources for DMA registers if any */
+	if (macio_resource_count(mdev) >= 2) {
+		if (macio_request_resource(mdev, 1, "pata-macio-dma"))
+			dev_err(&mdev->ofdev.dev,
+				"Cannot obtain DMA resource\n");
+		else
+			dmaregs = macio_resource_start(mdev, 1);
+	}
+
+	/*
+	 * Fixup missing IRQ for some old implementations with broken
+	 * device-trees.
+	 *
+	 * This is a bit bogus, it should be fixed in the device-tree itself,
+	 * via the existing macio fixups, based on the type of interrupt
+	 * controller in the machine. However, I have no test HW for this case,
+	 * and this trick works well enough on those old machines...
+	 */
+	if (macio_irq_count(mdev) == 0) {
+		dev_warn(&mdev->ofdev.dev,
+			 "No interrupts for controller, using 13\n");
+		irq = irq_create_mapping(NULL, 13);
+	} else
+		irq = macio_irq(mdev, 0);
+
+	/* Get register addresses and call common initialization */
+	if (pata_macio_common_init(priv,
+				   tfregs,		/* Taskfile regs */
+				   dmaregs,		/* DBDMA regs */
+				   0,			/* Feature control */
+				   irq)) {
+		macio_release_resource(mdev, 0);
+		return -ENXIO;
+	}
+
+	return 0;
+}
+
+static int __devexit pata_macio_detach(struct macio_dev *mdev)
+{
+	/* XXX TODO */
+
+	return 0;
+}
+
+static int __devinit pata_macio_pci_attach(struct pci_dev *pdev,
+					   const struct pci_device_id *id)
+{
+	struct pata_macio_priv	*priv;
+	struct device_node	*np;
+	resource_size_t		rbase;
+
+	/* We cannot use a MacIO controller without its OF device node */
+	np = pci_device_to_OF_node(pdev);
+	if (np == NULL) {
+		dev_err(&pdev->dev,
+			"Cannot find OF device node for controller\n");
+		return -ENODEV;
+	}
+
+	/* Check that it can be enabled */
+	if (pci_enable_device(pdev)) {
+		dev_err(&pdev->dev,
+			"Cannot enable controller PCI device\n");
+		return -ENXIO;
+	}
+
+	/* Allocate and init private data structure */
+	priv = devm_kzalloc(&pdev->dev,
+			    sizeof(struct pata_macio_priv), GFP_KERNEL);
+	if (priv == NULL) {
+		dev_err(&pdev->dev,
+			"Failed to allocate private memory\n");
+		return -ENOMEM;
+	}
+	priv->node = of_node_get(np);
+	priv->pdev = pdev;
+	priv->dev = &pdev->dev;
+
+	/* Get MMIO regions */
+	if (pci_request_regions(pdev, "pata-macio")) {
+		dev_err(&pdev->dev,
+			"Cannot obtain PCI resources\n");
+		return -EBUSY;
+	}
+
+	/* Get register addresses and call common initialization */
+	rbase = pci_resource_start(pdev, 0);
+	if (pata_macio_common_init(priv,
+				   rbase + 0x2000,	/* Taskfile regs */
+				   rbase + 0x1000,	/* DBDMA regs */
+				   rbase,		/* Feature control */
+				   pdev->irq)) {
+		pci_release_regions(pdev);
+		return -ENXIO;
+	}
+
+	return 0;
+}
+
+static void __devexit pata_macio_pci_detach(struct pci_dev *pdev)
+{
+	/* XXX TODO */
+}
+
+
+static struct of_device_id pata_macio_match[] =
+{
+	{
+	.name 		= "IDE",
+	},
+	{
+	.name 		= "ATA",
+	},
+	{
+	.type		= "ide",
+	},
+	{
+	.type		= "ata",
+	},
+	{},
+};
+
+static struct macio_driver pata_macio_driver =
+{
+	.name 		= "pata-macio",
+	.match_table	= pata_macio_match,
+	.probe		= pata_macio_attach,
+	.remove		= pata_macio_detach,
+#if 0 /* XXX TODO */
+	.suspend	= pata_macio_suspend,
+	.resume		= pata_macio_resume,
+#endif
+	.driver = {
+		.owner		= THIS_MODULE,
+	},
+};
+
+static const struct pci_device_id pata_macio_pci_match[] = {
+	{ PCI_VDEVICE(APPLE, PCI_DEVICE_ID_APPLE_UNI_N_ATA),	0 },
+	{ PCI_VDEVICE(APPLE, PCI_DEVICE_ID_APPLE_IPID_ATA100),	0 },
+	{ PCI_VDEVICE(APPLE, PCI_DEVICE_ID_APPLE_K2_ATA100),	0 },
+	{ PCI_VDEVICE(APPLE, PCI_DEVICE_ID_APPLE_SH_ATA),	0 },
+	{ PCI_VDEVICE(APPLE, PCI_DEVICE_ID_APPLE_IPID2_ATA),	0 },
+	{},
+};
+
+static struct pci_driver pata_macio_pci_driver = {
+	.name		= "pata-pci-macio",
+	.id_table	= pata_macio_pci_match,
+	.probe		= pata_macio_pci_attach,
+	.remove		= pata_macio_pci_detach,
+#if 0 /* XXX TODO */
+	.suspend	= pata_macio_pci_suspend,
+	.resume		= pata_macio_pci_resume,
+#endif
+	.driver = {
+		.owner		= THIS_MODULE,
+	},
+};
+MODULE_DEVICE_TABLE(pci, pata_macio_pci_match);
+
+
+static int __init pata_macio_init(void)
+{
+	int rc;
+
+	if (!machine_is(powermac))
+		return -ENODEV;
+
+	rc = pci_register_driver(&pata_macio_pci_driver);
+	if (rc)
+		return rc;
+	rc = macio_register_driver(&pata_macio_driver);
+	if (rc) {
+		pci_unregister_driver(&pata_macio_pci_driver);
+		return rc;
+	}
+	return 0;
+}
+
+static void __exit pata_macio_exit(void)
+{
+	macio_unregister_driver(&pata_macio_driver);
+	pci_unregister_driver(&pata_macio_pci_driver);
+}
+
+module_init(pata_macio_init);
+module_exit(pata_macio_exit);
+
+MODULE_AUTHOR("Benjamin Herrenschmidt");
+MODULE_DESCRIPTION("Apple MacIO PATA driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
Index: linux-work/drivers/ata/pata_macio.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-work/drivers/ata/pata_macio.h	2008-08-01 17:13:31.000000000 +1000
@@ -0,0 +1,199 @@
+#ifndef __PATA_MACIO_H__
+#define __PATA_MACIO_H__
+
+/*
+ * Model of macio controller
+ */
+enum {
+	controller_ohare,	/* OHare based */
+	controller_heathrow,	/* Heathrow/Paddington */
+	controller_kl_ata3,	/* KeyLargo ATA-3 */
+	controller_kl_ata4,	/* KeyLargo ATA-4 */
+	controller_un_ata6,	/* UniNorth2 ATA-6 */
+	controller_k2_ata6,	/* K2 ATA-6 */
+	controller_sh_ata6,	/* Shasta ATA-6 */
+};
+
+static const char* macio_ata_names[] = {
+	"OHare ATA",		/* OHare based */
+	"Heathrow ATA",		/* Heathrow/Paddington */
+	"KeyLargo ATA-3",	/* KeyLargo ATA-3 (MDMA only) */
+	"KeyLargo ATA-4",	/* KeyLargo ATA-4 (UDMA/66) */
+	"UniNorth ATA-6",	/* UniNorth2 ATA-6 (UDMA/100) */
+	"K2 ATA-6",		/* K2 ATA-6 (UDMA/100) */
+	"Shasta ATA-6",		/* Shasta ATA-6 (UDMA/133) */
+};
+
+/*
+ * Extra registers, both 32-bit little-endian
+ */
+#define IDE_TIMING_CONFIG	0x200
+#define IDE_INTERRUPT		0x300
+
+/* Kauai (U2) ATA has different register setup */
+#define IDE_KAUAI_PIO_CONFIG	0x200
+#define IDE_KAUAI_ULTRA_CONFIG	0x210
+#define IDE_KAUAI_POLL_CONFIG	0x220
+
+/*
+ * Timing configuration register definitions
+ */
+
+/* Number of IDE_SYSCLK_NS ticks, argument is in nanoseconds */
+#define SYSCLK_TICKS(t)		(((t) + IDE_SYSCLK_NS - 1) / IDE_SYSCLK_NS)
+#define SYSCLK_TICKS_66(t)	(((t) + IDE_SYSCLK_66_NS - 1) / IDE_SYSCLK_66_NS)
+#define IDE_SYSCLK_NS		30	/* 33Mhz cell */
+#define IDE_SYSCLK_66_NS	15	/* 66Mhz cell */
+
+/* 133Mhz cell, found in shasta.
+ * See comments about 100 Mhz Uninorth 2...
+ * Note that PIO_MASK and MDMA_MASK seem to overlap, that's just
+ * weird and I don't now why .. at this stage
+ */
+#define TR_133_PIOREG_PIO_MASK		0xff000fff
+#define TR_133_PIOREG_MDMA_MASK		0x00fff800
+#define TR_133_UDMAREG_UDMA_MASK	0x0003ffff
+#define TR_133_UDMAREG_UDMA_EN		0x00000001
+
+/* 100Mhz cell, found in Uninorth 2 and K2. It appears as a pci device
+ * (106b/0033) on uninorth or K2 internal PCI bus and it's clock is
+ * controlled like gem or fw. It appears to be an evolution of keylargo
+ * ATA4 with a timing register extended to 2x32bits registers (one
+ * for PIO & MWDMA and one for UDMA, and a similar DBDMA channel.
+ * It has it's own local feature control register as well.
+ *
+ * After scratching my mind over the timing values, at least for PIO
+ * and MDMA, I think I've figured the format of the timing register,
+ * though I use pre-calculated tables for UDMA as usual...
+ */
+#define TR_100_PIO_ADDRSETUP_MASK	0xff000000 /* Size of field unknown */
+#define TR_100_PIO_ADDRSETUP_SHIFT	24
+#define TR_100_MDMA_MASK		0x00fff000
+#define TR_100_MDMA_RECOVERY_MASK	0x00fc0000
+#define TR_100_MDMA_RECOVERY_SHIFT	18
+#define TR_100_MDMA_ACCESS_MASK		0x0003f000
+#define TR_100_MDMA_ACCESS_SHIFT	12
+#define TR_100_PIO_MASK			0xff000fff
+#define TR_100_PIO_RECOVERY_MASK	0x00000fc0
+#define TR_100_PIO_RECOVERY_SHIFT	6
+#define TR_100_PIO_ACCESS_MASK		0x0000003f
+#define TR_100_PIO_ACCESS_SHIFT		0
+
+#define TR_100_UDMAREG_UDMA_MASK	0x0000ffff
+#define TR_100_UDMAREG_UDMA_EN		0x00000001
+
+
+/* 66Mhz cell, found in KeyLargo. Can do ultra mode 0 to 2 on
+ * 40 connector cable and to 4 on 80 connector one.
+ * Clock unit is 15ns (66Mhz)
+ *
+ * 3 Values can be programmed:
+ *  - Write data setup, which appears to match the cycle time. They
+ *    also call it DIOW setup.
+ *  - Ready to pause time (from spec)
+ *  - Address setup. That one is weird. I don't see where exactly
+ *    it fits in UDMA cycles, I got it's name from an obscure piece
+ *    of commented out code in Darwin. They leave it to 0, we do as
+ *    well, despite a comment that would lead to think it has a
+ *    min value of 45ns.
+ * Apple also add 60ns to the write data setup (or cycle time ?) on
+ * reads.
+ */
+#define TR_66_UDMA_MASK			0xfff00000
+#define TR_66_UDMA_EN			0x00100000 /* Enable Ultra mode for DMA */
+#define TR_66_PIO_ADDRSETUP_MASK	0xe0000000 /* Address setup */
+#define TR_66_PIO_ADDRSETUP_SHIFT	29
+#define TR_66_UDMA_RDY2PAUS_MASK	0x1e000000 /* Ready 2 pause time */
+#define TR_66_UDMA_RDY2PAUS_SHIFT	25
+#define TR_66_UDMA_WRDATASETUP_MASK	0x01e00000 /* Write data setup time */
+#define TR_66_UDMA_WRDATASETUP_SHIFT	21
+#define TR_66_MDMA_MASK			0x000ffc00
+#define TR_66_MDMA_RECOVERY_MASK	0x000f8000
+#define TR_66_MDMA_RECOVERY_SHIFT	15
+#define TR_66_MDMA_ACCESS_MASK		0x00007c00
+#define TR_66_MDMA_ACCESS_SHIFT		10
+#define TR_66_PIO_MASK			0xe00003ff
+#define TR_66_PIO_RECOVERY_MASK		0x000003e0
+#define TR_66_PIO_RECOVERY_SHIFT	5
+#define TR_66_PIO_ACCESS_MASK		0x0000001f
+#define TR_66_PIO_ACCESS_SHIFT		0
+
+/* 33Mhz cell, found in OHare, Heathrow (& Paddington) and KeyLargo
+ * Can do pio & mdma modes, clock unit is 30ns (33Mhz)
+ *
+ * The access time and recovery time can be programmed. Some older
+ * Darwin code base limit OHare to 150ns cycle time. I decided to do
+ * the same here fore safety against broken old hardware ;)
+ * The HalfTick bit, when set, adds half a clock (15ns) to the access
+ * time and removes one from recovery. It's not supported on KeyLargo
+ * implementation afaik. The E bit appears to be set for PIO mode 0 and
+ * is used to reach long timings used in this mode.
+ */
+#define TR_33_MDMA_MASK			0x003ff800
+#define TR_33_MDMA_RECOVERY_MASK	0x001f0000
+#define TR_33_MDMA_RECOVERY_SHIFT	16
+#define TR_33_MDMA_ACCESS_MASK		0x0000f800
+#define TR_33_MDMA_ACCESS_SHIFT		11
+#define TR_33_MDMA_HALFTICK		0x00200000
+#define TR_33_PIO_MASK			0x000007ff
+#define TR_33_PIO_E			0x00000400
+#define TR_33_PIO_RECOVERY_MASK		0x000003e0
+#define TR_33_PIO_RECOVERY_SHIFT	5
+#define TR_33_PIO_ACCESS_MASK		0x0000001f
+#define TR_33_PIO_ACCESS_SHIFT		0
+
+/*
+ * Interrupt register definitions
+ */
+#define IDE_INTR_DMA			0x80000000
+#define IDE_INTR_DEVICE			0x40000000
+
+/*
+ * FCR Register on Kauai. Not sure what bit 0x4 is  ...
+ */
+#define KAUAI_FCR_UATA_MAGIC		0x00000004
+#define KAUAI_FCR_UATA_RESET_N		0x00000002
+#define KAUAI_FCR_UATA_ENABLE		0x00000001
+
+
+/* allow up to 256 DBDMA commands per xfer */
+#define MAX_DCMDS		256
+
+/*
+ * Wait 1s for disk to answer on IDE bus after a hard reset
+ * of the device (via GPIO/FCR).
+ *
+ * Some devices seem to "pollute" the bus even after dropping
+ * the BSY bit (typically some combo drives slave on the UDMA
+ * bus) after a hard reset. Since we hard reset all drives on
+ * KeyLargo ATA66, we have to keep that delay around. I may end
+ * up not hard resetting anymore on these and keep the delay only
+ * for older interfaces instead (we have to reset when coming
+ * from MacOS...) --BenH.
+ */
+#define IDE_WAKEUP_DELAY	(1*HZ)
+
+struct pata_macio_timing;
+
+struct pata_macio_priv {
+	int				kind;
+	int				aapl_bus_id;
+	int				cable_80 : 1;
+	int				mediabay : 1;
+	int				broken_dma : 1;
+	int				broken_dma_warn : 1;
+	struct device_node		*node;
+	struct macio_dev		*mdev;
+	struct pci_dev			*pdev;
+	struct device			*dev;
+	u32				treg[2][2];
+	void __iomem			*tfregs;
+	void __iomem			*kauai_fcr;
+	struct dbdma_regs __iomem	*dma_regs;
+	struct dbdma_cmd *		dma_table_cpu;
+	dma_addr_t			dma_table_dma;
+	struct ata_host			*host;
+	const struct pata_macio_timing	*timings;
+};
+
+#endif /* __PATA_MACIO_H__ */
Index: linux-work/drivers/ata/Kconfig
===================================================================
--- linux-work.orig/drivers/ata/Kconfig	2008-07-07 13:45:04.000000000 +1000
+++ linux-work/drivers/ata/Kconfig	2008-08-01 16:58:54.000000000 +1000
@@ -723,5 +723,15 @@ config PATA_BF54X
 
 	  If unsure, say N.
 
+config PATA_MACIO
+	tristate "Apple PowerMac/PowerBook internal 'MacIO' IDE"
+	depends on PPC_PMAC
+	help
+	  Most IDE capable PowerMacs have IDE busses driven by a variant
+          of this controller which is part of the Apple chipset used on
+          most PowerMac models. Some models have multiple busses using
+          different chipsets, though generally, MacIO is one of them.
+
+
 endif # ATA_SFF
 endif # ATA
Index: linux-work/drivers/ata/Makefile
===================================================================
--- linux-work.orig/drivers/ata/Makefile	2008-07-07 13:45:04.000000000 +1000
+++ linux-work/drivers/ata/Makefile	2008-07-31 16:08:19.000000000 +1000
@@ -18,6 +18,7 @@ obj-$(CONFIG_SATA_MV)		+= sata_mv.o
 obj-$(CONFIG_SATA_INIC162X)	+= sata_inic162x.o
 obj-$(CONFIG_PDC_ADMA)		+= pdc_adma.o
 obj-$(CONFIG_SATA_FSL)		+= sata_fsl.o
+obj-$(CONFIG_PATA_MACIO)	+= pata_macio.o
 
 obj-$(CONFIG_PATA_ALI)		+= pata_ali.o
 obj-$(CONFIG_PATA_AMD)		+= pata_amd.o



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC/Review] libata driver for Apple "macio" pata
  2008-08-01  9:08 [RFC/Review] libata driver for Apple "macio" pata Benjamin Herrenschmidt
@ 2008-08-01  9:59 ` Tejun Heo
  2008-08-01 10:56   ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 7+ messages in thread
From: Tejun Heo @ 2008-08-01  9:59 UTC (permalink / raw)
  To: benh; +Cc: list linux-ide, Jeff Garzik, Alan Cox

Hello,

I don't know anything about ppc macs or macio controllers and only
tentatively reviewed common libata stuff.  I couldn't spot any major
problem.  A few nits follow.

Benjamin Herrenschmidt wrote:
> +#include "pata_macio.h"

In libata, LLDs usually don't use header files for constants and stuff
unless they need to be shared.

> +static void pata_macio_apply_timings(struct ata_port *ap, unsigned int device)
> +{
> +	struct pata_macio_priv *priv = ap->private_data;
> +	void __iomem *rbase = ap->ioaddr.cmd_addr;
> +
> +	if (device != 0 && device != 1)
> +		return;

This condition is guaranteed not to be triggered by the core layer.

> +static void pata_macio_set_timings(struct ata_port *ap,
> +				   struct ata_device *adev)

Using single function for ->set_piomode and ->set_dmamode might not be
a good idea as libata core might call ->set_piomode with pio mode set
to PIO0 before reset but leaving the dma mode as is expecting the
transfer mode to be configured to PIO0.  However, this function would
configure considering the left-over DMA mode.

Hmmm... I guess we need to clear the DMA mode on reset, but anyways
that's what libata has been assuming till now.  set_piomode only deals
with pio mode.

> +{
> +	struct pata_macio_priv *priv = ap->private_data;
> +	const struct pata_macio_timing *t;
> +
> +	dev_dbg(priv->dev, "Set timings: DEV=%d, PIO=0x%x (%s), DMA=0x%x (%s)\n",
> +		adev->devno,
> +		adev->pio_mode, ata_mode_string(ata_xfer_mode2mask(adev->pio_mode)),
> +		adev->dma_mode, ata_mode_string(ata_xfer_mode2mask(adev->dma_mode)));
> +
> +	if (adev->devno != 0 && adev->devno != 1)
> +		return;

This condition is guaranteed not to be triggered by the core layer.

> +static unsigned long pata_macio_mode_filter(struct ata_device *adev,
> +					    unsigned long xfer_mask)
> +{
> +	struct pata_macio_priv *priv = adev->link->ap->private_data;
> +
> +	if (priv->dma_regs == NULL)
> +		xfer_mask &= ~(ATA_MASK_MWDMA | ATA_MASK_UDMA);
> +	return xfer_mask;
> +}

Wouldn't it be better to clear these during initialization?

> +static int pata_macio_cable_detect(struct ata_port *ap)
> +{
> +	struct pata_macio_priv *priv = ap->private_data;
> +
> +	return priv->cable_80 ? ATA_CBL_PATA80 : ATA_CBL_PATA40;
> +}
> +
> +static void pata_macio_qc_prep(struct ata_queued_cmd *qc)
> +{
> +	unsigned int write = (qc->tf.flags & ATA_TFLAG_WRITE);
> +	struct ata_port *ap = qc->ap;
> +	struct pata_macio_priv *priv = ap->private_data;
> +	struct scatterlist *sg;
> +	struct dbdma_cmd *table;
> +	unsigned int si, pi;
> +
> +	dev_dbgdma(priv->dev, "%s: qc %p flags %lx, write %d dev %d\n",
> +		   __func__, qc, qc->flags, write, qc->dev->devno);
> +
> +	if (!(qc->flags & ATA_QCFLAG_DMAMAP))
> +		return;
> +
> +	table = (struct dbdma_cmd *) priv->dma_table_cpu;
> +
> +	pi = 0;
> +	for_each_sg(qc->sg, sg, qc->n_elem, si) {
> +		u32 addr, sg_len, len;
> +
> +		/* determine if physical DMA addr spans 64K boundary.
> +		 * Note h/w doesn't support 64-bit, so we unconditionally
> +		 * truncate dma_addr_t to u32.
> +		 */
> +		addr = (u32) sg_dma_address(sg);
> +		sg_len = sg_dma_len(sg);
> +
> +		while (sg_len) {
> +			/* table overflow should never happen */
> +			BUG_ON (pi++ >= MAX_DCMDS);
> +
> +			len = (sg_len < 0xfe00) ? sg_len : 0xfe00;
> +			st_le16(&table->command, write ? OUTPUT_MORE: INPUT_MORE);
> +			st_le16(&table->req_count, len);
> +			st_le32(&table->phy_addr, addr);
> +			table->cmd_dep = 0;
> +			table->xfer_status = 0;
> +			table->res_count = 0;
> +			addr += len;
> +			sg_len -= len;
> +			++table;
> +		}
> +	}
> +
> +	/* Convert the last command to an input/output */
> +	if (pi) {
> +		table--;
> +		st_le16(&table->command, write ? OUTPUT_LAST: INPUT_LAST);
> +		table++;
> +	}

libata guarantees that pi is not zero here.

> +	/* Add the stop command to the end of the list */
> +	memset(table, 0, sizeof(struct dbdma_cmd));
> +	st_le16(&table->command, DBDMA_STOP);
> +
> +	dev_dbgdma(priv->dev, "%s: %d DMA list entries\n", __func__, pi);
> +}
> +
> +static void pata_macio_bmdma_setup (struct ata_queued_cmd *qc)

                                     ^ Heh heh.  On other BMDMA ops too.

> +static u8 pata_macio_bmdma_status (struct ata_port *ap)
> +{
> +	struct pata_macio_priv *priv = ap->private_data;
> +	u32 dstat, rstat = ATA_DMA_INTR;
> +	unsigned long timeout = 0;
> +
> +	dstat = readl(&priv->dma_regs->status);
> +
> +	dev_dbgdma(priv->dev, "%s: dstat=%x\n", __func__, dstat);
> +
> +	/* We have to things to deal with here:
> +	 *
> +	 * - The dbdma won't stop if the command was started
> +	 * but completed with an error without transferring all
> +	 * datas. This happens when bad blocks are met during
> +	 * a multi-block transfer.
> +	 *
> +	 * - The dbdma fifo hasn't yet finished flushing to
> +	 * to system memory when the disk interrupt occurs.
> +	 *
> +	 */
> +
> +	/* First check for errors */
> +	if ((dstat & (RUN|DEAD)) != RUN)
> +		rstat |= ATA_DMA_ERR;
> +
> +	/* If ACTIVE is cleared, the STOP command have passed and
> +	 * transfer is complete. If not, we to flush the channel.

					missing have?

> +	 */
> +	if ((dstat & ACTIVE) == 0)
> +		return rstat;
> +
> +	dev_dbgdma(priv->dev, "%s: DMA still active, flushing...\n", __func__);
> +
> +	/* If dbdma didn't execute the STOP command yet, the
> +	 * active bit is still set. We consider that we aren't
> +	 * sharing interrupts (which is hopefully the case with

Wouldn't it be better to clear IRQF_SHARED for those?  Also, what
happens w/ irqpoll?

> +	 * those controllers) and so we just try to flush the
> +	 * channel for pending data in the fifo
> +	 */
> +	udelay(1);
> +	writel((FLUSH << 16) | FLUSH, &priv->dma_regs->control);
> +	for (;;) {
> +		udelay(1);
> +		dstat = readl(&priv->dma_regs->status);
> +		if ((dstat & FLUSH) == 0)
> +			break;
> +		if (++timeout > 1000) {
> +			dev_warn(priv->dev, "timeout flushing DMA\n");
> +			rstat |= ATA_DMA_ERR;
> +			break;
> +		}
> +	}
> +	return rstat;
> +}
> +
> +/* port_start is when we allocate the DMA command list */
> +static int pata_macio_port_start (struct ata_port *ap)
> +{
> +	struct pata_macio_priv *priv = ap->private_data;
> +
> +	if (priv->dma_regs == NULL)
> +		return 0;
> +
> +	/* Make sure DMA controller is stopped */
> +	writel((RUN|PAUSE|FLUSH|WAKE|DEAD) << 16, &priv->dma_regs->control);
> +	while (readl(&priv->dma_regs->status) & RUN)
> +		udelay(1);
> +
> +	/* Allocate space for the DBDMA commands.
> +	 *
> +	 * The +2 is +1 for the stop command and +1 to allow for
> +	 * aligning the start address to a multiple of 16 bytes.
> +	 */
> +	priv->dma_table_cpu = dma_alloc_coherent(priv->dev,
> +						 (MAX_DCMDS + 2) * sizeof(struct dbdma_cmd),
> +						 &priv->dma_table_dma, GFP_KERNEL);

dmam_alloc_coherent() maybe?

> +	if (priv->dma_table_cpu == NULL) {
> +		dev_err(priv->dev, "Unable to allocate DMA command list\n");
> +		priv->dma_regs = NULL;
> +	}
> +	return 0;
> +}

> +static int __devinit pata_macio_pci_attach(struct pci_dev *pdev,
> +					   const struct pci_device_id *id)
> +{
> +	struct pata_macio_priv	*priv;
> +	struct device_node	*np;
> +	resource_size_t		rbase;
> +
> +	/* We cannot use a MacIO controller without its OF device node */
> +	np = pci_device_to_OF_node(pdev);
> +	if (np == NULL) {
> +		dev_err(&pdev->dev,
> +			"Cannot find OF device node for controller\n");
> +		return -ENODEV;
> +	}
> +
> +	/* Check that it can be enabled */
> +	if (pci_enable_device(pdev)) {
> +		dev_err(&pdev->dev,
> +			"Cannot enable controller PCI device\n");
> +		return -ENXIO;
> +	}

Maybe pcim_enable_device() can be used?  Or is it difficult because of
the macio stuff?

> +
> +	/* Allocate and init private data structure */
> +	priv = devm_kzalloc(&pdev->dev,
> +			    sizeof(struct pata_macio_priv), GFP_KERNEL);
> +	if (priv == NULL) {
> +		dev_err(&pdev->dev,
> +			"Failed to allocate private memory\n");
> +		return -ENOMEM;
> +	}
> +	priv->node = of_node_get(np);
> +	priv->pdev = pdev;
> +	priv->dev = &pdev->dev;
> +
> +	/* Get MMIO regions */
> +	if (pci_request_regions(pdev, "pata-macio")) {
> +		dev_err(&pdev->dev,
> +			"Cannot obtain PCI resources\n");
> +		return -EBUSY;
> +	}
> +
> +	/* Get register addresses and call common initialization */
> +	rbase = pci_resource_start(pdev, 0);
> +	if (pata_macio_common_init(priv,
> +				   rbase + 0x2000,	/* Taskfile regs */
> +				   rbase + 0x1000,	/* DBDMA regs */
> +				   rbase,		/* Feature control */
> +				   pdev->irq)) {
> +		pci_release_regions(pdev);
> +		return -ENXIO;
> +	}
> +
> +	return 0;
> +}

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC/Review] libata driver for Apple "macio" pata
  2008-08-01  9:59 ` Tejun Heo
@ 2008-08-01 10:56   ` Benjamin Herrenschmidt
  2008-08-01 16:13     ` Tejun Heo
  2008-08-01 16:58     ` Alan Cox
  0 siblings, 2 replies; 7+ messages in thread
From: Benjamin Herrenschmidt @ 2008-08-01 10:56 UTC (permalink / raw)
  To: Tejun Heo; +Cc: list linux-ide, Jeff Garzik, Alan Cox

On Fri, 2008-08-01 at 18:59 +0900, Tejun Heo wrote:
> Hello,
> 
> I don't know anything about ppc macs or macio controllers and only
> tentatively reviewed common libata stuff.  I couldn't spot any major
> problem.  A few nits follow.

Fair enough, that's what I was after anyway :-)

Thanks !
Ben.

> > +#include "pata_macio.h"
> 
> In libata, LLDs usually don't use header files for constants and stuff
> unless they need to be shared.

Ok, I was hesitating a bit ... it's more handy when hacking to have it
separate but I may fold them together when I'm done.

> > +static void pata_macio_apply_timings(struct ata_port *ap, unsigned int device)
> > +{
> > +	struct pata_macio_priv *priv = ap->private_data;
> > +	void __iomem *rbase = ap->ioaddr.cmd_addr;
> > +
> > +	if (device != 0 && device != 1)
> > +		return;
> 
> This condition is guaranteed not to be triggered by the core layer.

I though that too, but wasn't 100% sure... ie, if something goes wrong
for some reason in the core (a bug ?) I prefer the above to blasting
beyond the array boundary. Maybe I can turn that into a BUG_ON.

> > +static void pata_macio_set_timings(struct ata_port *ap,
> > +				   struct ata_device *adev)
> 
> Using single function for ->set_piomode and ->set_dmamode might not be
> a good idea as libata core might call ->set_piomode with pio mode set
> to PIO0 before reset but leaving the dma mode as is expecting the
> transfer mode to be configured to PIO0.  However, this function would
> configure considering the left-over DMA mode.

Would that be a problem as long as we use PIO ? Ie. the chipset will
use the PIO timings when doing PIO transfers. The reason I do both in
one go is because of the shasta controller. For all the other one, I
know quite precisely what all the bits are, and the PIO vs. DMA timing
bits are clearly separate.

For shasta, I have no doc nor useful infos in the Darwin driver other
than the timing tables, and they seem to have at least a one bit overlap
between PIO and DMA timings. IE. This bit is set by the PIO timings and
by some DMA timings, and it's unclear to me how to deal with that. IE,
if you set a DMA timing where it's not set, and a PIO timing where it's
set, should I set it or not ?

Thus I decided to do a single function that does both and OR them
together, which corresponds to what Darwin does. I can still split them
again, but there's still the question of that mysterious bit :-)

> Hmmm... I guess we need to clear the DMA mode on reset, but anyways
> that's what libata has been assuming till now.  set_piomode only deals
> with pio mode.

I know and I believe it should still be ok ... As I said, the chipset
should use the PIO field in the register for PIO transfers. And if the
above unknown bit is set, I suspect it's just going to increase the
setup time a bit or something like that, which won't hurt other than
perfs.

> > +{
> > +	struct pata_macio_priv *priv = ap->private_data;
> > +	const struct pata_macio_timing *t;
> > +
> > +	dev_dbg(priv->dev, "Set timings: DEV=%d, PIO=0x%x (%s), DMA=0x%x (%s)\n",
> > +		adev->devno,
> > +		adev->pio_mode, ata_mode_string(ata_xfer_mode2mask(adev->pio_mode)),
> > +		adev->dma_mode, ata_mode_string(ata_xfer_mode2mask(adev->dma_mode)));
> > +
> > +	if (adev->devno != 0 && adev->devno != 1)
> > +		return;
> 
> This condition is guaranteed not to be triggered by the core layer.

Same as above... I prefer avoiding an array overflow if the core happens
to misbehave. Maybe I should use BUG_ON instead.

> > +static unsigned long pata_macio_mode_filter(struct ata_device *adev,
> > +					    unsigned long xfer_mask)
> > +{
> > +	struct pata_macio_priv *priv = adev->link->ap->private_data;
> > +
> > +	if (priv->dma_regs == NULL)
> > +		xfer_mask &= ~(ATA_MASK_MWDMA | ATA_MASK_UDMA);
> > +	return xfer_mask;
> > +}
> 
> Wouldn't it be better to clear these during initialization?

I could. Doesn't matter much where it's done, does it ? Doing there
allows to deal with a failure in my port_start() callback, if the
allocation of the DMA table fails, I clear dma_regs.

> > +	/* Convert the last command to an input/output */
> > +	if (pi) {
> > +		table--;
> > +		st_le16(&table->command, write ? OUTPUT_LAST: INPUT_LAST);
> > +		table++;
> > +	}
> 
> libata guarantees that pi is not zero here.

Ok. Thanks. BTW. I do have another question about that though.

I set currently the max size of the sglist to half of my max of DMA
commands a bit arbitrarily.

The reason is that I can only have 64K-4K per transfer (I don't think I
can do 64K per DBDMA entry). So the above routine can potentially
breakup, in the worst case scenario, the table into twice as many
entries if they are all 64K precisely.

Is there a way I can set a max size for a given sglist segment ? I know
the iommu can coalesce them back, but at least I know that breaking them
up won't cause more than what the iommu had as an input .. I don't have
an alignment restriction.

> > +	/* Add the stop command to the end of the list */
> > +	memset(table, 0, sizeof(struct dbdma_cmd));
> > +	st_le16(&table->command, DBDMA_STOP);
> > +
> > +	dev_dbgdma(priv->dev, "%s: %d DMA list entries\n", __func__, pi);
> > +}
> > +
> > +static void pata_macio_bmdma_setup (struct ata_queued_cmd *qc)
> 
>                                      ^ Heh heh.  On other BMDMA ops too.

Copy/paste from another driver :-) I'll fix that up.

> > +static u8 pata_macio_bmdma
> > +	/* If ACTIVE is cleared, the STOP command have passed and
> > +	 * transfer is complete. If not, we to flush the channel.
> 
> 					missing have?

Yup, thanks.

> > +	 */
> > +	if ((dstat & ACTIVE) == 0)
> > +		return rstat;
> > +
> > +	dev_dbgdma(priv->dev, "%s: DMA still active, flushing...\n", __func__);
> > +
> > +	/* If dbdma didn't execute the STOP command yet, the
> > +	 * active bit is still set. We consider that we aren't
> > +	 * sharing interrupts (which is hopefully the case with
> 
> Wouldn't it be better to clear IRQF_SHARED for those?  Also, what
> happens w/ irqpoll?

I don't think we ever use irqpoll on powerpc, but that's a good
question. I don't know what the right answer is. In fact, I looked a bit
at the libata core irq handling and it looks like if we return that the
IRQ wasn't for us, after 1000 iteration, libata goes read the status and
clear the IRQ anyway, which doesn't sound that a good idea if the IRQ is
shared....

I know some variant of the cell do have a register I can read to make
sure the IRQ came from it (or not), but I have to figure out if it
exists on the ohare variant or not. If it does, then I may be able to do
something saner here.

In the meantime, the above is the same as what I did for drivers/ide.

Another option would be to use the DMA channel IRQ (it's a separate IRQ
I can use) and do like Darwin, that is, wait for both IRQs to happen
before moving forward.


> > +	/* Allocate space for the DBDMA commands.
> > +	 *
> > +	 * The +2 is +1 for the stop command and +1 to allow for
> > +	 * aligning the start address to a multiple of 16 bytes.
> > +	 */
> > +	priv->dma_table_cpu = dma_alloc_coherent(priv->dev,
> > +						 (MAX_DCMDS + 2) * sizeof(struct dbdma_cmd),
> > +						 &priv->dma_table_dma, GFP_KERNEL);
> 
> dmam_alloc_coherent() maybe?

Good point. Thanks.

> > +	/* Check that it can be enabled */
> > +	if (pci_enable_device(pdev)) {
> > +		dev_err(&pdev->dev,
> > +			"Cannot enable controller PCI device\n");
> > +		return -ENXIO;
> > +	}
> 
> Maybe pcim_enable_device() can be used?  Or is it difficult because of
> the macio stuff?

I wouldn't bother. There's no point in disabling it, this is just to
make sure it's been properly enabled at boot.

Many thanks for your review !

Cheers,
Ben.



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC/Review] libata driver for Apple "macio" pata
  2008-08-01 10:56   ` Benjamin Herrenschmidt
@ 2008-08-01 16:13     ` Tejun Heo
  2008-08-01 22:40       ` Benjamin Herrenschmidt
  2008-08-01 16:58     ` Alan Cox
  1 sibling, 1 reply; 7+ messages in thread
From: Tejun Heo @ 2008-08-01 16:13 UTC (permalink / raw)
  To: benh; +Cc: list linux-ide, Jeff Garzik, Alan Cox

Benjamin Herrenschmidt wrote:
>>> +static void pata_macio_set_timings(struct ata_port *ap,
>>> +				   struct ata_device *adev)
>> Using single function for ->set_piomode and ->set_dmamode might not be
>> a good idea as libata core might call ->set_piomode with pio mode set
>> to PIO0 before reset but leaving the dma mode as is expecting the
>> transfer mode to be configured to PIO0.  However, this function would
>> configure considering the left-over DMA mode.
> 
> Would that be a problem as long as we use PIO ? Ie. the chipset will
> use the PIO timings when doing PIO transfers.

Some controllers share part of timing settings between PIO and DMA, so
the result might be different from expected.

> The reason I do both in
> one go is because of the shasta controller. For all the other one, I
> know quite precisely what all the bits are, and the PIO vs. DMA timing
> bits are clearly separate.
> 
> For shasta, I have no doc nor useful infos in the Darwin driver other
> than the timing tables, and they seem to have at least a one bit overlap
> between PIO and DMA timings. IE. This bit is set by the PIO timings and
> by some DMA timings, and it's unclear to me how to deal with that. IE,
> if you set a DMA timing where it's not set, and a PIO timing where it's
> set, should I set it or not ?
> 
> Thus I decided to do a single function that does both and OR them
> together, which corresponds to what Darwin does. I can still split them
> again, but there's still the question of that mysterious bit :-)

Yeah, exactly what I was talking about. :-) I think I'll just update the
core layer such that dma mode too is cleared when (re-)configuration starts.

>>> +static unsigned long pata_macio_mode_filter(struct ata_device *adev,
>>> +					    unsigned long xfer_mask)
>>> +{
>>> +	struct pata_macio_priv *priv = adev->link->ap->private_data;
>>> +
>>> +	if (priv->dma_regs == NULL)
>>> +		xfer_mask &= ~(ATA_MASK_MWDMA | ATA_MASK_UDMA);
>>> +	return xfer_mask;
>>> +}
>> Wouldn't it be better to clear these during initialization?
> 
> I could. Doesn't matter much where it's done, does it ? Doing there
> allows to deal with a failure in my port_start() callback, if the
> allocation of the DMA table fails, I clear dma_regs.

Functionally, it doesn't really matter but it's just customary to use
->mode_filter() to enforce dynamic restrictions - say ATA can go over
UDMA/66 but ATAPI can't or when both devices are present one transfer
mode affects the other kind of things.  Hmm... But then again,
ata_bmdma_mode_filter() is used to put the same kind of restriction as
yours.  Heh..  Never mind this suggestion.

>>> +	/* Convert the last command to an input/output */
>>> +	if (pi) {
>>> +		table--;
>>> +		st_le16(&table->command, write ? OUTPUT_LAST: INPUT_LAST);
>>> +		table++;
>>> +	}
>> libata guarantees that pi is not zero here.
> 
> Ok. Thanks. BTW. I do have another question about that though.
> 
> I set currently the max size of the sglist to half of my max of DMA
> commands a bit arbitrarily.
> 
> The reason is that I can only have 64K-4K per transfer (I don't think I
> can do 64K per DBDMA entry). So the above routine can potentially
> breakup, in the worst case scenario, the table into twice as many
> entries if they are all 64K precisely.
> 
> Is there a way I can set a max size for a given sglist segment ? I know
> the iommu can coalesce them back, but at least I know that breaking them
> up won't cause more than what the iommu had as an input .. I don't have
> an alignment restriction.

Maybe pci_set_dma_max_seg_size() which ends up in
blk_queue_max_segment_size().  sata_inic162x uses it and your
requirement seems similar.

>>> +	/* If dbdma didn't execute the STOP command yet, the
>>> +	 * active bit is still set. We consider that we aren't
>>> +	 * sharing interrupts (which is hopefully the case with
>> Wouldn't it be better to clear IRQF_SHARED for those?  Also, what
>> happens w/ irqpoll?
> 
> I don't think we ever use irqpoll on powerpc, but that's a good
> question. I don't know what the right answer is. In fact, I looked a bit
> at the libata core irq handling and it looks like if we return that the
> IRQ wasn't for us, after 1000 iteration, libata goes read the status and
> clear the IRQ anyway, which doesn't sound that a good idea if the IRQ is
> shared....

That code is activated only if ATA_IRQ_TRAP is set and it's not even in
config option.  I don't think anyone ever uses it.  Also, the worst
reading off status register and clearing BMDMA register can do is losing
an interrupt causing a timeout.  Weighed against the dreaded nobody
cared, it's not so bad.  libata definitely needs some improvements in
this area.

> I know some variant of the cell do have a register I can read to make
> sure the IRQ came from it (or not), but I have to figure out if it
> exists on the ohare variant or not. If it does, then I may be able to do
> something saner here.

Ah... if there's anything like that, please use it.  I have no idea why
the original TF and even BMDMA interface didn't include proper interrupt
pending bit.  Oh well, they were designed when floppy was popular after all.

> In the meantime, the above is the same as what I did for drivers/ide.
> 
> Another option would be to use the DMA channel IRQ (it's a separate IRQ
> I can use) and do like Darwin, that is, wait for both IRQs to happen
> before moving forward.

Hmm...

>>> +	/* Check that it can be enabled */
>>> +	if (pci_enable_device(pdev)) {
>>> +		dev_err(&pdev->dev,
>>> +			"Cannot enable controller PCI device\n");
>>> +		return -ENXIO;
>>> +	}
>> Maybe pcim_enable_device() can be used?  Or is it difficult because of
>> the macio stuff?
> 
> I wouldn't bother. There's no point in disabling it, this is just to
> make sure it's been properly enabled at boot.

pcim_enable_device() is a little bit more than managed pdev enable.  It
turns on resource management for other PCI resources like IO regions and
INTX setting.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC/Review] libata driver for Apple "macio" pata
  2008-08-01 10:56   ` Benjamin Herrenschmidt
  2008-08-01 16:13     ` Tejun Heo
@ 2008-08-01 16:58     ` Alan Cox
  2008-08-01 22:43       ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 7+ messages in thread
From: Alan Cox @ 2008-08-01 16:58 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Tejun Heo, list linux-ide, Jeff Garzik, Alan Cox

> I know and I believe it should still be ok ... As I said, the chipset
> should use the PIO field in the register for PIO transfers. And if the
> above unknown bit is set, I suspect it's just going to increase the
> setup time a bit or something like that, which won't hurt other than
> perfs.

What I do with some other drivers is set the PIO mode in the pio mode function
but defer DMA timing setup to bmdma_start/stop methods. Some chips need this
in the PC world and that works nicely.

> > > +static unsigned long pata_macio_mode_filter(struct ata_device *adev,
> > > +					    unsigned long xfer_mask)
> > > +{
> > > +	struct pata_macio_priv *priv = adev->link->ap->private_data;
> > > +
> > > +	if (priv->dma_regs == NULL)
> > > +		xfer_mask &= ~(ATA_MASK_MWDMA | ATA_MASK_UDMA);
> > > +	return xfer_mask;
> > > +}
> > 
> > Wouldn't it be better to clear these during initialization?
> 
> I could. Doesn't matter much where it's done, does it ? Doing there
> allows to deal with a failure in my port_start() callback, if the
> allocation of the DMA table fails, I clear dma_regs.

For x86 we clear it later in some cases too - because the allocation is
done after we pass the sht and ata parameters to the setup functions.

> The reason is that I can only have 64K-4K per transfer (I don't think I
> can do 64K per DBDMA entry). So the above routine can potentially
> breakup, in the worst case scenario, the table into twice as many
> entries if they are all 64K precisely.

See ata_sff_dumb_qc_prep - we have PC chips with the same bug!

--
   "Engineers are 'small children' when it comes to product warning labels"
					-- John Duino


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC/Review] libata driver for Apple "macio" pata
  2008-08-01 16:13     ` Tejun Heo
@ 2008-08-01 22:40       ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 7+ messages in thread
From: Benjamin Herrenschmidt @ 2008-08-01 22:40 UTC (permalink / raw)
  To: Tejun Heo; +Cc: list linux-ide, Jeff Garzik, Alan Cox

On Sat, 2008-08-02 at 01:13 +0900, Tejun Heo wrote:

> > Thus I decided to do a single function that does both and OR them
> > together, which corresponds to what Darwin does. I can still split them
> > again, but there's still the question of that mysterious bit :-)
> 
> Yeah, exactly what I was talking about. :-) I think I'll just update the
> core layer such that dma mode too is cleared when (re-)configuration starts.

Ok. As I said tho, I think the current approach will work fine for me.
Those timing regs are cycle counts for various parts of the cycle with
sometime a magic bit that just hard-extended a part of the cycle. So in
the case of the "weird" shared bit, having it spurriously set is of
little consequence other than slowing things down a bit. Having it
spurriously cleared is what I try to avoid.

But yes, it does make sense to clear dma_mode when you re-configure.

> Functionally, it doesn't really matter but it's just customary to use
> ->mode_filter() to enforce dynamic restrictions - say ATA can go over
> UDMA/66 but ATAPI can't or when both devices are present one transfer
> mode affects the other kind of things.  Hmm... But then again,
> ata_bmdma_mode_filter() is used to put the same kind of restriction as
> yours.  Heh..  Never mind this suggestion.

No problem :-)

> >>> +	/* Convert the last command to an input/output */
> >>> +	if (pi) {
> >>> +		table--;
> >>> +		st_le16(&table->command, write ? OUTPUT_LAST: INPUT_LAST);
> >>> +		table++;
> >>> +	}
> >> libata guarantees that pi is not zero here.
> > 
> > Ok. Thanks. BTW. I do have another question about that though.
> > 
> > I set currently the max size of the sglist to half of my max of DMA
> > commands a bit arbitrarily.
> > 
> > The reason is that I can only have 64K-4K per transfer (I don't think I
> > can do 64K per DBDMA entry). So the above routine can potentially
> > breakup, in the worst case scenario, the table into twice as many
> > entries if they are all 64K precisely.
> > 
> > Is there a way I can set a max size for a given sglist segment ? I know
> > the iommu can coalesce them back, but at least I know that breaking them
> > up won't cause more than what the iommu had as an input .. I don't have
> > an alignment restriction.
> 
> Maybe pci_set_dma_max_seg_size() which ends up in
> blk_queue_max_segment_size().  sata_inic162x uses it and your
> requirement seems similar.

Bingo ! I need to check this dma_params business, dunon in what case
the pointer exists in the struct dev though, but that looks like it will
do the trick. The iommu layer will deal with it too.

> > I don't think we ever use irqpoll on powerpc, but that's a good
> > question. I don't know what the right answer is. In fact, I looked a bit
> > at the libata core irq handling and it looks like if we return that the
> > IRQ wasn't for us, after 1000 iteration, libata goes read the status and
> > clear the IRQ anyway, which doesn't sound that a good idea if the IRQ is
> > shared....
> 
> That code is activated only if ATA_IRQ_TRAP is set and it's not even in
> config option.  I don't think anyone ever uses it.

Ah ok. Good then.

>   Also, the worst
> reading off status register and clearing BMDMA register can do is losing
> an interrupt causing a timeout.  Weighed against the dreaded nobody
> cared, it's not so bad.  libata definitely needs some improvements in
> this area.

The nobody cares thingy should be dealt by the irq core anyway.

> > I know some variant of the cell do have a register I can read to make
> > sure the IRQ came from it (or not), but I have to figure out if it
> > exists on the ohare variant or not. If it does, then I may be able to do
> > something saner here.
> 
> Ah... if there's anything like that, please use it.  I have no idea why
> the original TF and even BMDMA interface didn't include proper interrupt
> pending bit.  Oh well, they were designed when floppy was popular after all.

Heh :-)

> > In the meantime, the above is the same as what I did for drivers/ide.
> > 
> > Another option would be to use the DMA channel IRQ (it's a separate IRQ
> > I can use) and do like Darwin, that is, wait for both IRQs to happen
> > before moving forward.
> 
> Hmm...

Yup, I'm not fan of that option neither :-)

I'll see if I can dig more info about that interrupt pending register.
I'll have to do some experiments on the -one- ohare based machine I have
(which probably also needs a new disk)  to see if it has such a
register.

In the meantime though, the current hack should do the job at least as
well as the one in drivers/ide does :-)

> pcim_enable_device() is a little bit more than managed pdev enable.  It
> turns on resource management for other PCI resources like IO regions and
> INTX setting.

Oh ok, it would turn my pcim_request_region automagically into managed
ones so I don't have to bother releasing them ? Nice... I wonder if it's
worth adding something like that to of_device / macio_device :-)

Thanks !

Ben.



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC/Review] libata driver for Apple "macio" pata
  2008-08-01 16:58     ` Alan Cox
@ 2008-08-01 22:43       ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 7+ messages in thread
From: Benjamin Herrenschmidt @ 2008-08-01 22:43 UTC (permalink / raw)
  To: Alan Cox; +Cc: Tejun Heo, list linux-ide, Jeff Garzik

On Fri, 2008-08-01 at 12:58 -0400, Alan Cox wrote:
> > I know and I believe it should still be ok ... As I said, the chipset
> > should use the PIO field in the register for PIO transfers. And if the
> > above unknown bit is set, I suspect it's just going to increase the
> > setup time a bit or something like that, which won't hurt other than
> > perfs.
> 
> What I do with some other drivers is set the PIO mode in the pio mode function
> but defer DMA timing setup to bmdma_start/stop methods. Some chips need this
> in the PC world and that works nicely.

That would work too I suppose. I already need to tweak the DMA mode in
dbmda_start because of a need on one of the variants of the cell to add
60ns to the setup time on UDMA reads. However, the search for timing is
a bit of overhead I would have been happy to avoid there :-)

Oh well, we'll see how things go, but in this area, I think my current
code will work just fine.

> > The reason is that I can only have 64K-4K per transfer (I don't think I
> > can do 64K per DBDMA entry). So the above routine can potentially
> > breakup, in the worst case scenario, the table into twice as many
> > entries if they are all 64K precisely.
> 
> See ata_sff_dumb_qc_prep - we have PC chips with the same bug!

Yes well, this is an added bug of alignment restrictions which I don't
have (ie, you can't cross 64K boundaries, I'm pretty sure I can :-)

Anyway, setting the max segment size in the new dma params is what I
need to do here.

Cheers,
Ben.

> --
>    "Engineers are 'small children' when it comes to product warning labels"
> 					-- John Duino
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ide" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2008-08-01 22:43 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-08-01  9:08 [RFC/Review] libata driver for Apple "macio" pata Benjamin Herrenschmidt
2008-08-01  9:59 ` Tejun Heo
2008-08-01 10:56   ` Benjamin Herrenschmidt
2008-08-01 16:13     ` Tejun Heo
2008-08-01 22:40       ` Benjamin Herrenschmidt
2008-08-01 16:58     ` Alan Cox
2008-08-01 22:43       ` Benjamin Herrenschmidt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).