linux-ide.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH] sata_mv: stabilize for 5081 and other fixes
       [not found] <20060308194627.GA22346@localdomain>
@ 2006-03-12  1:45 ` Jeff Garzik
  2006-03-12  5:24   ` Dan Aloni
  2006-03-12  5:49   ` Dan Aloni
  0 siblings, 2 replies; 3+ messages in thread
From: Jeff Garzik @ 2006-03-12  1:45 UTC (permalink / raw)
  To: Dan Aloni; +Cc: Linux Kernel List, linux-ide@vger.kernel.org

Dan Aloni wrote:
> Hello,
> 
> With the patch below I've managed to stabilize the sata_mv driver 
> running a Marvell 5081 SATA controller. Prior to these changes it
> would cause sporadic memory corruptions right after insmod. I prefer 
> this driver over Marvell's own driver which have all sorts of weird
> bugs.
> 
> The patch also increases the maximum possible number of SG entries 
> per IO to 256 (this large sg_tablesize is a requirement for the 
> application we are developing at my company, XIV). Notice that it 
> also zeros-out a reserved field in the SG, I'm not sure how it 
> affected stability but something did. I've also added the 'volatile' 
> qualifier to some fields that belong to structs involved with I/O.

Adding 'volatile' is to be avoided.  This is simply hiding bugs 
elsewhere.  volatile is an "atom bomb" that kills quite valid 
optimizations, needlessly.  Most likely you need to sprinkle rmb(), 
wmb(), and/or mb() in the correct locations.

Also, it isn't clear at all from your description precisely which 
changes are fixes, and which are cleanups and optimizations.  It would 
be nice to get each category of changes split into two separate patches.



> --- drivers/scsi/sata_mv.c	2006-03-08 11:30:10.000000000 +0200
> +++ drivers/scsi/sata_mv.c	2006-03-08 20:59:55.000000000 +0200
> @@ -72,9 +72,10 @@
>  	 */
>  	MV_CRQB_Q_SZ		= (32 * MV_MAX_Q_DEPTH),
>  	MV_CRPB_Q_SZ		= (8 * MV_MAX_Q_DEPTH),
> -	MV_MAX_SG_CT		= 176,
> +	MV_MAX_SG_CT		= 256,
>  	MV_SG_TBL_SZ		= (16 * MV_MAX_SG_CT),
> -	MV_PORT_PRIV_DMA_SZ	= (MV_CRQB_Q_SZ + MV_CRPB_Q_SZ + MV_SG_TBL_SZ),
> +	MV_PORT_PRIV_DMA1_SZ	= (MV_CRQB_Q_SZ + MV_CRPB_Q_SZ),
> +	MV_PORT_PRIV_DMA2_SZ	= (MV_SG_TBL_SZ),
>  
>  	MV_PORTS_PER_HC		= 4,
>  	/* == (port / MV_PORTS_PER_HC) to determine HC from 0-7 port */
> @@ -98,7 +99,7 @@
>  
>  	CRPB_FLAG_STATUS_SHIFT	= 8,
>  
> -	EPRD_FLAG_END_OF_TBL	= (1 << 31),
> +	EPRD_FLAG_END_OF_TBL	= (1 << 15),
>  
>  	/* PCI interface registers */
>  
> @@ -257,29 +258,34 @@
>  	chip_608x,
>  };
>  
> +#pragma pack(1)
> +
>  /* Command ReQuest Block: 32B */
>  struct mv_crqb {
> -	u32			sg_addr;
> -	u32			sg_addr_hi;
> -	u16			ctrl_flags;
> +	volatile u32			sg_addr;
> +	volatile u32			sg_addr_hi;
> +	volatile u16			ctrl_flags;
>  	u16			ata_cmd[11];
>  };
>  
>  /* Command ResPonse Block: 8B */
>  struct mv_crpb {
> -	u16			id;
> -	u16			flags;
> -	u32			tmstmp;
> +	volatile u16			id;
> +	volatile u16			flags;
> +	volatile u32			tmstmp;
>  };
>  
>  /* EDMA Physical Region Descriptor (ePRD); A.K.A. SG */
>  struct mv_sg {
> -	u32			addr;
> -	u32			flags_size;
> -	u32			addr_hi;
> -	u32			reserved;
> +	volatile u32			addr;
> +	volatile u16			size;
> +	volatile u16			flags;
> +	volatile u32			addr_hi;
> +	volatile u32			reserved;
>  };
>  
> +#pragma pack(0)
> +
>  struct mv_port_priv {
>  	struct mv_crqb		*crqb;
>  	dma_addr_t		crqb_dma;
> @@ -294,8 +300,8 @@
>  };
>  
>  struct mv_port_signal {
> -	u32			amps;
> -	u32			pre;
> +	volatile u32			amps;
> +	volatile u32			pre;
>  };

see comment on 'volatile', above.  We can't add these changes.


>  struct mv_host_priv;
> @@ -365,7 +371,7 @@
>  	.eh_strategy_handler	= ata_scsi_error,
>  	.can_queue		= MV_USE_Q_DEPTH,
>  	.this_id		= ATA_SHT_THIS_ID,
> -	.sg_tablesize		= MV_MAX_SG_CT / 2,
> +	.sg_tablesize		= MV_MAX_SG_CT,

This is adding a bug.

The IOMMU worst case requires a split for each s/g entry, due to DMA 
boundary issues.  See mv_fill_sg() or ata_fill_sg().

Thus, the above "/ 2" is required.


>  	.max_sectors		= ATA_MAX_SECTORS,
>  	.cmd_per_lun		= ATA_SHT_CMD_PER_LUN,
>  	.emulated		= ATA_SHT_EMULATED,
> @@ -509,10 +515,7 @@
>  	.reset_bus		= mv_reset_pci_bus,
>  };
>  
> -/*
> - * module options
> - */
> -static int msi;	      /* Use PCI msi; either zero (off, default) or non-zero */
> +static int msi;              /* Use PCI msi; either zero (off, default) or non-zero */

what changed here?


>  /*
> @@ -770,7 +773,8 @@
>  
>  static inline void mv_priv_free(struct mv_port_priv *pp, struct device *dev)
>  {
> -	dma_free_coherent(dev, MV_PORT_PRIV_DMA_SZ, pp->crpb, pp->crpb_dma);
> +	dma_free_coherent(dev, MV_PORT_PRIV_DMA1_SZ, pp->crpb, pp->crpb_dma);
> +	dma_free_coherent(dev, MV_PORT_PRIV_DMA2_SZ, pp->sg_tbl, pp->sg_tbl_dma);
>  }
>  
>  /**
> @@ -788,8 +792,8 @@
>  	struct device *dev = ap->host_set->dev;
>  	struct mv_port_priv *pp;
>  	void __iomem *port_mmio = mv_ap_base(ap);
> -	void *mem;
> -	dma_addr_t mem_dma;
> +	void *mem, *mem2;
> +	dma_addr_t mem_dma, mem_dma2;
>  	int rc = -ENOMEM;
>  
>  	pp = kmalloc(sizeof(*pp), GFP_KERNEL);
> @@ -797,11 +801,17 @@
>  		goto err_out;
>  	memset(pp, 0, sizeof(*pp));
>  
> -	mem = dma_alloc_coherent(dev, MV_PORT_PRIV_DMA_SZ, &mem_dma,
> +	mem = dma_alloc_coherent(dev, MV_PORT_PRIV_DMA1_SZ, &mem_dma,
>  				 GFP_KERNEL);
>  	if (!mem)
>  		goto err_out_pp;
> -	memset(mem, 0, MV_PORT_PRIV_DMA_SZ);
> +	memset(mem, 0, MV_PORT_PRIV_DMA1_SZ);
> +
> +	mem2 = dma_alloc_coherent(dev, MV_PORT_PRIV_DMA2_SZ, &mem_dma2,
> +				 GFP_KERNEL);
> +	if (!mem2)
> +		goto err_out_pp_2;
> +	memset(mem2, 0, MV_PORT_PRIV_DMA2_SZ);
>  
>  	rc = ata_pad_alloc(ap, dev);
>  	if (rc)
> @@ -820,14 +830,12 @@
>  	 */
>  	pp->crpb = mem;
>  	pp->crpb_dma = mem_dma;
> -	mem += MV_CRPB_Q_SZ;
> -	mem_dma += MV_CRPB_Q_SZ;
>  
>  	/* Third item:
>  	 * Table of scatter-gather descriptors (ePRD), 16 bytes each
>  	 */
> -	pp->sg_tbl = mem;
> -	pp->sg_tbl_dma = mem_dma;
> +	pp->sg_tbl = mem2;
> +	pp->sg_tbl_dma = mem_dma2;

why two allocations?

why not just fix the [probable] alignment issue?

I also agree with John Stoffel's comments.

	Jeff



^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] sata_mv: stabilize for 5081 and other fixes
  2006-03-12  1:45 ` [PATCH] sata_mv: stabilize for 5081 and other fixes Jeff Garzik
@ 2006-03-12  5:24   ` Dan Aloni
  2006-03-12  5:49   ` Dan Aloni
  1 sibling, 0 replies; 3+ messages in thread
From: Dan Aloni @ 2006-03-12  5:24 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Linux Kernel List, linux-ide@vger.kernel.org

On Sat, Mar 11, 2006 at 08:45:29PM -0500, Jeff Garzik wrote:
> Dan Aloni wrote:
> >Hello,
> >
> >With the patch below I've managed to stabilize the sata_mv driver 
> >running a Marvell 5081 SATA controller. Prior to these changes it
> >would cause sporadic memory corruptions right after insmod. I prefer 
> >this driver over Marvell's own driver which have all sorts of weird
> >bugs.
> >
> >The patch also increases the maximum possible number of SG entries 
> >per IO to 256 (this large sg_tablesize is a requirement for the 
> >application we are developing at my company, XIV). Notice that it 
> >also zeros-out a reserved field in the SG, I'm not sure how it 
> >affected stability but something did. I've also added the 'volatile' 
> >qualifier to some fields that belong to structs involved with I/O.
> 
> Adding 'volatile' is to be avoided.  This is simply hiding bugs 
> elsewhere.  volatile is an "atom bomb" that kills quite valid 
> optimizations, needlessly.  Most likely you need to sprinkle rmb(), 
> wmb(), and/or mb() in the correct locations.

I'm not sure if these memory barriers are even needed, or whether
volatile made any impact on stability - but I can isolate these 
changes today and see if it has any impact simply by experimenting.
 
> Also, it isn't clear at all from your description precisely which 
> changes are fixes, and which are cleanups and optimizations.  It would 
> be nice to get each category of changes split into two separate patches.
 
I would prefer to just minimize the whole thing to a patch that 
only fixes the stabilty problem, less paranoidically. If you can
suggest such patch for me to test I'd be happy to give it a try.
 
> > struct mv_host_priv;
> >@@ -365,7 +371,7 @@
> > 	.eh_strategy_handler	= ata_scsi_error,
> > 	.can_queue		= MV_USE_Q_DEPTH,
> > 	.this_id		= ATA_SHT_THIS_ID,
> >-	.sg_tablesize		= MV_MAX_SG_CT / 2,
> >+	.sg_tablesize		= MV_MAX_SG_CT,
> 
> This is adding a bug.
> 
> The IOMMU worst case requires a split for each s/g entry, due to DMA 
> boundary issues.  See mv_fill_sg() or ata_fill_sg().
> 
> Thus, the above "/ 2" is required.

Interesting, I'll look into that. I wonder how it managed to work 
so far. 
 
> > 	.max_sectors		= ATA_MAX_SECTORS,
> > 	.cmd_per_lun		= ATA_SHT_CMD_PER_LUN,
> > 	.emulated		= ATA_SHT_EMULATED,
> >@@ -509,10 +515,7 @@
> > 	.reset_bus		= mv_reset_pci_bus,
> > };
> > 
> >-/*
> >- * module options
> >- */
> >-static int msi;	      /* Use PCI msi; either zero (off, default) or 
> >non-zero */
> >+static int msi;              /* Use PCI msi; either zero (off, default) 
> >or non-zero */
> 
> what changed here?

Nothing, that's just a diff hunk I should have cleaned away.

> > /*
> >@@ -770,7 +773,8 @@
> > 
> > static inline void mv_priv_free(struct mv_port_priv *pp, struct device 
> > *dev)
> > {
> >-	dma_free_coherent(dev, MV_PORT_PRIV_DMA_SZ, pp->crpb, pp->crpb_dma);
> >+	dma_free_coherent(dev, MV_PORT_PRIV_DMA1_SZ, pp->crpb, pp->crpb_dma);
> >+	dma_free_coherent(dev, MV_PORT_PRIV_DMA2_SZ, pp->sg_tbl, 
> >pp->sg_tbl_dma);
> > }
> > 
> > /**
> >@@ -788,8 +792,8 @@
> > 	struct device *dev = ap->host_set->dev;
> > 	struct mv_port_priv *pp;
> > 	void __iomem *port_mmio = mv_ap_base(ap);
> >-	void *mem;
> >-	dma_addr_t mem_dma;
> >+	void *mem, *mem2;
> >+	dma_addr_t mem_dma, mem_dma2;
> > 	int rc = -ENOMEM;
> > 
> > 	pp = kmalloc(sizeof(*pp), GFP_KERNEL);
> >@@ -797,11 +801,17 @@
> > 		goto err_out;
> > 	memset(pp, 0, sizeof(*pp));
> > 
> >-	mem = dma_alloc_coherent(dev, MV_PORT_PRIV_DMA_SZ, &mem_dma,
> >+	mem = dma_alloc_coherent(dev, MV_PORT_PRIV_DMA1_SZ, &mem_dma,
> > 				 GFP_KERNEL);
> > 	if (!mem)
> > 		goto err_out_pp;
> >-	memset(mem, 0, MV_PORT_PRIV_DMA_SZ);
> >+	memset(mem, 0, MV_PORT_PRIV_DMA1_SZ);
> >+
> >+	mem2 = dma_alloc_coherent(dev, MV_PORT_PRIV_DMA2_SZ, &mem_dma2,
> >+				 GFP_KERNEL);
> >+	if (!mem2)
> >+		goto err_out_pp_2;
> >+	memset(mem2, 0, MV_PORT_PRIV_DMA2_SZ);
> > 
> > 	rc = ata_pad_alloc(ap, dev);
> > 	if (rc)
> >@@ -820,14 +830,12 @@
> > 	 */
> > 	pp->crpb = mem;
> > 	pp->crpb_dma = mem_dma;
> >-	mem += MV_CRPB_Q_SZ;
> >-	mem_dma += MV_CRPB_Q_SZ;
> > 
> > 	/* Third item:
> > 	 * Table of scatter-gather descriptors (ePRD), 16 bytes each
> > 	 */
> >-	pp->sg_tbl = mem;
> >-	pp->sg_tbl_dma = mem_dma;
> >+	pp->sg_tbl = mem2;
> >+	pp->sg_tbl_dma = mem_dma2;
> 
> why two allocations?

A 256 entry SG array takes a PAGE_SIZE, I didn't want to allocate more
than PAGE_SIZE to avoid fragmentation problems.
 
> why not just fix the [probable] alignment issue?

Yes, I forgot these allocations should be aligned (by 16, right?).
 
> I also agree with John Stoffel's comments.

Me too.

-- 
Dan Aloni
da-x@monatomic.org, da-x@colinux.org, da-x@gmx.net, dan@xiv.co.il

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] sata_mv: stabilize for 5081 and other fixes
  2006-03-12  1:45 ` [PATCH] sata_mv: stabilize for 5081 and other fixes Jeff Garzik
  2006-03-12  5:24   ` Dan Aloni
@ 2006-03-12  5:49   ` Dan Aloni
  1 sibling, 0 replies; 3+ messages in thread
From: Dan Aloni @ 2006-03-12  5:49 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Linux Kernel List, linux-ide@vger.kernel.org

On Sat, Mar 11, 2006 at 08:45:29PM -0500, Jeff Garzik wrote:
> 
> This is adding a bug.
> 
> The IOMMU worst case requires a split for each s/g entry, due to DMA 
> boundary issues.  See mv_fill_sg() or ata_fill_sg().
> 
> Thus, the above "/ 2" is required.

Okay I figured it out - here we are using the SCSI sg driver, and 
a scatter-gatter entry generated by that driver will never cross 
a page boundery (nor a DMA boundary), because each userspace page 
is mapped into one scatter-gatter entry, so in that case the "/ 2" 
isn't needed. Coming to think about it, that's only valid on x86
and x86_64. Are there other architectures that break that assumption?

I'd still want to be able to read/write 1MB at a time, otherwise 
it would require massive userspace code rewrites in our application,
(limiting it to 0.5MB). Do you have any suggestions about how to do 
that? I mean, it is not trivial to pass 128 entries of 2*PAGE_SIZE 
based on userspace memory.

p.s. I thought scatter-gatter entries are only valid for the page 
they point to, it's good to learn new things :)

-- 
Dan Aloni
da-x@monatomic.org, da-x@colinux.org, da-x@gmx.net, dan@xiv.co.il

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2006-03-12  5:49 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20060308194627.GA22346@localdomain>
2006-03-12  1:45 ` [PATCH] sata_mv: stabilize for 5081 and other fixes Jeff Garzik
2006-03-12  5:24   ` Dan Aloni
2006-03-12  5:49   ` Dan Aloni

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).