From mboxrd@z Thu Jan 1 00:00:00 1970 From: Jeff Garzik Subject: Re: [PATCH 10/14] sata_mv Use DMA memory pools for hardware memory tables Date: Fri, 25 Jan 2008 23:25:29 -0500 Message-ID: <479AB639.5020108@pobox.com> References: <4798FB68.70400@rtr.ca> <4798FCE1.30702@rtr.ca> Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit Return-path: Received: from srv5.dvmed.net ([207.36.208.214]:57345 "EHLO mail.dvmed.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752583AbYAZEZc (ORCPT ); Fri, 25 Jan 2008 23:25:32 -0500 In-Reply-To: <4798FCE1.30702@rtr.ca> Sender: linux-ide-owner@vger.kernel.org List-Id: linux-ide@vger.kernel.org To: Mark Lord Cc: IDE/ATA development list Mark Lord wrote: > sata_mv Use DMA memory pools for hardware memory tables. > > Create module-owned DMA memory pools, for use in allocating/freeing > per-port > command/response queues and SG tables. This gives us a way to guarantee we > meet the hardware address alignment requirements, and also reduces > memory that > might otherwise be wasted on alignment gaps. > > Signed-off-by: Mark Lord > > --- old/drivers/ata/sata_mv.c 2008-01-24 12:40:10.000000000 -0500 > +++ new/drivers/ata/sata_mv.c 2008-01-24 14:17:39.000000000 -0500 > @@ -107,14 +107,12 @@ > > /* CRQB needs alignment on a 1KB boundary. Size == 1KB > * CRPB needs alignment on a 256B boundary. Size == 256B > - * SG count of 176 leads to MV_PORT_PRIV_DMA_SZ == 4KB > * ePRD (SG) entries need alignment on a 16B boundary. Size == 16B > */ > MV_CRQB_Q_SZ = (32 * MV_MAX_Q_DEPTH), > MV_CRPB_Q_SZ = (8 * MV_MAX_Q_DEPTH), > - MV_MAX_SG_CT = 176, > + MV_MAX_SG_CT = 256, > MV_SG_TBL_SZ = (16 * MV_MAX_SG_CT), > - MV_PORT_PRIV_DMA_SZ = (MV_CRQB_Q_SZ + MV_CRPB_Q_SZ + MV_SG_TBL_SZ), > > MV_PORTS_PER_HC = 4, > /* == (port / MV_PORTS_PER_HC) to determine HC from 0-7 port */ > @@ -701,6 +699,33 @@ > */ > static int msi; /* Use PCI msi; either zero (off, default) or > non-zero */ > > +/* > + * These consistent DMA memory pools are owned by this module, > + * and shared among all device instances. This gives us guaranteed > + * alignment for hardware-accessed data structures, and low memory > + * waste in accomplishing the alignment. > + */ > +static struct dma_pool *mv_crpb_pool; > +static struct dma_pool *mv_crqb_pool; > +static struct dma_pool *mv_sg_tbl_pool; > + > +static void mv_free_pool_items(struct ata_port *ap) > +{ > + struct mv_port_priv *pp = ap->private_data; > + > + if (pp->sg_tbl) { > + dma_pool_free(mv_sg_tbl_pool, pp->sg_tbl, pp->sg_tbl_dma); > + pp->sg_tbl = NULL; > + } > + if (pp->crqb) { > + dma_pool_free(mv_crqb_pool, pp->crqb, pp->crqb_dma); > + pp->crpb = NULL; > + } > + if (pp->crqb) { > + dma_pool_free(mv_crqb_pool, pp->crqb, pp->crqb_dma); > + pp->crqb = NULL; > + } > +} > > /* move to PCI layer or libata core? */ > static int pci_go_64(struct pci_dev *pdev) > @@ -1110,8 +1135,6 @@ > struct mv_host_priv *hpriv = ap->host->private_data; > struct mv_port_priv *pp; > void __iomem *port_mmio = mv_ap_base(ap); > - void *mem; > - dma_addr_t mem_dma; > unsigned long flags; > int rc; > > @@ -1119,37 +1142,21 @@ > if (!pp) > return -ENOMEM; > > - mem = dmam_alloc_coherent(dev, MV_PORT_PRIV_DMA_SZ, &mem_dma, > - GFP_KERNEL); > - if (!mem) > - return -ENOMEM; > - memset(mem, 0, MV_PORT_PRIV_DMA_SZ); > - > rc = ata_pad_alloc(ap, dev); > if (rc) > return rc; > > - /* First item in chunk of DMA memory: > - * 32-slot command request table (CRQB), 32 bytes each in size > - */ > - pp->crqb = mem; > - pp->crqb_dma = mem_dma; > - mem += MV_CRQB_Q_SZ; > - mem_dma += MV_CRQB_Q_SZ; > - > - /* Second item: > - * 32-slot command response table (CRPB), 8 bytes each in size > - */ > - pp->crpb = mem; > - pp->crpb_dma = mem_dma; > - mem += MV_CRPB_Q_SZ; > - mem_dma += MV_CRPB_Q_SZ; > - > - /* Third item: > - * Table of scatter-gather descriptors (ePRD), 16 bytes each > - */ > - pp->sg_tbl = mem; > - pp->sg_tbl_dma = mem_dma; > + pp->crqb = dma_pool_alloc(mv_crqb_pool, GFP_KERNEL, &pp->crqb_dma); > + if (!pp->crqb) > + goto out_alloc_failed; > + > + pp->crpb = dma_pool_alloc(mv_crpb_pool, GFP_KERNEL, &pp->crpb_dma); > + if (!pp->crpb) > + goto out_alloc_failed; > + > + pp->sg_tbl = dma_pool_alloc(mv_sg_tbl_pool, GFP_KERNEL, > &pp->sg_tbl_dma); > + if (!pp->sg_tbl) > + goto out_alloc_failed; > > spin_lock_irqsave(&ap->host->lock, flags); > > @@ -1165,6 +1172,10 @@ > */ > ap->private_data = pp; > return 0; > + > +out_alloc_failed: > + mv_free_pool_items(ap); > + return -ENOMEM; > } > > /** > @@ -1179,6 +1190,7 @@ > static void mv_port_stop(struct ata_port *ap) > { > mv_stop_dma(ap); > + mv_free_pool_items(ap); > } > > /** > @@ -2825,14 +2837,39 @@ > IS_GEN_I(hpriv) ? &mv5_sht : &mv6_sht); > } > > +static void mv_destroy_pools(void) > +{ > + if (mv_sg_tbl_pool) > + dma_pool_destroy(mv_sg_tbl_pool); > + if (mv_crpb_pool) > + dma_pool_destroy(mv_crpb_pool); > + if (mv_crqb_pool) > + dma_pool_destroy(mv_crqb_pool); > +} > + > static int __init mv_init(void) > { > + /* Ideally, a device (second parameter) would "own" these pools. > + * But for maximum memory efficiency, we really need one global > + * set of each, shared among all like devices. As below. > + */ > + mv_crqb_pool = dma_pool_create("mv_crqb_q", NULL, MV_CRQB_Q_SZ, > + MV_CRQB_Q_SZ, 0); > + mv_crpb_pool = dma_pool_create("mv_crpb_q", NULL, MV_CRPB_Q_SZ, > + MV_CRPB_Q_SZ, 0); > + mv_sg_tbl_pool = dma_pool_create("mv_sg_tbl", NULL, MV_SG_TBL_SZ, > + MV_SG_TBL_SZ, 0); Sorry, I would far rather waste a tiny bit of memory than this. The main objection is that DMA allocation should /always/ be associated with a struct device, as that is where most of the connectivity with the DMA mask/boundary/etc. and private IOMMU hooks live. The amount of memory used in the pre-existing configuration is small, it is already allocated on a per-device basis, and it is statically allocated -- meaning no worry about allocations failing inside of interrupts or similar nastiness. Jeff