From mboxrd@z Thu Jan 1 00:00:00 1970 From: Marek Vasut Date: Sat, 3 Mar 2012 00:40:29 +0100 Subject: [U-Boot] [PATCH 2/4] net: fec_mxc: allow use with cache enabled In-Reply-To: <4F5154A3.30802@boundarydevices.com> References: <1330729572-12642-1-git-send-email-eric.nelson@boundarydevices.com> <1330729572-12642-3-git-send-email-eric.nelson@boundarydevices.com> <4F5154A3.30802@boundarydevices.com> Message-ID: <201203030040.29966.marex@denx.de> List-Id: MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit To: u-boot@lists.denx.de > Whoops. > > Forgot to add the origin of this patch to the commit message: > http://lists.denx.de/pipermail/u-boot/2012-February/117695.html > > Thanks Marek. Eric, I hope you won't mind if we respin this patch a few times to make sure nothing gets broken by this. M > > On 03/02/2012 04:06 PM, Eric Nelson wrote: > > ensure that transmit and receive buffers are cache-line aligned > > > > invalidate cache after each packet received > > flush cache before transmitting > > > > Signed-off-by: Eric Nelson > > --- > > > > drivers/net/fec_mxc.c | 248 > > ++++++++++++++++++++++++++++++++++++------------- > > drivers/net/fec_mxc.h | 19 +---- > > 2 files changed, 184 insertions(+), 83 deletions(-) > > > > diff --git a/drivers/net/fec_mxc.c b/drivers/net/fec_mxc.c > > index 1fdd071..f72304b 100644 > > --- a/drivers/net/fec_mxc.c > > +++ b/drivers/net/fec_mxc.c > > @@ -50,6 +50,33 @@ DECLARE_GLOBAL_DATA_PTR; > > > > #define CONFIG_FEC_MXC_SWAP_PACKET > > #endif > > > > +#ifndef CONFIG_FEC_DESC_ALIGNMENT > > +#define CONFIG_FEC_DESC_ALIGNMENT ARCH_DMA_MINALIGN > > +#endif > > + > > +#ifndef CONFIG_FEC_DATA_ALIGNMENT > > +#define CONFIG_FEC_DATA_ALIGNMENT ARCH_DMA_MINALIGN > > +#endif > > + > > +/* Check various alignment issues at compile time */ > > +#if ((CONFIG_FEC_DESC_ALIGNMENT< 16) || (CONFIG_FEC_DESC_ALIGNMENT % 16 > > != 0)) +#error "CONFIG_FEC_DESC_ALIGNMENT must be multiple of 16!" > > +#endif > > + > > +#if ((CONFIG_FEC_DATA_ALIGNMENT< 16) || (CONFIG_FEC_DATA_ALIGNMENT % 16 > > != 0)) +#error "CONFIG_FEC_DATA_ALIGNMENT must be multiple of 16!" > > +#endif > > + > > +#if ((PKTALIGN< CONFIG_FEC_DATA_ALIGNMENT) || \ > > + (PKTALIGN % CONFIG_FEC_DATA_ALIGNMENT != 0)) > > +#error "PKTALIGN must be multiple of CONFIG_FEC_DATA_ALIGNMENT!" > > +#endif > > + > > +#if ((PKTSIZE_ALIGN< CONFIG_FEC_DATA_ALIGNMENT) || \ > > + (PKTSIZE_ALIGN % CONFIG_FEC_DATA_ALIGNMENT != 0)) > > +#error "PKTSIZE_ALIGN must be multiple of CONFIG_FEC_DATA_ALIGNMENT!" > > +#endif > > + > > > > #undef DEBUG > > > > struct nbuf { > > > > @@ -259,43 +286,47 @@ static int fec_tx_task_disable(struct fec_priv > > *fec) > > > > * Initialize receive task's buffer descriptors > > * @param[in] fec all we know about the device yet > > * @param[in] count receive buffer count to be allocated > > > > - * @param[in] size size of each receive buffer > > + * @param[in] dsize desired size of each receive buffer > > > > * @return 0 on success > > * > > * For this task we need additional memory for the data buffers. And > > each * data buffer requires some alignment. Thy must be aligned to a > > specific > > > > - * boundary each (DB_DATA_ALIGNMENT). > > + * boundary each. > > > > */ > > > > -static int fec_rbd_init(struct fec_priv *fec, int count, int size) > > +static int fec_rbd_init(struct fec_priv *fec, int count, int dsize) > > > > { > > > > - int ix; > > - uint32_t p = 0; > > - > > - /* reserve data memory and consider alignment */ > > - if (fec->rdb_ptr == NULL) > > - fec->rdb_ptr = malloc(size * count + DB_DATA_ALIGNMENT); > > - p = (uint32_t)fec->rdb_ptr; > > - if (!p) { > > - puts("fec_mxc: not enough malloc memory\n"); > > - return -ENOMEM; > > - } > > - memset((void *)p, 0, size * count + DB_DATA_ALIGNMENT); > > - p += DB_DATA_ALIGNMENT-1; > > - p&= ~(DB_DATA_ALIGNMENT-1); > > - > > - for (ix = 0; ix< count; ix++) { > > - writel(p,&fec->rbd_base[ix].data_pointer); > > - p += size; > > - writew(FEC_RBD_EMPTY,&fec->rbd_base[ix].status); > > - writew(0,&fec->rbd_base[ix].data_length); > > - } > > + uint32_t size; > > + int i; > > + > > > > /* > > > > - * mark the last RBD to close the ring > > + * Allocate memory for the buffers. This allocation respects the > > + * alignment > > > > */ > > > > - writew(FEC_RBD_WRAP | FEC_RBD_EMPTY,&fec->rbd_base[ix - 1].status); > > + size = roundup(dsize, CONFIG_FEC_DATA_ALIGNMENT); > > + for (i = 0; i< count; i++) { > > + if (0 == fec->rbd_base[i].data_pointer) { > > + uint8_t *data = memalign(CONFIG_FEC_DATA_ALIGNMENT, size); > > + if (!data) { > > + printf("%s: error allocating rxbuf %d\n", __func__, i); > > + goto err; > > + } > > + fec->rbd_base[i].data_pointer = (uint32_t)data; > > + } // needs allocation > > + fec->rbd_base[i].status = FEC_RBD_EMPTY; > > + fec->rbd_base[i].data_length = 0; > > + } > > + > > + /* Mark the last RBD to close the ring. */ > > + fec->rbd_base[i - 1].status = FEC_RBD_EMPTY | FEC_RBD_WRAP; > > > > fec->rbd_index = 0; > > > > return 0; > > > > + > > +err: > > + for (; i>= 0; i--) > > + free((uint8_t *)fec->rbd_base[i].data_pointer); > > + > > + return -ENOMEM; > > > > } > > > > /** > > > > @@ -312,8 +343,8 @@ static int fec_rbd_init(struct fec_priv *fec, int > > count, int size) > > > > */ > > > > static void fec_tbd_init(struct fec_priv *fec) > > { > > > > - writew(0x0000,&fec->tbd_base[0].status); > > - writew(FEC_TBD_WRAP,&fec->tbd_base[1].status); > > + fec->tbd_base[0].status = 0; > > + fec->tbd_base[1].status = FEC_TBD_WRAP; > > > > fec->tbd_index = 0; > > > > } > > > > @@ -387,12 +418,25 @@ static int fec_open(struct eth_device *edev) > > > > { > > > > struct fec_priv *fec = (struct fec_priv *)edev->priv; > > int speed; > > > > + uint32_t addr, size; > > + int i; > > > > debug("fec_open: fec_open(dev)\n"); > > /* full-duplex, heartbeat disabled */ > > writel(1<< 2,&fec->eth->x_cntrl); > > fec->rbd_index = 0; > > > > + /* Invalidate all descriptors */ > > + for (i = 0; i< FEC_RBD_NUM - 1; i++) > > + fec_rbd_clean(0,&fec->rbd_base[i]); > > + fec_rbd_clean(1,&fec->rbd_base[i]); > > + > > + /* Flush the descriptors into RAM */ > > + size = roundup(FEC_RBD_NUM * sizeof(struct fec_bd), > > + CONFIG_FEC_DATA_ALIGNMENT); > > + addr = (uint32_t)&fec->rbd_base[0]; > > + flush_dcache_range(addr, addr + size); > > + > > > > #ifdef FEC_QUIRK_ENET_MAC > > > > /* Enable ENET HW endian SWAP */ > > writel(readl(&fec->eth->ecntrl) | FEC_ECNTRL_DBSWAP, > > > > @@ -478,38 +522,44 @@ static int fec_open(struct eth_device *edev) > > > > static int fec_init(struct eth_device *dev, bd_t* bd) > > { > > > > - uint32_t base; > > > > struct fec_priv *fec = (struct fec_priv *)dev->priv; > > uint32_t mib_ptr = (uint32_t)&fec->eth->rmon_t_drop; > > uint32_t rcntrl; > > > > - int i; > > + uint32_t size; > > + int i, ret; > > > > /* Initialize MAC address */ > > fec_set_hwaddr(dev); > > > > /* > > > > - * reserve memory for both buffer descriptor chains at once > > - * Datasheet forces the startaddress of each chain is 16 byte > > - * aligned > > + * Allocate transmit descriptors, there are two in total. This > > + * allocation respects cache alignment. > > > > */ > > > > - if (fec->base_ptr == NULL) > > - fec->base_ptr = malloc((2 + FEC_RBD_NUM) * > > - sizeof(struct fec_bd) + DB_ALIGNMENT); > > - base = (uint32_t)fec->base_ptr; > > - if (!base) { > > - puts("fec_mxc: not enough malloc memory\n"); > > - return -ENOMEM; > > + if (!fec->tbd_base) { > > + size = roundup(2 * sizeof(struct fec_bd), > > + CONFIG_FEC_DATA_ALIGNMENT); > > + fec->tbd_base = memalign(CONFIG_FEC_DESC_ALIGNMENT, size); > > + if (!fec->tbd_base) { > > + ret = -ENOMEM; > > + goto err1; > > + } > > + memset(fec->tbd_base, 0, size); > > > > } > > > > - memset((void *)base, 0, (2 + FEC_RBD_NUM) * > > - sizeof(struct fec_bd) + DB_ALIGNMENT); > > - base += (DB_ALIGNMENT-1); > > - base&= ~(DB_ALIGNMENT-1); > > - > > - fec->rbd_base = (struct fec_bd *)base; > > - > > - base += FEC_RBD_NUM * sizeof(struct fec_bd); > > > > - fec->tbd_base = (struct fec_bd *)base; > > + /* > > + * Allocate receive descriptors. This allocation respects cache > > + * alignment. > > + */ > > + if (!fec->rbd_base) { > > + size = roundup(FEC_RBD_NUM * sizeof(struct fec_bd), > > + CONFIG_FEC_DATA_ALIGNMENT); > > + fec->rbd_base = memalign(CONFIG_FEC_DESC_ALIGNMENT, size); > > + if (!fec->rbd_base) { > > + ret = -ENOMEM; > > + goto err2; > > + } > > + memset(fec->rbd_base, 0, size); > > + } > > > > /* > > > > * Set interrupt mask register > > > > @@ -570,9 +620,8 @@ static int fec_init(struct eth_device *dev, bd_t* bd) > > > > * Initialize RxBD/TxBD rings > > */ > > > > if (fec_rbd_init(fec, FEC_RBD_NUM, FEC_MAX_PKT_SIZE)< 0) { > > > > - free(fec->base_ptr); > > - fec->base_ptr = NULL; > > - return -ENOMEM; > > + ret = -ENOMEM; > > + goto err3; > > > > } > > fec_tbd_init(fec); > > > > @@ -583,6 +632,13 @@ static int fec_init(struct eth_device *dev, bd_t* > > bd) > > > > #endif > > > > fec_open(dev); > > return 0; > > > > + > > +err3: > > + free(fec->rbd_base); > > +err2: > > + free(fec->tbd_base); > > +err1: > > + return ret; > > > > } > > > > /** > > > > @@ -631,9 +687,11 @@ static void fec_halt(struct eth_device *dev) > > > > * @param[in] length Data count in bytes > > * @return 0 on success > > */ > > > > -static int fec_send(struct eth_device *dev, volatile void* packet, int > > length) +static int fec_send(struct eth_device *dev, volatile void > > *packet, int length) > > > > { > > > > unsigned int status; > > > > + uint32_t size; > > + uint32_t addr; > > > > /* > > > > * This routine transmits one frame. This routine only accepts > > > > @@ -650,15 +708,21 @@ static int fec_send(struct eth_device *dev, > > volatile void* packet, int length) > > > > } > > > > /* > > > > - * Setup the transmit buffer > > - * Note: We are always using the first buffer for transmission, > > - * the second will be empty and only used to stop the DMA engine > > + * Setup the transmit buffer. We are always using the first buffer for > > + * transmission, the second will be empty and only used to stop the DMA > > + * engine. We also flush the packet to RAM here to avoid cache trouble. > > > > */ > > > > #ifdef CONFIG_FEC_MXC_SWAP_PACKET > > > > swap_packet((uint32_t *)packet, length); > > > > #endif > > > > - writew(length,&fec->tbd_base[fec->tbd_index].data_length); > > - writel((uint32_t)packet,&fec->tbd_base[fec->tbd_index].data_pointer); > > + > > + addr = (uint32_t)packet; > > + size = roundup(length, CONFIG_FEC_DATA_ALIGNMENT); > > + flush_dcache_range(addr, addr + size); > > + > > + fec->tbd_base[fec->tbd_index].data_length = length; > > + fec->tbd_base[fec->tbd_index].data_pointer = addr; > > + > > > > /* > > > > * update BD's status now > > > > * This block: > > @@ -667,9 +731,18 @@ static int fec_send(struct eth_device *dev, volatile > > void* packet, int length) > > > > * - might be the last BD in the list, so the address counter should > > * wrap (-> keep the WRAP flag) > > */ > > > > - status = readw(&fec->tbd_base[fec->tbd_index].status)& FEC_TBD_WRAP; > > + status = fec->tbd_base[fec->tbd_index].status& FEC_TBD_WRAP; > > > > status |= FEC_TBD_LAST | FEC_TBD_TC | FEC_TBD_READY; > > > > - writew(status,&fec->tbd_base[fec->tbd_index].status); > > + fec->tbd_base[fec->tbd_index].status = status; > > + > > + /* > > + * Flush data cache. This code flushes both TX descriptors to RAM. > > + * After this code this code, the descritors will be safely in RAM > > + * and we can start DMA. > > + */ > > + size = roundup(2 * sizeof(struct fec_bd), CONFIG_FEC_DATA_ALIGNMENT); > > + addr = (uint32_t)fec->tbd_base; > > + flush_dcache_range(addr, addr + size); > > > > /* > > > > * Enable SmartDMA transmit task > > > > @@ -677,11 +750,16 @@ static int fec_send(struct eth_device *dev, > > volatile void* packet, int length) > > > > fec_tx_task_enable(fec); > > > > /* > > > > - * wait until frame is sent . > > + * Wait until frame is sent. On each turn of the wait cycle, we must > > + * invalidate data cache to see what's really in RAM. Also, we need > > + * barrier here. > > > > */ > > > > + invalidate_dcache_range(addr, addr + size); > > > > while (readw(&fec->tbd_base[fec->tbd_index].status)& FEC_TBD_READY) { > > > > udelay(1); > > > > + invalidate_dcache_range(addr, addr + size); > > > > } > > > > + > > > > debug("fec_send: status 0x%x index %d\n", > > > > readw(&fec->tbd_base[fec->tbd_index].status), > > fec->tbd_index); > > > > @@ -707,6 +785,8 @@ static int fec_recv(struct eth_device *dev) > > > > int frame_length, len = 0; > > struct nbuf *frame; > > uint16_t bd_status; > > > > + uint32_t addr, size; > > + int i; > > > > uchar buff[FEC_MAX_PKT_SIZE]; > > > > /* > > > > @@ -737,8 +817,23 @@ static int fec_recv(struct eth_device *dev) > > > > } > > > > /* > > > > - * ensure reading the right buffer status > > + * Read the buffer status. Before the status can be read, the data > > cache + * must be invalidated, because the data in RAM might have been > > changed + * by DMA. The descriptors are properly aligned to cachelines > > so there's + * no need to worry they'd overlap. > > + * > > + * WARNING: By invalidating the descriptor here, we also invalidate > > + * the descriptors surrounding this one. Therefore we can NOT change > > the + * contents of this descriptor nor the surrounding ones. The > > problem is + * that in order to mark the descriptor as processed, we > > need to change + * the descriptor. The solution is to mark the whole > > cache line when all + * descriptors in the cache line are processed. > > > > */ > > > > + addr = (uint32_t)rbd; > > + addr&= ~(CONFIG_FEC_DATA_ALIGNMENT - 1); > > + size = roundup(sizeof(struct fec_bd), CONFIG_FEC_DATA_ALIGNMENT); > > + invalidate_dcache_range(addr, addr + size); > > + > > > > bd_status = readw(&rbd->status); > > debug("fec_recv: status 0x%x\n", bd_status); > > > > @@ -751,6 +846,13 @@ static int fec_recv(struct eth_device *dev) > > > > frame = (struct nbuf *)readl(&rbd->data_pointer); > > frame_length = readw(&rbd->data_length) - 4; > > /* > > > > + * Invalidate data cache over the buffer > > + */ > > + addr = (uint32_t)frame; > > + size = roundup(frame_length, CONFIG_FEC_DATA_ALIGNMENT); > > + invalidate_dcache_range(addr, addr + size); > > + > > + /* > > > > * Fill the buffer and pass it to upper layers > > */ > > > > #ifdef CONFIG_FEC_MXC_SWAP_PACKET > > > > @@ -765,11 +867,27 @@ static int fec_recv(struct eth_device *dev) > > > > (ulong)rbd->data_pointer, > > bd_status); > > > > } > > > > + > > > > /* > > > > - * free the current buffer, restart the engine > > - * and move forward to the next buffer > > + * Free the current buffer, restart the engine and move forward > > + * to the next buffer. Here we check if the whole cacheline of > > + * descriptors was already processed and if so, we mark it free > > + * as whole. > > > > */ > > > > - fec_rbd_clean(fec->rbd_index == (FEC_RBD_NUM - 1) ? 1 : 0, rbd); > > + size = (CONFIG_FEC_DATA_ALIGNMENT / sizeof(struct fec_bd)) - 1; > > + if ((fec->rbd_index& size) == size) { > > + i = fec->rbd_index - size; > > + addr = (uint32_t)&fec->rbd_base[i]; > > + for (; i<= fec->rbd_index + size; i++) { > > + if (i == (FEC_RBD_NUM - 1)) > > + fec_rbd_clean(1,&fec->rbd_base[i]); > > + else > > + fec_rbd_clean(0,&fec->rbd_base[i]); > > + } > > + flush_dcache_range(addr, > > + addr + CONFIG_FEC_DATA_ALIGNMENT); > > + } > > + > > > > fec_rx_task_enable(fec); > > fec->rbd_index = (fec->rbd_index + 1) % FEC_RBD_NUM; > > > > } > > > > diff --git a/drivers/net/fec_mxc.h b/drivers/net/fec_mxc.h > > index 2eb7803..852b2e0 100644 > > --- a/drivers/net/fec_mxc.h > > +++ b/drivers/net/fec_mxc.h > > @@ -234,22 +234,6 @@ struct ethernet_regs { > > > > #endif > > > > /** > > > > - * @brief Descriptor buffer alignment > > - * > > - * i.MX27 requires a 16 byte alignment (but for the first element only) > > - */ > > -#define DB_ALIGNMENT 16 > > - > > -/** > > - * @brief Data buffer alignment > > - * > > - * i.MX27 requires a four byte alignment for transmit and 16 bits > > - * alignment for receive so take 16 > > - * Note: Valid for member data_pointer in struct buffer_descriptor > > - */ > > -#define DB_DATA_ALIGNMENT 16 > > - > > -/** > > > > * @brief Receive& Transmit Buffer Descriptor definitions > > * > > * Note: The first BD must be aligned (see DB_ALIGNMENT) > > > > @@ -282,8 +266,7 @@ struct fec_priv { > > > > struct fec_bd *tbd_base; /* TBD ring */ > > int tbd_index; /* next transmit BD to write */ > > bd_t *bd; > > > > - void *rdb_ptr; > > - void *base_ptr; > > + uint8_t *tdb_ptr; > > > > int dev_id; > > int phy_id; > > struct mii_dev *bus; > > _______________________________________________ > U-Boot mailing list > U-Boot at lists.denx.de > http://lists.denx.de/mailman/listinfo/u-boot