From mboxrd@z Thu Jan 1 00:00:00 1970 From: Eric Nelson Date: Fri, 02 Mar 2012 16:50:14 -0700 Subject: [U-Boot] [PATCH 2/4] net: fec_mxc: allow use with cache enabled In-Reply-To: <201203030040.29966.marex@denx.de> References: <1330729572-12642-1-git-send-email-eric.nelson@boundarydevices.com> <1330729572-12642-3-git-send-email-eric.nelson@boundarydevices.com> <4F5154A3.30802@boundarydevices.com> <201203030040.29966.marex@denx.de> Message-ID: <4F515CB6.2040807@boundarydevices.com> List-Id: MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit To: u-boot@lists.denx.de On 03/02/2012 04:40 PM, Marek Vasut wrote: >> Whoops. >> >> Forgot to add the origin of this patch to the commit message: >> http://lists.denx.de/pipermail/u-boot/2012-February/117695.html >> >> Thanks Marek. > > Eric, I hope you won't mind if we respin this patch a few times to make sure > nothing gets broken by this. > > M > Not at all. I just wanted to get it out there along with the dcache patch so we have something to test each of the drivers against. >> >> On 03/02/2012 04:06 PM, Eric Nelson wrote: >>> ensure that transmit and receive buffers are cache-line aligned >>> >>> invalidate cache after each packet received >>> flush cache before transmitting >>> >>> Signed-off-by: Eric Nelson >>> --- >>> >>> drivers/net/fec_mxc.c | 248 >>> ++++++++++++++++++++++++++++++++++++------------- >>> drivers/net/fec_mxc.h | 19 +---- >>> 2 files changed, 184 insertions(+), 83 deletions(-) >>> >>> diff --git a/drivers/net/fec_mxc.c b/drivers/net/fec_mxc.c >>> index 1fdd071..f72304b 100644 >>> --- a/drivers/net/fec_mxc.c >>> +++ b/drivers/net/fec_mxc.c >>> @@ -50,6 +50,33 @@ DECLARE_GLOBAL_DATA_PTR; >>> >>> #define CONFIG_FEC_MXC_SWAP_PACKET >>> #endif >>> >>> +#ifndef CONFIG_FEC_DESC_ALIGNMENT >>> +#define CONFIG_FEC_DESC_ALIGNMENT ARCH_DMA_MINALIGN >>> +#endif >>> + >>> +#ifndef CONFIG_FEC_DATA_ALIGNMENT >>> +#define CONFIG_FEC_DATA_ALIGNMENT ARCH_DMA_MINALIGN >>> +#endif >>> + >>> +/* Check various alignment issues at compile time */ >>> +#if ((CONFIG_FEC_DESC_ALIGNMENT< 16) || (CONFIG_FEC_DESC_ALIGNMENT % 16 >>> != 0)) +#error "CONFIG_FEC_DESC_ALIGNMENT must be multiple of 16!" >>> +#endif >>> + >>> +#if ((CONFIG_FEC_DATA_ALIGNMENT< 16) || (CONFIG_FEC_DATA_ALIGNMENT % 16 >>> != 0)) +#error "CONFIG_FEC_DATA_ALIGNMENT must be multiple of 16!" >>> +#endif >>> + >>> +#if ((PKTALIGN< CONFIG_FEC_DATA_ALIGNMENT) || \ >>> + (PKTALIGN % CONFIG_FEC_DATA_ALIGNMENT != 0)) >>> +#error "PKTALIGN must be multiple of CONFIG_FEC_DATA_ALIGNMENT!" >>> +#endif >>> + >>> +#if ((PKTSIZE_ALIGN< CONFIG_FEC_DATA_ALIGNMENT) || \ >>> + (PKTSIZE_ALIGN % CONFIG_FEC_DATA_ALIGNMENT != 0)) >>> +#error "PKTSIZE_ALIGN must be multiple of CONFIG_FEC_DATA_ALIGNMENT!" >>> +#endif >>> + >>> >>> #undef DEBUG >>> >>> struct nbuf { >>> >>> @@ -259,43 +286,47 @@ static int fec_tx_task_disable(struct fec_priv >>> *fec) >>> >>> * Initialize receive task's buffer descriptors >>> * @param[in] fec all we know about the device yet >>> * @param[in] count receive buffer count to be allocated >>> >>> - * @param[in] size size of each receive buffer >>> + * @param[in] dsize desired size of each receive buffer >>> >>> * @return 0 on success >>> * >>> * For this task we need additional memory for the data buffers. And >>> each * data buffer requires some alignment. Thy must be aligned to a >>> specific >>> >>> - * boundary each (DB_DATA_ALIGNMENT). >>> + * boundary each. >>> >>> */ >>> >>> -static int fec_rbd_init(struct fec_priv *fec, int count, int size) >>> +static int fec_rbd_init(struct fec_priv *fec, int count, int dsize) >>> >>> { >>> >>> - int ix; >>> - uint32_t p = 0; >>> - >>> - /* reserve data memory and consider alignment */ >>> - if (fec->rdb_ptr == NULL) >>> - fec->rdb_ptr = malloc(size * count + DB_DATA_ALIGNMENT); >>> - p = (uint32_t)fec->rdb_ptr; >>> - if (!p) { >>> - puts("fec_mxc: not enough malloc memory\n"); >>> - return -ENOMEM; >>> - } >>> - memset((void *)p, 0, size * count + DB_DATA_ALIGNMENT); >>> - p += DB_DATA_ALIGNMENT-1; >>> - p&= ~(DB_DATA_ALIGNMENT-1); >>> - >>> - for (ix = 0; ix< count; ix++) { >>> - writel(p,&fec->rbd_base[ix].data_pointer); >>> - p += size; >>> - writew(FEC_RBD_EMPTY,&fec->rbd_base[ix].status); >>> - writew(0,&fec->rbd_base[ix].data_length); >>> - } >>> + uint32_t size; >>> + int i; >>> + >>> >>> /* >>> >>> - * mark the last RBD to close the ring >>> + * Allocate memory for the buffers. This allocation respects the >>> + * alignment >>> >>> */ >>> >>> - writew(FEC_RBD_WRAP | FEC_RBD_EMPTY,&fec->rbd_base[ix - 1].status); >>> + size = roundup(dsize, CONFIG_FEC_DATA_ALIGNMENT); >>> + for (i = 0; i< count; i++) { >>> + if (0 == fec->rbd_base[i].data_pointer) { >>> + uint8_t *data = memalign(CONFIG_FEC_DATA_ALIGNMENT, > size); >>> + if (!data) { >>> + printf("%s: error allocating rxbuf %d\n", > __func__, i); >>> + goto err; >>> + } >>> + fec->rbd_base[i].data_pointer = (uint32_t)data; >>> + } // needs allocation >>> + fec->rbd_base[i].status = FEC_RBD_EMPTY; >>> + fec->rbd_base[i].data_length = 0; >>> + } >>> + >>> + /* Mark the last RBD to close the ring. */ >>> + fec->rbd_base[i - 1].status = FEC_RBD_EMPTY | FEC_RBD_WRAP; >>> >>> fec->rbd_index = 0; >>> >>> return 0; >>> >>> + >>> +err: >>> + for (; i>= 0; i--) >>> + free((uint8_t *)fec->rbd_base[i].data_pointer); >>> + >>> + return -ENOMEM; >>> >>> } >>> >>> /** >>> >>> @@ -312,8 +343,8 @@ static int fec_rbd_init(struct fec_priv *fec, int >>> count, int size) >>> >>> */ >>> >>> static void fec_tbd_init(struct fec_priv *fec) >>> { >>> >>> - writew(0x0000,&fec->tbd_base[0].status); >>> - writew(FEC_TBD_WRAP,&fec->tbd_base[1].status); >>> + fec->tbd_base[0].status = 0; >>> + fec->tbd_base[1].status = FEC_TBD_WRAP; >>> >>> fec->tbd_index = 0; >>> >>> } >>> >>> @@ -387,12 +418,25 @@ static int fec_open(struct eth_device *edev) >>> >>> { >>> >>> struct fec_priv *fec = (struct fec_priv *)edev->priv; >>> int speed; >>> >>> + uint32_t addr, size; >>> + int i; >>> >>> debug("fec_open: fec_open(dev)\n"); >>> /* full-duplex, heartbeat disabled */ >>> writel(1<< 2,&fec->eth->x_cntrl); >>> fec->rbd_index = 0; >>> >>> + /* Invalidate all descriptors */ >>> + for (i = 0; i< FEC_RBD_NUM - 1; i++) >>> + fec_rbd_clean(0,&fec->rbd_base[i]); >>> + fec_rbd_clean(1,&fec->rbd_base[i]); >>> + >>> + /* Flush the descriptors into RAM */ >>> + size = roundup(FEC_RBD_NUM * sizeof(struct fec_bd), >>> + CONFIG_FEC_DATA_ALIGNMENT); >>> + addr = (uint32_t)&fec->rbd_base[0]; >>> + flush_dcache_range(addr, addr + size); >>> + >>> >>> #ifdef FEC_QUIRK_ENET_MAC >>> >>> /* Enable ENET HW endian SWAP */ >>> writel(readl(&fec->eth->ecntrl) | FEC_ECNTRL_DBSWAP, >>> >>> @@ -478,38 +522,44 @@ static int fec_open(struct eth_device *edev) >>> >>> static int fec_init(struct eth_device *dev, bd_t* bd) >>> { >>> >>> - uint32_t base; >>> >>> struct fec_priv *fec = (struct fec_priv *)dev->priv; >>> uint32_t mib_ptr = (uint32_t)&fec->eth->rmon_t_drop; >>> uint32_t rcntrl; >>> >>> - int i; >>> + uint32_t size; >>> + int i, ret; >>> >>> /* Initialize MAC address */ >>> fec_set_hwaddr(dev); >>> >>> /* >>> >>> - * reserve memory for both buffer descriptor chains at once >>> - * Datasheet forces the startaddress of each chain is 16 byte >>> - * aligned >>> + * Allocate transmit descriptors, there are two in total. This >>> + * allocation respects cache alignment. >>> >>> */ >>> >>> - if (fec->base_ptr == NULL) >>> - fec->base_ptr = malloc((2 + FEC_RBD_NUM) * >>> - sizeof(struct fec_bd) + DB_ALIGNMENT); >>> - base = (uint32_t)fec->base_ptr; >>> - if (!base) { >>> - puts("fec_mxc: not enough malloc memory\n"); >>> - return -ENOMEM; >>> + if (!fec->tbd_base) { >>> + size = roundup(2 * sizeof(struct fec_bd), >>> + CONFIG_FEC_DATA_ALIGNMENT); >>> + fec->tbd_base = memalign(CONFIG_FEC_DESC_ALIGNMENT, size); >>> + if (!fec->tbd_base) { >>> + ret = -ENOMEM; >>> + goto err1; >>> + } >>> + memset(fec->tbd_base, 0, size); >>> >>> } >>> >>> - memset((void *)base, 0, (2 + FEC_RBD_NUM) * >>> - sizeof(struct fec_bd) + DB_ALIGNMENT); >>> - base += (DB_ALIGNMENT-1); >>> - base&= ~(DB_ALIGNMENT-1); >>> - >>> - fec->rbd_base = (struct fec_bd *)base; >>> - >>> - base += FEC_RBD_NUM * sizeof(struct fec_bd); >>> >>> - fec->tbd_base = (struct fec_bd *)base; >>> + /* >>> + * Allocate receive descriptors. This allocation respects cache >>> + * alignment. >>> + */ >>> + if (!fec->rbd_base) { >>> + size = roundup(FEC_RBD_NUM * sizeof(struct fec_bd), >>> + CONFIG_FEC_DATA_ALIGNMENT); >>> + fec->rbd_base = memalign(CONFIG_FEC_DESC_ALIGNMENT, size); >>> + if (!fec->rbd_base) { >>> + ret = -ENOMEM; >>> + goto err2; >>> + } >>> + memset(fec->rbd_base, 0, size); >>> + } >>> >>> /* >>> >>> * Set interrupt mask register >>> >>> @@ -570,9 +620,8 @@ static int fec_init(struct eth_device *dev, bd_t* bd) >>> >>> * Initialize RxBD/TxBD rings >>> */ >>> >>> if (fec_rbd_init(fec, FEC_RBD_NUM, FEC_MAX_PKT_SIZE)< 0) { >>> >>> - free(fec->base_ptr); >>> - fec->base_ptr = NULL; >>> - return -ENOMEM; >>> + ret = -ENOMEM; >>> + goto err3; >>> >>> } >>> fec_tbd_init(fec); >>> >>> @@ -583,6 +632,13 @@ static int fec_init(struct eth_device *dev, bd_t* >>> bd) >>> >>> #endif >>> >>> fec_open(dev); >>> return 0; >>> >>> + >>> +err3: >>> + free(fec->rbd_base); >>> +err2: >>> + free(fec->tbd_base); >>> +err1: >>> + return ret; >>> >>> } >>> >>> /** >>> >>> @@ -631,9 +687,11 @@ static void fec_halt(struct eth_device *dev) >>> >>> * @param[in] length Data count in bytes >>> * @return 0 on success >>> */ >>> >>> -static int fec_send(struct eth_device *dev, volatile void* packet, int >>> length) +static int fec_send(struct eth_device *dev, volatile void >>> *packet, int length) >>> >>> { >>> >>> unsigned int status; >>> >>> + uint32_t size; >>> + uint32_t addr; >>> >>> /* >>> >>> * This routine transmits one frame. This routine only accepts >>> >>> @@ -650,15 +708,21 @@ static int fec_send(struct eth_device *dev, >>> volatile void* packet, int length) >>> >>> } >>> >>> /* >>> >>> - * Setup the transmit buffer >>> - * Note: We are always using the first buffer for transmission, >>> - * the second will be empty and only used to stop the DMA engine >>> + * Setup the transmit buffer. We are always using the first buffer for >>> + * transmission, the second will be empty and only used to stop the DMA >>> + * engine. We also flush the packet to RAM here to avoid cache trouble. >>> >>> */ >>> >>> #ifdef CONFIG_FEC_MXC_SWAP_PACKET >>> >>> swap_packet((uint32_t *)packet, length); >>> >>> #endif >>> >>> - writew(length,&fec->tbd_base[fec->tbd_index].data_length); >>> - writel((uint32_t)packet,&fec->tbd_base[fec->tbd_index].data_pointer); >>> + >>> + addr = (uint32_t)packet; >>> + size = roundup(length, CONFIG_FEC_DATA_ALIGNMENT); >>> + flush_dcache_range(addr, addr + size); >>> + >>> + fec->tbd_base[fec->tbd_index].data_length = length; >>> + fec->tbd_base[fec->tbd_index].data_pointer = addr; >>> + >>> >>> /* >>> >>> * update BD's status now >>> >>> * This block: >>> @@ -667,9 +731,18 @@ static int fec_send(struct eth_device *dev, volatile >>> void* packet, int length) >>> >>> * - might be the last BD in the list, so the address counter should >>> * wrap (-> keep the WRAP flag) >>> */ >>> >>> - status = readw(&fec->tbd_base[fec->tbd_index].status)& FEC_TBD_WRAP; >>> + status = fec->tbd_base[fec->tbd_index].status& FEC_TBD_WRAP; >>> >>> status |= FEC_TBD_LAST | FEC_TBD_TC | FEC_TBD_READY; >>> >>> - writew(status,&fec->tbd_base[fec->tbd_index].status); >>> + fec->tbd_base[fec->tbd_index].status = status; >>> + >>> + /* >>> + * Flush data cache. This code flushes both TX descriptors to RAM. >>> + * After this code this code, the descritors will be safely in RAM >>> + * and we can start DMA. >>> + */ >>> + size = roundup(2 * sizeof(struct fec_bd), CONFIG_FEC_DATA_ALIGNMENT); >>> + addr = (uint32_t)fec->tbd_base; >>> + flush_dcache_range(addr, addr + size); >>> >>> /* >>> >>> * Enable SmartDMA transmit task >>> >>> @@ -677,11 +750,16 @@ static int fec_send(struct eth_device *dev, >>> volatile void* packet, int length) >>> >>> fec_tx_task_enable(fec); >>> >>> /* >>> >>> - * wait until frame is sent . >>> + * Wait until frame is sent. On each turn of the wait cycle, we must >>> + * invalidate data cache to see what's really in RAM. Also, we need >>> + * barrier here. >>> >>> */ >>> >>> + invalidate_dcache_range(addr, addr + size); >>> >>> while (readw(&fec->tbd_base[fec->tbd_index].status)& FEC_TBD_READY) { >>> >>> udelay(1); >>> >>> + invalidate_dcache_range(addr, addr + size); >>> >>> } >>> >>> + >>> >>> debug("fec_send: status 0x%x index %d\n", >>> >>> readw(&fec->tbd_base[fec->tbd_index].status), >>> fec->tbd_index); >>> >>> @@ -707,6 +785,8 @@ static int fec_recv(struct eth_device *dev) >>> >>> int frame_length, len = 0; >>> struct nbuf *frame; >>> uint16_t bd_status; >>> >>> + uint32_t addr, size; >>> + int i; >>> >>> uchar buff[FEC_MAX_PKT_SIZE]; >>> >>> /* >>> >>> @@ -737,8 +817,23 @@ static int fec_recv(struct eth_device *dev) >>> >>> } >>> >>> /* >>> >>> - * ensure reading the right buffer status >>> + * Read the buffer status. Before the status can be read, the data >>> cache + * must be invalidated, because the data in RAM might have been >>> changed + * by DMA. The descriptors are properly aligned to cachelines >>> so there's + * no need to worry they'd overlap. >>> + * >>> + * WARNING: By invalidating the descriptor here, we also invalidate >>> + * the descriptors surrounding this one. Therefore we can NOT change >>> the + * contents of this descriptor nor the surrounding ones. The >>> problem is + * that in order to mark the descriptor as processed, we >>> need to change + * the descriptor. The solution is to mark the whole >>> cache line when all + * descriptors in the cache line are processed. >>> >>> */ >>> >>> + addr = (uint32_t)rbd; >>> + addr&= ~(CONFIG_FEC_DATA_ALIGNMENT - 1); >>> + size = roundup(sizeof(struct fec_bd), CONFIG_FEC_DATA_ALIGNMENT); >>> + invalidate_dcache_range(addr, addr + size); >>> + >>> >>> bd_status = readw(&rbd->status); >>> debug("fec_recv: status 0x%x\n", bd_status); >>> >>> @@ -751,6 +846,13 @@ static int fec_recv(struct eth_device *dev) >>> >>> frame = (struct nbuf *)readl(&rbd->data_pointer); >>> frame_length = readw(&rbd->data_length) - 4; >>> /* >>> >>> + * Invalidate data cache over the buffer >>> + */ >>> + addr = (uint32_t)frame; >>> + size = roundup(frame_length, CONFIG_FEC_DATA_ALIGNMENT); >>> + invalidate_dcache_range(addr, addr + size); >>> + >>> + /* >>> >>> * Fill the buffer and pass it to upper layers >>> */ >>> >>> #ifdef CONFIG_FEC_MXC_SWAP_PACKET >>> >>> @@ -765,11 +867,27 @@ static int fec_recv(struct eth_device *dev) >>> >>> (ulong)rbd->data_pointer, >>> bd_status); >>> >>> } >>> >>> + >>> >>> /* >>> >>> - * free the current buffer, restart the engine >>> - * and move forward to the next buffer >>> + * Free the current buffer, restart the engine and move forward >>> + * to the next buffer. Here we check if the whole cacheline of >>> + * descriptors was already processed and if so, we mark it free >>> + * as whole. >>> >>> */ >>> >>> - fec_rbd_clean(fec->rbd_index == (FEC_RBD_NUM - 1) ? 1 : 0, rbd); >>> + size = (CONFIG_FEC_DATA_ALIGNMENT / sizeof(struct fec_bd)) - 1; >>> + if ((fec->rbd_index& size) == size) { >>> + i = fec->rbd_index - size; >>> + addr = (uint32_t)&fec->rbd_base[i]; >>> + for (; i<= fec->rbd_index + size; i++) { >>> + if (i == (FEC_RBD_NUM - 1)) >>> + fec_rbd_clean(1,&fec->rbd_base[i]); >>> + else >>> + fec_rbd_clean(0,&fec->rbd_base[i]); >>> + } >>> + flush_dcache_range(addr, >>> + addr + CONFIG_FEC_DATA_ALIGNMENT); >>> + } >>> + >>> >>> fec_rx_task_enable(fec); >>> fec->rbd_index = (fec->rbd_index + 1) % FEC_RBD_NUM; >>> >>> } >>> >>> diff --git a/drivers/net/fec_mxc.h b/drivers/net/fec_mxc.h >>> index 2eb7803..852b2e0 100644 >>> --- a/drivers/net/fec_mxc.h >>> +++ b/drivers/net/fec_mxc.h >>> @@ -234,22 +234,6 @@ struct ethernet_regs { >>> >>> #endif >>> >>> /** >>> >>> - * @brief Descriptor buffer alignment >>> - * >>> - * i.MX27 requires a 16 byte alignment (but for the first element only) >>> - */ >>> -#define DB_ALIGNMENT 16 >>> - >>> -/** >>> - * @brief Data buffer alignment >>> - * >>> - * i.MX27 requires a four byte alignment for transmit and 16 bits >>> - * alignment for receive so take 16 >>> - * Note: Valid for member data_pointer in struct buffer_descriptor >>> - */ >>> -#define DB_DATA_ALIGNMENT 16 >>> - >>> -/** >>> >>> * @brief Receive& Transmit Buffer Descriptor definitions >>> * >>> * Note: The first BD must be aligned (see DB_ALIGNMENT) >>> >>> @@ -282,8 +266,7 @@ struct fec_priv { >>> >>> struct fec_bd *tbd_base; /* TBD ring */ >>> int tbd_index; /* next transmit BD to write */ >>> bd_t *bd; >>> >>> - void *rdb_ptr; >>> - void *base_ptr; >>> + uint8_t *tdb_ptr; >>> >>> int dev_id; >>> int phy_id; >>> struct mii_dev *bus; >> >> _______________________________________________ >> U-Boot mailing list >> U-Boot at lists.denx.de >> http://lists.denx.de/mailman/listinfo/u-boot >