From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Eliezer Tamir" Subject: Re: [RFC][BNX2X]: New driver for Broadcom 10Gb Ethernet. Date: Thu, 02 Aug 2007 21:09:20 +0300 Message-ID: <46B21DD0.7070206@broadcom.com> References: <1185957077.5552.22.camel@dell> <200708020006.13457.mb@bu3sch.de> Mime-Version: 1.0 Content-Type: text/plain; charset=iso-8859-15; format=flowed Content-Transfer-Encoding: 7bit Cc: "Michael Chan" , davem@davemloft.net, jeff@garzik.org, netdev@vger.kernel.org, eilong@broadcom.com To: "Michael Buesch" Return-path: Received: from mms3.broadcom.com ([216.31.210.19]:2872 "EHLO MMS3.broadcom.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755250AbXHBSJl (ORCPT ); Thu, 2 Aug 2007 14:09:41 -0400 In-Reply-To: <200708020006.13457.mb@bu3sch.de> Sender: netdev-owner@vger.kernel.org List-Id: netdev.vger.kernel.org Michal, Thanks for going over the code. My responses are inline. Eliezer Michael Buesch wrote: > On Wednesday 01 August 2007 10:31:17 Michael Chan wrote: > >> +typedef struct { >> + u8 reserved[64]; >> +} license_key_t; > > No typedef. > What is a "license key" used for, anyway? This will be removed. This is just a placeholder that has the right size. > >> +#define RUN_AT(x) (jiffies + (x)) > > That macro does only obfuscate code, in my opinion. > If you want jiffies+x, better opencode it. OK > >> +typedef enum { >> + BCM5710 = 0, >> +} board_t; > > No typedef. Do > enum bnx2x_board { > BCM5710 = 0, > }; > Or something like that. OK > >> +static struct pci_device_id bnx2x_pci_tbl[] = { > > static const struct... > OK >> + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_NX2_5710, >> + PCI_ANY_ID, PCI_ANY_ID, 0, 0, BCM5710 }, >> + { 0 } >> +}; > >> +static inline u32 bnx2x_bits_en(struct bnx2x *bp, u32 block, u32 reg, >> + u32 bits) > > Does that really need to be inline? I'd suggest dropping inline. Does not need to be inlined. Will change. > >> +{ >> + u32 val = REG_RD(bp, block, reg); >> + >> + val |= bits; >> + REG_WR(bp, block, reg, val); >> + return val; >> +} >> + >> +static inline u32 bnx2x_bits_dis(struct bnx2x *bp, u32 block, u32 reg, >> + u32 bits) > > Same here. Same > >> +{ >> + u32 val = REG_RD(bp, block, reg); >> + >> + val &= ~bits; >> + REG_WR(bp, block, reg, val); >> + return val; >> +} >> + >> +static int bnx2x_mdio22_write(struct bnx2x *bp, u32 reg, u32 val) >> +{ >> + int rc; >> + u32 tmp, i; >> + int port = bp->port; >> + u32 emac_base = port ? GRCBASE_EMAC1 : GRCBASE_EMAC0; >> + >> + if (bp->phy_flags & PHY_INT_MODE_AUTO_POLLING_FLAG) { >> + >> + tmp = REG_RD(bp, emac_base, EMAC_REG_EMAC_MDIO_MODE); >> + tmp &= ~EMAC_MDIO_MODE_AUTO_POLL; >> + EMAC_WR(EMAC_REG_EMAC_MDIO_MODE, tmp); >> + REG_RD(bp, emac_base, EMAC_REG_EMAC_MDIO_MODE); >> + udelay(40); >> + } >> + >> + tmp = ((bp->phy_addr << 21) | (reg << 16) | >> + (val & EMAC_MDIO_COMM_DATA) | >> + EMAC_MDIO_COMM_COMMAND_WRITE_22 | >> + EMAC_MDIO_COMM_START_BUSY); >> + EMAC_WR(EMAC_REG_EMAC_MDIO_COMM, tmp); >> + >> + for (i = 0; i < 50; i++) { >> + udelay(10); >> + >> + tmp = REG_RD(bp, emac_base, EMAC_REG_EMAC_MDIO_COMM); >> + if (!(tmp & EMAC_MDIO_COMM_START_BUSY)) { >> + udelay(5); >> + break; >> + } >> + } >> + >> + if (tmp & EMAC_MDIO_COMM_START_BUSY) { >> + BNX2X_ERR("write phy register failed\n"); >> + >> + rc = -EBUSY; >> + } else { >> + rc = 0; >> + } >> + >> + if (bp->phy_flags & PHY_INT_MODE_AUTO_POLLING_FLAG) { >> + >> + tmp = REG_RD(bp, emac_base, EMAC_REG_EMAC_MDIO_MODE); >> + tmp |= EMAC_MDIO_MODE_AUTO_POLL; >> + EMAC_WR(EMAC_REG_EMAC_MDIO_MODE, tmp); >> + } >> + >> + return rc; >> +} >> + >> +static int bnx2x_mdio22_read(struct bnx2x *bp, u32 reg, u32 *ret_val) >> +{ >> + int rc; >> + u32 val, i; >> + int port = bp->port; >> + u32 emac_base = port ? GRCBASE_EMAC1 : GRCBASE_EMAC0; >> + >> + if (bp->phy_flags & PHY_INT_MODE_AUTO_POLLING_FLAG) { >> + >> + val = REG_RD(bp, emac_base, EMAC_REG_EMAC_MDIO_MODE); >> + val &= ~EMAC_MDIO_MODE_AUTO_POLL; >> + EMAC_WR(EMAC_REG_EMAC_MDIO_MODE, val); >> + REG_RD(bp, emac_base, EMAC_REG_EMAC_MDIO_MODE); >> + udelay(40); >> + } >> + >> + val = ((bp->phy_addr << 21) | (reg << 16) | >> + EMAC_MDIO_COMM_COMMAND_READ_22 | >> + EMAC_MDIO_COMM_START_BUSY); >> + EMAC_WR(EMAC_REG_EMAC_MDIO_COMM, val); >> + >> + for (i = 0; i < 50; i++) { >> + udelay(10); >> + >> + val = REG_RD(bp, emac_base, EMAC_REG_EMAC_MDIO_COMM); >> + if (!(val & EMAC_MDIO_COMM_START_BUSY)) { > > No udelay(5) here, like in write above? There is a udelay. > >> + val &= EMAC_MDIO_COMM_DATA; >> + break; >> + } >> + } > >> +static int bnx2x_mdio45_vwrite(struct bnx2x *bp, u32 reg, u32 addr, u32 val) >> +{ >> + int i; >> + u32 rd_val; >> + >> + for (i = 0; i < 10; i++) { >> + bnx2x_mdio45_write(bp, reg, addr, val); >> + mdelay(5); > > Can you msleep(5) here? Can't sleep, this can happen from a "sleepless" context. Maybe we can break it down to smaller mdelays, will ask the HW guys. > >> + bnx2x_mdio45_read(bp, reg, addr, &rd_val); >> + /* if the read value is not the same as the value we wrote, >> + we should write it again */ >> + if (rd_val == val) { >> + return 0; >> + } >> + } >> + BNX2X_ERR("MDIO write in CL45 failed\n"); >> + return -EBUSY; >> +} > >> +/* DMAE command positions used >> + * Port0 14 >> + * Port1 15 >> + */ >> +static void bnx2x_wb_write_dmae(struct bnx2x *bp, u32 wb_addr, u32 *wb_write, >> + u32 wb_len) >> +{ >> + struct dmae_command *dmae = &bp->dmae; >> + int port = bp->port; >> + u32 *wb_comp = bnx2x_sp(bp, wb_comp); >> + >> + memcpy(bnx2x_sp(bp, wb_write[0]), wb_write, wb_len * 4); >> + memset(dmae, 0, sizeof(struct dmae_command)); >> + >> + dmae->opcode = (DMAE_CMD_SRC_PCI | DMAE_CMD_DST_GRC | >> + DMAE_CMD_C_DST_PCI | DMAE_CMD_C_ENABLE | >> + DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET | >> + DMAE_CMD_ENDIANITY_DW_SWAP | >> + (port? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0)); >> + dmae->src_addr_lo = U64_LO(bnx2x_sp_mapping(bp, wb_write)); >> + dmae->src_addr_hi = U64_HI(bnx2x_sp_mapping(bp, wb_write)); >> + dmae->dst_addr_lo = wb_addr >> 2; >> + dmae->dst_addr_hi = 0; >> + dmae->len = wb_len; >> + dmae->comp_addr_lo = U64_LO(bnx2x_sp_mapping(bp, wb_comp)); >> + dmae->comp_addr_hi = U64_HI(bnx2x_sp_mapping(bp, wb_comp)); >> + dmae->comp_val = BNX2X_WB_COMP_VAL; >> + bnx2x_post_dmae(bp, port? 15 : 14); >> + >> + *wb_comp = 0; > > wmb(); Right. > >> + REG_WR32(bp, GRCBASE_DMAE, >> + (bp->port? DMAE_REGISTERS_GO_C15 : >> + DMAE_REGISTERS_GO_C14), 1); >> + udelay(5); >> + while (*wb_comp != BNX2X_WB_COMP_VAL) { >> + udelay(5); > > Not sure what this is doing here. > Do we need some DMA syncing here? > > Anyway, we _do_ need a timeout here. > Probably also some kind of memory barrier, if no DMA syncing is needed. This causes the chip to read from pci consistent memory, so I guess we can use an wmb(). Will add timeout. > >> + } >> +} > >> +static void bnx2x_emac_enable(struct bnx2x *bp) >> +{ >> + u32 val; >> + int port = bp->port; >> + u32 emac_base = port ? GRCBASE_EMAC1 : GRCBASE_EMAC0; > >> + while (val & EMAC_MODE_RESET) { >> + val = REG_RD(bp, emac_base, EMAC_REG_EMAC_MODE); >> + DP(NETIF_MSG_LINK, "EMAC reset reg is %u\n", val); >> + } > > A timeout, please. OK > >> + /* reset tx part */ >> + EMAC_WR(EMAC_REG_EMAC_TX_MODE, EMAC_TX_MODE_RESET); >> + >> + while (val & EMAC_TX_MODE_RESET) { >> + val = REG_RD(bp, emac_base, EMAC_REG_EMAC_TX_MODE); >> + DP(NETIF_MSG_LINK, "EMAC reset reg is %u\n", val); >> + } > > A timeout, please. OK > >> +static void bnx2x_pbf_update(struct bnx2x *bp) >> +{ >> + int port = bp->port; >> + u32 init_crd, crd; >> + u32 count = 1000; >> + u32 pause = 0; >> + >> + /* disable port */ >> + REG_WR(bp, GRCBASE_PBF, >> + PBF_REGISTERS_DISABLE_NEW_TASK_PROC_P0 + port*4, 0x1); >> + >> + /* wait for init credit */ >> + init_crd = REG_RD(bp, GRCBASE_PBF, >> + PBF_REGISTERS_P0_INIT_CRD + port*4); >> + crd = REG_RD(bp, GRCBASE_PBF, PBF_REGISTERS_P0_CREDIT + port*8); >> + DP(NETIF_MSG_LINK, "init_crd 0x%x crd 0x%x\n", init_crd, crd); >> + >> + while ((init_crd != crd) && count) { >> + mdelay(5); > > msleep(5)? Can't sleep in link change code since it can be called from the slowpath task. > >> + crd = REG_RD(bp, GRCBASE_PBF, >> + PBF_REGISTERS_P0_CREDIT + port*8); >> + count--; >> + } > >> + /* probe the credit changes */ >> + REG_WR(bp, GRCBASE_PBF, PBF_REGISTERS_INIT_P0 + port*4, 0x1); >> + mdelay(5); > > msleep(5)? > >> + REG_WR(bp, GRCBASE_PBF, PBF_REGISTERS_INIT_P0 + port*4, 0x0); >> + >> + /* enable port */ >> + REG_WR(bp, GRCBASE_PBF, >> + PBF_REGISTERS_DISABLE_NEW_TASK_PROC_P0 + port*4, 0x0); >> +} > >> +/* This function is called upon link interrupt */ >> +static void bnx2x_link_update(struct bnx2x *bp) >> +{ >> + u32 gp_status; >> + int port = bp->port; >> + int i; >> + int link_10g; >> + >> + DP(NETIF_MSG_LINK, "port %x, is xgxs %x, stat_mask 0x%x, " >> + "int_mask 0x%x, saved_mask 0x%x, MI_INT %x, SERDES_LINK %x," >> + " 10G %x, XGXS_LINK %x\n", >> + port, (bp->phy_flags & PHY_XGSX_FLAG), >> + REG_RD(bp, GRCBASE_NIG, >> + NIG_REGISTERS_STATUS_INTERRUPT_PORT0 + port*4), >> + REG_RD(bp, GRCBASE_NIG, >> + NIG_REGISTERS_MASK_INTERRUPT_PORT0 + port*4), >> + bp->nig_mask, >> + REG_RD(bp, GRCBASE_NIG, >> + NIG_REGISTERS_EMAC0_STATUS_MISC_MI_INT + port*0x18), >> + REG_RD(bp, GRCBASE_NIG, >> + NIG_REGISTERS_SERDES0_STATUS_LINK_STATUS + port*0x3c), >> + REG_RD(bp, GRCBASE_NIG, >> + NIG_REGISTERS_XGXS0_STATUS_LINK10G + port*0x68), >> + REG_RD(bp, GRCBASE_NIG, >> + NIG_REGISTERS_XGXS0_STATUS_LINK_STATUS + port*0x68) >> + ); >> + >> + MDIO_SET_REG_BANK(bp, MDIO_REG_BANK_GP_STATUS); >> + /* avoid fast toggling */ >> + for (i = 0 ; i < 10 ; i++) { >> + mdelay(10); > > msleep(10)? > >> + bnx2x_mdio22_read(bp, MDIO_GP_STATUS_TOP_AN_STATUS1, >> + &gp_status); >> + } >> + >> + bnx2x_link_settings_status(bp, gp_status); >> + >> + /* anything 10 and over uses the bmac */ >> + link_10g = ((bp->line_speed >= SPEED_10000) && >> + (bp->line_speed <= SPEED_16000)); >> + >> + bnx2x_link_int_ack(bp, link_10g); >> + >> + /* link is up only if both local phy and external phy are up */ >> + if (bp->link_up && bnx2x_ext_phy_is_link_up(bp)) { >> + if (link_10g) { >> + bnx2x_bmac_enable(bp, 0); >> + bnx2x_leds_set(bp, SPEED_10000); >> + >> + } else { >> + bnx2x_emac_enable(bp); >> + bnx2x_emac_program(bp); >> + >> + /* AN complete? */ >> + if (gp_status & MDIO_AN_CL73_OR_37_COMPLETE) { >> + if (!(bp->phy_flags & PHY_SGMII_FLAG)) { >> + bnx2x_set_sgmii_tx_driver(bp); >> + }/* Not SGMII */ >> + } >> + } >> + bnx2x_link_up(bp); >> + >> + } else { /* link down */ >> + bnx2x_leds_unset(bp); >> + bnx2x_link_down(bp); >> + } >> +} > >> +static inline u16 bnx2x_update_dsb_idx(struct bnx2x *bp) > > Too big for inlining. > If this func is only used in one place, gcc will inline it automatically. This is the slowpath status block so the inline can be removed. > >> +{ >> + struct host_def_status_block *dsb = bp->def_status_blk; >> + u16 rc = 0; >> + >> + if (bp->def_att_idx != dsb->atten_status_block.attn_bits_index) { >> + bp->def_att_idx = dsb->atten_status_block.attn_bits_index; >> + rc |= 1; >> + } >> + if (bp->def_c_idx != dsb->c_def_status_block.status_block_index) { >> + bp->def_c_idx = dsb->c_def_status_block.status_block_index; >> + rc |= 2; >> + } >> + if (bp->def_u_idx != dsb->u_def_status_block.status_block_index) { >> + bp->def_u_idx = dsb->u_def_status_block.status_block_index; >> + rc |= 4; >> + } >> + if (bp->def_x_idx != dsb->x_def_status_block.status_block_index) { >> + bp->def_x_idx = dsb->x_def_status_block.status_block_index; >> + rc |= 8; >> + } >> + if (bp->def_t_idx != dsb->t_def_status_block.status_block_index) { >> + bp->def_t_idx = dsb->t_def_status_block.status_block_index; >> + rc |= 16; >> + } >> + rmb(); /* TBD chack this */ >> + return rc; >> +} >> + >> +static inline u16 bnx2x_update_fpsb_idx(struct bnx2x_fastpath *fp) > > Too big for inlining. > This is fastpath code, I will have to see if removing the inline impacts performance. >> +{ >> + struct host_status_block *fpsb = fp->status_blk; >> + u16 rc = 0; >> + >> + if (fp->fp_c_idx != fpsb->c_status_block.status_block_index) { >> + fp->fp_c_idx = fpsb->c_status_block.status_block_index; >> + rc |= 1; >> + } >> + if (fp->fp_u_idx != fpsb->u_status_block.status_block_index) { >> + fp->fp_u_idx = fpsb->u_status_block.status_block_index; >> + rc |= 2; >> + } >> + rmb(); /* TBD chack this */ >> + return rc; >> +} >> + >> +static inline u32 bnx2x_tx_avail(struct bnx2x_fastpath *fp) > > Too big for inlining. this is in the fast-fastpath I think it should be inlined. > >> +{ >> + u16 used; >> + u32 prod = fp->tx_bd_prod; >> + u32 cons = fp->tx_bd_cons; >> + >> + smp_mb(); > > This barrier needs a comment. Why is it there? And why SMP only? > >> + >> + used = (NUM_TX_BD - NUM_TX_RINGS + prod - cons + >> + (cons / TX_DESC_CNT) - (prod / TX_DESC_CNT)); >> + >> + if (prod >= cons) { >> + /* used = prod - cons - prod/size + cons/size */ >> + used -= NUM_TX_BD - NUM_TX_RINGS; >> + } >> + >> + BUG_TRAP(used <= fp->bp->tx_ring_size); >> + BUG_TRAP((fp->bp->tx_ring_size - used) <= MAX_TX_AVAIL); >> + return(fp->bp->tx_ring_size - used); >> +} > >> +/*========================================================================*/ >> + >> +/* acquire split MCP access lock register */ >> +static int bnx2x_lock_alr( struct bnx2x *bp) >> +{ >> + int rc = 0; >> + u32 i, j, val; >> + >> + i = 100; >> + val = 1UL << 31; >> + >> + REG_WR(bp, GRCBASE_MCP, 0x9c, val); >> + for (j = 0; j < i*10; j++) { >> + val = REG_RD(bp, GRCBASE_MCP, 0x9c); >> + if (val & (1L << 31)) { >> + break; >> + } >> + >> + mdelay(5); > > msleep(5)? called from link change event, can't sleep. > >> + } >> + >> + if (!(val & (1L << 31))) { >> + BNX2X_ERR("Cannot acquire nvram interface.\n"); >> + >> + rc = -EBUSY; >> + } >> + >> + return rc; >> +} > >> +static void bnx2x_free_mem(struct bnx2x *bp) >> +{ >> + >> +#define BNX2X_PCI_FREE(x, y, size) do { \ >> + if (x) { \ >> + pci_free_consistent(bp->pdev, size, x, y); \ >> + x = NULL; \ >> + } \ >> +} while (0) >> + >> +#define BNX2X_KFREE(x) do { \ >> + if (x) { \ >> + vfree(x); \ >> + x = NULL; \ >> + } \ >> +} while (0) >> + >> + int i; >> + >> + /* fastpath */ >> + for_each_queue(bp, i) { >> + >> + /* Status blocks */ >> + BNX2X_PCI_FREE(bnx2x_fp(bp, i, status_blk), >> + bnx2x_fp(bp, i, status_blk_mapping), >> + sizeof(struct host_status_block) >> + + sizeof(struct eth_tx_db_data)); >> + >> + /* fast path rings: tx_buf tx_desc rx_buf rx_desc rx_comp */ >> + BNX2X_KFREE(bnx2x_fp(bp, i, tx_buf_ring)); >> + BNX2X_PCI_FREE(bnx2x_fp(bp, i, tx_desc_ring), >> + bnx2x_fp(bp, i, tx_desc_mapping), >> + sizeof(struct eth_tx_bd) * NUM_TX_BD); >> + >> + BNX2X_KFREE(bnx2x_fp(bp, i, rx_buf_ring)); >> + BNX2X_PCI_FREE(bnx2x_fp(bp, i, rx_desc_ring), >> + bnx2x_fp(bp, i, rx_desc_mapping), >> + sizeof(struct eth_rx_bd) * NUM_RX_BD); >> + >> + BNX2X_PCI_FREE(bnx2x_fp(bp, i, rx_comp_ring), >> + bnx2x_fp(bp, i, rx_comp_mapping), >> + sizeof(struct eth_rx_bd) * NUM_RX_BD); >> + } >> + >> + BNX2X_KFREE( bp->fp); >> + /* end of fast path*/ >> + >> + BNX2X_PCI_FREE(bp->def_status_blk, bp->def_status_blk_mapping, >> + (sizeof(struct host_def_status_block))); >> + >> + BNX2X_PCI_FREE(bp->slowpath, bp->slowpath_mapping, >> + (sizeof(struct bnx2x_slowpath))); >> + >> + if (iscsi_active) { >> + BNX2X_PCI_FREE(bp->t1, bp->t1_mapping, 64*1024); >> + BNX2X_PCI_FREE(bp->t2, bp->t2_mapping, 16*1024); >> + BNX2X_PCI_FREE(bp->timers, bp->timers_mapping, 8*1024); >> + BNX2X_PCI_FREE(bp->qm, bp->qm_mapping, 128*1024); >> + } >> + >> + BNX2X_PCI_FREE(bp->spq, bp->spq_mapping, PAGE_SIZE); >> + >> +#undef BNX2X_PCI_FREE >> +#undef BNX2X_KFREE >> +} >> + >> +static int bnx2x_alloc_mem(struct bnx2x *bp) >> +{ >> + int i; >> + >> +#define BNX2X_PCI_ALLOC(x, y, size) do { \ >> + x = pci_alloc_consistent(bp->pdev, size, y); \ >> + if (x == NULL) \ >> + goto alloc_mem_err; \ >> + memset(x, 0, size); \ >> +} while (0) >> + >> +#define BNX2X_ALLOC(x, size) do { \ >> + x = vmalloc(size); \ > > Why not zalloc()? We want to allow allocating chunks bigger that zalloc will allow. > >> + if (x == NULL) \ >> + goto alloc_mem_err; \ >> + memset(x, 0, size); \ >> +} while (0) >> + >> + /* fastpath */ >> + BNX2X_ALLOC(bp->fp, sizeof(struct bnx2x_fastpath)*bp->num_queues); >> + >> + for_each_queue(bp, i) { >> + >> + bnx2x_fp(bp, i, bp) = bp; >> + >> + /* Status blocks */ >> + BNX2X_PCI_ALLOC(bnx2x_fp(bp, i, status_blk), >> + &bnx2x_fp(bp, i, status_blk_mapping), >> + sizeof(struct host_status_block) + >> + sizeof(struct eth_tx_db_data)); >> + >> + bnx2x_fp(bp, i, tx_prods_mapping) = >> + bnx2x_fp(bp, i, status_blk_mapping) + >> + sizeof(struct host_status_block); >> + >> + bnx2x_fp(bp, i, hw_tx_prods) = >> + (void *)(bnx2x_fp(bp, i, status_blk) + 1); >> + >> + /* fast path rings: tx_buf tx_desc rx_buf rx_desc rx_comp */ >> + BNX2X_ALLOC(bnx2x_fp(bp, i, tx_buf_ring), >> + sizeof(struct sw_tx_bd) * NUM_TX_BD); >> + BNX2X_PCI_ALLOC(bnx2x_fp(bp, i, tx_desc_ring), >> + &bnx2x_fp(bp, i, tx_desc_mapping), >> + sizeof(struct eth_tx_bd) * NUM_TX_BD); >> + >> + BNX2X_ALLOC(bnx2x_fp(bp, i, rx_buf_ring), >> + sizeof(struct sw_rx_bd) * NUM_RX_BD); >> + BNX2X_PCI_ALLOC(bnx2x_fp(bp, i, rx_desc_ring), >> + &bnx2x_fp(bp, i, rx_desc_mapping), >> + sizeof(struct eth_rx_bd) * NUM_RX_BD); >> + >> + BNX2X_PCI_ALLOC(bnx2x_fp(bp, i, rx_comp_ring), >> + &bnx2x_fp(bp, i, rx_comp_mapping), >> + sizeof(struct eth_rx_bd) * NUM_RX_BD); >> + } >> + /* end of fast path*/ >> + >> + BNX2X_PCI_ALLOC(bp->def_status_blk, &bp->def_status_blk_mapping, >> + sizeof(struct host_def_status_block)); >> + >> + BNX2X_PCI_ALLOC(bp->slowpath, &bp->slowpath_mapping, >> + sizeof(struct bnx2x_slowpath)); >> + >> + if (iscsi_active) { >> + BNX2X_PCI_ALLOC(bp->t1, &bp->t1_mapping, 64*1024); >> + >> + /* Initialize T1 */ >> + for (i = 0; i < 64*1024; i += 64) { >> + *(u64 *)((char *)bp->t1 + i+56) = 0x0UL; >> + *(u64 *)((char *)bp->t1 + i+3) = 0x0UL; >> + } >> + >> + /* allocate searcher T2 table >> + we allocate 1/4 of alloc num for T2 >> + (which is not entered into the ILT) */ >> + BNX2X_PCI_ALLOC(bp->t2, &bp->t2_mapping, 16*1024); >> + >> + /* Initialize T2 */ >> + for (i = 0; i < 16*1024; i += 64) { >> + *(u64 *)((char *)bp->t2 + i+56) = >> + bp->t2_mapping + i + 64; >> + } >> + >> + /* now sixup the last line in the block >> + * to point to the next block >> + */ >> + *(u64 *)((char *)bp->t2 + 1024*16-8) = bp->t2_mapping; >> + >> + /* Timer block array (MAX_CONN*8) >> + * phys uncached for now 1024 conns >> + */ >> + BNX2X_PCI_ALLOC(bp->timers, &bp->timers_mapping, 8*1024); >> + >> + /* QM queues (128*MAX_CONN) */ >> + BNX2X_PCI_ALLOC(bp->qm, &bp->qm_mapping, 128*1024); >> + } >> + >> + /* Slow path ring */ >> + BNX2X_PCI_ALLOC(bp->spq, &bp->spq_mapping, PAGE_SIZE); >> + >> + return 0; >> + >> +alloc_mem_err: >> + bnx2x_free_mem(bp); >> + return -ENOMEM; >> + >> +#undef BNX2X_PCI_ALLOC >> +#undef BNX2X_ALLOC >> +} >> + >> + >> + >> +static void bnx2x_set_mac_addr(struct bnx2x *bp) >> +{ >> + struct mac_configuration_cmd *config = bnx2x_sp(bp, mac_config); >> + >> + /* CAM allocation >> + * unicasts 0-31:port0 32-63:port1 >> + * multicast 64-127:port0 128-191:port1 >> + */ >> + config->hdr.length = 2; >> + config->hdr.offset = bp->port ? 31 : 0; >> + config->hdr.reserved0 = 0; >> + config->hdr.reserved1 = 0; >> + >> + /* primary MAC */ >> + config->config_table[0].cam_entry.msb_mac_addr = >> + ntohl(*(u32 *)&bp->dev->dev_addr[0]); >> + config->config_table[0].cam_entry.lsb_mac_addr = >> + ntohs(*(u16 *)&bp->dev->dev_addr[4]); > > Better use xx_to_cpu() > anyway, ntohX is defined as beX_to_cpu(). > Are you sure this is correct? A byte array is little endian. This is a HW quirk, will change to cpu_to_le(). > >> +static inline int bnx2x_alloc_rx_skb(struct bnx2x *bp, >> + struct bnx2x_fastpath *fp, u16 index) > > Too big for inlining. > If this is only called in one place, gcc will inline it automatically. this is fast-fast-path (called once for each rx packet) I think it should be inlined. > >> +{ >> + struct sk_buff *skb; >> + struct sw_rx_bd *rx_buf = &fp->rx_buf_ring[index]; >> + struct eth_rx_bd *rxbd = &fp->rx_desc_ring[index]; >> + dma_addr_t mapping; >> + >> + skb = netdev_alloc_skb(bp->dev, bp->rx_buf_size); >> + >> + if (skb == NULL) { >> + return -ENOMEM; >> + } >> + >> + mapping = pci_map_single(bp->pdev, skb->data, bp->rx_buf_use_size, >> + PCI_DMA_FROMDEVICE); > > You need to check if the mapping succeed. > There's a macro for that: dma_mapping_error() OK > >> + rx_buf->skb = skb; >> + pci_unmap_addr_set(rx_buf, mapping, mapping); >> +#ifdef BNX2X_STOP_ON_ERROR >> + rxbd->reserved = fp->last_alloc++; >> +#else >> + rxbd->reserved = 0; >> +#endif >> + rxbd->len = bp->rx_buf_use_size; >> + rxbd->addr_hi = U64_HI(mapping); >> + rxbd->addr_lo = U64_LO(mapping); >> + return 0; >> +} > >> +static void bnx2x_tx_int(struct bnx2x_fastpath *fp, int work) >> +{ >> + struct bnx2x *bp = fp->bp; >> + u16 hw_cons, sw_cons, bd_cons = fp->tx_bd_cons; >> + int done = 0; >> + >> + hw_cons = *fp->tx_cons_sb; >> + sw_cons = fp->tx_pkt_cons; >> + >> +#ifdef BNX2X_STOP_ON_ERROR >> + if (unlikely(bp->panic)) { >> + return; >> + } >> +#endif >> + >> + while (sw_cons != hw_cons) { >> + u16 pkt_cons; >> + >> + pkt_cons = TX_BD(sw_cons); >> + >> + /* prefetch(bp->tx_buf_ring[pkt_cons].skb); */ >> + >> + DP(NETIF_MSG_TX_DONE, "sw_cons=%u hw_cons=%u pkt_cons=%d\n", >> + sw_cons, hw_cons, pkt_cons); >> + >> +/* if (NEXT_TX_IDX(sw_cons)!=hw_cons){ >> + rmb(); >> + prefetch(fp->tx_buf_ring[NEXT_TX_IDX(sw_cons)].skb); >> + } >> +*/ >> + >> + bd_cons = bnx2x_free_tx_pkt(bp, fp, pkt_cons); >> + sw_cons++; >> + done++; >> + >> + if (done == work) { >> + break; >> + } >> + } >> + >> + fp->tx_pkt_cons = sw_cons; >> + fp->tx_bd_cons = bd_cons; >> + >> + smp_mb(); > > Please add a comment why we need a SMP MB here. > >> + >> + /* TBD need a thresh? */ >> + if (unlikely(netif_queue_stopped(bp->dev))) { >> + netif_tx_lock(bp->dev); >> + if ((netif_queue_stopped(bp->dev)) && >> + (bnx2x_tx_avail(fp) >= MAX_SKB_FRAGS + 3)) { >> + netif_wake_queue(bp->dev); >> + } >> + netif_tx_unlock(bp->dev); >> + } >> +} > >> +static inline void bnx2x_reuse_rx_skb(struct bnx2x_fastpath *fp, >> + struct sk_buff *skb, u16 cons, u16 prod) > > Too big for inlining. Now that I think about it, this is better off out-of-line. Will change. > >> +{ >> + struct bnx2x *bp = fp->bp; >> + struct sw_rx_bd *cons_rx_buf = &fp->rx_buf_ring[cons]; >> + struct sw_rx_bd *prod_rx_buf = &fp->rx_buf_ring[prod]; >> + struct eth_rx_bd *cons_bd = &fp->rx_desc_ring[cons]; >> + struct eth_rx_bd *prod_bd = &fp->rx_desc_ring[prod]; >> + >> + pci_dma_sync_single_for_device(bp->pdev, >> + pci_unmap_addr(cons_rx_buf, mapping), >> + bp->rx_offset + RX_COPY_THRESH, >> + PCI_DMA_FROMDEVICE); >> + >> + prod_rx_buf->skb = cons_rx_buf->skb; >> + pci_unmap_addr_set(prod_rx_buf, mapping, >> + pci_unmap_addr(cons_rx_buf, mapping)); >> + *prod_bd = *cons_bd; >> +#ifdef BNX2X_STOP_ON_ERROR >> + prod_bd->reserved = fp->last_alloc++; >> +#endif >> +} > >> +static void bnx2x_attn_int(struct bnx2x *bp) >> +{ >> + int port = bp->port; >> + >> + /* read local copy of bits */ >> + u16 attn_bits = bp->def_status_blk->atten_status_block.attn_bits; >> + u16 attn_ack = bp->def_status_blk->atten_status_block.attn_bits_ack; >> + u16 attn_state = bp->attn_state; >> + >> + /* look for changed bits */ >> + u16 asserted = attn_bits & ~attn_ack & ~attn_state; >> + u16 deasserted = ~attn_bits & attn_ack & attn_state; > > Do you probably want | instead of & here? This is code that came from the HW guys, it works. > >> + >> + DP(NETIF_MSG_HW, >> + "attn_bits %x, attn_ack %x, asserted %x, deasserted %x \n", >> + attn_bits, attn_ack, asserted, deasserted); >> + >> + if (~(attn_bits ^ attn_ack) & (attn_bits ^ attn_state)) { > > Do you want && or | here, instead of & ? > >> + BNX2X_ERR("bad attention state\n"); >> + } > >> +} >> + >> +static inline int bnx2x_has_work(struct bnx2x_fastpath *fp) > > Probably better not inline. this is also fastpath, I will test to see if removing the inline impacts performance. > >> +{ >> + u16 rx_cons_sb = *fp->rx_cons_sb; >> + >> + if ((rx_cons_sb & MAX_RX_DESC_CNT) == MAX_RX_DESC_CNT) >> + rx_cons_sb++; >> + >> + if ((rx_cons_sb != fp->rx_comp_cons) || >> + (*fp->tx_cons_sb != fp->tx_pkt_cons)) >> + return 1; >> + >> + return 0; >> +} > >> +static irqreturn_t bnx2x_msix_sp_int(int irq, void *dev_instance) >> +{ >> + struct net_device *dev = dev_instance; > > You need to check if dev==NULL and bail out. > Another driver sharing the IRQ with this might choose to pass the dev > pointer as NULL. This is the MSI-X slowpath interrupt handler, it does not share it's interrupt. Same for the MSI-X fastpath handler. (snipping some lines) >> +static irqreturn_t bnx2x_interrupt(int irq, void *dev_instance) >> +{ >> + struct net_device *dev = dev_instance; > > Check if dev==NULL OK > >> + struct bnx2x *bp = netdev_priv(dev); >> + u16 status = bnx2x_ack_int(bp); >> + >> + if (unlikely(status == 0)) { > > That's not unlikely. We are optimizing for the most common case. I think we should even think about adding a warn_once if we find out our line is shared since this is an extremely sub optimal configuration. > >> + DP(NETIF_MSG_INTR, "not our interrupt!\n"); >> + return IRQ_NONE; >> + } >> + >> + DP(NETIF_MSG_INTR, "got an interrupt status is %u\n", status); >> + >> +#ifdef BNX2X_STOP_ON_ERROR >> + if (unlikely(bp->panic)) { >> + return IRQ_HANDLED; >> + } >> +#endif >> + >> + /* Return here if interrupt is shared and is disabled. */ >> + if (unlikely(atomic_read(&bp->intr_sem) != 0)) { >> + DP(NETIF_MSG_INTR, "called but int_sem not 0, returning. \n"); >> + return IRQ_HANDLED; >> + } >> + >> + if (status & 0x2) { >> + >> + struct bnx2x_fastpath *fp = &bp->fp[0]; >> + >> + prefetch(fp->rx_cons_sb); >> + prefetch(fp->tx_cons_sb); >> + prefetch(&fp->status_blk->c_status_block.status_block_index); >> + prefetch(&fp->status_blk->u_status_block.status_block_index); >> + >> + netif_rx_schedule(dev); >> + >> + status &= ~0x2; >> + >> + if (!status) { >> + return IRQ_HANDLED; >> + } >> + } >> + >> + if (unlikely(status & 0x1)) { >> + >> + tasklet_schedule(&bp->sp_task); >> + status &= ~0x1; >> + if (!status) { >> + return IRQ_HANDLED; >> + } >> + } >> + >> + DP(NETIF_MSG_INTR, "got an unknown interrupt! (status is %u)\n", >> + status); >> + >> + return IRQ_HANDLED; >> +} > >> +/* some of the internal memories are not directly readable from the driver >> + to test them we send debug packets >> + */ >> +static int bnx2x_int_mem_test(struct bnx2x *bp) >> +{ >> + u32 val; >> + u32 count; >> + u8 hw; >> + u8 port; >> + u8 i; >> + u32 factor; >> + >> + switch (CHIP_REV(bp)) { >> + case CHIP_REV_EMUL: >> + hw = INIT_EMULATION; >> + factor = 2000; >> + break; >> + case CHIP_REV_FPGA: >> + hw = INIT_FPGA; >> + factor = 120; >> + break; >> + default: >> + hw = INIT_ASIC; >> + factor = 1; >> + break; >> + } >> + >> + port = PORT0 << bp->port; >> + >> + DP(NETIF_MSG_HW, "mem_wrk start part1\n"); >> + >> + /* Disable inputs of parser neighbor blocks */ >> + REG_WR(bp, GRCBASE_TSDM, TSDM_REGISTERS_ENABLE_IN1, 0x0); >> + REG_WR(bp, GRCBASE_TCM, TCM_REGISTERS_PRS_IFEN, 0x0); >> + REG_WR(bp, GRCBASE_CFC, CFC_REGISTERS_DEBUG0, 0x1); >> + NIG_WR(NIG_REGISTERS_PRS_REQ_IN_EN, 0x0); >> + >> + /* Write 0 to parser credits for CFC search request */ >> + REG_WR(bp, GRCBASE_PRS, PRS_REGISTERS_CFC_SEARCH_INITIAL_CREDIT, 0x0); >> + >> + /* send Ethernet packet */ >> + bnx2x_lb_pckt(bp); >> + >> + /* TODO do i reset NIG statistic ?*/ >> + /* Wait until NIG register shows 1 packet of size 0x10 */ >> + count = 1000; >> + while (count) { >> + val = REG_RD(bp, GRCBASE_NIG, NIG_REGISTERS_STAT2_BRB_OCTET); >> + REG_RD(bp, GRCBASE_NIG, NIG_REGISTERS_STAT2_BRB_OCTET + 4); >> + >> + if (val == 0x10) { >> + break; >> + } >> + msleep(10 * factor); > > That would be msleep(20000) for CHIP_REV_EMUL. > besides the sleep being pretty big (does it really need to wait > 20 sec?), some arches don't support such a big msleep value. > use ssleep(). It would take 5 and a half hours for the > timeout count to hit in. The timeout is too long, will change the factor. (However CHIP_REV_EMUL means we are not running on a real chip, we are running on a simulated chip.) > >> + count--; >> + } >> + if (val != 0x10) { >> + BNX2X_ERR("NIG timeout val = 0x%x\n", val); >> + return -1; >> + } >> + >> + /* Wait until PRS register shows 1 packet */ >> + count = 1000; >> + while (count) { >> + val = REG_RD(bp, GRCBASE_PRS, PRS_REGISTERS_NUM_OF_PACKETS); >> + >> + if (val == 0x1) { >> + break; >> + } >> + msleep(10 * factor); > > Same here. > >> + count--; >> + } >> + if (val != 0x1) { >> + BNX2X_ERR("PRS timeout val = 0x%x\n", val); >> + return -2; >> + } > >> + /* Wait until NIG register shows 10 + 1 >> + packets of size 11*0x10 = 0xb0 */ >> + count = 1000; >> + while (count) { >> + >> + val = REG_RD(bp, GRCBASE_NIG, NIG_REGISTERS_STAT2_BRB_OCTET); >> + REG_RD(bp, GRCBASE_NIG, NIG_REGISTERS_STAT2_BRB_OCTET + 4); >> + >> + if (val == 0xb0) { >> + break; >> + } >> + msleep(10 * factor); > > Same here. > >> + count--; >> + } >> + if (val != 0xb0) { >> + BNX2X_ERR("NIG timeout val = 0x%x\n", val); >> + return -3; >> + } >> + >> + /* Wait until PRS register shows 2 packet */ >> + val = REG_RD(bp, GRCBASE_PRS, PRS_REGISTERS_NUM_OF_PACKETS); >> + >> + if (val != 0x2) { >> + BNX2X_ERR("PRS timeout val = 0x%x\n", val); >> + } >> + >> + /* Write 1 to parser credits for CFC search request */ >> + REG_WR(bp, GRCBASE_PRS, PRS_REGISTERS_CFC_SEARCH_INITIAL_CREDIT, 0x1); >> + >> + /* Wait until PRS register shows 3 packet */ >> + msleep(10 * factor); > > Same. > >> + DP(NETIF_MSG_HW, "done\n"); >> + return 0; /* OK */ >> +} > >> +static void bnx2x_stop_stats(struct bnx2x *bp) >> +{ >> + atomic_set(&bp->stats_state, STATS_STATE_STOP); >> + DP(BNX2X_MSG_STATS, "stats_state - STOP\n"); >> + while (atomic_read(&bp->stats_state) != STATS_STATE_DISABLE) { > > This seems to need a memory barrier (on this side _and_ on the write side, > wherever that is (comment!)). > There are special mbs for atomic variables. This does not need to be an atomic_t will change to int. > >> + msleep(100); >> + } >> + DP(BNX2X_MSG_STATS, "stats_state - DISABLE\n"); >> +} > >> +static void bnx2x_update_stats(struct bnx2x *bp) >> +{ >> + >> + if (!bnx2x_update_storm_stats(bp)) { >> + >> + if (bp->phy_flags & PHY_BMAC_FLAG) { >> + bnx2x_update_bmac_stats(bp); >> + >> + } else if (bp->phy_flags & PHY_EMAC_FLAG) { >> + bnx2x_update_emac_stats(bp); >> + >> + } else { /* unreached */ >> + BNX2X_ERR("no MAC active\n"); >> + return; >> + } >> + >> + bnx2x_update_net_stats(bp); >> + } >> + >> + if (bp->msglevel & NETIF_MSG_TIMER) { >> + printk(KERN_DEBUG "%s:\n" >> + KERN_DEBUG "tx avail(%x) rx usage(%x)\n" >> + KERN_DEBUG "tx pkt (%lx) rx pkt(%lx)\n" >> + KERN_DEBUG "tx hc idx(%x) rx hc index(%x)\n" >> + KERN_DEBUG "%s (Xoff events %u)\n" >> + KERN_DEBUG "brb drops %u\n" >> + KERN_DEBUG "tstats->no_buff_discard, %u\n" >> + KERN_DEBUG "tstats->errors_discard, %u\n" >> + KERN_DEBUG "tstats->mac_filter_discard, %u\n" >> + KERN_DEBUG "tstats->xxoverflow_discard, %u\n" >> + KERN_DEBUG "tstats->ttl0_discard, %u\n", >> + bp->dev->name, >> + bnx2x_tx_avail(bp->fp), >> + (u16)(*bp->fp->rx_cons_sb - bp->fp->rx_comp_cons), >> + bp->net_stats.tx_packets, bp->net_stats.rx_packets, >> + *bp->fp->tx_cons_sb, *bp->fp->rx_cons_sb, >> + netif_queue_stopped(bp->dev)? "Xoff" : "Xon ", >> + bp->slowpath->eth_stats.driver_xoff, >> + bp->slowpath->eth_stats.brb_discard, >> + bp->slowpath->eth_stats.no_buff_discard, >> + bp->slowpath->eth_stats.errors_discard, >> + bp->slowpath->eth_stats.mac_filter_discard, >> + bp->slowpath->eth_stats.xxoverflow_discard, >> + bp->slowpath->eth_stats.ttl0_discard); >> + } >> + >> + if (bp->state != BNX2X_STATE_OPEN) { >> + DP(BNX2X_MSG_STATS, "state is %x, returning\n", bp->state); >> + return; >> + } >> + >> +#ifdef BNX2X_STOP_ON_ERROR >> + if (unlikely(bp->panic)) { >> + return; >> + } >> +#endif >> + >> + if (bp->fw_mb) { >> + REG_WR32(bp, GRCBASE_DMAE, >> + (bp->port? DMAE_REGISTERS_GO_C13 : >> + DMAE_REGISTERS_GO_C12), 1); >> + } >> + >> + if (atomic_read(&bp->stats_state) != STATS_STATE_ENABLE) { >> + atomic_set(&bp->stats_state, STATS_STATE_DISABLE); > > Is that the write side? If yes, you need a mb here, too. > And a comment. > >> + return; >> + } >> + >> + if (bp->phy_flags & PHY_BMAC_FLAG) { >> + REG_WR32(bp, GRCBASE_DMAE, >> + (bp->port? DMAE_REGISTERS_GO_C6 : >> + DMAE_REGISTERS_GO_C0), 1); >> + >> + } else if (bp->phy_flags & PHY_EMAC_FLAG) { >> + REG_WR32(bp, GRCBASE_DMAE, >> + (bp->port? DMAE_REGISTERS_GO_C8 : >> + DMAE_REGISTERS_GO_C2), 1); >> + } >> + >> + if (bnx2x_sp_post(bp, RAMROD_CMD_ID_ETH_STAT_QUERY, 0, 0, 0, 0) == 0) { >> + /* stats ramrod has it's own slot on the spe */ >> + bp->spq_left++; >> + bp->stat_pending = 1; >> + } >> +} > >> +/* Called with netif_tx_lock. >> + * bnx2x_tx_int() runs without netif_tx_lock unless it needs to call >> + * netif_wake_queue(). >> + */ >> +static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) >> +{ >> + struct bnx2x *bp = netdev_priv(dev); >> + dma_addr_t mapping; >> + struct eth_tx_bd *txbd; >> + struct eth_tx_parse_bd *pbd = NULL; >> + struct sw_tx_bd *tx_buf; >> + u16 pkt_prod, bd_prod; >> + int nbd, fp_index = 0; >> + struct bnx2x_fastpath *fp; >> + >> + fp = &bp->fp[fp_index]; >> +#ifdef BNX2X_STOP_ON_ERROR >> + if (unlikely(bp->panic)) { >> + return NETDEV_TX_BUSY; >> + } >> +#endif >> + >> + if (unlikely(bnx2x_tx_avail(bp->fp) < >> + (skb_shinfo(skb)->nr_frags + 3))) { >> + bp->slowpath->eth_stats.driver_xoff++, >> + netif_stop_queue(dev); >> + BNX2X_ERR("BUG! Tx ring full when queue awake!\n"); >> + return NETDEV_TX_BUSY; >> + } >> + >> + /* >> + This is a bit ugly. First we use one BD which we mark as start, >> + then for TSO or xsum we have a parsing info BD, >> + and only then we have the rest of the TSO bds. >> + (don't forget to mark the last one as last, >> + and to unmap only AFTER you write to the BD ...) >> + I would like to thank Dov Hirshfeld for this mess. >> + */ >> + >> + pkt_prod = fp->tx_pkt_prod++; >> + bd_prod = fp->tx_bd_prod; >> + >> + pkt_prod = TX_BD(pkt_prod); >> + bd_prod = TX_BD(bd_prod); >> + >> + /* get a tx_buff and first bd */ >> + tx_buf = &fp->tx_buf_ring[pkt_prod]; >> + txbd = &fp->tx_desc_ring[bd_prod]; >> + >> + txbd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD; >> + >> + /* remeber the first bd of the packet */ >> + tx_buf->first_bd = bd_prod; >> + >> + DP(NETIF_MSG_TX_QUEUED, >> + "sending pkt=%u @%p, next_idx=%u, bd=%u @%p\n", >> + pkt_prod, tx_buf, fp->tx_pkt_prod, bd_prod, txbd); >> + >> + if (skb->ip_summed == CHECKSUM_PARTIAL) { >> + u8 len; >> + struct iphdr *iph = ip_hdr(skb); >> + >> + txbd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_IP_CSUM ; >> + >> + /* turn on parsing and get a bd */ >> + bd_prod = TX_BD(NEXT_TX_IDX(bd_prod)); >> + pbd = (void *)&fp->tx_desc_ring[bd_prod]; >> + >> + len = ((u8 *)ip_hdr(skb) - (u8 *)skb->data)/2; >> + >> + /* for now NS flag is not used in Linux */ >> + pbd->global_data = >> + len | ((skb->protocol == ETH_P_8021Q) >> + << ETH_TX_PARSE_BD_LLC_SNAP_EN_SHIFT); >> + >> + pbd->ip_hlen = ip_hdrlen(skb)/2; >> + pbd->total_hlen = len + pbd->ip_hlen; >> + >> + if (iph->protocol == IPPROTO_TCP) { >> + struct tcphdr *th = tcp_hdr(skb); >> + txbd->bd_flags.as_bitfield |= >> + ETH_TX_BD_FLAGS_TCP_CSUM; >> + pbd->tcp_flags = htonl(tcp_flag_word(skb))&0xFFFF; >> + pbd->total_hlen += tcp_hdrlen(skb)/2; >> + >> + pbd->tcp_pseudo_csum = ntohs(th->check); >> + } else if (iph->protocol == IPPROTO_UDP) { >> + struct udphdr *uh = udp_hdr(skb); >> + txbd->bd_flags.as_bitfield |= >> + ETH_TX_BD_FLAGS_TCP_CSUM; >> + pbd->total_hlen += 4; >> + pbd->global_data |= ETH_TX_PARSE_BD_UDP_FLG; >> + >> + /* HW bug: we need to subtract 10 bytes before the >> + * UDP header from the csum >> + */ >> + uh->check = (u16) ~csum_fold(csum_sub(uh->check, >> + csum_partial(((u8 *)(uh)-10), 10, 0))); >> + } >> + } >> + >> + if (bp->vlgrp != 0 && vlan_tx_tag_present(skb)) { >> + txbd->vlan = vlan_tx_tag_get(skb); >> + txbd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_VLAN_TAG; >> + } >> +#ifdef BNX2X_STOP_ON_ERROR >> + else { >> + txbd->vlan = pkt_prod; >> + } >> +#endif >> + >> + mapping = pci_map_single(bp->pdev, skb->data, >> + skb->len, PCI_DMA_TODEVICE); > > dma_mapping_error() OK > >> + >> + txbd->addr_hi = U64_HI(mapping); >> + txbd->addr_lo = U64_LO(mapping); >> + txbd->hdr_nbds = 1; >> + txbd->nbd = nbd = skb_shinfo(skb)->nr_frags + ((pbd == NULL)? 1 : 2); >> + txbd->nbytes = skb_headlen(skb); >> + >> + DP(NETIF_MSG_TX_QUEUED, >> + "first bd @%p, addr(%x:%x) hdr_nbds(%d) nbd(%d)" >> + " nbytes(%d) flags(%x) vlan(%u)\n", >> + txbd, txbd->addr_hi, txbd->addr_lo, txbd->hdr_nbds, txbd->nbd, >> + txbd->nbytes, txbd->bd_flags.as_bitfield, txbd->vlan); >> + >> + if ((skb_shinfo(skb)->gso_size) && >> + (skb->len > (bp->dev->mtu + ETH_HLEN))) { >> + >> + int hlen = 2*pbd->total_hlen; >> + DP(NETIF_MSG_TX_QUEUED, >> + "TSO packet len %d, hlen %d, total len %d, tso size %d\n", >> + skb->len, hlen, skb_headlen(skb), skb_shinfo(skb)->gso_size); >> + >> + txbd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_SW_LSO; >> + >> + if (txbd->nbytes > hlen) { >> + /* we split the first bd into headers and data bds >> + * to ease the pain of our fellow micocode engineers >> + * we use one mapping for both bds >> + * So far this has only been observed to happen >> + * in Other Operating Systems(TM) >> + */ >> + >> + /* first fix first bd */ >> + nbd++; >> + txbd->nbd++; >> + txbd->nbytes = hlen; >> + >> + /* we only print this as an error >> + * because we don't think this will ever happen. >> + */ >> + BNX2X_ERR("TSO split headr size is %d (%x:%x)" >> + " nbd %d\n", txbd->nbytes, txbd->addr_hi, >> + txbd->addr_lo, txbd->nbd); >> + >> + /* now get a new data bd >> + * (after the pbd) and fill it */ >> + bd_prod = TX_BD(NEXT_TX_IDX(bd_prod)); >> + txbd = &fp->tx_desc_ring[bd_prod]; >> + >> +#ifdef BNX2X_STOP_ON_ERROR >> + txbd->vlan = pkt_prod; >> +#endif >> + txbd->addr_hi = U64_HI(mapping); >> + txbd->addr_lo = U64_LO(mapping) + hlen; >> + txbd->nbytes = skb_headlen(skb) - hlen; >> + /* this marks the bd >> + * as one that has no individual mapping >> + * the FW ignors this flag in a bd not maked start >> + */ >> + txbd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_SW_LSO; >> + DP(NETIF_MSG_TX_QUEUED, >> + "TSO split data size is %d (%x:%x)\n", >> + txbd->nbytes, txbd->addr_hi, txbd->addr_lo); >> + } >> + >> + if (!pbd) { >> + /* supposed to be unreached >> + * (and therefore not handled properly...) >> + */ >> + BNX2X_ERR("LSO with no PBD\n"); >> + BUG(); >> + } >> + >> + pbd->lso_mss = skb_shinfo(skb)->gso_size; >> + pbd->tcp_send_seq = ntohl(tcp_hdr(skb)->seq); >> + pbd->ip_id = ntohs(ip_hdr(skb)->id); >> + pbd->tcp_pseudo_csum = >> + ntohs(~csum_tcpudp_magic(ip_hdr(skb)->saddr, >> + ip_hdr(skb)->daddr, >> + 0, IPPROTO_TCP, 0)); >> + pbd->global_data |= ETH_TX_PARSE_BD_PSEUDO_CS_WITHOUT_LEN; >> + } >> + >> + { >> + int i; >> + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { >> + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; >> + >> + bd_prod = TX_BD(NEXT_TX_IDX(bd_prod)); >> + txbd = &fp->tx_desc_ring[bd_prod]; >> + >> + mapping = pci_map_page(bp->pdev, frag->page, >> + frag->page_offset, >> + frag->size, PCI_DMA_TODEVICE); >> + >> + txbd->addr_hi = U64_HI(mapping); >> + txbd->addr_lo = U64_LO(mapping); >> + txbd->nbytes = frag->size; >> + txbd->bd_flags.as_bitfield = 0; >> +#ifdef BNX2X_STOP_ON_ERROR >> + txbd->vlan = pkt_prod; >> +#endif >> + DP(NETIF_MSG_TX_QUEUED, "frag (%d) bd @%p," >> + " addr(%x:%x) nbytes(%d) flags(%x)\n", >> + i, txbd, txbd->addr_hi, txbd->addr_lo, >> + txbd->nbytes, txbd->bd_flags.as_bitfield); >> + >> + } /* for */ >> + } >> + >> + /* now at last mark the bd as the last bd */ >> + txbd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_END_BD; >> + >> + DP(NETIF_MSG_TX_QUEUED, "last bd @%p flags(%x)\n", >> + txbd, txbd->bd_flags.as_bitfield); >> + >> + tx_buf->skb = skb; >> + >> + bd_prod = TX_BD(NEXT_TX_IDX(bd_prod)); >> + >> + /* now send a tx doorbell, counting the next bd >> + * if the packet contains or ends with it >> + */ >> + if (TX_BD_POFF(bd_prod) < nbd) >> + nbd++; >> + >> + if (pbd) { >> + DP(NETIF_MSG_TX_QUEUED, >> + "PBD @%p, ip_data=%x , ip_hlen %u, ip_id %u, lso_mss %u," >> + " tcp_flags %x, xsum %x, seq %u, hlen %u)\n", >> + pbd, pbd->global_data, pbd->ip_hlen, pbd->ip_id, >> + pbd->lso_mss, pbd->tcp_flags, pbd->tcp_pseudo_csum, >> + pbd->tcp_send_seq, pbd->total_hlen); >> + } >> + >> + DP(NETIF_MSG_TX_QUEUED, "doorbell: nbd=%u, bd=%d\n", nbd, bd_prod); >> + >> + fp->hw_tx_prods->bds_prod += nbd; >> + fp->hw_tx_prods->packets_prod++; >> + DOORBELL(bp, fp_index, 0); >> + >> + mmiowb(); >> + >> + fp->tx_bd_prod = bd_prod; >> + dev->trans_start = jiffies; >> + >> + if (unlikely(bnx2x_tx_avail(fp) < MAX_SKB_FRAGS + 3)) { >> + netif_stop_queue(dev); >> + bp->slowpath->eth_stats.driver_xoff++; >> + if (bnx2x_tx_avail(fp) >= MAX_SKB_FRAGS + 3) >> + netif_wake_queue(dev); >> + } >> + fp->tx_pkt++; >> + return NETDEV_TX_OK; >> +} > >> +static int bnx2x_nic_unload(struct bnx2x *bp, int fre_irq) >> +{ >> + u32 reset_code = 0; >> + int rc /*, j */; >> + bp->state = BNX2X_STATE_CLOSING_WAIT4_HALT; >> + >> + /* Calling flush_scheduled_work() may deadlock because >> + * linkwatch_event() may be on the workqueue and it will try to get >> + * the rtnl_lock which we are holding. >> + */ >> + while (bp->in_reset_task) >> + msleep(1); > > Memory barrier? Now that cancel_work is available we will use that instead of the in_rest_task flag. > >> + >> + /* Delete the timer: do it before disabling interrupts, as it >> + may be stil STAT_QUERY ramrod pending after stopping the timer */ >> + del_timer_sync(&bp->timer); >> + >> + /* Wait until stat ramrod returns and all SP tasks complete */ >> + while (bp->stat_pending && (bp->spq_left != MAX_SPQ_PENDING)) { > > Memory barrier? OK > >> + msleep(1); >> + } >> + >> + /* Stop fast path, disable MAC, disable interrupts */ >> + bnx2x_netif_stop(bp); > >> + /* Remember cstorm producer in DSB to track it */ >> + bp->dsb_prod_sp_idx = *bp->dsb_sp_prod; >> + /* Send CFC_DELETE ramrod */ >> + bnx2x_sp_post(bp, RAMROD_CMD_ID_ETH_LEADING_CFC_DEL, 0, 0, 0, 1); >> + /* Wait for completion to arrive. Do nothing as we are going to reset >> + the chip a few lines later */ >> + while ( bp->dsb_prod_sp_idx == *bp->dsb_sp_prod ) { > > What's that doing? Poking some DMA? > Barrier needed? DMA sync needed? sp_post send a command on the slowpath ring. I will add a barrier inside it. > >> + msleep(1); >> + } >> + > >> +static void bnx2x_reset_task(struct work_struct *work) >> +{ >> + struct bnx2x *bp = container_of(work, struct bnx2x, reset_task); >> + >> +#ifdef BNX2X_STOP_ON_ERROR >> + BNX2X_ERR("reset taks called but STOP_ON_ERROR defined" >> + " so reset not done to allow debug dump,\n" >> + " you will need to reboot when done\n"); >> + return; >> +#endif >> + >> + if (!netif_running(bp->dev)) >> + return; >> + >> + bp->in_reset_task = 1; >> + bnx2x_netif_stop(bp); >> + >> + bnx2x_nic_unload(bp, 0); >> + bnx2x_nic_load(bp, 0); > > >> + bp->in_reset_task = 0; > > smp_wmb()? will be removed. > >> +} > >> +static int bnx2x_phys_id(struct net_device *dev, u32 data) >> +{ >> + struct bnx2x *bp = netdev_priv(dev); >> + int i; >> + >> + if (data == 0) >> + data = 2; >> + >> + for (i = 0; i < (data * 2); i++) { >> + if ((i % 2) == 0) { >> + bnx2x_leds_set(bp, SPEED_1000); >> + } else { >> + bnx2x_leds_unset(bp); >> + } >> + msleep_interruptible(500); > > Check the return value of msleep_interruptible and drop the > following check for signals. OK > >> + if (signal_pending(current)) >> + break; >> + } >> + >> + if (bp->link_up) { >> + bnx2x_leds_set(bp, bp->line_speed); >> + } >> + return 0; >> +} > >> +#ifdef BNX2X_STOP_ON_ERROR >> +#warning stop on error defined >> +#define bnx2x_panic() \ >> + do { bp->panic = 1; \ >> + BNX2X_ERR("driver assert\n"); \ >> + bnx2x_disable_int(bp); \ >> + bnx2x_panic_dump(bp); \ >> + } while (0); > > Drop the last semicolon Opps... ;) > >> +#else >> +#define bnx2x_panic() >> +#endif > > > Puh, that was a lot :) > >