Netdev List
 help / color / mirror / Atom feed
* [PATCH v2 net-next 13/22] bnx2x: Support of PF driver of a VF init request
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

The VF driver will send an 'init' request as part of its nic load
flow. This message is used by the VF to publish the GPA's of its
status blocks, slow path ring and statistics buffer.
The PF driver notes all this down in the VF database, and also uses
this message to transfer the VF to VF_INIT state internally.

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h       |    2 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c  |    2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h   |    4 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |  159 +++++++++++++++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h |    7 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c  |   20 +++
 6 files changed, 193 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 52012f2..4d89ef6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1853,6 +1853,8 @@ int bnx2x_del_all_macs(struct bnx2x *bp,
 
 /* Init Function API  */
 void bnx2x_func_init(struct bnx2x *bp, struct bnx2x_func_init_params *p);
+void bnx2x_init_sb(struct bnx2x *bp, dma_addr_t mapping, int vfid,
+			  u8 vf_valid, int fw_sb_id, int igu_sb_id);
 u32 bnx2x_get_pretend_reg(struct bnx2x *bp);
 int bnx2x_get_gpio(struct bnx2x *bp, int gpio_num, u8 port);
 int bnx2x_set_gpio(struct bnx2x *bp, int gpio_num, u32 mode, u8 port);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index cd80552..7c2fc15 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -5402,7 +5402,7 @@ static void bnx2x_map_sb_state_machines(struct hc_index_data *index_data)
 		SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT;
 }
 
-static void bnx2x_init_sb(struct bnx2x *bp, dma_addr_t mapping, int vfid,
+void bnx2x_init_sb(struct bnx2x *bp, dma_addr_t mapping, int vfid,
 			  u8 vf_valid, int fw_sb_id, int igu_sb_id)
 {
 	int igu_seg_id;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
index 50d825e..3997f63 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
@@ -3726,6 +3726,10 @@
 #define PXP_REG_HST_DISCARD_INTERNAL_WRITES_STATUS		 0x10309c
 /* [WB 160] Used for initialization of the inbound interrupts memory */
 #define PXP_REG_HST_INBOUND_INT 				 0x103800
+/* [RW 7] Indirect access to the permission table. The fields are : {Valid;
+ * VFID[5:0]}
+ */
+#define PXP_REG_HST_ZONE_PERMISSION_TABLE			 0x103400
 /* [RW 32] Interrupt mask register #0 read/write */
 #define PXP_REG_PXP_INT_MASK_0					 0x103074
 #define PXP_REG_PXP_INT_MASK_1					 0x103084
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index b91cb00..6141c73 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -63,6 +63,40 @@ struct bnx2x_virtf *bnx2x_vf_by_abs_fid(struct bnx2x *bp, u16 abs_vfid)
 	return (idx < BNX2X_NR_VIRTFN(bp)) ? BP_VF(bp, idx) : NULL;
 }
 
+static void bnx2x_vf_igu_ack_sb(struct bnx2x *bp, struct bnx2x_virtf *vf,
+				u8 igu_sb_id, u8 segment, u16 index, u8 op,
+				u8 update)
+{
+	/* acking a VF sb through the PF - use the GRC */
+	u32 ctl;
+	u32 igu_addr_data = IGU_REG_COMMAND_REG_32LSB_DATA;
+	u32 igu_addr_ctl = IGU_REG_COMMAND_REG_CTRL;
+	u32 func_encode = vf->abs_vfid;
+	u32 addr_encode = IGU_CMD_E2_PROD_UPD_BASE + igu_sb_id;
+	struct igu_regular cmd_data = {0};
+
+	cmd_data.sb_id_and_flags =
+			((index << IGU_REGULAR_SB_INDEX_SHIFT) |
+			 (segment << IGU_REGULAR_SEGMENT_ACCESS_SHIFT) |
+			 (update << IGU_REGULAR_BUPDATE_SHIFT) |
+			 (op << IGU_REGULAR_ENABLE_INT_SHIFT));
+
+	ctl = addr_encode << IGU_CTRL_REG_ADDRESS_SHIFT		|
+	      func_encode << IGU_CTRL_REG_FID_SHIFT		|
+	      IGU_CTRL_CMD_TYPE_WR << IGU_CTRL_REG_TYPE_SHIFT;
+
+	DP(NETIF_MSG_HW, "write 0x%08x to IGU(via GRC) addr 0x%x\n",
+	   cmd_data.sb_id_and_flags, igu_addr_data);
+	REG_WR(bp, igu_addr_data, cmd_data.sb_id_and_flags);
+	mmiowb();
+	barrier();
+
+	DP(NETIF_MSG_HW, "write 0x%08x to IGU(via GRC) addr 0x%x\n",
+	   ctl, igu_addr_ctl);
+	REG_WR(bp, igu_addr_ctl, ctl);
+	mmiowb();
+	barrier();
+}
 static int bnx2x_ari_enabled(struct pci_dev *dev)
 {
 	return dev->bus->self && dev->bus->self->ari_enabled;
@@ -355,6 +389,52 @@ static void bnx2x_vf_pglue_clear_err(struct bnx2x *bp, u8 abs_vfid)
 	REG_WR(bp, was_err_reg, 1 << (abs_vfid & 0x1f));
 }
 
+static void bnx2x_vf_igu_reset(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	int i;
+	u32 val;
+
+	/* Set VF masks and configuration - pretend */
+	bnx2x_pretend_func(bp, HW_VF_HANDLE(bp, vf->abs_vfid));
+
+	REG_WR(bp, IGU_REG_SB_INT_BEFORE_MASK_LSB, 0);
+	REG_WR(bp, IGU_REG_SB_INT_BEFORE_MASK_MSB, 0);
+	REG_WR(bp, IGU_REG_SB_MASK_LSB, 0);
+	REG_WR(bp, IGU_REG_SB_MASK_MSB, 0);
+	REG_WR(bp, IGU_REG_PBA_STATUS_LSB, 0);
+	REG_WR(bp, IGU_REG_PBA_STATUS_MSB, 0);
+
+	val = REG_RD(bp, IGU_REG_VF_CONFIGURATION);
+	val |= (IGU_VF_CONF_FUNC_EN | IGU_VF_CONF_MSI_MSIX_EN);
+	if (vf->cfg_flags & VF_CFG_INT_SIMD)
+		val |= IGU_VF_CONF_SINGLE_ISR_EN;
+	val &= ~IGU_VF_CONF_PARENT_MASK;
+	val |= BP_FUNC(bp) << IGU_VF_CONF_PARENT_SHIFT;	/* parent PF */
+	REG_WR(bp, IGU_REG_VF_CONFIGURATION, val);
+
+	DP(BNX2X_MSG_IOV,
+	   "value in IGU_REG_VF_CONFIGURATION of vf %d after write %x\n",
+	   vf->abs_vfid, REG_RD(bp, IGU_REG_VF_CONFIGURATION));
+
+	bnx2x_pretend_func(bp, BP_ABS_FUNC(bp));
+
+	/* iterate over all queues, clear sb consumer */
+	for (i = 0; i < vf_sb_count(vf); i++) {
+		u8 igu_sb_id = vf_igu_sb(vf, i);
+
+		/* zero prod memory */
+		REG_WR(bp, IGU_REG_PROD_CONS_MEMORY + igu_sb_id * 4, 0);
+
+		/* clear sb state machine */
+		bnx2x_igu_clear_sb_gen(bp, vf->abs_vfid, igu_sb_id,
+				       false /* VF */);
+
+		/* disable + update */
+		bnx2x_vf_igu_ack_sb(bp, vf, igu_sb_id, USTORM_ID, 0,
+				    IGU_INT_DISABLE, 1);
+	}
+}
+
 void bnx2x_vf_enable_access(struct bnx2x *bp, u8 abs_vfid)
 {
 	/* set the VF-PF association in the FW */
@@ -372,6 +452,17 @@ void bnx2x_vf_enable_access(struct bnx2x *bp, u8 abs_vfid)
 	bnx2x_pretend_func(bp, BP_ABS_FUNC(bp));
 }
 
+static void bnx2x_vf_enable_traffic(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	/* Reset vf in IGU  interrupts are still disabled */
+	bnx2x_vf_igu_reset(bp, vf);
+
+	/* pretend to enable the vf with the PBF */
+	bnx2x_pretend_func(bp, HW_VF_HANDLE(bp, vf->abs_vfid));
+	REG_WR(bp, PBF_REG_DISABLE_VF, 0);
+	bnx2x_pretend_func(bp, BP_ABS_FUNC(bp));
+}
+
 static u8 bnx2x_vf_is_pcie_pending(struct bnx2x *bp, u8 abs_vfid)
 {
 	struct pci_dev *dev;
@@ -988,6 +1079,13 @@ void bnx2x_iov_sp_task(struct bnx2x *bp)
 		}
 	}
 }
+static void bnx2x_vf_qtbl_set_q(struct bnx2x *bp, u8 abs_vfid, u8 qid,
+				u8 enable)
+{
+	u32 reg = PXP_REG_HST_ZONE_PERMISSION_TABLE + qid * 4;
+	u32 val = enable ? (abs_vfid | (1 << 6)) : 0;
+	REG_WR(bp, reg, val);
+}
 
 u8 bnx2x_vf_max_queue_cnt(struct bnx2x *bp, struct bnx2x_virtf *vf)
 {
@@ -1099,6 +1197,67 @@ int bnx2x_vf_acquire(struct bnx2x *bp, struct bnx2x_virtf *vf,
 	return 0;
 }
 
+int bnx2x_vf_init(struct bnx2x *bp, struct bnx2x_virtf *vf, dma_addr_t *sb_map)
+{
+	struct bnx2x_func_init_params func_init = {0};
+	u16 flags = 0;
+	int i;
+
+	/* the sb resources are initialized at this point, do the
+	 * FW/HW initializations
+	 */
+	for_each_vf_sb(vf, i)
+		bnx2x_init_sb(bp, (dma_addr_t)sb_map[i], vf->abs_vfid, true,
+			      vf_igu_sb(vf, i), vf_igu_sb(vf, i));
+
+	/* Sanity checks */
+	if (vf->state != VF_ACQUIRED) {
+		DP(BNX2X_MSG_IOV, "VF[%d] is not in VF_ACQUIRED, but %d\n",
+		   vf->abs_vfid, vf->state);
+		return -EINVAL;
+	}
+	/* FLR cleanup epilogue */
+	if (bnx2x_vf_flr_clnup_epilog(bp, vf->abs_vfid))
+		return -EBUSY;
+
+	/* reset IGU VF statistics: MSIX */
+	REG_WR(bp, IGU_REG_STATISTIC_NUM_MESSAGE_SENT + vf->abs_vfid * 4 , 0);
+
+	/* vf init */
+	if (vf->cfg_flags & VF_CFG_STATS)
+		flags |= (FUNC_FLG_STATS | FUNC_FLG_SPQ);
+
+	if (vf->cfg_flags & VF_CFG_TPA)
+		flags |= FUNC_FLG_TPA;
+
+	if (is_vf_multi(vf))
+		flags |= FUNC_FLG_RSS;
+
+	/* function setup */
+	func_init.func_flgs = flags;
+	func_init.pf_id = BP_FUNC(bp);
+	func_init.func_id = FW_VF_HANDLE(vf->abs_vfid);
+	func_init.fw_stat_map = vf->fw_stat_map;
+	func_init.spq_map = vf->spq_map;
+	func_init.spq_prod = 0;
+	bnx2x_func_init(bp, &func_init);
+
+	/* Configure RSS TBD */
+
+	/* Enable the vf */
+	bnx2x_vf_enable_access(bp, vf->abs_vfid);
+	bnx2x_vf_enable_traffic(bp, vf);
+
+	/* queue protection table */
+	for_each_vfq(vf, i)
+		bnx2x_vf_qtbl_set_q(bp, vf->abs_vfid,
+				    vfq_qzone_id(vf, vfq_get(vf, i)), true);
+
+	vf->state = VF_ENABLED;
+
+	return 0;
+}
+
 void bnx2x_lock_vf_pf_channel(struct bnx2x *bp, struct bnx2x_virtf *vf,
 			      enum channel_tlvs tlv)
 {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index a6ae7f0..a3a8240 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -269,6 +269,8 @@ struct bnx2x_virtf {
 #define for_each_vf_sb(vf, var) \
 		for ((var) = 0; (var) < vf_sb_count(vf); (var)++)
 
+#define is_vf_multi(vf)	(vf_rxq_count(vf) > 1)
+
 #define HW_VF_HANDLE(bp, abs_vfid) \
 	(u16)(BP_ABS_FUNC((bp)) | (1<<3) |  ((u16)(abs_vfid) << 4))
 
@@ -431,6 +433,11 @@ void bnx2x_vf_enable_mbx(struct bnx2x *bp, u8 abs_vfid);
 int bnx2x_vf_acquire(struct bnx2x *bp, struct bnx2x_virtf *vf,
 			  struct vf_pf_resc_request *resc);
 
+/* init */
+int bnx2x_vf_init(struct bnx2x *bp, struct bnx2x_virtf *vf,
+		  dma_addr_t *sb_map);
+
+
 static inline struct bnx2x_vfop *bnx2x_vfop_cur(struct bnx2x *bp,
 						struct bnx2x_virtf *vf)
 {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 01ee50a..1dfe734 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -22,6 +22,8 @@
  */
 #include "bnx2x.h"
 #include "bnx2x_sriov.h"
+#include <linux/crc32.h>
+
 /* place a given tlv on the tlv buffer at a given offset */
 void bnx2x_add_tlv(struct bnx2x *bp, void *tlvs_list, u16 offset, u16 type,
 		   u16 length)
@@ -360,6 +362,21 @@ static void bnx2x_vf_mbx_acquire(struct bnx2x *bp, struct bnx2x_virtf *vf,
 	bnx2x_vf_mbx_acquire_resp(bp, vf, mbx, rc);
 }
 
+static void bnx2x_vf_mbx_init_vf(struct bnx2x *bp, struct bnx2x_virtf *vf,
+			      struct bnx2x_vf_mbx *mbx)
+{
+	struct vfpf_init_tlv *init = &mbx->msg->req.init;
+
+	/* record ghost addresses from vf message */
+	vf->spq_map = init->spq_addr;
+	vf->fw_stat_map = init->stats_addr;
+
+	vf->op_rc = bnx2x_vf_init(bp, vf, (dma_addr_t *)init->sb_addr);
+
+	/* response */
+	bnx2x_vf_mbx_resp(bp, vf);
+}
+
 /* dispatch request */
 static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
 				  struct bnx2x_vf_mbx *mbx)
@@ -379,6 +396,9 @@ static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
 		case CHANNEL_TLV_ACQUIRE:
 			bnx2x_vf_mbx_acquire(bp, vf, mbx);
 			break;
+		case CHANNEL_TLV_INIT:
+			bnx2x_vf_mbx_init_vf(bp, vf, mbx);
+			break;
 		}
 
 	/* unknown TLV - this may belong to a VF driver from the future - a
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 08/22] bnx2x: VF fastpath
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

When VF driver is transmitting it must supply the correct mac
address in the parsing BD. This is used for firmware validation
and enforcement and also for tx-switching.
Refactor interrupt ack flow to allow for different BAR addresses of
the hardware in the PF BAR vs the VF BAR.

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c  |   20 ++++++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h  |   68 +++++++++++-----------
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |   13 +----
 3 files changed, 52 insertions(+), 49 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 2519d5a..ba51259 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -3462,8 +3462,21 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		    cpu_to_le16(vlan_tx_tag_get(skb));
 		tx_start_bd->bd_flags.as_bitfield |=
 		    (X_ETH_OUTBAND_VLAN << ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT);
-	} else
-		tx_start_bd->vlan_or_ethertype = cpu_to_le16(pkt_prod);
+	} else {
+
+		/* when transmitting in a vf, start bd must hold the ethertype
+		 * for fw to enforce it
+		 */
+		if (IS_VF(bp)) {
+			struct ethhdr *hdr = (struct ethhdr *)skb->data;
+			tx_start_bd->vlan_or_ethertype =
+				cpu_to_le16(ntohs(hdr->h_proto));
+		} else {
+			/* used by FW for packet accounting */
+			tx_start_bd->vlan_or_ethertype = cpu_to_le16(pkt_prod);
+
+		}
+	}
 
 	/* turn on parsing and get a BD */
 	bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
@@ -3479,7 +3492,8 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			hlen = bnx2x_set_pbd_csum_e2(bp, skb,
 						     &pbd_e2_parsing_data,
 						     xmit_type);
-		if (IS_MF_SI(bp)) {
+
+		if (IS_MF_SI(bp) || IS_VF(bp)) {
 			/*
 			 * fill in the MAC addresses in the PBD - for local
 			 * switching
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 2e32a21..948949d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -499,6 +499,39 @@ int bnx2x_setup_tc(struct net_device *dev, u8 num_tc);
 /* select_queue callback */
 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb);
 
+static inline void bnx2x_update_rx_prod(struct bnx2x *bp,
+					struct bnx2x_fastpath *fp,
+					u16 bd_prod, u16 rx_comp_prod,
+					u16 rx_sge_prod)
+{
+	struct ustorm_eth_rx_producers rx_prods = {0};
+	u32 i;
+
+	/* Update producers */
+	rx_prods.bd_prod = bd_prod;
+	rx_prods.cqe_prod = rx_comp_prod;
+	rx_prods.sge_prod = rx_sge_prod;
+
+	/* Make sure that the BD and SGE data is updated before updating the
+	 * producers since FW might read the BD/SGE right after the producer
+	 * is updated.
+	 * This is only applicable for weak-ordered memory model archs such
+	 * as IA-64. The following barrier is also mandatory since FW will
+	 * assumes BDs must have buffers.
+	 */
+	wmb();
+
+	for (i = 0; i < sizeof(rx_prods)/4; i++)
+		REG_WR(bp, fp->ustorm_rx_prods_offset + i*4,
+		       ((u32 *)&rx_prods)[i]);
+
+	mmiowb(); /* keep prod updates ordered */
+
+	DP(NETIF_MSG_RX_STATUS,
+	   "queue[%d]:  wrote  bd_prod %u  cqe_prod %u  sge_prod %u\n",
+	   fp->index, bd_prod, rx_comp_prod, rx_sge_prod);
+}
+
 /* reload helper */
 int bnx2x_reload_if_running(struct net_device *dev);
 
@@ -507,9 +540,6 @@ int bnx2x_change_mac_addr(struct net_device *dev, void *p);
 /* NAPI poll Rx part */
 int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget);
 
-void bnx2x_update_rx_prod(struct bnx2x *bp, struct bnx2x_fastpath *fp,
-			u16 bd_prod, u16 rx_comp_prod, u16 rx_sge_prod);
-
 /* NAPI poll Tx part */
 int bnx2x_tx_int(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata);
 
@@ -612,38 +642,6 @@ static inline void bnx2x_update_fpsb_idx(struct bnx2x_fastpath *fp)
 	fp->fp_hc_idx = fp->sb_running_index[SM_RX_ID];
 }
 
-static inline void bnx2x_update_rx_prod_gen(struct bnx2x *bp,
-			struct bnx2x_fastpath *fp, u16 bd_prod,
-			u16 rx_comp_prod, u16 rx_sge_prod, u32 start)
-{
-	struct ustorm_eth_rx_producers rx_prods = {0};
-	u32 i;
-
-	/* Update producers */
-	rx_prods.bd_prod = bd_prod;
-	rx_prods.cqe_prod = rx_comp_prod;
-	rx_prods.sge_prod = rx_sge_prod;
-
-	/*
-	 * Make sure that the BD and SGE data is updated before updating the
-	 * producers since FW might read the BD/SGE right after the producer
-	 * is updated.
-	 * This is only applicable for weak-ordered memory model archs such
-	 * as IA-64. The following barrier is also mandatory since FW will
-	 * assumes BDs must have buffers.
-	 */
-	wmb();
-
-	for (i = 0; i < sizeof(rx_prods)/4; i++)
-		REG_WR(bp, start + i*4, ((u32 *)&rx_prods)[i]);
-
-	mmiowb(); /* keep prod updates ordered */
-
-	DP(NETIF_MSG_RX_STATUS,
-	   "queue[%d]:  wrote  bd_prod %u  cqe_prod %u  sge_prod %u\n",
-	   fp->index, bd_prod, rx_comp_prod, rx_sge_prod);
-}
-
 static inline void bnx2x_igu_ack_sb_gen(struct bnx2x *bp, u8 igu_sb_id,
 					u8 segment, u16 index, u8 op,
 					u8 update, u32 igu_addr)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 16f73d3..bf575a9 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -1697,15 +1697,6 @@ void bnx2x_sp_event(struct bnx2x_fastpath *fp, union eth_rx_cqe *rr_cqe)
 	return;
 }
 
-void bnx2x_update_rx_prod(struct bnx2x *bp, struct bnx2x_fastpath *fp,
-			u16 bd_prod, u16 rx_comp_prod, u16 rx_sge_prod)
-{
-	u32 start = BAR_USTRORM_INTMEM + fp->ustorm_rx_prods_offset;
-
-	bnx2x_update_rx_prod_gen(bp, fp, bd_prod, rx_comp_prod, rx_sge_prod,
-				 start);
-}
-
 irqreturn_t bnx2x_interrupt(int irq, void *dev_instance)
 {
 	struct bnx2x *bp = netdev_priv(dev_instance);
@@ -4602,8 +4593,8 @@ static void bnx2x_attn_int(struct bnx2x *bp)
 void bnx2x_igu_ack_sb(struct bnx2x *bp, u8 igu_sb_id, u8 segment,
 		      u16 index, u8 op, u8 update)
 {
-	u32 igu_addr = BAR_IGU_INTMEM + (IGU_CMD_INT_ACK_BASE + igu_sb_id)*8;
-
+	u32 igu_addr = bp->igu_base_addr;
+	igu_addr += (IGU_CMD_INT_ACK_BASE + igu_sb_id)*8;
 	bnx2x_igu_ack_sb_gen(bp, igu_sb_id, segment, index, op, update,
 			     igu_addr);
 }
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 04/22] bnx2x: Separate VF and PF logic
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

Generally, the VF driver cannot access the chip, except by the
narrow window its BAR allows. Care had to be taken so the VF driver
will not reach code which accesses the chip elsewhere.
Refactor the nic_load flow into parts so it would be
easier to separate the VF-only logic from the PF-only logic.

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h        |    1 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c    |  613 +++++++++++++-------
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h    |   15 +-
 .../net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c    |    2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   |   99 +++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h    |    7 +
 6 files changed, 505 insertions(+), 232 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index d6f4e0e..231fb5e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -2222,6 +2222,7 @@ int bnx2x_get_vf_id(struct bnx2x *bp, u32 *vf_id);
 int bnx2x_send_msg2pf(struct bnx2x *bp, u8 *done, dma_addr_t msg_mapping);
 int __devinit bnx2x_vfpf_acquire(struct bnx2x *bp, u8 tx_count, u8 rx_count);
 int __devexit bnx2x_vfpf_release(struct bnx2x *bp);
+int bnx2x_nic_load_analyze_req(struct bnx2x *bp, u32 load_code);
 /* Congestion management fairness mode */
 #define CMNG_FNS_NONE		0
 #define CMNG_FNS_MINMAX		1
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 73dbe0a..8df311f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1050,7 +1050,7 @@ void __bnx2x_link_report(struct bnx2x *bp)
 	struct bnx2x_link_report_data cur_data;
 
 	/* reread mf_cfg */
-	if (!CHIP_IS_E1(bp))
+	if (IS_PF(bp) && !CHIP_IS_E1(bp))
 		bnx2x_read_mf_cfg(bp);
 
 	/* Read the current link report info */
@@ -1393,10 +1393,14 @@ static void bnx2x_free_msix_irqs(struct bnx2x *bp, int nvecs)
 
 	if (nvecs == offset)
 		return;
-	free_irq(bp->msix_table[offset].vector, bp->dev);
-	DP(NETIF_MSG_IFDOWN, "released sp irq (%d)\n",
-	   bp->msix_table[offset].vector);
-	offset++;
+
+	/* VFs don't have a default SB */
+	if (IS_PF(bp)) {
+		free_irq(bp->msix_table[offset].vector, bp->dev);
+		DP(NETIF_MSG_IFDOWN, "released sp irq (%d)\n",
+		   bp->msix_table[offset].vector);
+		offset++;
+	}
 
 	if (CNIC_SUPPORT(bp)) {
 		if (nvecs == offset)
@@ -1417,11 +1421,17 @@ static void bnx2x_free_msix_irqs(struct bnx2x *bp, int nvecs)
 void bnx2x_free_irq(struct bnx2x *bp)
 {
 	if (bp->flags & USING_MSIX_FLAG &&
-	    !(bp->flags & USING_SINGLE_MSIX_FLAG))
-		bnx2x_free_msix_irqs(bp, BNX2X_NUM_ETH_QUEUES(bp) +
-				     CNIC_SUPPORT(bp) + 1);
-	else
+	    !(bp->flags & USING_SINGLE_MSIX_FLAG)) {
+		int nvecs = BNX2X_NUM_ETH_QUEUES(bp) + CNIC_SUPPORT(bp);
+
+		/* vfs don't have a default status block */
+		if (IS_PF(bp))
+			nvecs++;
+
+		bnx2x_free_msix_irqs(bp, nvecs);
+	} else {
 		free_irq(bp->dev->irq, bp->dev);
+	}
 }
 
 int bnx2x_enable_msix(struct bnx2x *bp)
@@ -1519,12 +1529,15 @@ static int bnx2x_req_msix_irqs(struct bnx2x *bp)
 {
 	int i, rc, offset = 0;
 
-	rc = request_irq(bp->msix_table[offset++].vector,
-			 bnx2x_msix_sp_int, 0,
-			 bp->dev->name, bp->dev);
-	if (rc) {
-		BNX2X_ERR("request sp irq failed\n");
-		return -EBUSY;
+	/* no default status block for vf */
+	if (IS_PF(bp)) {
+		rc = request_irq(bp->msix_table[offset++].vector,
+				 bnx2x_msix_sp_int, 0,
+				 bp->dev->name, bp->dev);
+		if (rc) {
+			BNX2X_ERR("request sp irq failed\n");
+			return -EBUSY;
+		}
 	}
 
 	if (CNIC_SUPPORT(bp))
@@ -1548,12 +1561,20 @@ static int bnx2x_req_msix_irqs(struct bnx2x *bp)
 	}
 
 	i = BNX2X_NUM_ETH_QUEUES(bp);
-	offset = 1 + CNIC_SUPPORT(bp);
-	netdev_info(bp->dev, "using MSI-X  IRQs: sp %d  fp[%d] %d ... fp[%d] %d\n",
-	       bp->msix_table[0].vector,
-	       0, bp->msix_table[offset].vector,
-	       i - 1, bp->msix_table[offset + i - 1].vector);
-
+	if (IS_PF(bp)) {
+		offset = 1 + CNIC_SUPPORT(bp);
+		netdev_info(bp->dev,
+			    "using MSI-X  IRQs: sp %d  fp[%d] %d ... fp[%d] %d\n",
+			    bp->msix_table[0].vector,
+			    0, bp->msix_table[offset].vector,
+			    i - 1, bp->msix_table[offset + i - 1].vector);
+	} else {
+		offset = CNIC_SUPPORT(bp);
+		netdev_info(bp->dev,
+			    "using MSI-X  IRQs: fp[%d] %d ... fp[%d] %d\n",
+			    0, bp->msix_table[offset].vector,
+			    i - 1, bp->msix_table[offset + i - 1].vector);
+	}
 	return 0;
 }
 
@@ -1571,7 +1592,7 @@ int bnx2x_enable_msi(struct bnx2x *bp)
 	return 0;
 }
 
-static int bnx2x_req_irq(struct bnx2x *bp)
+int bnx2x_req_irq(struct bnx2x *bp)
 {
 	unsigned long flags;
 	unsigned int irq;
@@ -1960,27 +1981,206 @@ static void bnx2x_squeeze_objects(struct bnx2x *bp)
 	} while (0)
 #endif /*BNX2X_STOP_ON_ERROR*/
 
-bool bnx2x_test_firmware_version(struct bnx2x *bp, bool is_err)
+static void bnx2x_free_fw_stats_mem(struct bnx2x *bp)
 {
-	/* build FW version dword */
-	u32 my_fw = (BCM_5710_FW_MAJOR_VERSION) +
-		    (BCM_5710_FW_MINOR_VERSION << 8) +
-		    (BCM_5710_FW_REVISION_VERSION << 16) +
-		    (BCM_5710_FW_ENGINEERING_VERSION << 24);
+	BNX2X_PCI_FREE(bp->fw_stats, bp->fw_stats_mapping,
+		       bp->fw_stats_data_sz + bp->fw_stats_req_sz);
+	return;
+}
+
+static int bnx2x_alloc_fw_stats_mem(struct bnx2x *bp)
+{
+	int num_groups;
+	int is_fcoe_stats = NO_FCOE(bp) ? 0 : 1;
+
+	/* number of queues for statistics is number of eth queues + FCoE */
+	u8 num_queue_stats = BNX2X_NUM_ETH_QUEUES(bp) + is_fcoe_stats;
+
+	/* Total number of FW statistics requests =
+	 * 1 for port stats + 1 for PF stats + potential 2 for FCoE (fcoe proper
+	 * and fcoe l2 queue) stats + num of queues (which includes another 1
+	 * for fcoe l2 queue if applicable)
+	 */
+	bp->fw_stats_num = 2 + is_fcoe_stats + num_queue_stats;
+	/* Request is built from stats_query_header and an array of
+	 * stats_query_cmd_group each of which contains
+	 * STATS_QUERY_CMD_COUNT rules. The real number or requests is
+	 * configured in the stats_query_header.
+	 */
+	num_groups =
+		(((bp->fw_stats_num) / STATS_QUERY_CMD_COUNT) +
+		 (((bp->fw_stats_num) % STATS_QUERY_CMD_COUNT) ?
+		 1 : 0));
 
-	/* read loaded FW from chip */
-	u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM);
+	DP(BNX2X_MSG_SP, "stats fw_stats_num %d, num_groups %d",
+	   bp->fw_stats_num, num_groups);
 
-	DP(NETIF_MSG_IFUP, "loaded fw %x, my fw %x\n", loaded_fw, my_fw);
+	bp->fw_stats_req_sz = sizeof(struct stats_query_header) +
+		num_groups * sizeof(struct stats_query_cmd_group);
 
-	if (loaded_fw != my_fw) {
-		if (is_err)
-			BNX2X_ERR("bnx2x with FW %x was already loaded, which mismatches my %x FW. aborting\n",
+	/* Data for statistics requests + stats_counter
+	 *
+	 * stats_counter holds per-STORM counters that are incremented
+	 * when STORM has finished with the current request.
+	 * memory for FCoE offloaded statistics are counted anyway,
+	 * even if they will not be sent.
+	 * VF stats are not accounted for here as the data of VF stats is stored
+	 * in memory allocated by the VF, not here.
+	 */
+	bp->fw_stats_data_sz = sizeof(struct per_port_stats) +
+		sizeof(struct per_pf_stats) +
+		sizeof(struct fcoe_statistics_params) +
+		sizeof(struct per_queue_stats) * num_queue_stats +
+		sizeof(struct stats_counter);
+
+	BNX2X_PCI_ALLOC(bp->fw_stats, &bp->fw_stats_mapping,
+			bp->fw_stats_data_sz + bp->fw_stats_req_sz);
+
+	/* Set shortcuts */
+	bp->fw_stats_req = (struct bnx2x_fw_stats_req *)bp->fw_stats;
+	bp->fw_stats_req_mapping = bp->fw_stats_mapping;
+
+	bp->fw_stats_data = (struct bnx2x_fw_stats_data *)
+		((u8 *)bp->fw_stats + bp->fw_stats_req_sz);
+
+	bp->fw_stats_data_mapping = bp->fw_stats_mapping +
+		bp->fw_stats_req_sz;
+
+	DP(BNX2X_MSG_SP, "statistics request base address set to %x %x",
+	   U64_HI(bp->fw_stats_req_mapping),
+	   U64_LO(bp->fw_stats_req_mapping));
+
+	DP(BNX2X_MSG_SP, "statistics data base address set to %x %x",
+	   U64_HI(bp->fw_stats_data_mapping),
+	   U64_LO(bp->fw_stats_data_mapping));
+	return 0;
+
+alloc_mem_err:
+	bnx2x_free_fw_stats_mem(bp);
+	BNX2X_ERR("Can't allocate FW stats memory\n");
+	return -ENOMEM;
+}
+
+/* send load requrest to mcp and analyze response */
+static int bnx2x_nic_load_request(struct bnx2x *bp, u32 *load_code)
+{
+	/* init fw_seq */
+	bp->fw_seq =
+		(SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) &
+		 DRV_MSG_SEQ_NUMBER_MASK);
+	BNX2X_DEV_INFO("fw_seq 0x%08x\n", bp->fw_seq);
+
+	/* Get current FW pulse sequence */
+	bp->fw_drv_pulse_wr_seq =
+		(SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_pulse_mb) &
+		 DRV_PULSE_SEQ_MASK);
+	BNX2X_DEV_INFO("drv_pulse 0x%x\n", bp->fw_drv_pulse_wr_seq);
+	/* load request */
+	(*load_code) = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_REQ,
+					DRV_MSG_CODE_LOAD_REQ_WITH_LFA);
+
+	/* if mcp fails to respond we must abort */
+	if (!(*load_code)) {
+		BNX2X_ERR("MCP response failure, aborting\n");
+		return -EBUSY;
+	}
+
+	/* If mcp refused (e.g. other port is in diagnostic mode) we
+	 * must abort
+	 */
+	if ((*load_code) == FW_MSG_CODE_DRV_LOAD_REFUSED) {
+		BNX2X_ERR("MCP refused load request, aborting\n");
+		return -EBUSY;
+	}
+	return 0;
+}
+
+/* check whether another PF has already loaded FW to chip. In
+ * virtualized environments a pf from another VM may have already
+ * initialized the device including loading FW
+ */
+int bnx2x_nic_load_analyze_req(struct bnx2x *bp, u32 load_code)
+{
+	/* is another pf loaded on this engine? */
+	if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP &&
+	    load_code != FW_MSG_CODE_DRV_LOAD_COMMON) {
+
+		/* build my FW version dword */
+		u32 my_fw = (BCM_5710_FW_MAJOR_VERSION) +
+			(BCM_5710_FW_MINOR_VERSION << 8) +
+			(BCM_5710_FW_REVISION_VERSION << 16) +
+			(BCM_5710_FW_ENGINEERING_VERSION << 24);
+
+		/* read loaded FW from chip */
+		u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM);
+		DP(BNX2X_MSG_SP, "loaded fw %x, my fw %x",
+		   loaded_fw, my_fw);
+
+		/* abort nic load if version mismatch */
+		if (my_fw != loaded_fw) {
+			BNX2X_ERR("bnx2x with FW %x was already loaded which mismatches my %x FW. aborting",
 				  loaded_fw, my_fw);
-		return false;
+			return -EBUSY;
+		}
+	}
+	return 0;
+}
+
+/* returns the "mcp load_code" according to global load_count array */
+static int bnx2x_nic_load_no_mcp(struct bnx2x *bp, int port)
+{
+	int path = BP_PATH(bp);
+	DP(NETIF_MSG_IFUP, "NO MCP - load counts[%d]      %d, %d, %d\n",
+	   path, load_count[path][0], load_count[path][1],
+	   load_count[path][2]);
+	load_count[path][0]++;
+	load_count[path][1 + port]++;
+	DP(NETIF_MSG_IFUP, "NO MCP - new load counts[%d]  %d, %d, %d\n",
+	   path, load_count[path][0], load_count[path][1],
+	   load_count[path][2]);
+	if (load_count[path][0] == 1)
+		return FW_MSG_CODE_DRV_LOAD_COMMON;
+	else if (load_count[path][1 + port] == 1)
+		return FW_MSG_CODE_DRV_LOAD_PORT;
+	else
+		return FW_MSG_CODE_DRV_LOAD_FUNCTION;
+}
+
+/* mark PMF if applicable */
+static void bnx2x_nic_load_pmf(struct bnx2x *bp, u32 load_code)
+{
+	if ((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) ||
+	    (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) ||
+	    (load_code == FW_MSG_CODE_DRV_LOAD_PORT)) {
+		bp->port.pmf = 1;
+		/* We need the barrier to ensure the ordering between the
+		 * writing to bp->port.pmf here and reading it from the
+		 * bnx2x_periodic_task().
+		 */
+		smp_mb();
+	} else {
+		bp->port.pmf = 0;
+	}
+
+	DP(NETIF_MSG_LINK, "pmf %d\n", bp->port.pmf);
+}
+
+static void bnx2x_nic_load_afex_dcc(struct bnx2x *bp, int load_code)
+{
+	if (((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) ||
+	     (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP)) &&
+	    (bp->common.shmem2_base)) {
+		if (SHMEM2_HAS(bp, dcc_support))
+			SHMEM2_WR(bp, dcc_support,
+				  (SHMEM_DCC_SUPPORT_DISABLE_ENABLE_PF_TLV |
+				   SHMEM_DCC_SUPPORT_BANDWIDTH_ALLOCATION_TLV));
+		if (SHMEM2_HAS(bp, afex_driver_support))
+			SHMEM2_WR(bp, afex_driver_support,
+				  SHMEM_AFEX_SUPPORTED_VERSION_ONE);
 	}
 
-	return true;
+	/* Set AFEX default VLAN tag to an invalid value */
+	bp->afex_def_vlan_tag = -1;
 }
 
 /**
@@ -2083,10 +2283,12 @@ int bnx2x_load_cnic(struct bnx2x *bp)
 
 	mutex_init(&bp->cnic_mutex);
 
-	rc = bnx2x_alloc_mem_cnic(bp);
-	if (rc) {
-		BNX2X_ERR("Unable to allocate bp memory for cnic\n");
-		LOAD_ERROR_EXIT_CNIC(bp, load_error_cnic0);
+	if (IS_PF(bp)) {
+		rc = bnx2x_alloc_mem_cnic(bp);
+		if (rc) {
+			BNX2X_ERR("Unable to allocate bp memory for cnic\n");
+			LOAD_ERROR_EXIT_CNIC(bp, load_error_cnic0);
+		}
 	}
 
 	rc = bnx2x_alloc_fp_mem_cnic(bp);
@@ -2113,14 +2315,16 @@ int bnx2x_load_cnic(struct bnx2x *bp)
 
 	bnx2x_nic_init_cnic(bp);
 
-	/* Enable Timer scan */
-	REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + port*4, 1);
-
-	for_each_cnic_queue(bp, i) {
-		rc = bnx2x_setup_queue(bp, &bp->fp[i], 0);
-		if (rc) {
-			BNX2X_ERR("Queue setup failed\n");
-			LOAD_ERROR_EXIT(bp, load_error_cnic2);
+	if (IS_PF(bp)) {
+		/* Enable Timer scan */
+		REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + port*4, 1);
+
+		for_each_cnic_queue(bp, i) {
+			rc = bnx2x_setup_queue(bp, &bp->fp[i], 0);
+			if (rc) {
+				BNX2X_ERR("Queue setup failed\n");
+				LOAD_ERROR_EXIT(bp, load_error_cnic2);
+			}
 		}
 	}
 
@@ -2166,8 +2370,7 @@ load_error_cnic0:
 int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 {
 	int port = BP_PORT(bp);
-	u32 load_code;
-	int i, rc;
+	int i, rc = 0, load_code = 0;
 
 	DP(NETIF_MSG_IFUP, "Starting NIC load\n");
 	DP(NETIF_MSG_IFUP,
@@ -2189,8 +2392,9 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 		&bp->last_reported_link.link_report_flags);
 	bnx2x_release_phy_lock(bp);
 
-	/* must be called before memory allocation and HW init */
-	bnx2x_ilt_set_info(bp);
+	if (IS_PF(bp))
+		/* must be called before memory allocation and HW init */
+		bnx2x_ilt_set_info(bp);
 
 	/*
 	 * Zero fastpath structures preserving invariants like napi, which are
@@ -2209,8 +2413,29 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 	/* Set the receive queues buffer size */
 	bnx2x_set_rx_buf_size(bp);
 
-	if (bnx2x_alloc_mem(bp))
-		return -ENOMEM;
+	if (IS_PF(bp)) {
+		rc = bnx2x_alloc_mem(bp);
+		if (rc) {
+			BNX2X_ERR("Unable to allocate bp memory\n");
+			return rc;
+		}
+	}
+
+	/* Allocated memory for FW statistics  */
+	if (bnx2x_alloc_fw_stats_mem(bp))
+		LOAD_ERROR_EXIT(bp, load_error0);
+
+	/* fastpath: */
+
+	/* need to be done after alloc mem, since it's self adjusting to amount
+	 * of memory available for RSS queues
+	 */
+	rc = bnx2x_alloc_fp_mem(bp);
+	if (rc) {
+		BNX2X_ERR("Unable to allocate memory for fps\n");
+		LOAD_ERROR_EXIT(bp, load_error0);
+	}
+
 
 	/* As long as bnx2x_alloc_mem() may possibly update
 	 * bp->num_queues, bnx2x_set_real_num_queues() should always
@@ -2233,97 +2458,52 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 	DP(NETIF_MSG_IFUP, "napi added\n");
 	bnx2x_napi_enable(bp);
 
-	/* set pf load just before approaching the MCP */
-	bnx2x_set_pf_load(bp);
+	if (IS_PF(bp)) {
+		/* set pf load just before approaching the MCP */
+		bnx2x_set_pf_load(bp);
 
-	/* Send LOAD_REQUEST command to MCP
-	 * Returns the type of LOAD command:
-	 * if it is the first port to be initialized
-	 * common blocks should be initialized, otherwise - not
-	 */
-	if (!BP_NOMCP(bp)) {
-		/* init fw_seq */
-		bp->fw_seq =
-			(SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) &
-			 DRV_MSG_SEQ_NUMBER_MASK);
-		BNX2X_DEV_INFO("fw_seq 0x%08x\n", bp->fw_seq);
-
-		/* Get current FW pulse sequence */
-		bp->fw_drv_pulse_wr_seq =
-			(SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_pulse_mb) &
-			 DRV_PULSE_SEQ_MASK);
-		BNX2X_DEV_INFO("drv_pulse 0x%x\n", bp->fw_drv_pulse_wr_seq);
-
-		load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_REQ, 0);
-		if (!load_code) {
-			BNX2X_ERR("MCP response failure, aborting\n");
-			rc = -EBUSY;
-			LOAD_ERROR_EXIT(bp, load_error1);
-		}
-		if (load_code == FW_MSG_CODE_DRV_LOAD_REFUSED) {
-			BNX2X_ERR("Driver load refused\n");
-			rc = -EBUSY; /* other port in diagnostic mode */
-			LOAD_ERROR_EXIT(bp, load_error1);
-		}
-		if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP &&
-		    load_code != FW_MSG_CODE_DRV_LOAD_COMMON) {
-			/* abort nic load if version mismatch */
-			if (!bnx2x_test_firmware_version(bp, true)) {
-				rc = -EBUSY;
+		/* if mcp exists send load request and analyze response */
+		if (!BP_NOMCP(bp)) {
+
+			/* attempt to load pf */
+			rc = bnx2x_nic_load_request(bp, &load_code);
+			if (rc)
+				LOAD_ERROR_EXIT(bp, load_error1);
+
+			/* what did mcp say? */
+			rc = bnx2x_nic_load_analyze_req(bp, load_code);
+			if (rc) {
+				bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
 				LOAD_ERROR_EXIT(bp, load_error2);
 			}
-		}
+		} else {
+			load_code = bnx2x_nic_load_no_mcp(bp, port);
 
-	} else {
-		int path = BP_PATH(bp);
-
-		DP(NETIF_MSG_IFUP, "NO MCP - load counts[%d]      %d, %d, %d\n",
-		   path, load_count[path][0], load_count[path][1],
-		   load_count[path][2]);
-		load_count[path][0]++;
-		load_count[path][1 + port]++;
-		DP(NETIF_MSG_IFUP, "NO MCP - new load counts[%d]  %d, %d, %d\n",
-		   path, load_count[path][0], load_count[path][1],
-		   load_count[path][2]);
-		if (load_count[path][0] == 1)
-			load_code = FW_MSG_CODE_DRV_LOAD_COMMON;
-		else if (load_count[path][1 + port] == 1)
-			load_code = FW_MSG_CODE_DRV_LOAD_PORT;
-		else
-			load_code = FW_MSG_CODE_DRV_LOAD_FUNCTION;
-	}
 
-	if ((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) ||
-	    (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) ||
-	    (load_code == FW_MSG_CODE_DRV_LOAD_PORT)) {
-		bp->port.pmf = 1;
-		/*
-		 * We need the barrier to ensure the ordering between the
-		 * writing to bp->port.pmf here and reading it from the
-		 * bnx2x_periodic_task().
-		 */
-		smp_mb();
-	} else
-		bp->port.pmf = 0;
+		}
 
-	DP(NETIF_MSG_IFUP, "pmf %d\n", bp->port.pmf);
+		/* mark pmf if applicable */
+		bnx2x_nic_load_pmf(bp, load_code);
 
-	/* Init Function state controlling object */
-	bnx2x__init_func_obj(bp);
+		/* Init Function state controlling object */
+		bnx2x__init_func_obj(bp);
+
+		/* Initialize HW */
+		rc = bnx2x_init_hw(bp, load_code);
+		if (rc) {
+			BNX2X_ERR("HW init failed, aborting\n");
+			bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
+			LOAD_ERROR_EXIT(bp, load_error2);
+		}
 
-	/* Initialize HW */
-	rc = bnx2x_init_hw(bp, load_code);
-	if (rc) {
-		BNX2X_ERR("HW init failed, aborting\n");
-		bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
-		LOAD_ERROR_EXIT(bp, load_error2);
 	}
 
 	/* Connect to IRQs */
 	rc = bnx2x_setup_irqs(bp);
 	if (rc) {
-		BNX2X_ERR("IRQs setup failed\n");
-		bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
+		BNX2X_ERR("setup irqs failed\n");
+		if (IS_PF(bp))
+			bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
 		LOAD_ERROR_EXIT(bp, load_error2);
 	}
 
@@ -2331,78 +2511,75 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 	bnx2x_nic_init(bp, load_code);
 
 	/* Init per-function objects */
-	bnx2x_init_bp_objs(bp);
-
-	if (((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) ||
-	    (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP)) &&
-	    (bp->common.shmem2_base)) {
-		if (SHMEM2_HAS(bp, dcc_support))
-			SHMEM2_WR(bp, dcc_support,
-				  (SHMEM_DCC_SUPPORT_DISABLE_ENABLE_PF_TLV |
-				   SHMEM_DCC_SUPPORT_BANDWIDTH_ALLOCATION_TLV));
-		if (SHMEM2_HAS(bp, afex_driver_support))
-			SHMEM2_WR(bp, afex_driver_support,
-				  SHMEM_AFEX_SUPPORTED_VERSION_ONE);
-	}
+	if (IS_PF(bp)) {
+		bnx2x_init_bp_objs(bp);
 
-	/* Set AFEX default VLAN tag to an invalid value */
-	bp->afex_def_vlan_tag = -1;
 
-	bp->state = BNX2X_STATE_OPENING_WAIT4_PORT;
-	rc = bnx2x_func_start(bp);
-	if (rc) {
-		BNX2X_ERR("Function start failed!\n");
-		bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
-		LOAD_ERROR_EXIT(bp, load_error3);
-	}
+		/* Set AFEX default VLAN tag to an invalid value */
+		bp->afex_def_vlan_tag = -1;
+		bnx2x_nic_load_afex_dcc(bp, load_code);
+		bp->state = BNX2X_STATE_OPENING_WAIT4_PORT;
+		rc = bnx2x_func_start(bp);
+		if (rc) {
+			BNX2X_ERR("Function start failed!\n");
+			bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
 
-	/* Send LOAD_DONE command to MCP */
-	if (!BP_NOMCP(bp)) {
-		load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
-		if (!load_code) {
-			BNX2X_ERR("MCP response failure, aborting\n");
-			rc = -EBUSY;
 			LOAD_ERROR_EXIT(bp, load_error3);
 		}
-	}
 
-	rc = bnx2x_setup_leading(bp);
-	if (rc) {
-		BNX2X_ERR("Setup leading failed!\n");
-		LOAD_ERROR_EXIT(bp, load_error3);
-	}
+		/* Send LOAD_DONE command to MCP */
+		if (!BP_NOMCP(bp)) {
+			load_code = bnx2x_fw_command(bp,
+						     DRV_MSG_CODE_LOAD_DONE, 0);
+			if (!load_code) {
+				BNX2X_ERR("MCP response failure, aborting\n");
+				rc = -EBUSY;
+				LOAD_ERROR_EXIT(bp, load_error3);
+			}
+		}
 
-	for_each_nondefault_eth_queue(bp, i) {
-		rc = bnx2x_setup_queue(bp, &bp->fp[i], 0);
+		rc = bnx2x_setup_leading(bp);
 		if (rc) {
-			BNX2X_ERR("Queue setup failed\n");
+			BNX2X_ERR("Setup leading failed!\n");
+			LOAD_ERROR_EXIT(bp, load_error3);
+		}
+
+		for_each_nondefault_eth_queue(bp, i) {
+			rc = bnx2x_setup_queue(bp, &bp->fp[i], 0);
+			if (rc) {
+				BNX2X_ERR("Queue setup failed\n");
+				LOAD_ERROR_EXIT(bp, load_error3);
+			}
+		}
+		rc = bnx2x_init_rss_pf(bp);
+		if (rc) {
+			BNX2X_ERR("PF RSS init failed\n");
 			LOAD_ERROR_EXIT(bp, load_error3);
 		}
-	}
 
-	rc = bnx2x_init_rss_pf(bp);
-	if (rc) {
-		BNX2X_ERR("PF RSS init failed\n");
-		LOAD_ERROR_EXIT(bp, load_error3);
 	}
 
 	/* Now when Clients are configured we are ready to work */
 	bp->state = BNX2X_STATE_OPEN;
 
 	/* Configure a ucast MAC */
-	rc = bnx2x_set_eth_mac(bp, true);
+	if (IS_PF(bp))
+		rc = bnx2x_set_eth_mac(bp, true);
 	if (rc) {
 		BNX2X_ERR("Setting Ethernet MAC failed\n");
 		LOAD_ERROR_EXIT(bp, load_error3);
 	}
 
-	if (bp->pending_max) {
+	if (IS_PF(bp) && bp->pending_max) {
 		bnx2x_update_max_mf_config(bp, bp->pending_max);
 		bp->pending_max = 0;
 	}
 
-	if (bp->port.pmf)
-		bnx2x_initial_phy_init(bp, load_mode);
+	if (bp->port.pmf) {
+		rc = bnx2x_initial_phy_init(bp, load_mode);
+		if (rc)
+			LOAD_ERROR_EXIT(bp, load_error3);
+	}
 
 	/* Start fast path */
 
@@ -2443,8 +2620,8 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 	if (CNIC_ENABLED(bp))
 		bnx2x_load_cnic(bp);
 
-	/* mark driver is loaded in shmem2 */
-	if (SHMEM2_HAS(bp, drv_capabilities_flag)) {
+	if (IS_PF(bp) && SHMEM2_HAS(bp, drv_capabilities_flag)) {
+		/* mark driver is loaded in shmem2 */
 		u32 val;
 		val = SHMEM2_RD(bp, drv_capabilities_flag[BP_FW_MB_IDX(bp)]);
 		SHMEM2_WR(bp, drv_capabilities_flag[BP_FW_MB_IDX(bp)],
@@ -2453,7 +2630,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 	}
 
 	/* Wait for all pending SP commands to complete */
-	if (!bnx2x_wait_sp_comp(bp, ~0x0UL)) {
+	if (IS_PF(bp) && !bnx2x_wait_sp_comp(bp, ~0x0UL)) {
 		BNX2X_ERR("Timeout waiting for SP elements to complete\n");
 		bnx2x_nic_unload(bp, UNLOAD_CLOSE, false);
 		return -EBUSY;
@@ -2469,10 +2646,12 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 
 #ifndef BNX2X_STOP_ON_ERROR
 load_error3:
-	bnx2x_int_disable_sync(bp, 1);
+	if (IS_PF(bp)) {
+		bnx2x_int_disable_sync(bp, 1);
 
-	/* Clean queueable objects */
-	bnx2x_squeeze_objects(bp);
+		/* Clean queueable objects */
+		bnx2x_squeeze_objects(bp);
+	}
 
 	/* Free SKBs, SGEs, TPA pool and driver internals */
 	bnx2x_free_skbs(bp);
@@ -2482,7 +2661,7 @@ load_error3:
 	/* Release IRQs */
 	bnx2x_free_irq(bp);
 load_error2:
-	if (!BP_NOMCP(bp)) {
+	if (IS_PF(bp) && !BP_NOMCP(bp)) {
 		bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_REQ_WOL_MCP, 0);
 		bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE, 0);
 	}
@@ -2490,15 +2669,34 @@ load_error2:
 	bp->port.pmf = 0;
 load_error1:
 	bnx2x_napi_disable(bp);
+
 	/* clear pf_load status, as it was already set */
-	bnx2x_clear_pf_load(bp);
+	if (IS_PF(bp))
+		bnx2x_clear_pf_load(bp);
 load_error0:
+	bnx2x_free_fp_mem(bp);
+	bnx2x_free_fw_stats_mem(bp);
 	bnx2x_free_mem(bp);
 
 	return rc;
 #endif /* ! BNX2X_STOP_ON_ERROR */
 }
 
+static int bnx2x_drain_tx_queues(struct bnx2x *bp)
+{
+	u8 rc = 0, cos, i;
+
+	/* Wait until tx fastpath tasks complete */
+	for_each_tx_queue(bp, i) {
+		struct bnx2x_fastpath *fp = &bp->fp[i];
+		for_each_cos_in_tx_queue(fp, cos)
+			rc = bnx2x_clean_tx_queue(bp, fp->txdata_ptr[cos]);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
 /* must be called with rtnl_lock */
 int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
 {
@@ -2508,15 +2706,16 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
 	DP(NETIF_MSG_IFUP, "Starting NIC unload\n");
 
 	/* mark driver is unloaded in shmem2 */
-	if (SHMEM2_HAS(bp, drv_capabilities_flag)) {
+	if (IS_PF(bp) && SHMEM2_HAS(bp, drv_capabilities_flag)) {
 		u32 val;
 		val = SHMEM2_RD(bp, drv_capabilities_flag[BP_FW_MB_IDX(bp)]);
 		SHMEM2_WR(bp, drv_capabilities_flag[BP_FW_MB_IDX(bp)],
 			  val & ~DRV_FLAGS_CAPABILITIES_LOADED_L2);
 	}
 
-	if ((bp->state == BNX2X_STATE_CLOSED) ||
-	    (bp->state == BNX2X_STATE_ERROR)) {
+	if (IS_PF(bp) &&
+	    (bp->state == BNX2X_STATE_CLOSED ||
+	     bp->state == BNX2X_STATE_ERROR)) {
 		/* We can get here if the driver has been unloaded
 		 * during parity error recovery and is either waiting for a
 		 * leader to complete or for other functions to unload and
@@ -2553,13 +2752,19 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
 
 	del_timer_sync(&bp->timer);
 
-	/* Set ALWAYS_ALIVE bit in shmem */
-	bp->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE;
+	if (IS_PF(bp)) {
 
-	bnx2x_drv_pulse(bp);
+		/* Set ALWAYS_ALIVE bit in shmem */
+		bp->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE;
 
-	bnx2x_stats_handle(bp, STATS_EVENT_STOP);
-	bnx2x_save_statistics(bp);
+		bnx2x_drv_pulse(bp);
+
+		bnx2x_stats_handle(bp, STATS_EVENT_STOP);
+		bnx2x_save_statistics(bp);
+	}
+
+	/* wait till consumers catch up with producers in all queues */
+	bnx2x_drain_tx_queues(bp);
 
 	/* Cleanup the chip if needed */
 	if (unload_mode != UNLOAD_RECOVERY)
@@ -2595,7 +2800,8 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
 	 * At this stage no more interrupts will arrive so we may safly clean
 	 * the queueable objects here in case they failed to get cleaned so far.
 	 */
-	bnx2x_squeeze_objects(bp);
+	if (IS_PF(bp))
+		bnx2x_squeeze_objects(bp);
 
 	/* There should be no more pending SP commands at this stage */
 	bp->sp_state = 0;
@@ -2609,19 +2815,22 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
 	for_each_rx_queue(bp, i)
 		bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
 
-	if (CNIC_LOADED(bp)) {
+	bnx2x_free_fp_mem(bp);
+	if (CNIC_LOADED(bp))
 		bnx2x_free_fp_mem_cnic(bp);
-		bnx2x_free_mem_cnic(bp);
-	}
-	bnx2x_free_mem(bp);
 
+	if (IS_PF(bp)) {
+		bnx2x_free_mem(bp);
+		if (CNIC_LOADED(bp))
+			bnx2x_free_mem_cnic(bp);
+	}
 	bp->state = BNX2X_STATE_CLOSED;
 	bp->cnic_loaded = false;
 
 	/* Check if there are pending parity attentions. If there are - set
 	 * RECOVERY_IN_PROGRESS.
 	 */
-	if (bnx2x_chk_parity_attn(bp, &global, false)) {
+	if (IS_PF(bp) && bnx2x_chk_parity_attn(bp, &global, false)) {
 		bnx2x_set_reset_in_progress(bp);
 
 		/* Set RESET_IS_GLOBAL if needed */
@@ -2633,7 +2842,9 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
 	/* The last driver must disable a "close the gate" if there is no
 	 * parity attention or "process kill" pending.
 	 */
-	if (!bnx2x_clear_pf_load(bp) && bnx2x_reset_is_done(bp, BP_PATH(bp)))
+	if (IS_PF(bp) &&
+	    !bnx2x_clear_pf_load(bp) &&
+	    bnx2x_reset_is_done(bp, BP_PATH(bp)))
 		bnx2x_disable_close_the_gate(bp);
 
 	DP(NETIF_MSG_IFUP, "Ending NIC unload\n");
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index c5813c6..2e32a21 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -1128,11 +1128,18 @@ static inline u8 bnx2x_fp_qzone_id(struct bnx2x_fastpath *fp)
 static inline u32 bnx2x_rx_ustorm_prods_offset(struct bnx2x_fastpath *fp)
 {
 	struct bnx2x *bp = fp->bp;
-
-	if (!CHIP_IS_E1x(bp))
-		return USTORM_RX_PRODS_E2_OFFSET(fp->cl_qzone_id);
+	u32 offset = BAR_USTRORM_INTMEM;
+
+	if (IS_VF(bp))
+		return PXP_VF_ADDR_USDM_QUEUES_START +
+			bp->acquire_resp.resc.hw_qid[fp->index] *
+			sizeof(struct ustorm_queue_zone_data);
+	else if (!CHIP_IS_E1x(bp))
+		offset += USTORM_RX_PRODS_E2_OFFSET(fp->cl_qzone_id);
 	else
-		return USTORM_RX_PRODS_E1X_OFFSET(BP_PORT(bp), fp->cl_id);
+		offset += USTORM_RX_PRODS_E1X_OFFSET(BP_PORT(bp), fp->cl_id);
+
+	return offset;
 }
 
 static inline void bnx2x_init_txdata(struct bnx2x *bp,
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index c40c025..ce6fb27 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -890,7 +890,7 @@ static void bnx2x_set_msglevel(struct net_device *dev, u32 level)
 
 	if (capable(CAP_NET_ADMIN)) {
 		/* dump MCP trace */
-		if (level & BNX2X_MSG_MCP)
+		if (IS_PF(bp) && (level & BNX2X_MSG_MCP))
 			bnx2x_fw_dump_lvl(bp, KERN_INFO);
 		bp->msg_enable = level;
 	}
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 0ee0572..1a847a4 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -2458,19 +2458,50 @@ void bnx2x__link_status_update(struct bnx2x *bp)
 		return;
 
 	/* read updated dcb configuration */
-	bnx2x_dcbx_pmf_update(bp);
-
-	bnx2x_link_status_update(&bp->link_params, &bp->link_vars);
+	if (IS_PF(bp)) {
+		bnx2x_dcbx_pmf_update(bp);
+		bnx2x_link_status_update(&bp->link_params, &bp->link_vars);
+		if (bp->link_vars.link_up)
+			bnx2x_stats_handle(bp, STATS_EVENT_LINK_UP);
+		else
+			bnx2x_stats_handle(bp, STATS_EVENT_STOP);
+			/* indicate link status */
+		bnx2x_link_report(bp);
 
-	if (bp->link_vars.link_up)
+	} else { /* VF */
+		bp->port.supported[0] |= (SUPPORTED_10baseT_Half |
+					  SUPPORTED_10baseT_Full |
+					  SUPPORTED_100baseT_Half |
+					  SUPPORTED_100baseT_Full |
+					  SUPPORTED_1000baseT_Full |
+					  SUPPORTED_2500baseX_Full |
+					  SUPPORTED_10000baseT_Full |
+					  SUPPORTED_TP |
+					  SUPPORTED_FIBRE |
+					  SUPPORTED_Autoneg |
+					  SUPPORTED_Pause |
+					  SUPPORTED_Asym_Pause);
+		bp->port.advertising[0] = bp->port.supported[0];
+
+		bp->link_params.bp = bp;
+		bp->link_params.port = BP_PORT(bp);
+		bp->link_params.req_duplex[0] = DUPLEX_FULL;
+		bp->link_params.req_flow_ctrl[0] = BNX2X_FLOW_CTRL_NONE;
+		bp->link_params.req_line_speed[0] = SPEED_10000;
+		bp->link_params.speed_cap_mask[0] = 0x7f0000;
+		bp->link_params.switch_cfg = SWITCH_CFG_10G;
+		bp->link_vars.mac_type = MAC_TYPE_BMAC;
+		bp->link_vars.line_speed = SPEED_10000;
+		bp->link_vars.link_status = (LINK_STATUS_LINK_UP |
+					     LINK_STATUS_SPEED_AND_DUPLEX_10GTFD);
+		bp->link_vars.link_up = 1;
+		bp->link_vars.duplex = DUPLEX_FULL;
+		bp->link_vars.flow_ctrl = BNX2X_FLOW_CTRL_NONE;
+		__bnx2x_link_report(bp);
 		bnx2x_stats_handle(bp, STATS_EVENT_LINK_UP);
-	else
-		bnx2x_stats_handle(bp, STATS_EVENT_STOP);
 
-	/* indicate link status */
-	bnx2x_link_report(bp);
+	}
 }
-
 static int bnx2x_afex_func_update(struct bnx2x *bp, u16 vifid,
 				  u16 vlan_val, u8 allowed_prio)
 {
@@ -5648,7 +5679,7 @@ static u8 bnx2x_fp_cl_id(struct bnx2x_fastpath *fp)
 		return bnx2x_fp_igu_sb_id(fp);
 }
 
-static void bnx2x_init_eth_fp(struct bnx2x *bp, int fp_idx)
+void bnx2x_init_eth_fp(struct bnx2x *bp, int fp_idx)
 {
 	struct bnx2x_fastpath *fp = &bp->fp[fp_idx];
 	u8 cos;
@@ -5683,6 +5714,15 @@ static void bnx2x_init_eth_fp(struct bnx2x *bp, int fp_idx)
 		cids[cos] = fp->txdata_ptr[cos]->cid;
 	}
 
+	/* nothing more for vf to do here */
+	if (IS_VF(bp))
+		return;
+
+	bnx2x_init_sb(bp, fp->status_blk_mapping, BNX2X_VF_ID_INVALID, false,
+		      fp->fw_sb_id, fp->igu_sb_id);
+
+	bnx2x_update_fpsb_idx(fp);
+
 	bnx2x_init_queue_obj(bp, &bnx2x_sp_obj(bp, fp).q_obj, fp->cl_id, cids,
 			     fp->max_cos, BP_FUNC(bp), bnx2x_sp(bp, q_rdata),
 			     bnx2x_sp_mapping(bp, q_rdata), q_type);
@@ -5692,13 +5732,10 @@ static void bnx2x_init_eth_fp(struct bnx2x *bp, int fp_idx)
 	 */
 	bnx2x_init_vlan_mac_fp_objs(fp, BNX2X_OBJ_TYPE_RX_TX);
 
-	DP(NETIF_MSG_IFUP, "queue[%d]:  bnx2x_init_sb(%p,%p)  cl_id %d  fw_sb %d  igu_sb %d\n",
-		   fp_idx, bp, fp->status_blk.e2_sb, fp->cl_id, fp->fw_sb_id,
-		   fp->igu_sb_id);
-	bnx2x_init_sb(bp, fp->status_blk_mapping, BNX2X_VF_ID_INVALID, false,
-		      fp->fw_sb_id, fp->igu_sb_id);
-
-	bnx2x_update_fpsb_idx(fp);
+	DP(NETIF_MSG_IFUP,
+	   "queue[%d]:  bnx2x_init_sb(%p,%p)  cl_id %d  fw_sb %d  igu_sb %d\n",
+	   fp_idx, bp, fp->status_blk.e2_sb, fp->cl_id, fp->fw_sb_id,
+	   fp->igu_sb_id);
 }
 
 static void bnx2x_init_tx_ring_one(struct bnx2x_fp_txdata *txdata)
@@ -5735,6 +5772,7 @@ static void bnx2x_init_tx_rings_cnic(struct bnx2x *bp)
 	for_each_tx_queue_cnic(bp, i)
 		bnx2x_init_tx_ring_one(bp->fp[i].txdata_ptr[0]);
 }
+
 static void bnx2x_init_tx_rings(struct bnx2x *bp)
 {
 	int i;
@@ -5770,17 +5808,22 @@ void bnx2x_nic_init(struct bnx2x *bp, u32 load_code)
 
 	for_each_eth_queue(bp, i)
 		bnx2x_init_eth_fp(bp, i);
+
+	/* ensure status block indices were read */
+	rmb();
+	bnx2x_init_rx_rings(bp);
+	bnx2x_init_tx_rings(bp);
+
+	if (IS_VF(bp))
+		return;
+
 	/* Initialize MOD_ABS interrupts */
 	bnx2x_init_mod_abs_int(bp, &bp->link_vars, bp->common.chip_id,
 			       bp->common.shmem_base, bp->common.shmem2_base,
 			       BP_PORT(bp));
-	/* ensure status block indices were read */
-	rmb();
 
 	bnx2x_init_def_sb(bp);
 	bnx2x_update_dsb_idx(bp);
-	bnx2x_init_rx_rings(bp);
-	bnx2x_init_tx_rings(bp);
 	bnx2x_init_sp_ring(bp);
 	bnx2x_init_eq_ring(bp);
 	bnx2x_init_internal(bp, load_code);
@@ -9617,7 +9660,7 @@ static int __devinit bnx2x_prev_unload_uncommon(struct bnx2x *bp)
 	 * the one required, then FLR will be sufficient to clean any residue
 	 * left by previous driver
 	 */
-	rc = bnx2x_test_firmware_version(bp, false);
+	rc = bnx2x_nic_load_analyze_req(bp, FW_MSG_CODE_DRV_LOAD_FUNCTION);
 
 	if (!rc) {
 		/* fw version is good */
@@ -11151,17 +11194,21 @@ static int bnx2x_open(struct net_device *dev)
 
 	bnx2x_set_power_state(bp, PCI_D0);
 
-	other_load_status = bnx2x_get_load_status(bp, other_engine);
-	load_status = bnx2x_get_load_status(bp, BP_PATH(bp));
+	if (IS_PF(bp)) {
+		other_load_status = bnx2x_get_load_status(bp, other_engine);
+		load_status = bnx2x_get_load_status(bp, BP_PATH(bp));
+	}
 
 	/*
 	 * If parity had happen during the unload, then attentions
 	 * and/or RECOVERY_IN_PROGRES may still be set. In this case we
 	 * want the first function loaded on the current engine to
 	 * complete the recovery.
+	 * Parity recovery is only relevant for PF driver.
 	 */
-	if (!bnx2x_reset_is_done(bp, BP_PATH(bp)) ||
-	    bnx2x_chk_parity_attn(bp, &global, true))
+	if (IS_PF(bp) &&
+	    (!bnx2x_reset_is_done(bp, BP_PATH(bp)) ||
+	    bnx2x_chk_parity_attn(bp, &global, true)))
 		do {
 			/*
 			 * If there are attentions and they are in a global
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
index 4c51f43..e92577a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
@@ -6542,6 +6542,13 @@
 #define PXP_VF_ADDR_IGU_SIZE		(0x3000)
 #define PXP_VF_ADDR_IGU_END\
 	((PXP_VF_ADDR_IGU_START) + (PXP_VF_ADDR_IGU_SIZE) - 1)
+
+#define PXP_VF_ADDR_USDM_QUEUES_START		0x3000
+#define PXP_VF_ADDR_USDM_QUEUES_SIZE\
+	(PXP_VF_ADRR_NUM_QUEUES * PXP_ADDR_QUEUE_SIZE)
+#define PXP_VF_ADDR_USDM_QUEUES_END\
+	((PXP_VF_ADDR_USDM_QUEUES_START) + (PXP_VF_ADDR_USDM_QUEUES_SIZE) - 1)
+
 #define PXP_VF_ADDR_CSDM_GLOBAL_START		0x7600
 #define PXP_VF_ADDR_CSDM_GLOBAL_SIZE		(PXP_ADDR_REG_SIZE)
 #define PXP_VF_ADDR_CSDM_GLOBAL_END\
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 11/22] bnx2x: Infrastructure for VF <-> PF request on PF side
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

Support interrupt from device which indicates VF has placed
A request on the VF <-> PF channel.
The PF driver issues a DMAE to retrieve the request from the VM
memory (the Ghost Physical Address of the request is contained
in the interrupt. The PF driver uses the GPA in the DMAE request,
which is translated by the IOMMU to the correct physical address).
The request which arrives is examined to recognize the sending VF.
The PF driver allocates a workitem to handle the VF Operation (vfop).

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h       |    6 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c  |  203 ++++++++++++++----
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |  247 ++++++++++++++++++++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h |  102 +++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c  |  158 +++++++++++++
 5 files changed, 671 insertions(+), 45 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index d0d1467..52012f2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1380,6 +1380,7 @@ struct bnx2x {
 	int			mrrs;
 
 	struct delayed_work	sp_task;
+	atomic_t		interrupt_occurred;
 	struct delayed_work	sp_rtnl_task;
 
 	struct delayed_work	period_task;
@@ -1871,6 +1872,11 @@ u32 bnx2x_dmae_opcode_clr_src_reset(u32 opcode);
 u32 bnx2x_dmae_opcode(struct bnx2x *bp, u8 src_type, u8 dst_type,
 		      bool with_comp, u8 comp_type);
 
+void bnx2x_prep_dmae_with_comp(struct bnx2x *bp, struct dmae_command *dmae,
+			       u8 src_type, u8 dst_type);
+int bnx2x_issue_dmae_with_comp(struct bnx2x *bp, struct dmae_command *dmae);
+void bnx2x_dp_dmae(struct bnx2x *bp, struct dmae_command *dmae, int msglvl);
+
 u8 bnx2x_is_pcie_pending(struct pci_dev *dev);
 
 void bnx2x_calc_fc_adv(struct bnx2x *bp);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index a955b1d..cd80552 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -345,6 +345,66 @@ static u32 bnx2x_reg_rd_ind(struct bnx2x *bp, u32 addr)
 #define DMAE_DP_DST_PCI		"pci dst_addr [%x:%08x]"
 #define DMAE_DP_DST_NONE	"dst_addr [none]"
 
+void bnx2x_dp_dmae(struct bnx2x *bp, struct dmae_command *dmae, int msglvl)
+{
+	u32 src_type = dmae->opcode & DMAE_COMMAND_SRC;
+
+	switch (dmae->opcode & DMAE_COMMAND_DST) {
+	case DMAE_CMD_DST_PCI:
+		if (src_type == DMAE_CMD_SRC_PCI)
+			DP(msglvl, "DMAE: opcode 0x%08x\n"
+			   "src [%x:%08x], len [%d*4], dst [%x:%08x]\n"
+			   "comp_addr [%x:%08x], comp_val 0x%08x\n",
+			   dmae->opcode, dmae->src_addr_hi, dmae->src_addr_lo,
+			   dmae->len, dmae->dst_addr_hi, dmae->dst_addr_lo,
+			   dmae->comp_addr_hi, dmae->comp_addr_lo,
+			   dmae->comp_val);
+		else
+			DP(msglvl, "DMAE: opcode 0x%08x\n"
+			   "src [%08x], len [%d*4], dst [%x:%08x]\n"
+			   "comp_addr [%x:%08x], comp_val 0x%08x\n",
+			   dmae->opcode, dmae->src_addr_lo >> 2,
+			   dmae->len, dmae->dst_addr_hi, dmae->dst_addr_lo,
+			   dmae->comp_addr_hi, dmae->comp_addr_lo,
+			   dmae->comp_val);
+		break;
+	case DMAE_CMD_DST_GRC:
+		if (src_type == DMAE_CMD_SRC_PCI)
+			DP(msglvl, "DMAE: opcode 0x%08x\n"
+			   "src [%x:%08x], len [%d*4], dst_addr [%08x]\n"
+			   "comp_addr [%x:%08x], comp_val 0x%08x\n",
+			   dmae->opcode, dmae->src_addr_hi, dmae->src_addr_lo,
+			   dmae->len, dmae->dst_addr_lo >> 2,
+			   dmae->comp_addr_hi, dmae->comp_addr_lo,
+			   dmae->comp_val);
+		else
+			DP(msglvl, "DMAE: opcode 0x%08x\n"
+			   "src [%08x], len [%d*4], dst [%08x]\n"
+			   "comp_addr [%x:%08x], comp_val 0x%08x\n",
+			   dmae->opcode, dmae->src_addr_lo >> 2,
+			   dmae->len, dmae->dst_addr_lo >> 2,
+			   dmae->comp_addr_hi, dmae->comp_addr_lo,
+			   dmae->comp_val);
+		break;
+	default:
+		if (src_type == DMAE_CMD_SRC_PCI)
+			DP(msglvl, "DMAE: opcode 0x%08x\n"
+			   "src_addr [%x:%08x]  len [%d * 4]  dst_addr [none]\n"
+			   "comp_addr [%x:%08x]  comp_val 0x%08x\n",
+			   dmae->opcode, dmae->src_addr_hi, dmae->src_addr_lo,
+			   dmae->len, dmae->comp_addr_hi, dmae->comp_addr_lo,
+			   dmae->comp_val);
+		else
+			DP(msglvl, "DMAE: opcode 0x%08x\n"
+			   "src_addr [%08x]  len [%d * 4]  dst_addr [none]\n"
+			   "comp_addr [%x:%08x]  comp_val 0x%08x\n",
+			   dmae->opcode, dmae->src_addr_lo >> 2,
+			   dmae->len, dmae->comp_addr_hi, dmae->comp_addr_lo,
+			   dmae->comp_val);
+		break;
+	}
+
+}
 
 /* copy command into DMAE command memory and set DMAE command go */
 void bnx2x_post_dmae(struct bnx2x *bp, struct dmae_command *dmae, int idx)
@@ -395,7 +455,7 @@ u32 bnx2x_dmae_opcode(struct bnx2x *bp, u8 src_type, u8 dst_type,
 	return opcode;
 }
 
-static void bnx2x_prep_dmae_with_comp(struct bnx2x *bp,
+void bnx2x_prep_dmae_with_comp(struct bnx2x *bp,
 				      struct dmae_command *dmae,
 				      u8 src_type, u8 dst_type)
 {
@@ -411,9 +471,8 @@ static void bnx2x_prep_dmae_with_comp(struct bnx2x *bp,
 	dmae->comp_val = DMAE_COMP_VAL;
 }
 
-/* issue a dmae command over the init-channel and wailt for completion */
-static int bnx2x_issue_dmae_with_comp(struct bnx2x *bp,
-				      struct dmae_command *dmae)
+/* issue a dmae command over the init-channel and wait for completion */
+int bnx2x_issue_dmae_with_comp(struct bnx2x *bp, struct dmae_command *dmae)
 {
 	u32 *wb_comp = bnx2x_sp(bp, wb_comp);
 	int cnt = CHIP_REV_IS_SLOW(bp) ? (400000) : 4000;
@@ -1598,6 +1657,24 @@ static bool bnx2x_trylock_leader_lock(struct bnx2x *bp)
 
 static void bnx2x_cnic_cfc_comp(struct bnx2x *bp, int cid, u8 err);
 
+/* schedule the sp task and mark that interrupt occurred (runs from ISR) */
+static int bnx2x_schedule_sp_task(struct bnx2x *bp)
+{
+	/* Set the interrupt occurred bit for the sp-task to recognize it
+	 * must ack the interrupt and transition according to the IGU
+	 * state machine.
+	 */
+	atomic_set(&bp->interrupt_occurred, 1);
+
+	/* The sp_task must execute only after this bit
+	 * is set, otherwise we will get out of sync and miss all
+	 * further interrupts. Hence, the barrier.
+	 */
+	smp_wmb();
+
+	/* schedule sp_task to workqueue */
+	return queue_delayed_work(bnx2x_wq, &bp->sp_task, 0);
+}
 
 void bnx2x_sp_event(struct bnx2x_fastpath *fp, union eth_rx_cqe *rr_cqe)
 {
@@ -1612,6 +1689,13 @@ void bnx2x_sp_event(struct bnx2x_fastpath *fp, union eth_rx_cqe *rr_cqe)
 	   fp->index, cid, command, bp->state,
 	   rr_cqe->ramrod_cqe.ramrod_type);
 
+	/* If cid is within VF range, replace the slowpath object with the
+	 * one corresponding to this VF
+	 */
+	if (cid >= BNX2X_FIRST_VF_CID  &&
+	    cid < BNX2X_FIRST_VF_CID + BNX2X_VF_CIDS)
+		bnx2x_iov_set_queue_sp_obj(bp, cid, &q_obj);
+
 	switch (command) {
 	case (RAMROD_CMD_ID_ETH_CLIENT_UPDATE):
 		DP(BNX2X_MSG_SP, "got UPDATE ramrod. CID %d\n", cid);
@@ -1663,6 +1747,8 @@ void bnx2x_sp_event(struct bnx2x_fastpath *fp, union eth_rx_cqe *rr_cqe)
 #else
 		return;
 #endif
+	/* SRIOV: reschedule any 'in_progress' operations */
+	bnx2x_iov_sp_event(bp, cid, true);
 
 	smp_mb__before_atomic_inc();
 	atomic_inc(&bp->cq_spq_left);
@@ -1688,8 +1774,8 @@ void bnx2x_sp_event(struct bnx2x_fastpath *fp, union eth_rx_cqe *rr_cqe)
 		clear_bit(BNX2X_AFEX_FCOE_Q_UPDATE_PENDING, &bp->sp_state);
 		smp_mb__after_clear_bit();
 
-		/* schedule workqueue to send ack to MCP */
-		queue_delayed_work(bnx2x_wq, &bp->sp_task, 0);
+		/* schedule the sp task as mcp ack is required */
+		bnx2x_schedule_sp_task(bp);
 	}
 
 	return;
@@ -1749,7 +1835,11 @@ irqreturn_t bnx2x_interrupt(int irq, void *dev_instance)
 	}
 
 	if (unlikely(status & 0x1)) {
-		queue_delayed_work(bnx2x_wq, &bp->sp_task, 0);
+
+		/* schedule sp task to perform default status block work, ack
+		 * attentions and enable interrupts.
+		 */
+		bnx2x_schedule_sp_task(bp);
 
 		status &= ~0x1;
 		if (!status)
@@ -4812,7 +4902,7 @@ static void bnx2x_eq_int(struct bnx2x *bp)
 	u8 echo;
 	u32 cid;
 	u8 opcode;
-	int spqe_cnt = 0;
+	int rc, spqe_cnt = 0;
 	struct bnx2x_queue_sp_obj *q_obj;
 	struct bnx2x_func_sp_obj *f_obj = &bp->func_obj;
 	struct bnx2x_raw_obj *rss_raw = &bp->rss_conf_obj.raw;
@@ -4843,12 +4933,23 @@ static void bnx2x_eq_int(struct bnx2x *bp)
 
 		elem = &bp->eq_ring[EQ_DESC(sw_cons)];
 
+		rc = bnx2x_iov_eq_sp_event(bp, elem);
+		if (!rc) {
+			DP(BNX2X_MSG_IOV, "bnx2x_iov_eq_sp_event returned %d\n",
+			   rc);
+			goto next_spqe;
+		}
 		cid = SW_CID(elem->message.data.cfc_del_event.cid);
 		opcode = elem->message.opcode;
 
 
 		/* handle eq element */
 		switch (opcode) {
+		case EVENT_RING_OPCODE_VF_PF_CHANNEL:
+			DP(BNX2X_MSG_IOV, "vf pf channel element on eq");
+			bnx2x_vf_mbx(bp, &elem->message.data.vf_pf_event);
+			continue;
+
 		case EVENT_RING_OPCODE_STAT_QUERY:
 			DP(BNX2X_MSG_SP | BNX2X_MSG_STATS,
 			   "got statistics comp event %d\n",
@@ -5014,50 +5115,67 @@ next_spqe:
 static void bnx2x_sp_task(struct work_struct *work)
 {
 	struct bnx2x *bp = container_of(work, struct bnx2x, sp_task.work);
-	u16 status;
 
-	status = bnx2x_update_dsb_idx(bp);
-/*	if (status == 0)				     */
-/*		BNX2X_ERR("spurious slowpath interrupt!\n"); */
+	DP(BNX2X_MSG_SP, "sp task invoked\n");
 
-	DP(BNX2X_MSG_SP, "got a slowpath interrupt (status 0x%x)\n", status);
+	/* make sure the atomic interupt_occurred has bee written */
+	smp_rmb();
+	if (atomic_read(&bp->interrupt_occurred)) {
 
-	/* HW attentions */
-	if (status & BNX2X_DEF_SB_ATT_IDX) {
-		bnx2x_attn_int(bp);
-		status &= ~BNX2X_DEF_SB_ATT_IDX;
-	}
+		/* what work needs to be performed? */
+		u16 status = bnx2x_update_dsb_idx(bp);
+		DP(BNX2X_MSG_SP, "status %x\n", status);
 
-	/* SP events: STAT_QUERY and others */
-	if (status & BNX2X_DEF_SB_IDX) {
-		struct bnx2x_fastpath *fp = bnx2x_fcoe_fp(bp);
+		DP(BNX2X_MSG_SP, "setting interrupt_occurred to 0");
+		atomic_set(&bp->interrupt_occurred, 0);
+
+		/* HW attentions */
+		if (status & BNX2X_DEF_SB_ATT_IDX) {
+			bnx2x_attn_int(bp);
+			status &= ~BNX2X_DEF_SB_ATT_IDX;
+		}
+
+		/* SP events: STAT_QUERY and others */
+		if (status & BNX2X_DEF_SB_IDX) {
+			struct bnx2x_fastpath *fp = bnx2x_fcoe_fp(bp);
 
 		if (FCOE_INIT(bp) &&
-		    (bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
-			/*
-			 * Prevent local bottom-halves from running as
-			 * we are going to change the local NAPI list.
-			 */
-			local_bh_disable();
-			napi_schedule(&bnx2x_fcoe(bp, napi));
-			local_bh_enable();
+			    (bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
+				/*
+				 * Prevent local bottom-halves from running as
+				 * we are going to change the local NAPI list.
+				 */
+				local_bh_disable();
+				napi_schedule(&bnx2x_fcoe(bp, napi));
+				local_bh_enable();
+			}
+
+			/* Handle EQ completions */
+			bnx2x_eq_int(bp);
+
+			bnx2x_ack_sb(bp, bp->igu_dsb_id, USTORM_ID,
+				     le16_to_cpu(bp->def_idx), IGU_INT_NOP, 1);
+
+			status &= ~BNX2X_DEF_SB_IDX;
 		}
 
-		/* Handle EQ completions */
-		bnx2x_eq_int(bp);
+		/* if status is non zero then perhaps something went wrong */
+		if (unlikely(status))
+			DP(BNX2X_MSG_SP,
+			   "got an unknown interrupt! (status 0x%x)\n", status);
 
-		bnx2x_ack_sb(bp, bp->igu_dsb_id, USTORM_ID,
-			le16_to_cpu(bp->def_idx), IGU_INT_NOP, 1);
+		/* ack status block only if something was actually handled */
+		bnx2x_ack_sb(bp, bp->igu_dsb_id, ATTENTION_ID,
+			     le16_to_cpu(bp->def_att_idx), IGU_INT_ENABLE, 1);
 
-		status &= ~BNX2X_DEF_SB_IDX;
 	}
 
-	if (unlikely(status))
-		DP(BNX2X_MSG_SP, "got an unknown interrupt! (status 0x%x)\n",
-		   status);
-
-	bnx2x_ack_sb(bp, bp->igu_dsb_id, ATTENTION_ID,
-	     le16_to_cpu(bp->def_att_idx), IGU_INT_ENABLE, 1);
+	/* must be called after the EQ processing (since eq leads to sriov
+	 * ramrod completion flows).
+	 * This flow may have been scheduled by the arrival of a ramrod
+	 * completion, or by the sriov code rescheduling itself.
+	 */
+	bnx2x_iov_sp_task(bp);
 
 	/* afex - poll to check if VIFSET_ACK should be sent to MFW */
 	if (test_and_clear_bit(BNX2X_AFEX_PENDING_VIFSET_MCP_ACK,
@@ -5090,7 +5208,10 @@ irqreturn_t bnx2x_msix_sp_int(int irq, void *dev_instance)
 		rcu_read_unlock();
 	}
 
-	queue_delayed_work(bnx2x_wq, &bp->sp_task, 0);
+	/* schedule sp task to perform default status block work, ack
+	 * attentions and enable interrupts.
+	 */
+	bnx2x_schedule_sp_task(bp);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index a5675b8..93497e5 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -509,6 +509,16 @@ static void bnx2x_vf_set_bars(struct bnx2x *bp, struct bnx2x_virtf *vf)
 	}
 }
 
+void __devexit bnx2x_iov_remove_one(struct bnx2x *bp)
+{
+	/* if SRIOV is not enabled there's nothing to do */
+	if (!IS_SRIOV(bp))
+		return;
+
+	/* free vf database */
+	__bnx2x_iov_free_vfdb(bp);
+}
+
 void bnx2x_iov_free_mem(struct bnx2x *bp)
 {
 	int i;
@@ -680,12 +690,241 @@ int bnx2x_iov_init_ilt(struct bnx2x *bp, u16 line)
 	}
 	return line+i;
 }
-void __devexit bnx2x_iov_remove_one(struct bnx2x *bp)
+
+static u8 bnx2x_iov_is_vf_cid(struct bnx2x *bp, u16 cid)
 {
-	/* if SRIOV is not enabled there's nothing to do */
+	return ((cid >= BNX2X_FIRST_VF_CID) &&
+		((cid - BNX2X_FIRST_VF_CID) < BNX2X_VF_CIDS));
+}
+
+static
+void bnx2x_vf_handle_classification_eqe(struct bnx2x *bp,
+					struct bnx2x_vf_queue *vfq,
+					union event_ring_elem *elem)
+{
+	unsigned long ramrod_flags = 0;
+	int rc = 0;
+
+	/* Always push next commands out, don't wait here */
+	set_bit(RAMROD_CONT, &ramrod_flags);
+
+	switch (elem->message.data.eth_event.echo >> BNX2X_SWCID_SHIFT) {
+	case BNX2X_FILTER_MAC_PENDING:
+		rc = vfq->mac_obj.complete(bp, &vfq->mac_obj, elem,
+					   &ramrod_flags);
+		break;
+	case BNX2X_FILTER_VLAN_PENDING:
+		rc = vfq->vlan_obj.complete(bp, &vfq->vlan_obj, elem,
+					    &ramrod_flags);
+		break;
+	default:
+		BNX2X_ERR("Unsupported classification command: %d\n",
+			  elem->message.data.eth_event.echo);
+		return;
+	}
+	if (rc < 0)
+		BNX2X_ERR("Failed to schedule new commands: %d\n", rc);
+	else if (rc > 0)
+		DP(BNX2X_MSG_IOV, "Scheduled next pending commands...\n");
+}
+
+static
+void bnx2x_vf_handle_mcast_eqe(struct bnx2x *bp,
+			       struct bnx2x_virtf *vf)
+{
+	struct bnx2x_mcast_ramrod_params rparam = {NULL};
+	int rc;
+
+	rparam.mcast_obj = &vf->mcast_obj;
+
+	vf->mcast_obj.raw.clear_pending(&vf->mcast_obj.raw);
+
+	/* If there are pending mcast commands - send them */
+	if (vf->mcast_obj.check_pending(&vf->mcast_obj)) {
+		rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_CONT);
+		if (rc < 0)
+			BNX2X_ERR("Failed to send pending mcast commands: %d\n",
+				  rc);
+	}
+}
+
+static
+void bnx2x_vf_handle_filters_eqe(struct bnx2x *bp,
+				 struct bnx2x_virtf *vf)
+{
+	smp_mb__before_clear_bit();
+	clear_bit(BNX2X_FILTER_RX_MODE_PENDING, &vf->filter_state);
+	smp_mb__after_clear_bit();
+}
+
+int bnx2x_iov_eq_sp_event(struct bnx2x *bp, union event_ring_elem *elem)
+{
+	struct bnx2x_virtf *vf;
+	int qidx = 0, abs_vfid;
+	u8 opcode;
+	u16 cid = 0xffff;
+
+	if (!IS_SRIOV(bp))
+		return 1;
+
+	/* first get the cid - the only events we handle here are cfc-delete
+	 * and set-mac completion
+	 */
+	opcode = elem->message.opcode;
+
+	switch (opcode) {
+	case EVENT_RING_OPCODE_CFC_DEL:
+		cid = SW_CID((__force __le32)
+			     elem->message.data.cfc_del_event.cid);
+		DP(BNX2X_MSG_IOV, "checking cfc-del comp cid=%d\n", cid);
+		break;
+	case EVENT_RING_OPCODE_CLASSIFICATION_RULES:
+	case EVENT_RING_OPCODE_MULTICAST_RULES:
+	case EVENT_RING_OPCODE_FILTERS_RULES:
+		cid = (elem->message.data.eth_event.echo &
+		       BNX2X_SWCID_MASK);
+		DP(BNX2X_MSG_IOV, "checking filtering comp cid=%d\n", cid);
+		break;
+	case EVENT_RING_OPCODE_VF_FLR:
+		abs_vfid = elem->message.data.vf_flr_event.vf_id;
+		DP(BNX2X_MSG_IOV, "Got VF FLR notification abs_vfid=%d\n", abs_vfid);
+		goto get_vf;
+	case EVENT_RING_OPCODE_MALICIOUS_VF:
+		abs_vfid = elem->message.data.malicious_vf_event.vf_id;
+		DP(BNX2X_MSG_IOV, "Got VF MALICIOUS notification abs_vfid=%d\n", abs_vfid);
+		goto get_vf;
+	default:
+		return 1;
+	}
+
+	/* check if the cid is the VF range */
+	if (!bnx2x_iov_is_vf_cid(bp, cid)) {
+		DP(BNX2X_MSG_IOV, "cid is outside vf range: %d\n", cid);
+		return 1;
+	}
+	/* extract vf and rxq index from vf_cid - relies on the following:
+	 * 1. vfid on cid reflects the true abs_vfid
+	 * 2. the max number of VFs (per path) is 64
+	 */
+	qidx = cid & ((1 << BNX2X_VF_CID_WND)-1);
+	abs_vfid = (cid >> BNX2X_VF_CID_WND) & (BNX2X_MAX_NUM_OF_VFS-1);
+get_vf:
+	vf = bnx2x_vf_by_abs_fid(bp, abs_vfid);
+
+	if (!vf) {
+		BNX2X_ERR("EQ completion for unknown VF, cid %d, abs_vfid %d\n",
+			  cid, abs_vfid);
+		return 0;
+	}
+
+	switch (opcode) {
+	case EVENT_RING_OPCODE_CFC_DEL:
+		DP(BNX2X_MSG_IOV, "got VF [%d:%d] cfc delete ramrod\n",
+		   vf->abs_vfid, qidx);
+		vfq_get(vf, qidx)->sp_obj.complete_cmd(bp,
+						       &vfq_get(vf,
+								qidx)->sp_obj,
+						       BNX2X_Q_CMD_CFC_DEL);
+		break;
+	case EVENT_RING_OPCODE_CLASSIFICATION_RULES:
+		DP(BNX2X_MSG_IOV, "got VF [%d:%d] set mac/vlan ramrod\n",
+		   vf->abs_vfid, qidx);
+		bnx2x_vf_handle_classification_eqe(bp, vfq_get(vf, qidx), elem);
+		break;
+	case EVENT_RING_OPCODE_MULTICAST_RULES:
+		DP(BNX2X_MSG_IOV, "got VF [%d:%d] set mcast ramrod\n",
+		   vf->abs_vfid, qidx);
+		bnx2x_vf_handle_mcast_eqe(bp, vf);
+		break;
+	case EVENT_RING_OPCODE_FILTERS_RULES:
+		DP(BNX2X_MSG_IOV, "got VF [%d:%d] set rx-mode ramrod\n",
+		   vf->abs_vfid, qidx);
+		bnx2x_vf_handle_filters_eqe(bp, vf);
+		break;
+	case EVENT_RING_OPCODE_VF_FLR:
+		DP(BNX2X_MSG_IOV, "got VF [%d] FLR notification\n",
+		   vf->abs_vfid);
+		/* Do nothing for now */
+		break;
+	case EVENT_RING_OPCODE_MALICIOUS_VF:
+		DP(BNX2X_MSG_IOV, "got VF [%d] MALICIOUS notification\n",
+		   vf->abs_vfid);
+		/* Do nothing for now */
+		break;
+	}
+	/* SRIOV: reschedule any 'in_progress' operations */
+	bnx2x_iov_sp_event(bp, cid, false);
+
+	return 0;
+}
+
+static struct bnx2x_virtf *bnx2x_vf_by_cid(struct bnx2x *bp, int vf_cid)
+{
+	/* extract the vf from vf_cid - relies on the following:
+	 * 1. vfid on cid reflects the true abs_vfid
+	 * 2. the max number of VFs (per path) is 64
+	 */
+	int abs_vfid = (vf_cid >> BNX2X_VF_CID_WND) & (BNX2X_MAX_NUM_OF_VFS-1);
+	return bnx2x_vf_by_abs_fid(bp, abs_vfid);
+}
+
+void bnx2x_iov_set_queue_sp_obj(struct bnx2x *bp, int vf_cid,
+				struct bnx2x_queue_sp_obj **q_obj)
+{
+	struct bnx2x_virtf *vf;
+
 	if (!IS_SRIOV(bp))
 		return;
 
-	/* free vf database */
-	__bnx2x_iov_free_vfdb(bp);
+	vf = bnx2x_vf_by_cid(bp, vf_cid);
+
+	if (vf) {
+		/**
+		 * extract queue index from vf_cid - relies on the following:
+		 * 1. vfid on cid reflects the true abs_vfid
+		 * 2. the max number of VFs (per path) is 64
+		 */
+		int q_index = vf_cid & ((1 << BNX2X_VF_CID_WND)-1);
+		*q_obj = &bnx2x_vfq(vf, q_index, sp_obj);
+	} else {
+		BNX2X_ERR("No vf matching cid %d\n", vf_cid);
+	}
+}
+
+void bnx2x_iov_sp_event(struct bnx2x *bp, int vf_cid, bool queue_work)
+{
+	struct bnx2x_virtf *vf;
+
+	/* check if the cid is the VF range */
+	if (!IS_SRIOV(bp) || !bnx2x_iov_is_vf_cid(bp, vf_cid))
+		return;
+
+	vf = bnx2x_vf_by_cid(bp, vf_cid);
+	if (vf) {
+
+		/* set in_progress flag */
+		atomic_set(&vf->op_in_progress, 1);
+		if (queue_work)
+			queue_delayed_work(bnx2x_wq, &bp->sp_task, 0);
+	}
+}
+void bnx2x_iov_sp_task(struct bnx2x *bp)
+{
+	int i;
+
+	if (!IS_SRIOV(bp))
+		return;
+	/* Iterate over all VFs and invoke state transition for VFs with
+	 * 'in-progress' slow-path operations
+	 */
+	DP(BNX2X_MSG_IOV, "searching for pending vf operations\n");
+	for_each_vf(bp, i) {
+		struct bnx2x_virtf *vf = BP_VF(bp, i);
+
+		if (!list_empty(&vf->op_list_head) &&
+		    atomic_read(&vf->op_in_progress)) {
+			DP(BNX2X_MSG_IOV, "running pending op for vf %d\n", i);
+			bnx2x_vfop_cur(bp, vf)->transition(bp, vf);
+		}
+	}
 }
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index d5e5103..249fa58 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -82,6 +82,84 @@ union bnx2x_vfop_params {
 
 /* forward */
 struct bnx2x_virtf;
+
+/* VFOP definitions */
+typedef void (*vfop_handler_t)(struct bnx2x *bp, struct bnx2x_virtf *vf);
+
+/* VFOP queue filters command additional arguments */
+struct bnx2x_vfop_filter {
+	struct list_head link;
+	int type;
+#define BNX2X_VFOP_FILTER_MAC	1
+#define BNX2X_VFOP_FILTER_VLAN	2
+
+	bool add;
+	u8 *mac;
+	u16 vid;
+};
+
+struct bnx2x_vfop_filters {
+	int add_cnt;
+	struct list_head head;
+	struct bnx2x_vfop_filter filters[];
+};
+
+/* transient list allocated, built and saved until its
+ * passed to the SP-VERBs layer.
+ */
+struct bnx2x_vfop_args_mcast {
+	int mc_num;
+	struct bnx2x_mcast_list_elem *mc;
+};
+
+struct bnx2x_vfop_args_qctor {
+	int	qid;
+	u16	sb_idx;
+};
+
+struct bnx2x_vfop_args_qdtor {
+	int	qid;
+	struct eth_context *cxt;
+};
+
+struct bnx2x_vfop_args_defvlan {
+	int	qid;
+	bool	enable;
+	u16	vid;
+	u8	prio;
+};
+
+struct bnx2x_vfop_args_qx {
+	int	qid;
+	bool	en_add;
+};
+
+struct bnx2x_vfop_args_filters {
+	struct bnx2x_vfop_filters *multi_filter;
+	atomic_t *credit;	/* non NULL means 'don't consume credit' */
+};
+
+union bnx2x_vfop_args {
+	struct bnx2x_vfop_args_mcast	mc_list;
+	struct bnx2x_vfop_args_qctor	qctor;
+	struct bnx2x_vfop_args_qdtor	qdtor;
+	struct bnx2x_vfop_args_defvlan	defvlan;
+	struct bnx2x_vfop_args_qx	qx;
+	struct bnx2x_vfop_args_filters	filters;
+};
+
+struct bnx2x_vfop {
+	struct list_head link;
+	int			rc;		/* return code */
+	int			state;		/* next state */
+	union bnx2x_vfop_args	args;		/* extra arguments */
+	union bnx2x_vfop_params *op_p;		/* ramrod params */
+
+	/* state machine callbacks */
+	vfop_handler_t transition;
+	vfop_handler_t done;
+};
+
 /* vf context */
 struct bnx2x_virtf {
 	u16 cfg_flags;
@@ -284,6 +362,12 @@ struct bnx2x_vfdb {
 	u32 flrd_vfs[FLRD_VFS_DWORDS];
 };
 
+/* queue access */
+static inline struct bnx2x_vf_queue *vfq_get(struct bnx2x_virtf *vf, u8 index)
+{
+	return &(vf->vfqs[index]);
+}
+
 static inline u8 vf_igu_sb(struct bnx2x_virtf *vf, u16 sb_idx)
 {
 	return vf->igu_base_id + sb_idx;
@@ -298,7 +382,22 @@ int bnx2x_iov_alloc_mem(struct bnx2x *bp);
 int bnx2x_iov_nic_init(struct bnx2x *bp);
 void bnx2x_iov_init_dq(struct bnx2x *bp);
 void bnx2x_iov_init_dmae(struct bnx2x *bp);
+void bnx2x_iov_set_queue_sp_obj(struct bnx2x *bp, int vf_cid,
+				struct bnx2x_queue_sp_obj **q_obj);
+void bnx2x_iov_sp_event(struct bnx2x *bp, int vf_cid, bool queue_work);
+int bnx2x_iov_eq_sp_event(struct bnx2x *bp, union event_ring_elem *elem);
+void bnx2x_iov_sp_task(struct bnx2x *bp);
+/* global vf mailbox routines */
+void bnx2x_vf_mbx(struct bnx2x *bp, struct vf_pf_event_data *vfpf_event);
 void bnx2x_vf_enable_mbx(struct bnx2x *bp, u8 abs_vfid);
+static inline struct bnx2x_vfop *bnx2x_vfop_cur(struct bnx2x *bp,
+						struct bnx2x_virtf *vf)
+{
+	WARN(!mutex_is_locked(&vf->op_mutex), "about to access vf op linked list but mutex was not locked!");
+	WARN_ON(list_empty(&vf->op_list_head));
+	return list_first_entry(&vf->op_list_head, struct bnx2x_vfop, link);
+}
+
 int bnx2x_vf_idx_by_abs_fid(struct bnx2x *bp, u16 abs_vfid);
 /* VF FLR helpers */
 int bnx2x_vf_flr_clnup_epilog(struct bnx2x *bp, u8 abs_vfid);
@@ -308,4 +407,7 @@ void bnx2x_add_tlv(struct bnx2x *bp, void *tlvs_list, u16 offset, u16 type,
 void bnx2x_vfpf_prep(struct bnx2x *bp, struct vfpf_first_tlv *first_tlv,
 		     u16 type, u16 length);
 void bnx2x_dp_tlv_list(struct bnx2x *bp, void *tlvs_list);
+
+bool bnx2x_tlv_supported(u16 tlvtype);
+
 #endif /* bnx2x_sriov.h */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 7259ea5..4d9f629 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -80,6 +80,24 @@ void bnx2x_dp_tlv_list(struct bnx2x *bp, void *tlvs_list)
 	   tlv->type, tlv->length);
 }
 
+/* test whether we support a tlv type */
+bool bnx2x_tlv_supported(u16 tlvtype)
+{
+	return CHANNEL_TLV_NONE < tlvtype && tlvtype < CHANNEL_TLV_MAX;
+}
+
+static inline int bnx2x_pfvf_status_codes(int rc)
+{
+	switch (rc) {
+	case 0:
+		return PFVF_STATUS_SUCCESS;
+	case -ENOMEM:
+		return PFVF_STATUS_NO_RESOURCE;
+	default:
+		return PFVF_STATUS_FAILURE;
+	}
+}
+
 /* General service functions */
 static void storm_memset_vf_mbx_ack(struct bnx2x *bp, u16 abs_fid)
 {
@@ -116,3 +134,143 @@ void bnx2x_vf_enable_mbx(struct bnx2x *bp, u8 abs_vfid)
 	/* enable the VF access to the mailbox */
 	bnx2x_vf_enable_access(bp, abs_vfid);
 }
+
+/* this works only on !E1h */
+static int bnx2x_copy32_vf_dmae(struct bnx2x *bp, u8 from_vf,
+				dma_addr_t pf_addr, u8 vfid, u32 vf_addr_hi,
+				u32 vf_addr_lo, u32 len32)
+{
+	struct dmae_command dmae;
+
+	if (CHIP_IS_E1x(bp)) {
+		BNX2X_ERR("Chip revision does not support VFs\n");
+		return DMAE_NOT_RDY;
+	}
+
+	if (!bp->dmae_ready) {
+		BNX2X_ERR("DMAE is not ready, can not copy\n");
+		return DMAE_NOT_RDY;
+	}
+
+	/* set opcode and fixed command fields */
+	bnx2x_prep_dmae_with_comp(bp, &dmae, DMAE_SRC_PCI, DMAE_DST_PCI);
+
+	if (from_vf) {
+		dmae.opcode_iov = (vfid << DMAE_COMMAND_SRC_VFID_SHIFT) |
+			(DMAE_SRC_VF << DMAE_COMMAND_SRC_VFPF_SHIFT) |
+			(DMAE_DST_PF << DMAE_COMMAND_DST_VFPF_SHIFT);
+
+		dmae.opcode |= (DMAE_C_DST << DMAE_COMMAND_C_FUNC_SHIFT);
+
+		dmae.src_addr_lo = vf_addr_lo;
+		dmae.src_addr_hi = vf_addr_hi;
+		dmae.dst_addr_lo = U64_LO(pf_addr);
+		dmae.dst_addr_hi = U64_HI(pf_addr);
+	} else {
+		dmae.opcode_iov = (vfid << DMAE_COMMAND_DST_VFID_SHIFT) |
+			(DMAE_DST_VF << DMAE_COMMAND_DST_VFPF_SHIFT) |
+			(DMAE_SRC_PF << DMAE_COMMAND_SRC_VFPF_SHIFT);
+
+		dmae.opcode |= (DMAE_C_SRC << DMAE_COMMAND_C_FUNC_SHIFT);
+
+		dmae.src_addr_lo = U64_LO(pf_addr);
+		dmae.src_addr_hi = U64_HI(pf_addr);
+		dmae.dst_addr_lo = vf_addr_lo;
+		dmae.dst_addr_hi = vf_addr_hi;
+	}
+	dmae.len = len32;
+
+	bnx2x_dp_dmae(bp, &dmae, BNX2X_MSG_DMAE);
+
+	/* issue the command and wait for completion */
+	return bnx2x_issue_dmae_with_comp(bp, &dmae);
+}
+/* dispatch request */
+static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
+				  struct bnx2x_vf_mbx *mbx)
+{
+	int i;
+
+	/* check if tlv type is known */
+	if (bnx2x_tlv_supported(mbx->first_tlv.tl.type)) {
+
+		/* switch on the opcode */
+		switch (mbx->first_tlv.tl.type) {
+		}
+
+	/* unknown TLV - this may belong to a VF driver from the future - a
+	 * version written after this PF driver was written, which supports
+	 * features unknown as of yet. Too bad since we don't support them.
+	 * Or this may be because someone wrote a crappy VF driver and is
+	 * sending garbage over the channel.
+	 */
+	} else {
+		BNX2X_ERR("unknown TLV. type %d length %d. first 20 bytes of mailbox buffer:\n",
+			  mbx->first_tlv.tl.type, mbx->first_tlv.tl.length);
+		for (i = 0; i < 20; i++)
+			DP_CONT(BNX2X_MSG_IOV, "%x ",
+				mbx->msg->req.tlv_buf_size.tlv_buffer[i]);
+	}
+}
+
+/* handle new vf-pf message */
+void bnx2x_vf_mbx(struct bnx2x *bp, struct vf_pf_event_data *vfpf_event)
+{
+	struct bnx2x_virtf *vf;
+	struct bnx2x_vf_mbx *mbx;
+	u8 vf_idx;
+	int rc;
+
+	DP(BNX2X_MSG_IOV,
+	   "vf pf event received: vfid %d, address_hi %x, address lo %x",
+	   vfpf_event->vf_id, vfpf_event->msg_addr_hi, vfpf_event->msg_addr_lo);
+	/* Sanity checks consider removing later */
+
+	/* check if the vf_id is valid */
+	if (vfpf_event->vf_id - BP_VFDB(bp)->sriov.first_vf_in_pf >
+	    BNX2X_NR_VIRTFN(bp)) {
+		BNX2X_ERR("Illegal vf_id %d max allowed: %d\n",
+			  vfpf_event->vf_id, BNX2X_NR_VIRTFN(bp));
+		goto mbx_done;
+	}
+	vf_idx = bnx2x_vf_idx_by_abs_fid(bp, vfpf_event->vf_id);
+
+	mbx = BP_VF_MBX(bp, vf_idx);
+
+	/* verify an event is not currently being processed -
+	 * debug failsafe only
+	 */
+	if (mbx->flags & VF_MSG_INPROCESS) {
+		BNX2X_ERR("Previous message is still being processed, vf_id %d\n",
+			  vfpf_event->vf_id);
+		goto mbx_done;
+	}
+
+	vf = BP_VF(bp, vf_idx);
+
+	/* save the VF message address */
+	mbx->vf_addr_hi = vfpf_event->msg_addr_hi;
+	mbx->vf_addr_lo = vfpf_event->msg_addr_lo;
+	DP(BNX2X_MSG_IOV, "mailbox vf address hi 0x%x, lo 0x%x, offset 0x%x",
+	   mbx->vf_addr_hi, mbx->vf_addr_lo, mbx->first_tlv.resp_msg_offset);
+
+	/* dmae to get the VF request */
+	rc = bnx2x_copy32_vf_dmae(bp, true, mbx->msg_mapping, vf->abs_vfid,
+				  mbx->vf_addr_hi, mbx->vf_addr_lo,
+				  sizeof(union vfpf_tlvs)/4);
+	if (rc) {
+		BNX2X_ERR("Failed to copy request VF %d\n", vf->abs_vfid);
+		goto mbx_error;
+	}
+
+	/* process the VF message header */
+	mbx->first_tlv = mbx->msg->req.first_tlv;
+
+	/* dispatch the request (will prepare the response) */
+	bnx2x_vf_mbx_request(bp, vf, mbx);
+	goto mbx_done;
+
+mbx_error:
+mbx_done:
+	return;
+}
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 02/22] bnx2x: VF <-> PF channel 'acquire' at vf probe
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

Add the 'acquire' request to VF <-> PF channel and use it at
VF probe. In the acquire request the VF driver lists the resources
it would like to have. In the response the PF either ratifies the
request, or denies it and supplies the maximum values supported.
The VF may then attempt another acquire request.
This patch adds the bnx2x_vfpf.c file which contains the
implementation of the VF to PF hardware channel.

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/Makefile      |    2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h       |   13 ++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c  |  189 +++++++++++++++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h   |    5 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h |    5 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c  |   81 +++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h  |  114 ++++++++++++-
 7 files changed, 407 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c

diff --git a/drivers/net/ethernet/broadcom/bnx2x/Makefile b/drivers/net/ethernet/broadcom/bnx2x/Makefile
index 48fbdd4..d862ea6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/Makefile
+++ b/drivers/net/ethernet/broadcom/bnx2x/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_BNX2X) += bnx2x.o
 
-bnx2x-objs := bnx2x_main.o bnx2x_link.o bnx2x_cmn.o bnx2x_ethtool.o bnx2x_stats.o bnx2x_dcb.o bnx2x_sp.o
+bnx2x-objs := bnx2x_main.o bnx2x_link.o bnx2x_cmn.o bnx2x_ethtool.o bnx2x_stats.o bnx2x_dcb.o bnx2x_sp.o bnx2x_vfpf.o
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 40db837..3de66ef 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -49,6 +49,7 @@
 #include "bnx2x_sp.h"
 #include "bnx2x_dcb.h"
 #include "bnx2x_stats.h"
+#include "bnx2x_vfpf.h"
 
 enum bnx2x_int_mode {
 	BNX2X_INT_MODE_MSIX,
@@ -1246,6 +1247,9 @@ struct bnx2x {
 	struct bnx2x_vf_mbx_msg	*vf2pf_mbox;
 	dma_addr_t		vf2pf_mbox_mapping;
 
+	/* we set aside a copy of the acquire response */
+	struct pfvf_acquire_resp_tlv acquire_resp;
+
 	struct net_device	*dev;
 	struct pci_dev		*pdev;
 
@@ -2208,6 +2212,15 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
 #define BNX2X_VPD_LEN			128
 #define VENDOR_ID_LEN			4
 
+#define VF_ACQUIRE_THRESH 3
+#define VF_ACQUIRE_MAC_FILTERS 1
+#define VF_ACQUIRE_MC_FILTERS 10
+
+#define GOOD_ME_REG(me_reg) (((me_reg) & ME_REG_VF_VALID) && \
+			    (!((me_reg) & ME_REG_VF_ERR)))
+int bnx2x_get_vf_id(struct bnx2x *bp, u32 *vf_id);
+int bnx2x_send_msg2pf(struct bnx2x *bp, u8 *done, dma_addr_t msg_mapping);
+int __devinit bnx2x_vfpf_acquire(struct bnx2x *bp, u8 tx_count, u8 rx_count);
 /* Congestion management fairness mode */
 #define CMNG_FNS_NONE		0
 #define CMNG_FNS_MINMAX		1
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 78392de..89092b0 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12149,6 +12149,12 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev,
 		goto init_one_exit;
 	}
 
+	if (IS_VF(bp)) {
+		rc = bnx2x_vfpf_acquire(bp, tx_count, rx_count);
+		if (rc)
+			goto init_one_exit;
+	}
+
 	/* calc qm_cid_count */
 	bp->qm_cid_count = bnx2x_set_qm_cid_count(bp);
 	BNX2X_DEV_INFO("qm_cid_count %d\n", bp->qm_cid_count);
@@ -13077,3 +13083,186 @@ struct cnic_eth_dev *bnx2x_cnic_probe(struct net_device *dev)
 EXPORT_SYMBOL(bnx2x_cnic_probe);
 
 
+int bnx2x_send_msg2pf(struct bnx2x *bp, u8 *done, dma_addr_t msg_mapping)
+{
+	struct cstorm_vf_zone_data __iomem *zone_data =
+		REG_ADDR(bp, PXP_VF_ADDR_CSDM_GLOBAL_START);
+	int tout = 600, interval = 100; /* wait for 60 seconds */
+
+	if (*done) {
+		BNX2X_ERR("done was non zero before message to pf was sent");
+		WARN_ON(true);
+		return -EINVAL;
+	}
+
+	/* Write message address */
+	writel(U64_LO(msg_mapping),
+	       &zone_data->non_trigger.vf_pf_channel.msg_addr_lo);
+	writel(U64_HI(msg_mapping),
+	       &zone_data->non_trigger.vf_pf_channel.msg_addr_hi);
+
+	/* Trigger the PF FW */
+	writeb(1, &zone_data->trigger.vf_pf_channel.addr_valid);
+
+	/* Wait for PF to complete */
+	while ((tout >= 0) && (!*done)) {
+		msleep(interval);
+		tout -= 1;
+
+		/* progress indicator - HV can take its own sweet time in
+		 * answering VFs...
+		 */
+		DP_CONT(BNX2X_MSG_IOV, ".");
+	}
+
+	if (!*done) {
+		BNX2X_ERR("PF response has timed out\n");
+		return -EAGAIN;
+	}
+	DP(BNX2X_MSG_SP, "Got a response from PF\n");
+	return 0;
+}
+
+int bnx2x_get_vf_id(struct bnx2x *bp, u32 *vf_id)
+{
+	u32 me_reg;
+	int tout = 10, interval = 100; /* Wait for 1 sec */
+
+	do {
+		/* pxp traps vf read of doorbells and returns me reg value */
+		me_reg = readl(bp->doorbells);
+		if (GOOD_ME_REG(me_reg))
+			break;
+
+		msleep(interval);
+
+		BNX2X_ERR("Invalid ME register value: 0x%08x\n. Is pf driver up?",
+			  me_reg);
+	} while (tout-- > 0);
+
+	if (!GOOD_ME_REG(me_reg)) {
+		BNX2X_ERR("Invalid ME register value: 0x%08x\n", me_reg);
+		return -EINVAL;
+	}
+
+	BNX2X_ERR("valid ME register value: 0x%08x\n", me_reg);
+
+	*vf_id = (me_reg & ME_REG_VF_NUM_MASK) >> ME_REG_VF_NUM_SHIFT;
+
+	return 0;
+}
+
+#define VF_ACQUIRE_THRESH 3
+#define VF_ACQUIRE_MAC_FILTERS 1
+#define VF_ACQUIRE_MC_FILTERS 10
+
+int __devinit bnx2x_vfpf_acquire(struct bnx2x *bp, u8 tx_count,
+				       u8 rx_count)
+{
+	int rc = 0, attempts = 0;
+	struct vfpf_acquire_tlv *req = &bp->vf2pf_mbox->req.acquire;
+	struct pfvf_acquire_resp_tlv *resp = &bp->vf2pf_mbox->resp.acquire_resp;
+	u32 vf_id;
+	bool resources_acquired = false;
+
+	/* clear mailbox and prep first tlv */
+	bnx2x_vfpf_prep(bp, &req->first_tlv, CHANNEL_TLV_ACQUIRE, sizeof(*req));
+
+	if (bnx2x_get_vf_id(bp, &vf_id))
+		return -EAGAIN;
+
+	req->vfdev_info.vf_id = vf_id;
+	req->vfdev_info.vf_os = 0;
+
+	req->resc_request.num_rxqs = rx_count;
+	req->resc_request.num_txqs = tx_count;
+	req->resc_request.num_sbs = bp->igu_sb_cnt;
+	req->resc_request.num_mac_filters = VF_ACQUIRE_MAC_FILTERS;
+	req->resc_request.num_mc_filters = VF_ACQUIRE_MC_FILTERS;
+
+	/* add list termination tlv */
+	bnx2x_add_tlv(bp, req, req->first_tlv.tl.length, CHANNEL_TLV_LIST_END,
+		      sizeof(struct channel_list_end_tlv));
+
+	/* output tlvs list */
+	bnx2x_dp_tlv_list(bp, req);
+
+	while (!resources_acquired) {
+		DP(BNX2X_MSG_SP, "attempting to acquire resources");
+
+		/* send acquire request */
+		rc = bnx2x_send_msg2pf(bp,
+				       &resp->hdr.status,
+				       bp->vf2pf_mbox_mapping);
+
+		/* copy acquire response from buffer to bp */
+		memcpy(&bp->acquire_resp, resp, sizeof(bp->acquire_resp));
+
+		attempts++;
+
+		/* PF timeout */
+		if (rc)
+			return rc;
+
+		/* PF agrees to allocate our resources */
+		if (bp->acquire_resp.hdr.status == PFVF_STATUS_SUCCESS) {
+			DP(BNX2X_MSG_SP, "resources acquired");
+			resources_acquired = true;
+
+		/* PF refuses to allocate our resources */
+		} else if (bp->acquire_resp.hdr.status ==
+			   PFVF_STATUS_NO_RESOURCE &&
+			   attempts < VF_ACQUIRE_THRESH) {
+			DP(BNX2X_MSG_SP,
+			   "PF unwilling to fulfill resource request. Try PF recommended amount");
+
+			/* humble our request */
+			req->resc_request.num_txqs =
+				bp->acquire_resp.resc.num_txqs;
+			req->resc_request.num_rxqs =
+				bp->acquire_resp.resc.num_rxqs;
+			req->resc_request.num_sbs =
+				bp->acquire_resp.resc.num_sbs;
+			req->resc_request.num_mac_filters =
+				bp->acquire_resp.resc.num_mac_filters;
+			req->resc_request.num_vlan_filters =
+				bp->acquire_resp.resc.num_vlan_filters;
+			req->resc_request.num_mc_filters =
+				bp->acquire_resp.resc.num_mc_filters;
+
+			/* Clear response buffer */
+			memset(&bp->vf2pf_mbox->resp, 0,
+			       sizeof(union pfvf_tlvs));
+
+		/* PF reports error */
+		} else {
+			BNX2X_ERR("Failed to get the requested amount of resources: %d. Breaking...\n",
+				  bp->acquire_resp.hdr.status);
+			return -EAGAIN;
+		}
+	}
+
+	/* get HW info */
+	bp->common.chip_id |= (bp->acquire_resp.pfdev_info.chip_num & 0xffff);
+	bp->link_params.chip_id = bp->common.chip_id;
+	bp->db_size = bp->acquire_resp.pfdev_info.db_size;
+	bp->common.int_block = INT_BLOCK_IGU;
+	bp->common.chip_port_mode = CHIP_2_PORT_MODE;
+	bp->igu_dsb_id = -1;
+	bp->mf_ov = 0;
+	bp->mf_mode = 0;
+	bp->common.flash_size = 0;
+	bp->flags |=
+		NO_WOL_FLAG | NO_ISCSI_OOO_FLAG | NO_ISCSI_FLAG | NO_FCOE_FLAG;
+	bp->igu_sb_cnt = 1;
+	bp->igu_base_sb = bp->acquire_resp.resc.hw_sbs[0].hw_sb_id;
+	strlcpy(bp->fw_ver, bp->acquire_resp.pfdev_info.fw_ver,
+		sizeof(bp->fw_ver));
+
+	if (is_valid_ether_addr(bp->acquire_resp.resc.current_mac_addr))
+		memcpy(bp->dev->dev_addr,
+		       bp->acquire_resp.resc.current_mac_addr,
+		       ETH_ALEN);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
index be8c2c4..4c51f43 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
@@ -6542,6 +6542,11 @@
 #define PXP_VF_ADDR_IGU_SIZE		(0x3000)
 #define PXP_VF_ADDR_IGU_END\
 	((PXP_VF_ADDR_IGU_START) + (PXP_VF_ADDR_IGU_SIZE) - 1)
+#define PXP_VF_ADDR_CSDM_GLOBAL_START		0x7600
+#define PXP_VF_ADDR_CSDM_GLOBAL_SIZE		(PXP_ADDR_REG_SIZE)
+#define PXP_VF_ADDR_CSDM_GLOBAL_END\
+	((PXP_VF_ADDR_CSDM_GLOBAL_START) + (PXP_VF_ADDR_CSDM_GLOBAL_SIZE) - 1)
+
 #define PXP_VF_ADDR_DB_START				0x7c00
 #define PXP_VF_ADDR_DB_SIZE					(0x200)
 #define PXP_VF_ADDR_DB_END\
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index 181f30f..a23b69f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -24,4 +24,9 @@ struct bnx2x_vf_mbx_msg {
 	union pfvf_tlvs resp;
 };
 
+void bnx2x_add_tlv(struct bnx2x *bp, void *tlvs_list, u16 offset, u16 type,
+		   u16 length);
+void bnx2x_vfpf_prep(struct bnx2x *bp, struct vfpf_first_tlv *first_tlv,
+		     u16 type, u16 length);
+void bnx2x_dp_tlv_list(struct bnx2x *bp, void *tlvs_list);
 #endif /* bnx2x_sriov.h */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
new file mode 100644
index 0000000..5bc9016
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -0,0 +1,81 @@
+/* bnx2x_vfpf.c: Broadcom Everest network driver.
+ *
+ * Copyright 2009-2012 Broadcom Corporation
+ *
+ * Unless you and Broadcom execute a separate written software license
+ * agreement governing use of this software, this software is licensed to you
+ * under the terms of the GNU General Public License version 2, available
+ * at http://www.gnu.org/licenses/old-licenses/gpl-2.0.html (the "GPL").
+ *
+ * Notwithstanding the above, under no circumstances may you combine this
+ * software in any way with any other Broadcom software provided under a
+ * license other than the GPL, without Broadcom's express prior written
+ * consent.
+ *
+ * Maintained by: Eilon Greenstein <eilong@broadcom.com>
+ * Written by: Shmulik Ravid <shmulikr@broadcom.com>
+ *
+ */
+
+/**
+ * VF MBX (aka vf-pf channel)
+ */
+#include "bnx2x.h"
+#include "bnx2x_sriov.h"
+/* place a given tlv on the tlv buffer at a given offset */
+void bnx2x_add_tlv(struct bnx2x *bp, void *tlvs_list, u16 offset, u16 type,
+		   u16 length)
+{
+	struct channel_tlv *tl =
+		(struct channel_tlv *)(tlvs_list + offset);
+	tl->type = type;
+	tl->length = length;
+}
+
+/* Clear the mailbox and init the header of the first tlv */
+void bnx2x_vfpf_prep(struct bnx2x *bp, struct vfpf_first_tlv *first_tlv,
+		     u16 type, u16 length)
+{
+	DP(BNX2X_MSG_IOV, "preparing to send %d tlv over vf pf channel",
+	   type);
+
+	/* Clear mailbox */
+	memset(bp->vf2pf_mbox, 0, sizeof(struct bnx2x_vf_mbx_msg));
+
+	/* init type and length */
+	bnx2x_add_tlv(bp, &first_tlv->tl, 0, type, length);
+
+	/* init first tlv header */
+	first_tlv->resp_msg_offset = sizeof(bp->vf2pf_mbox->req);
+}
+
+/* list the types and lengths of the tlvs on the buffer */
+void bnx2x_dp_tlv_list(struct bnx2x *bp, void *tlvs_list)
+{
+	int i = 1;
+	struct channel_tlv *tlv = (struct channel_tlv *)tlvs_list;
+
+	while (tlv->type != CHANNEL_TLV_LIST_END) {
+
+		/* output tlv */
+		DP(BNX2X_MSG_IOV, "TLV number %d: type %d, length %d\n", i,
+		   tlv->type, tlv->length);
+
+		/* advance to next tlv */
+		tlvs_list += tlv->length;
+
+		/* cast general tlv list pointer to channel tlv header*/
+		tlv = (struct channel_tlv *)tlvs_list;
+
+		i++;
+
+		if (i > 50) {
+			WARN(true, "corrupt tlvs");
+			return;
+		}
+	}
+
+	/* output last tlv */
+	DP(BNX2X_MSG_IOV, "TLV number %d: type %d, length %d\n", i,
+	   tlv->type, tlv->length);
+}
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
index 82edebb..8f124fe 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
@@ -18,20 +18,132 @@
 #ifndef VF_PF_IF_H
 #define VF_PF_IF_H
 
+/* Common definitions for all HVs */
+struct vf_pf_resc_request {
+	u8  num_rxqs;
+	u8  num_txqs;
+	u8  num_sbs;
+	u8  num_mac_filters;
+	u8  num_vlan_filters;
+	u8  num_mc_filters; /* No limit  so superfluous */
+};
+
+struct hw_sb_info {
+	u8 hw_sb_id;	/* aka absolute igu id, used to ack the sb */
+	u8 sb_qid;	/* used to update DHC for sb */
+};
 /* HW VF-PF channel definitions
  * A.K.A VF-PF mailbox
  */
 #define TLV_BUFFER_SIZE			1024
 
+enum {
+	PFVF_STATUS_WAITING = 0,
+	PFVF_STATUS_SUCCESS,
+	PFVF_STATUS_FAILURE,
+	PFVF_STATUS_NOT_SUPPORTED,
+	PFVF_STATUS_NO_RESOURCE
+};
+
+/* vf pf channel tlvs */
+/* general tlv header (used for both vf->pf request and pf->vf response) */
+struct channel_tlv {
+	u16 type;
+	u16 length;
+};
+
+/* header of first vf->pf tlv carries the offset used to calculate response
+ * buffer address
+ */
+struct vfpf_first_tlv {
+	struct channel_tlv tl;
+	u32 resp_msg_offset;
+};
+
+/* header of pf->vf tlvs, carries the status of handling the request */
+struct pfvf_tlv {
+	struct channel_tlv tl;
+	u8 status;
+	u8 padding[3];
+};
+
+/* used to terminate and pad a tlv list */
+struct channel_list_end_tlv {
+	struct channel_tlv tl;
+	u8 padding[4];
+};
+
+/* Acquire */
+struct vfpf_acquire_tlv {
+	struct vfpf_first_tlv first_tlv;
+
+	struct vf_pf_vfdev_info {
+		/* the following fields are for debug purposes */
+		u8  vf_id;		/* ME register value */
+		u8  vf_os;		/* e.g. Linux, W2K8 */
+		u8 padding[2];
+	} vfdev_info;
+
+	struct vf_pf_resc_request resc_request;
+
+	aligned_u64 bulletin_addr;
+};
+
+/* acquire response tlv - carries the allocated resources */
+struct pfvf_acquire_resp_tlv {
+	struct pfvf_tlv hdr;
+	struct pf_vf_pfdev_info {
+		u32 chip_num;
+		u32 pf_cap;
+		#define PFVF_CAP_RSS        0x00000001
+		#define PFVF_CAP_DHC        0x00000002
+		#define PFVF_CAP_TPA        0x00000004
+		char fw_ver[32];
+		u16 db_size;
+		u8  indices_per_sb;
+		u8  padding;
+	} pfdev_info;
+	struct pf_vf_resc {
+		/* in case of status NO_RESOURCE in message hdr, pf will fill
+		 * this struct with suggested amount of resources for next
+		 * acquire request
+		 */
+		#define PFVF_MAX_QUEUES_PER_VF         16
+		#define PFVF_MAX_SBS_PER_VF            16
+		struct hw_sb_info hw_sbs[PFVF_MAX_SBS_PER_VF];
+		u8	hw_qid[PFVF_MAX_QUEUES_PER_VF];
+		u8	num_rxqs;
+		u8	num_txqs;
+		u8	num_sbs;
+		u8	num_mac_filters;
+		u8	num_vlan_filters;
+		u8	num_mc_filters;
+		u8	permanent_mac_addr[ETH_ALEN];
+		u8	current_mac_addr[ETH_ALEN];
+		u8	padding[2];
+	} resc;
+};
+
 struct tlv_buffer_size {
 	u8 tlv_buffer[TLV_BUFFER_SIZE];
 };
 union vfpf_tlvs {
-
+	struct vfpf_first_tlv		first_tlv;
+	struct vfpf_acquire_tlv		acquire;
+	struct channel_list_end_tlv     list_end;
 	struct tlv_buffer_size		tlv_buf_size;
 };
 
 union pfvf_tlvs {
+	struct pfvf_acquire_resp_tlv	acquire_resp;
+	struct channel_list_end_tlv	list_end;
 	struct tlv_buffer_size		tlv_buf_size;
 };
+enum channel_tlvs {
+	   CHANNEL_TLV_NONE, /* ends tlv sequence */
+	   CHANNEL_TLV_ACQUIRE,
+	   CHANNEL_TLV_LIST_END,
+	   CHANNEL_TLV_MAX
+};
+
 #endif /* VF_PF_IF_H */
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 09/22] bnx2x: Allocate VF database in PF when VFs are present
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

When A PF determines that it may have to manage SRIOV VFs it
allocates a database for this purpose. The database is intended to
keep track of the VF state, the resources allocated for each VF
(queues, interrupt vectors, etc), the state of the VF's queues.
When the VF loads the database is updated accordingly.
When A VF closes the database is consulted to determine which
resources need to be released (close queues against device, reclaim
interrupt vectors, etc).

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/Makefile      |    2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h       |    8 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c  |   32 ++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h   |    9 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |  292 +++++++++++++++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h |  223 ++++++++++++++++
 6 files changed, 562 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c

diff --git a/drivers/net/ethernet/broadcom/bnx2x/Makefile b/drivers/net/ethernet/broadcom/bnx2x/Makefile
index d862ea6..2ef6803 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/Makefile
+++ b/drivers/net/ethernet/broadcom/bnx2x/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_BNX2X) += bnx2x.o
 
-bnx2x-objs := bnx2x_main.o bnx2x_link.o bnx2x_cmn.o bnx2x_ethtool.o bnx2x_stats.o bnx2x_dcb.o bnx2x_sp.o bnx2x_vfpf.o
+bnx2x-objs := bnx2x_main.o bnx2x_link.o bnx2x_cmn.o bnx2x_ethtool.o bnx2x_stats.o bnx2x_dcb.o bnx2x_sp.o bnx2x_vfpf.o bnx2x_sriov.o
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index d1806e3..f38ff4e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -13,9 +13,12 @@
 
 #ifndef BNX2X_H
 #define BNX2X_H
+
+#include <linux/pci.h>
 #include <linux/netdevice.h>
 #include <linux/dma-mapping.h>
 #include <linux/types.h>
+#include <linux/pci_regs.h>
 
 /* compilation time flags */
 
@@ -969,6 +972,7 @@ extern struct workqueue_struct *bnx2x_wq;
 #define BNX2X_MAX_NUM_OF_VFS	64
 #define BNX2X_VF_CID_WND	0
 #define BNX2X_CIDS_PER_VF	(1 << BNX2X_VF_CID_WND)
+#define BNX2X_FIRST_VF_CID	256
 #define BNX2X_VF_CIDS		(BNX2X_MAX_NUM_OF_VFS * BNX2X_CIDS_PER_VF)
 #define BNX2X_VF_ID_INVALID	0xFF
 
@@ -1120,6 +1124,7 @@ struct hw_context {
 /* forward */
 struct bnx2x_ilt;
 
+struct bnx2x_vfdb;
 
 enum bnx2x_recovery_state {
 	BNX2X_RECOVERY_DONE,
@@ -1607,6 +1612,9 @@ struct bnx2x {
 	char			fw_ver[32];
 	const struct firmware	*firmware;
 
+	struct bnx2x_vfdb	*vfdb;
+#define IS_SRIOV(bp)		((bp)->vfdb)
+
 	/* DCB support on/off */
 	u16 dcb_state;
 #define BNX2X_DCB_STATE_OFF			0
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index bf575a9..7472e37 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -125,8 +125,6 @@ static int debug;
 module_param(debug, int, 0);
 MODULE_PARM_DESC(debug, " Default debug msglevel");
 
-
-
 struct workqueue_struct *bnx2x_wq;
 
 enum bnx2x_board_type {
@@ -2207,7 +2205,6 @@ u8 bnx2x_link_test(struct bnx2x *bp, u8 is_serdes)
 	return rc;
 }
 
-
 /* Calculates the sum of vn_min_rates.
    It's needed for further normalizing of the min_rates.
    Returns:
@@ -7254,12 +7251,21 @@ static int bnx2x_init_hw_func(struct bnx2x *bp)
 	ilt = BP_ILT(bp);
 	cdu_ilt_start = ilt->clients[ILT_CLIENT_CDU].start;
 
+	if (IS_SRIOV(bp))
+		cdu_ilt_start += BNX2X_FIRST_VF_CID/ILT_PAGE_CIDS;
+	cdu_ilt_start = bnx2x_iov_init_ilt(bp, cdu_ilt_start);
+
+	/* since BNX2X_FIRST_VF_CID > 0 the PF L2 cids precedes
+	 * those of the VFs, so start line should be reset
+	 */
+	cdu_ilt_start = ilt->clients[ILT_CLIENT_CDU].start;
 	for (i = 0; i < L2_ILT_LINES(bp); i++) {
 		ilt->lines[cdu_ilt_start + i].page = bp->context[i].vcxt;
 		ilt->lines[cdu_ilt_start + i].page_mapping =
 			bp->context[i].cxt_mapping;
 		ilt->lines[cdu_ilt_start + i].size = bp->context[i].size;
 	}
+
 	bnx2x_ilt_init_op(bp, INITOP_SET);
 
 	if (!CONFIGURE_NIC_MODE(bp)) {
@@ -7865,6 +7871,8 @@ int bnx2x_set_int_mode(struct bnx2x *bp)
 /* must be called prior to any HW initializations */
 static inline u16 bnx2x_cid_ilt_lines(struct bnx2x *bp)
 {
+	if (IS_SRIOV(bp))
+		return (BNX2X_FIRST_VF_CID + BNX2X_VF_CIDS)/ILT_PAGE_CIDS;
 	return L2_ILT_LINES(bp);
 }
 
@@ -12020,8 +12028,13 @@ void bnx2x__init_func_obj(struct bnx2x *bp)
 static int bnx2x_set_qm_cid_count(struct bnx2x *bp)
 {
 	int cid_count = BNX2X_L2_MAX_CID(bp);
+
+	if (IS_SRIOV(bp))
+		cid_count += BNX2X_VF_CIDS;
+
 	if (CNIC_SUPPORT(bp))
 		cid_count += CNIC_CID_MAX;
+
 	return roundup(cid_count, QM_CID_ROUND);
 }
 
@@ -12232,6 +12245,16 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev,
 			goto init_one_exit;
 	}
 
+	/* Enable SRIOV if capability found in configuration space.
+	 * Once the generic SR-IOV framework makes it in from the
+	 * pci tree this will be revised, to allow dynamic control
+	 * over the number of VFs. Right now, change the num of vfs
+	 * param below to enable SR-IOV.
+	 */
+	rc = bnx2x_iov_init_one(bp, int_mode, 0/*num vfs*/);
+	if (rc)
+		goto init_one_exit;
+
 	/* calc qm_cid_count */
 	bp->qm_cid_count = bnx2x_set_qm_cid_count(bp);
 	BNX2X_DEV_INFO("qm_cid_count %d\n", bp->qm_cid_count);
@@ -12357,6 +12380,9 @@ static void __devexit bnx2x_remove_one(struct pci_dev *pdev)
 
 	/* Make sure RESET task is not scheduled before continuing */
 	cancel_delayed_work_sync(&bp->sp_rtnl_task);
+
+	bnx2x_iov_remove_one(bp);
+
 	/* send message via vfpf channel to release the resources of this vf */
 	if (IS_VF(bp))
 		bnx2x_vfpf_release(bp);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
index e92577a..236079a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
@@ -6289,6 +6289,15 @@
 #define PCI_PM_DATA_B					0x414
 #define PCI_ID_VAL1					0x434
 #define PCI_ID_VAL2					0x438
+#define GRC_CONFIG_REG_PF_INIT_VF		0x624
+#define GRC_CR_PF_INIT_VF_PF_FIRST_VF_NUM_MASK	0xf
+/* First VF_NUM for PF is encoded in this register.
+ * The number of VFs assigned to a PF is assumed to be a multiple of 8.
+ * Software should program these bits based on Total Number of VFs \
+ * programmed for each PF.
+ * Since registers from 0x000-0x7ff are split across functions, each PF will
+ * have the same location for the same 4 bits
+ */
 
 #define PXPCS_TL_CONTROL_5		    0x814
 #define PXPCS_TL_CONTROL_5_UNKNOWNTYPE_ERR_ATTN    (1 << 29) /*WC*/
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
new file mode 100644
index 0000000..cd08ac2
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -0,0 +1,292 @@
+/* bnx2x_sriov.c: Broadcom Everest network driver.
+ *
+ * Copyright 2009-2012 Broadcom Corporation
+ *
+ * Unless you and Broadcom execute a separate written software license
+ * agreement governing use of this software, this software is licensed to you
+ * under the terms of the GNU General Public License version 2, available
+ * at http://www.gnu.org/licenses/old-licenses/gpl-2.0.html (the "GPL").
+ *
+ * Notwithstanding the above, under no circumstances may you combine this
+ * software in any way with any other Broadcom software provided under a
+ * license other than the GPL, without Broadcom's express prior written
+ * consent.
+ *
+ * Maintained by: Eilon Greenstein <eilong@broadcom.com>
+ * Written by: Shmulik Ravid
+ *
+ */
+#include "bnx2x.h"
+#include "bnx2x_init.h"
+#include "bnx2x_sriov.h"
+int bnx2x_vf_idx_by_abs_fid(struct bnx2x *bp, u16 abs_vfid)
+{
+	int idx;
+	for_each_vf(bp, idx)
+		if (bnx2x_vf(bp, idx, abs_vfid) == abs_vfid)
+			break;
+	return idx;
+}
+
+static
+struct bnx2x_virtf *bnx2x_vf_by_abs_fid(struct bnx2x *bp, u16 abs_vfid)
+{
+	u16 idx =  (u16)bnx2x_vf_idx_by_abs_fid(bp, abs_vfid);
+	return (idx < BNX2X_NR_VIRTFN(bp)) ? BP_VF(bp, idx) : NULL;
+}
+
+static int bnx2x_ari_enabled(struct pci_dev *dev)
+{
+	return dev->bus->self && dev->bus->self->ari_enabled;
+}
+
+static void __devinit
+bnx2x_vf_set_igu_info(struct bnx2x *bp, u8 igu_sb_id, u8 abs_vfid)
+{
+	struct bnx2x_virtf *vf = bnx2x_vf_by_abs_fid(bp, abs_vfid);
+	if (vf) {
+		if (!vf_sb_count(vf))
+			vf->igu_base_id = igu_sb_id;
+		++vf_sb_count(vf);
+	 }
+}
+
+static void __devinit
+bnx2x_get_vf_igu_cam_info(struct bnx2x *bp)
+{
+	int sb_id;
+	u32 val;
+	u8 fid;
+
+	/* IGU in normal mode - read CAM */
+	for (sb_id = 0; sb_id < IGU_REG_MAPPING_MEMORY_SIZE; sb_id++) {
+		val = REG_RD(bp, IGU_REG_MAPPING_MEMORY + sb_id * 4);
+		if (!(val & IGU_REG_MAPPING_MEMORY_VALID))
+			continue;
+		fid = GET_FIELD((val), IGU_REG_MAPPING_MEMORY_FID);
+		if (!(fid & IGU_FID_ENCODE_IS_PF))
+			bnx2x_vf_set_igu_info(bp, sb_id,
+					      (fid & IGU_FID_VF_NUM_MASK));
+
+		DP(BNX2X_MSG_IOV, "%s[%d], igu_sb_id=%d, msix=%d\n",
+		   ((fid & IGU_FID_ENCODE_IS_PF) ? "PF" : "VF"),
+		   ((fid & IGU_FID_ENCODE_IS_PF) ? (fid & IGU_FID_PF_NUM_MASK) :
+		   (fid & IGU_FID_VF_NUM_MASK)), sb_id,
+		   GET_FIELD((val), IGU_REG_MAPPING_MEMORY_VECTOR));
+	}
+}
+
+static void __bnx2x_iov_free_vfdb(struct bnx2x *bp)
+{
+	if (bp->vfdb) {
+		kfree(bp->vfdb->vfqs);
+		kfree(bp->vfdb->vfs);
+		kfree(bp->vfdb);
+	}
+	bp->vfdb = NULL;
+}
+
+static int bnx2x_sriov_pci_cfg_info(struct bnx2x *bp, struct bnx2x_sriov *iov)
+{
+	int pos;
+	struct pci_dev *dev = bp->pdev;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
+	if (!pos) {
+		BNX2X_ERR("failed to find SRIOV capability in device\n");
+		return -ENODEV;
+	}
+
+	iov->pos = pos;
+	DP(BNX2X_MSG_IOV, "sriov ext pos %d\n", pos);
+	pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &iov->ctrl);
+	pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &iov->total);
+	pci_read_config_word(dev, pos + PCI_SRIOV_INITIAL_VF, &iov->initial);
+	pci_read_config_word(dev, pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
+	pci_read_config_word(dev, pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
+	pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &iov->pgsz);
+	pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
+	pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
+
+	return 0;
+}
+
+static int bnx2x_sriov_info(struct bnx2x *bp, struct bnx2x_sriov *iov)
+{
+	u32 val;
+
+	/* read the SRIOV capability structure
+	 * The fields can be read via configuration read or
+	 * directly from the device (starting at offset PCICFG_OFFSET)
+	 */
+	if (bnx2x_sriov_pci_cfg_info(bp, iov))
+		return -ENODEV;
+
+	/* get the number of SRIOV bars */
+	iov->nres = 0;
+
+	/* read the first_vfid */
+	val = REG_RD(bp, PCICFG_OFFSET + GRC_CONFIG_REG_PF_INIT_VF);
+	iov->first_vf_in_pf = ((val & GRC_CR_PF_INIT_VF_PF_FIRST_VF_NUM_MASK)
+			       * 8) - (BNX2X_MAX_NUM_OF_VFS * BP_PATH(bp));
+
+	DP(BNX2X_MSG_IOV,
+	   "IOV info[%d]: first vf %d, nres %d, cap 0x%x, ctrl 0x%x, total %d, initial %d, num vfs %d, offset %d, stride %d, page size 0x%x\n",
+	   BP_FUNC(bp),
+	   iov->first_vf_in_pf, iov->nres, iov->cap, iov->ctrl, iov->total,
+	   iov->initial, iov->nr_virtfn, iov->offset, iov->stride, iov->pgsz);
+
+	return 0;
+}
+
+static u8 bnx2x_iov_get_max_queue_count(struct bnx2x *bp)
+{
+	int i;
+	u8 queue_count = 0;
+
+	if (IS_SRIOV(bp))
+		for_each_vf(bp, i)
+			queue_count += bnx2x_vf(bp, i, alloc_resc.num_sbs);
+
+	return queue_count;
+}
+
+/* must be called after PF bars are mapped */
+int __devinit bnx2x_iov_init_one(struct bnx2x *bp, int int_mode_param,
+				 int num_vfs_param)
+{
+	int err, i, qcount;
+	struct bnx2x_sriov *iov;
+	struct pci_dev *dev = bp->pdev;
+
+	bp->vfdb = NULL;
+
+	/* verify sriov capability is present in configuration space */
+	if (!pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV))
+		return 0;
+
+	/* verify is pf */
+	if (IS_VF(bp))
+		return 0;
+
+	/* verify chip revision */
+	if (CHIP_IS_E1x(bp))
+		return 0;
+
+	/* check if SRIOV support is turned off */
+	if (!num_vfs_param)
+		return 0;
+
+	/* SRIOV assumes that num of PF CIDs < BNX2X_FIRST_VF_CID */
+	if (BNX2X_L2_MAX_CID(bp) >= BNX2X_FIRST_VF_CID) {
+		BNX2X_ERR("PF cids %d are overspilling into vf space (starts at %d). Abort SRIOV",
+			  BNX2X_L2_MAX_CID(bp), BNX2X_FIRST_VF_CID);
+		return 0;
+	}
+
+	/* SRIOV can be enabled only with MSIX */
+	if (int_mode_param == BNX2X_INT_MODE_MSI ||
+	    int_mode_param == BNX2X_INT_MODE_INTX)
+		BNX2X_ERR("Forced MSI/INTx mode is incompatible with SRIOV\n");
+
+	err = -EIO;
+	/* verify ari is enabled */
+	if (!bnx2x_ari_enabled(bp->pdev)) {
+		BNX2X_ERR("ARI not supported, SRIOV can not be enabled\n");
+		return err;
+	}
+
+	/* verify igu is in normal mode */
+	if (CHIP_INT_MODE_IS_BC(bp)) {
+		BNX2X_ERR("IGU not normal mode,  SRIOV can not be enabled\n");
+		return err;
+	}
+
+	/* allocate the vfs database */
+	bp->vfdb = kzalloc(sizeof(*(bp->vfdb)), GFP_KERNEL);
+	if (!bp->vfdb) {
+		BNX2X_ERR("failed to allocate vf database\n");
+		err = -ENOMEM;
+		goto failed;
+	}
+
+	/* get the sriov info - Linux already collected all the pertinent
+	 * information, however the sriov structure is for the private use
+	 * of the pci module. Also we want this information regardless
+	 *  of the hyper-visor.
+	 */
+	iov = &(bp->vfdb->sriov);
+	err = bnx2x_sriov_info(bp, iov);
+	if (err)
+		goto failed;
+
+	/* calcuate the actual number of VFs */
+	iov->nr_virtfn = min_t(u16, iov->total, (u16)num_vfs_param);
+
+	/* allcate the vf array */
+	bp->vfdb->vfs = kzalloc(sizeof(struct bnx2x_virtf) *
+				BNX2X_NR_VIRTFN(bp), GFP_KERNEL);
+	if (!bp->vfdb->vfs) {
+		BNX2X_ERR("failed to allocate vf array\n");
+		err = -ENOMEM;
+		goto failed;
+	}
+
+	/* Initial VF init - index and abs_vfid - nr_virtfn must be set */
+	for_each_vf(bp, i) {
+		bnx2x_vf(bp, i, index) = i;
+		bnx2x_vf(bp, i, abs_vfid) = iov->first_vf_in_pf + i;
+		bnx2x_vf(bp, i, state) = VF_FREE;
+		INIT_LIST_HEAD(&bnx2x_vf(bp, i, op_list_head));
+		mutex_init(&bnx2x_vf(bp, i, op_mutex));
+		bnx2x_vf(bp, i, op_current) = CHANNEL_TLV_NONE;
+	}
+
+	/* re-read the IGU CAM for VFs - index and abs_vfid must be set */
+	bnx2x_get_vf_igu_cam_info(bp);
+
+	/* get the total queue count and allocate the global queue arrays */
+	qcount = bnx2x_iov_get_max_queue_count(bp);
+
+	/* allocate the queue arrays for all VFs */
+	bp->vfdb->vfqs = kzalloc(qcount * sizeof(struct bnx2x_vf_queue),
+				 GFP_KERNEL);
+	if (!bp->vfdb->vfqs) {
+		BNX2X_ERR("failed to allocate vf queue array\n");
+		err = -ENOMEM;
+		goto failed;
+	}
+
+	return 0;
+failed:
+	DP(BNX2X_MSG_IOV, "Failed err=%d\n", err);
+	__bnx2x_iov_free_vfdb(bp);
+	return err;
+}
+/* called by bnx2x_init_hw_func, returns the next ilt line */
+int bnx2x_iov_init_ilt(struct bnx2x *bp, u16 line)
+{
+	int i;
+	struct bnx2x_ilt *ilt = BP_ILT(bp);
+
+	if (!IS_SRIOV(bp))
+		return line;
+
+	/* set vfs ilt lines */
+	for (i = 0; i < BNX2X_VF_CIDS/ILT_PAGE_CIDS; i++) {
+		struct hw_dma *hw_cxt = BP_VF_CXT_PAGE(bp, i);
+		ilt->lines[line+i].page = hw_cxt->addr;
+		ilt->lines[line+i].page_mapping = hw_cxt->mapping;
+		ilt->lines[line+i].size = hw_cxt->size; /* doesn't matter */
+	}
+	return line+i;
+}
+void __devexit bnx2x_iov_remove_one(struct bnx2x *bp)
+{
+	/* if SRIOV is not enabled there's nothing to do */
+	if (!IS_SRIOV(bp))
+		return;
+
+	/* free vf database */
+	__bnx2x_iov_free_vfdb(bp);
+}
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index a23b69f..04925f0 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -19,11 +19,234 @@
 #ifndef BNX2X_SRIOV_H
 #define BNX2X_SRIOV_H
 
+/* The bnx2x device structure holds vfdb structure described below.
+ * The VF array is indexed by the relative vfid.
+ */
+struct bnx2x_sriov {
+	u32 first_vf_in_pf;
+
+	/* standard SRIOV capability fields, mostly for debugging */
+	int pos;		/* capability position */
+	int nres;		/* number of resources */
+	u32 cap;		/* SR-IOV Capabilities */
+	u16 ctrl;		/* SR-IOV Control */
+	u16 total;		/* total VFs associated with the PF */
+	u16 initial;		/* initial VFs associated with the PF */
+	u16 nr_virtfn;		/* number of VFs available */
+	u16 offset;		/* first VF Routing ID offset */
+	u16 stride;		/* following VF stride */
+	u32 pgsz;		/* page size for BAR alignment */
+	u8 link;		/* Function Dependency Link */
+};
+/* bars */
+struct bnx2x_vf_bar {
+	u64 bar;
+	u32 size;
+};
+/* vf queue (used both for rx or tx) */
+struct bnx2x_vf_queue {
+	struct eth_context		*cxt;
+
+	/* MACs object */
+	struct bnx2x_vlan_mac_obj	mac_obj;
+
+	/* VLANs object */
+	struct bnx2x_vlan_mac_obj	vlan_obj;
+	atomic_t vlan_count;		/* 0 means vlan-0 is set  ~ untagged */
+
+	/* Queue Slow-path State object */
+	struct bnx2x_queue_sp_obj	sp_obj;
+
+	u32 cid;
+	u16 index;
+	u16 sb_idx;
+};
+
+/* struct bnx2x_vfop_qctor_params - prepare queue construction parameters:
+ * q-init, q-setup and SB index
+ */
+struct bnx2x_vfop_qctor_params {
+	struct bnx2x_queue_state_params		qstate;
+	struct bnx2x_queue_setup_params		prep_qsetup;
+};
+
+
+/* VFOP parameters (one copy per VF) */
+union bnx2x_vfop_params {
+	struct bnx2x_vlan_mac_ramrod_params	vlan_mac;
+	struct bnx2x_rx_mode_ramrod_params	rx_mode;
+	struct bnx2x_mcast_ramrod_params	mcast;
+	struct bnx2x_config_rss_params		rss;
+	struct bnx2x_vfop_qctor_params		qctor;
+};
+
+/* forward */
+struct bnx2x_virtf;
+/* vf context */
+struct bnx2x_virtf {
+	u16 cfg_flags;
+#define VF_CFG_STATS		0x0001
+#define VF_CFG_FW_FC		0x0002
+#define VF_CFG_TPA		0x0004
+#define VF_CFG_INT_SIMD		0x0008
+#define VF_CACHE_LINE		0x0010
+
+	/* rss params TODO */
+
+	u8 state;
+#define VF_FREE		0	/* VF ready to be acquired holds no resc */
+#define VF_ACQUIRED	1	/* VF aquired, but not initalized */
+#define VF_ENABLED	2	/* VF Enabled */
+#define VF_RESET	3	/* VF FLR'd, pending cleanup */
+
+	/* non 0 during flr cleanup */
+	u8 flr_clnup_stage;
+#define VF_FLR_CLN	1	/* reclaim resources and do 'final cleanup'
+				 * sans the end-wait
+				 */
+#define VF_FLR_ACK	2	/* ACK flr notification */
+#define VF_FLR_EPILOG	3	/* wait for VF remnants to dissipate in the HW
+				 * ~ final cleanup' end wait
+				 */
+
+	/* dma */
+	dma_addr_t fw_stat_map;		/* valid iff VF_CFG_STATS */
+	dma_addr_t spq_map;
+	dma_addr_t bulletin_map;
+
+	/* Allocated resources counters. Before the VF is acquired, the
+	 * counters hold the following values:
+	 *
+	 * - xxq_count = 0 as the queues memory is not allocated yet.
+	 *
+	 * - sb_count  = The number of status blocks configured for this VF in
+	 *		 the IGU CAM. Initially read during probe.
+	 *
+	 * - xx_rules_count = The number of rules statically and equally
+	 *		      allocated for each VF, during PF load.
+	 */
+	struct vf_pf_resc_request	alloc_resc;
+#define vf_rxq_count(vf)		((vf)->alloc_resc.num_rxqs)
+#define vf_txq_count(vf)		((vf)->alloc_resc.num_txqs)
+#define vf_sb_count(vf)			((vf)->alloc_resc.num_sbs)
+#define vf_mac_rules_cnt(vf)		((vf)->alloc_resc.num_mac_filters)
+#define vf_vlan_rules_cnt(vf)		((vf)->alloc_resc.num_vlan_filters)
+#define vf_mc_rules_cnt(vf)		((vf)->alloc_resc.num_mc_filters)
+
+	u8 sb_count;	/* actual number of SBs */
+	u8 igu_base_id;	/* base igu status block id */
+
+
+	struct bnx2x_vf_queue	*vfqs;
+#define bnx2x_vfq(vf, nr, var)	((vf)->vfqs[(nr)].var)
+
+	u8 index;	/* index in the vf array */
+	u8 abs_vfid;
+	u8 sp_cl_id;
+	u32 error;	/* 0 means all's-well */
+
+	/* BDF */
+	unsigned int bus;
+	unsigned int devfn;
+
+	/* bars */
+	struct bnx2x_vf_bar bars[PCI_SRIOV_NUM_BARS];
+
+	/* set-mac ramrod state 1-pending, 0-done */
+	unsigned long	filter_state;
+
+	/* leading rss client id ~~ the client id of the first rxq, must be
+	 * set for each txq.
+	 */
+	int leading_rss;
+
+	/* MCAST object */
+	struct bnx2x_mcast_obj		mcast_obj;
+
+	/* RSS configuration object */
+	struct bnx2x_rss_config_obj     rss_conf_obj;
+
+	/* slow-path operations */
+	atomic_t			op_in_progress;
+	int				op_rc;
+	bool				op_wait_blocking;
+	struct list_head		op_list_head;
+	union bnx2x_vfop_params		op_params;
+	struct mutex			op_mutex; /* one vfop at a time mutex */
+	enum channel_tlvs		op_current;
+};
+
+#define BNX2X_NR_VIRTFN(bp)	((bp)->vfdb->sriov.nr_virtfn)
+
+#define for_each_vf(bp, var) \
+		for ((var) = 0; (var) < BNX2X_NR_VIRTFN(bp); (var)++)
+
 struct bnx2x_vf_mbx_msg {
 	union vfpf_tlvs req;
 	union pfvf_tlvs resp;
 };
 
+struct bnx2x_vf_mbx {
+	struct bnx2x_vf_mbx_msg *msg;
+	dma_addr_t msg_mapping;
+
+	/* VF GPA address */
+	u32 vf_addr_lo;
+	u32 vf_addr_hi;
+
+	struct vfpf_first_tlv first_tlv;	/* saved VF request header */
+
+	u8 flags;
+#define VF_MSG_INPROCESS	0x1	/* failsafe - the FW should prevent
+					 * more then one pending msg
+					 */
+};
+
+struct hw_dma {
+	void *addr;
+	dma_addr_t mapping;
+	size_t size;
+};
+
+struct bnx2x_vfdb {
+
+#define BP_VFDB(bp)		((bp)->vfdb)
+	/* vf array */
+	struct bnx2x_virtf	*vfs;
+#define BP_VF(bp, idx)		(&((bp)->vfdb->vfs[(idx)]))
+#define bnx2x_vf(bp, idx, var)	((bp)->vfdb->vfs[(idx)].var)
+
+	/* queue array - for all vfs */
+	struct bnx2x_vf_queue *vfqs;
+
+	/* vf HW contexts */
+	struct hw_dma		context[BNX2X_VF_CIDS/ILT_PAGE_CIDS];
+#define	BP_VF_CXT_PAGE(bp, i)	(&(bp)->vfdb->context[(i)])
+
+	/* SR-IOV information */
+	struct bnx2x_sriov	sriov;
+	struct hw_dma		mbx_dma;
+#define BP_VF_MBX_DMA(bp)	(&((bp)->vfdb->mbx_dma))
+	struct bnx2x_vf_mbx	mbxs[BNX2X_MAX_NUM_OF_VFS];
+#define BP_VF_MBX(bp, vfid)	(&((bp)->vfdb->mbxs[(vfid)]))
+
+	struct hw_dma		sp_dma;
+#define bnx2x_vf_sp(bp, vf, field) ((bp)->vfdb->sp_dma.addr +		\
+		(vf)->index * sizeof(struct bnx2x_vf_sp) +		\
+		offsetof(struct bnx2x_vf_sp, field))
+#define bnx2x_vf_sp_map(bp, vf, field) ((bp)->vfdb->sp_dma.mapping +	\
+		(vf)->index * sizeof(struct bnx2x_vf_sp) +		\
+		offsetof(struct bnx2x_vf_sp, field))
+
+#define FLRD_VFS_DWORDS (BNX2X_MAX_NUM_OF_VFS / 32)
+	u32 flrd_vfs[FLRD_VFS_DWORDS];
+};
+
+/* global iov routines */
+int bnx2x_iov_init_ilt(struct bnx2x *bp, u16 line);
+int bnx2x_iov_init_one(struct bnx2x *bp, int int_mode_param, int num_vfs_param);
+void __devexit bnx2x_iov_remove_one(struct bnx2x *bp);
+int bnx2x_vf_idx_by_abs_fid(struct bnx2x *bp, u16 abs_vfid);
 void bnx2x_add_tlv(struct bnx2x *bp, void *tlvs_list, u16 offset, u16 type,
 		   u16 length);
 void bnx2x_vfpf_prep(struct bnx2x *bp, struct vfpf_first_tlv *first_tlv,
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 05/22] bnx2x: Add init, setup_q, set_mac to VF <-> PF channel
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

'init' - init an acquired VF. Supply allocation GPAs to PF.
'setup_q' - PF to allocate a queue in device on behalf of the VF.
'set_mac' - PF to configure a mac in device on behalf of the VF.
VF driver uses these requests in the VF <-> PF channel in nic_load
flow.

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h      |    6 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c  |   16 ++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |  166 ++++++++++++++++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h |  123 ++++++++++++++++
 4 files changed, 311 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 231fb5e..eedbf4c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -344,6 +344,9 @@ union db_prod {
 #define SGE_PAGE_SIZE		PAGE_SIZE
 #define SGE_PAGE_SHIFT		PAGE_SHIFT
 #define SGE_PAGE_ALIGN(addr)	PAGE_ALIGN((typeof(PAGE_SIZE))(addr))
+#define SGE_PAGES		(SGE_PAGE_SIZE * PAGES_PER_SGE)
+#define TPA_AGG_SIZE		min_t(u32, (min_t(u32, 8, MAX_SKB_FRAGS) * \
+					    SGE_PAGES), 0xffff)
 
 /* SGE ring related macros */
 #define NUM_RX_SGE_PAGES	2
@@ -2222,6 +2225,9 @@ int bnx2x_get_vf_id(struct bnx2x *bp, u32 *vf_id);
 int bnx2x_send_msg2pf(struct bnx2x *bp, u8 *done, dma_addr_t msg_mapping);
 int __devinit bnx2x_vfpf_acquire(struct bnx2x *bp, u8 tx_count, u8 rx_count);
 int __devexit bnx2x_vfpf_release(struct bnx2x *bp);
+int bnx2x_vfpf_init(struct bnx2x *bp);
+int bnx2x_vfpf_setup_q(struct bnx2x *bp, int fp_idx);
+int bnx2x_vfpf_set_mac(struct bnx2x *bp);
 int bnx2x_nic_load_analyze_req(struct bnx2x *bp, u32 load_code);
 /* Congestion management fairness mode */
 #define CMNG_FNS_NONE		0
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 8df311f..201e3bd 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -2436,6 +2436,12 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 		LOAD_ERROR_EXIT(bp, load_error0);
 	}
 
+	/* request pf to initialize status blocks */
+	if (IS_VF(bp)) {
+		rc = bnx2x_vfpf_init(bp);
+		if (rc)
+			LOAD_ERROR_EXIT(bp, load_error0);
+	}
 
 	/* As long as bnx2x_alloc_mem() may possibly update
 	 * bp->num_queues, bnx2x_set_real_num_queues() should always
@@ -2557,6 +2563,14 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 			LOAD_ERROR_EXIT(bp, load_error3);
 		}
 
+	} else { /* vf */
+		for_each_eth_queue(bp, i) {
+			rc = bnx2x_vfpf_setup_q(bp, i);
+			if (rc) {
+				BNX2X_ERR("Queue setup failed\n");
+				LOAD_ERROR_EXIT(bp, load_error3);
+			}
+		}
 	}
 
 	/* Now when Clients are configured we are ready to work */
@@ -2565,6 +2579,8 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 	/* Configure a ucast MAC */
 	if (IS_PF(bp))
 		rc = bnx2x_set_eth_mac(bp, true);
+	else /* vf */
+		rc = bnx2x_vfpf_set_mac(bp);
 	if (rc) {
 		BNX2X_ERR("Setting Ethernet MAC failed\n");
 		LOAD_ERROR_EXIT(bp, load_error3);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 1a847a4..278146e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -13358,3 +13358,169 @@ int __devexit bnx2x_vfpf_release(struct bnx2x *bp)
 
 	return 0;
 }
+
+/* Tell PF about SB addresses */
+int bnx2x_vfpf_init(struct bnx2x *bp)
+{
+	struct vfpf_init_tlv *req = &bp->vf2pf_mbox->req.init;
+	struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp;
+	int rc, i;
+
+	/* clear mailbox and prep first tlv */
+	bnx2x_vfpf_prep(bp, &req->first_tlv, CHANNEL_TLV_INIT, sizeof(*req));
+
+	/* status blocks */
+	for_each_queue(bp, i)
+		req->sb_addr[i] = (unsigned long)bnx2x_fp(bp, i,
+							  status_blk_mapping);
+
+	/* statistics - requests only supports single queue for now */
+	req->stats_addr = bp->fw_stats_data_mapping +
+		offsetof(struct bnx2x_fw_stats_data, queue_stats);
+
+	/* add list termination tlv */
+	bnx2x_add_tlv(bp, req, req->first_tlv.tl.length, CHANNEL_TLV_LIST_END,
+		      sizeof(struct channel_list_end_tlv));
+
+	/* output tlvs list */
+	bnx2x_dp_tlv_list(bp, req);
+
+	rc = bnx2x_send_msg2pf(bp, &resp->hdr.status, bp->vf2pf_mbox_mapping);
+	if (rc)
+		return rc;
+
+	if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+		BNX2X_ERR("INIT VF failed: %d. Breaking...\n",
+			  resp->hdr.status);
+		return -EAGAIN;
+	}
+
+	DP(BNX2X_MSG_SP, "INIT VF Succeeded\n");
+
+	return 0;
+}
+/* ask the pf to open a queue for the vf */
+int bnx2x_vfpf_setup_q(struct bnx2x *bp, int fp_idx)
+{
+	struct vfpf_setup_q_tlv *req = &bp->vf2pf_mbox->req.setup_q;
+	struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp;
+	struct bnx2x_fastpath *fp = &bp->fp[fp_idx];
+	u16 tpa_agg_size = 0, flags = 0;
+	int rc;
+
+	/* clear mailbox and prep first tlv */
+	bnx2x_vfpf_prep(bp, &req->first_tlv, CHANNEL_TLV_SETUP_Q, sizeof(*req));
+
+	/* select tpa mode to request */
+	if (!fp->disable_tpa) {
+		flags |= VFPF_QUEUE_FLG_TPA;
+		flags |= VFPF_QUEUE_FLG_TPA_IPV6;
+		if (fp->mode == TPA_MODE_GRO)
+			flags |= VFPF_QUEUE_FLG_TPA_GRO;
+		tpa_agg_size = TPA_AGG_SIZE;
+	}
+
+	/* calculate queue flags */
+	flags |= VFPF_QUEUE_FLG_STATS;
+	flags |= VFPF_QUEUE_FLG_CACHE_ALIGN;
+	flags |= IS_MF_SD(bp) ? VFPF_QUEUE_FLG_OV : 0;
+
+	flags |= VFPF_QUEUE_FLG_VLAN;
+	DP(NETIF_MSG_IFUP, "vlan removal enabled\n");
+
+	/* Common */
+	req->vf_qid = fp_idx;
+	req->param_valid = VFPF_RXQ_VALID | VFPF_TXQ_VALID;
+
+	/* Rx */
+	req->rxq.rcq_addr = fp->rx_comp_mapping;
+	req->rxq.rcq_np_addr = fp->rx_comp_mapping + BCM_PAGE_SIZE;
+	req->rxq.rxq_addr = fp->rx_desc_mapping;
+	req->rxq.sge_addr = fp->rx_sge_mapping;
+
+	req->rxq.vf_sb = fp_idx;
+	req->rxq.sb_index = HC_INDEX_ETH_RX_CQ_CONS;
+	req->rxq.hc_rate = 1000000/bp->rx_ticks; /* interrupts/sec */
+
+	req->rxq.mtu = bp->dev->mtu;
+	req->rxq.buf_sz = fp->rx_buf_size;
+	req->rxq.sge_buf_sz = BCM_PAGE_SIZE * PAGES_PER_SGE;
+	req->rxq.tpa_agg_sz = tpa_agg_size;
+	req->rxq.max_sge_pkt = SGE_PAGE_ALIGN(bp->dev->mtu) >> SGE_PAGE_SHIFT;
+	req->rxq.max_sge_pkt = ((req->rxq.max_sge_pkt + PAGES_PER_SGE - 1) &
+			  (~(PAGES_PER_SGE-1))) >> PAGES_PER_SGE_SHIFT;
+	req->rxq.flags = flags;
+	req->rxq.drop_flags = 0;
+	req->rxq.cache_line_log = BNX2X_RX_ALIGN_SHIFT;
+	req->rxq.stat_id = -1; /* No stats at the moment */
+
+	/* Tx */
+	req->txq.txq_addr = fp->txdata_ptr[FIRST_TX_COS_INDEX]->tx_desc_mapping;
+	req->txq.vf_sb = fp_idx;
+	req->txq.sb_index = HC_INDEX_ETH_TX_CQ_CONS_COS0;
+	req->txq.hc_rate = 1000000/bp->tx_ticks; /* interrupts/sec */
+	req->txq.flags = flags;
+	req->txq.traffic_type = LLFC_TRAFFIC_TYPE_NW;
+
+	/* add list termination tlv */
+	bnx2x_add_tlv(bp, req, req->first_tlv.tl.length, CHANNEL_TLV_LIST_END,
+		      sizeof(struct channel_list_end_tlv));
+
+	/* output tlvs list */
+	bnx2x_dp_tlv_list(bp, req);
+
+	rc = bnx2x_send_msg2pf(bp, &resp->hdr.status, bp->vf2pf_mbox_mapping);
+	if (rc)
+		BNX2X_ERR("Sending SETUP_Q message for queue[%d] failed!\n",
+			  fp_idx);
+
+	if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+		BNX2X_ERR("Status of SETUP_Q for queue[%d] is %d\n",
+			  fp_idx, resp->hdr.status);
+		return -EINVAL;
+	}
+	return rc;
+}
+
+/* request pf to add a mac for the vf */
+int bnx2x_vfpf_set_mac(struct bnx2x *bp)
+{
+	struct vfpf_set_q_filters_tlv *req = &bp->vf2pf_mbox->req.set_q_filters;
+	struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp;
+	int rc;
+
+	/* clear mailbox and prep first tlv */
+	bnx2x_vfpf_prep(bp, &req->first_tlv, CHANNEL_TLV_SET_Q_FILTERS,
+			sizeof(*req));
+
+	req->flags = VFPF_SET_Q_FILTERS_MAC_VLAN_CHANGED;
+	req->vf_qid = 0;
+	req->n_mac_vlan_filters = 1;
+
+	req->filters[0].flags =
+		VFPF_Q_FILTER_DEST_MAC_VALID | VFPF_Q_FILTER_SET_MAC;
+
+	/* copy mac from device to request */
+	memcpy(req->filters[0].mac, bp->dev->dev_addr, ETH_ALEN);
+
+	/* add list termination tlv */
+	bnx2x_add_tlv(bp, req, req->first_tlv.tl.length, CHANNEL_TLV_LIST_END,
+		      sizeof(struct channel_list_end_tlv));
+
+	/* output tlvs list */
+	bnx2x_dp_tlv_list(bp, req);
+
+	/* send message to pf */
+	rc = bnx2x_send_msg2pf(bp, &resp->hdr.status, bp->vf2pf_mbox_mapping);
+	if (rc) {
+		BNX2X_ERR("failed to send message to pf. rc was %d\n", rc);
+		return rc;
+	}
+
+	if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+		BNX2X_ERR("vfpf SET MAC failed: %d\n", resp->hdr.status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
index 79eedd3..a4c1ad9 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
@@ -37,6 +37,22 @@ struct hw_sb_info {
  */
 #define TLV_BUFFER_SIZE			1024
 
+#define VFPF_QUEUE_FLG_TPA		0x0001
+#define VFPF_QUEUE_FLG_TPA_IPV6		0x0002
+#define VFPF_QUEUE_FLG_TPA_GRO		0x0004
+#define VFPF_QUEUE_FLG_CACHE_ALIGN	0x0008
+#define VFPF_QUEUE_FLG_STATS		0x0010
+#define VFPF_QUEUE_FLG_OV		0x0020
+#define VFPF_QUEUE_FLG_VLAN		0x0040
+#define VFPF_QUEUE_FLG_COS		0x0080
+#define VFPF_QUEUE_FLG_HC		0x0100
+#define VFPF_QUEUE_FLG_DHC		0x0200
+
+#define VFPF_QUEUE_DROP_IP_CS_ERR	(1 << 0)
+#define VFPF_QUEUE_DROP_TCP_CS_ERR	(1 << 1)
+#define VFPF_QUEUE_DROP_TTL0		(1 << 2)
+#define VFPF_QUEUE_DROP_UDP_CS_ERR	(1 << 3)
+
 enum {
 	PFVF_STATUS_WAITING = 0,
 	PFVF_STATUS_SUCCESS,
@@ -128,6 +144,107 @@ struct pfvf_acquire_resp_tlv {
 	} resc;
 };
 
+/* Init VF */
+struct vfpf_init_tlv {
+	struct vfpf_first_tlv first_tlv;
+	aligned_u64 sb_addr[PFVF_MAX_SBS_PER_VF]; /* vf_sb based */
+	aligned_u64 spq_addr;
+	aligned_u64 stats_addr;
+};
+
+/* Setup Queue */
+struct vfpf_setup_q_tlv {
+	struct vfpf_first_tlv first_tlv;
+
+	struct vf_pf_rxq_params {
+		/* physical addresses */
+		aligned_u64 rcq_addr;
+		aligned_u64 rcq_np_addr;
+		aligned_u64 rxq_addr;
+		aligned_u64 sge_addr;
+
+		/* sb + hc info */
+		u8  vf_sb;		/* index in hw_sbs[] */
+		u8  sb_index;           /* Index in the SB */
+		u16 hc_rate;		/* desired interrupts per sec. */
+					/* valid iff VFPF_QUEUE_FLG_HC */
+		/* rx buffer info */
+		u16 mtu;
+		u16 buf_sz;
+		u16 flags;              /* VFPF_QUEUE_FLG_X flags */
+		u16 stat_id;		/* valid iff VFPF_QUEUE_FLG_STATS */
+
+		/* valid iff VFPF_QUEUE_FLG_TPA */
+		u16 sge_buf_sz;
+		u16 tpa_agg_sz;
+		u8 max_sge_pkt;
+
+		u8 drop_flags;		/* VFPF_QUEUE_DROP_X, for Linux VMs
+					 * all the flags are turned off
+					 */
+
+		u8 cache_line_log;	/* VFPF_QUEUE_FLG_CACHE_ALIGN */
+		u8 padding;
+	} rxq;
+
+	struct vf_pf_txq_params {
+		/* physical addresses */
+		aligned_u64 txq_addr;
+
+		/* sb + hc info */
+		u8  vf_sb;		/* index in hw_sbs[] */
+		u8  sb_index;		/* Index in the SB */
+		u16 hc_rate;		/* desired interrupts per sec. */
+					/* valid iff VFPF_QUEUE_FLG_HC */
+		u32 flags;		/* VFPF_QUEUE_FLG_X flags */
+		u16 stat_id;		/* valid iff VFPF_QUEUE_FLG_STATS */
+		u8  traffic_type;	/* see in setup_context() */
+		u8  padding;
+	} txq;
+
+	u8 vf_qid;			/* index in hw_qid[] */
+	u8 param_valid;
+	#define VFPF_RXQ_VALID		0x01
+	#define VFPF_TXQ_VALID		0x02
+	u8 padding[2];
+};
+
+/* Set Queue Filters */
+struct vfpf_q_mac_vlan_filter {
+	u32 flags;
+	#define VFPF_Q_FILTER_DEST_MAC_VALID	0x01
+	#define VFPF_Q_FILTER_VLAN_TAG_VALID	0x02
+	#define VFPF_Q_FILTER_SET_MAC		0x100	/* set/clear */
+	u8  mac[ETH_ALEN];
+	u16 vlan_tag;
+};
+
+/* configure queue filters */
+struct vfpf_set_q_filters_tlv {
+	struct vfpf_first_tlv first_tlv;
+
+	u32 flags;
+#define VFPF_SET_Q_FILTERS_MAC_VLAN_CHANGED	0x01
+#define VFPF_SET_Q_FILTERS_MULTICAST_CHANGED	0x02
+#define VFPF_SET_Q_FILTERS_RX_MASK_CHANGED	0x04
+
+	u8 vf_qid;			/* index in hw_qid[] */
+	u8 n_mac_vlan_filters;
+	u8 n_multicast;
+	u8 padding;
+
+#define PFVF_MAX_MAC_FILTERS			16
+#define PFVF_MAX_VLAN_FILTERS			16
+#define PFVF_MAX_FILTERS		(PFVF_MAX_MAC_FILTERS +\
+					 PFVF_MAX_VLAN_FILTERS)
+	struct vfpf_q_mac_vlan_filter filters[PFVF_MAX_FILTERS];
+
+#define PFVF_MAX_MULTICAST_PER_VF		32
+	u8  multicast[PFVF_MAX_MULTICAST_PER_VF][6];
+
+	u32 rx_mask;	/* see mask constants at the top of the file */
+};
+
 /* release the VF's acquired resources */
 struct vfpf_release_tlv {
 	struct vfpf_first_tlv   first_tlv;
@@ -141,6 +258,9 @@ struct tlv_buffer_size {
 union vfpf_tlvs {
 	struct vfpf_first_tlv		first_tlv;
 	struct vfpf_acquire_tlv		acquire;
+	struct vfpf_init_tlv		init;
+	struct vfpf_setup_q_tlv		setup_q;
+	struct vfpf_set_q_filters_tlv	set_q_filters;
 	struct vfpf_release_tlv		release;
 	struct channel_list_end_tlv     list_end;
 	struct tlv_buffer_size		tlv_buf_size;
@@ -155,6 +275,9 @@ union pfvf_tlvs {
 enum channel_tlvs {
 	   CHANNEL_TLV_NONE, /* ends tlv sequence */
 	   CHANNEL_TLV_ACQUIRE,
+	   CHANNEL_TLV_INIT,
+	   CHANNEL_TLV_SETUP_Q,
+	   CHANNEL_TLV_SET_Q_FILTERS,
 	   CHANNEL_TLV_RELEASE,
 	   CHANNEL_TLV_LIST_END,
 	   CHANNEL_TLV_MAX
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 10/22] bnx2x: Prepare device and initialize VF database
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

At nic load of the PF, if VFs may be present, prepare the device
for the VFs. Initialize the VF database in preparation of VF arrival.

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h       |   10 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c   |    2 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h   |    3 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c  |   57 +---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h   |   18 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |  399 +++++++++++++++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h |   56 +++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c  |   37 ++
 8 files changed, 534 insertions(+), 48 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index f38ff4e..d0d1467 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1634,6 +1634,10 @@ struct bnx2x {
 	int					dcb_version;
 
 	/* CAM credit pools */
+
+	/* used only in sriov */
+	struct bnx2x_credit_pool_obj		vlans_pool;
+
 	struct bnx2x_credit_pool_obj		macs_pool;
 
 	/* RX_MODE object */
@@ -1848,12 +1852,14 @@ int bnx2x_del_all_macs(struct bnx2x *bp,
 
 /* Init Function API  */
 void bnx2x_func_init(struct bnx2x *bp, struct bnx2x_func_init_params *p);
+u32 bnx2x_get_pretend_reg(struct bnx2x *bp);
 int bnx2x_get_gpio(struct bnx2x *bp, int gpio_num, u8 port);
 int bnx2x_set_gpio(struct bnx2x *bp, int gpio_num, u32 mode, u8 port);
 int bnx2x_set_mult_gpio(struct bnx2x *bp, u8 pins, u32 mode);
 int bnx2x_set_gpio_int(struct bnx2x *bp, int gpio_num, u32 mode, u8 port);
 void bnx2x_read_mf_cfg(struct bnx2x *bp);
 
+int bnx2x_pretend_func(struct bnx2x *bp, u16 pretend_func_val);
 
 /* dmae */
 void bnx2x_read_dmae(struct bnx2x *bp, u32 src_addr, u32 len32);
@@ -1865,6 +1871,7 @@ u32 bnx2x_dmae_opcode_clr_src_reset(u32 opcode);
 u32 bnx2x_dmae_opcode(struct bnx2x *bp, u8 src_type, u8 dst_type,
 		      bool with_comp, u8 comp_type);
 
+u8 bnx2x_is_pcie_pending(struct pci_dev *dev);
 
 void bnx2x_calc_fc_adv(struct bnx2x *bp);
 int bnx2x_sp_post(struct bnx2x *bp, int command, int cid,
@@ -1889,6 +1896,9 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
 	return val;
 }
 
+void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id,
+			    bool is_pf);
+
 #define BNX2X_ILT_ZALLOC(x, y, size) \
 	do { \
 		x = dma_alloc_coherent(&bp->pdev->dev, size, y, GFP_KERNEL); \
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index ba51259..859389f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -29,6 +29,7 @@
 #include "bnx2x_sp.h"
 
 
+#include "bnx2x_sriov.h"
 
 /**
  * bnx2x_move_fp - move content of the fastpath structure.
@@ -2520,6 +2521,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 	if (IS_PF(bp)) {
 		bnx2x_init_bp_objs(bp);
 
+		bnx2x_iov_nic_init(bp);
 
 		/* Set AFEX default VLAN tag to an invalid value */
 		bp->afex_def_vlan_tag = -1;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 948949d..f6e92ac 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -1106,6 +1106,9 @@ static inline void bnx2x_init_bp_objs(struct bnx2x *bp)
 	bnx2x_init_mac_credit_pool(bp, &bp->macs_pool, BP_FUNC(bp),
 				   bnx2x_get_path_func_num(bp));
 
+	bnx2x_init_vlan_credit_pool(bp, &bp->vlans_pool, BP_ABS_FUNC(bp)>>1,
+				    bnx2x_get_path_func_num(bp));
+
 	/* RSS configuration object */
 	bnx2x_init_rss_config_obj(bp, &bp->rss_conf_obj, bp->fp->cl_id,
 				  bp->fp->cid, BP_FUNC(bp), BP_FUNC(bp),
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 7472e37..a955b1d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -1169,7 +1169,7 @@ static int bnx2x_send_final_clnup(struct bnx2x *bp, u8 clnup_func,
 	return ret;
 }
 
-static u8 bnx2x_is_pcie_pending(struct pci_dev *dev)
+u8 bnx2x_is_pcie_pending(struct pci_dev *dev)
 {
 	u16 status;
 
@@ -6254,49 +6254,6 @@ static void bnx2x_setup_fan_failure_detection(struct bnx2x *bp)
 	REG_WR(bp, MISC_REG_SPIO_EVENT_EN, val);
 }
 
-static void bnx2x_pretend_func(struct bnx2x *bp, u8 pretend_func_num)
-{
-	u32 offset = 0;
-
-	if (CHIP_IS_E1(bp))
-		return;
-	if (CHIP_IS_E1H(bp) && (pretend_func_num >= E1H_FUNC_MAX))
-		return;
-
-	switch (BP_ABS_FUNC(bp)) {
-	case 0:
-		offset = PXP2_REG_PGL_PRETEND_FUNC_F0;
-		break;
-	case 1:
-		offset = PXP2_REG_PGL_PRETEND_FUNC_F1;
-		break;
-	case 2:
-		offset = PXP2_REG_PGL_PRETEND_FUNC_F2;
-		break;
-	case 3:
-		offset = PXP2_REG_PGL_PRETEND_FUNC_F3;
-		break;
-	case 4:
-		offset = PXP2_REG_PGL_PRETEND_FUNC_F4;
-		break;
-	case 5:
-		offset = PXP2_REG_PGL_PRETEND_FUNC_F5;
-		break;
-	case 6:
-		offset = PXP2_REG_PGL_PRETEND_FUNC_F6;
-		break;
-	case 7:
-		offset = PXP2_REG_PGL_PRETEND_FUNC_F7;
-		break;
-	default:
-		return;
-	}
-
-	REG_WR(bp, offset, pretend_func_num);
-	REG_RD(bp, offset);
-	DP(NETIF_MSG_HW, "Pretending to func %d\n", pretend_func_num);
-}
-
 void bnx2x_pf_disable(struct bnx2x *bp)
 {
 	u32 val = REG_RD(bp, IGU_REG_PF_CONFIGURATION);
@@ -6549,6 +6506,8 @@ static int bnx2x_init_hw_common(struct bnx2x *bp)
 
 	bnx2x_init_block(bp, BLOCK_DMAE, PHASE_COMMON);
 
+	bnx2x_iov_init_dmae(bp);
+
 	/* clean the DMAE memory */
 	bp->dmae_ready = 1;
 	bnx2x_init_fill(bp, TSEM_REG_PRAM, 0, 8, 1);
@@ -7034,15 +6993,14 @@ static void bnx2x_ilt_wr(struct bnx2x *bp, u32 index, dma_addr_t addr)
 	REG_WR_DMAE(bp, reg, wb_write, 2);
 }
 
-static void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func,
-				   u8 idu_sb_id, bool is_Pf)
+void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id, bool is_pf)
 {
 	u32 data, ctl, cnt = 100;
 	u32 igu_addr_data = IGU_REG_COMMAND_REG_32LSB_DATA;
 	u32 igu_addr_ctl = IGU_REG_COMMAND_REG_CTRL;
 	u32 igu_addr_ack = IGU_REG_CSTORM_TYPE_0_SB_CLEANUP + (idu_sb_id/32)*4;
 	u32 sb_bit =  1 << (idu_sb_id%32);
-	u32 func_encode = func | (is_Pf ? 1 : 0) << IGU_FID_ENCODE_IS_PF_SHIFT;
+	u32 func_encode = func | (is_pf ? 1 : 0) << IGU_FID_ENCODE_IS_PF_SHIFT;
 	u32 addr_encode = IGU_CMD_E2_PROD_UPD_BASE + idu_sb_id;
 
 	/* Not supported in BC mode */
@@ -7338,6 +7296,9 @@ static int bnx2x_init_hw_func(struct bnx2x *bp)
 
 	bnx2x_init_block(bp, BLOCK_TM, init_phase);
 	bnx2x_init_block(bp, BLOCK_DORQ, init_phase);
+
+	bnx2x_iov_init_dq(bp);
+
 	bnx2x_init_block(bp, BLOCK_BRB1, init_phase);
 	bnx2x_init_block(bp, BLOCK_PRS, init_phase);
 	bnx2x_init_block(bp, BLOCK_TSDM, init_phase);
@@ -9433,7 +9394,7 @@ period_task_exit:
  * Init service functions
  */
 
-static u32 bnx2x_get_pretend_reg(struct bnx2x *bp)
+u32 bnx2x_get_pretend_reg(struct bnx2x *bp)
 {
 	u32 base = PXP2_REG_PGL_PRETEND_FUNC_F0;
 	u32 stride = PXP2_REG_PGL_PRETEND_FUNC_F1 - base;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
index 236079a..50d825e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
@@ -825,6 +825,7 @@
 /* [RW 28] The value sent to CM header in the case of CFC load error. */
 #define DORQ_REG_ERR_CMHEAD					 0x170058
 #define DORQ_REG_IF_EN						 0x170004
+#define DORQ_REG_MAX_RVFID_SIZE				 0x1701ec
 #define DORQ_REG_MODE_ACT					 0x170008
 /* [RW 5] The normal mode CID extraction offset. */
 #define DORQ_REG_NORM_CID_OFST					 0x17002c
@@ -847,6 +848,22 @@
    writes the same initial credit to the rspa_crd_cnt and rspb_crd_cnt. The
    read reads this written value. */
 #define DORQ_REG_RSP_INIT_CRD					 0x170048
+#define DORQ_REG_RSPB_CRD_CNT					 0x1700b0
+#define DORQ_REG_VF_NORM_CID_BASE				 0x1701a0
+#define DORQ_REG_VF_NORM_CID_OFST				 0x1701f4
+#define DORQ_REG_VF_NORM_CID_WND_SIZE				 0x1701a4
+#define DORQ_REG_VF_NORM_MAX_CID_COUNT				 0x1701e4
+#define DORQ_REG_VF_NORM_VF_BASE				 0x1701a8
+/* [RW 10] VF type validation mask value */
+#define DORQ_REG_VF_TYPE_MASK_0					 0x170218
+/* [RW 17] VF type validation Min MCID value */
+#define DORQ_REG_VF_TYPE_MAX_MCID_0				 0x1702d8
+/* [RW 17] VF type validation Max MCID value */
+#define DORQ_REG_VF_TYPE_MIN_MCID_0				 0x170298
+/* [RW 10] VF type validation comp value */
+#define DORQ_REG_VF_TYPE_VALUE_0				 0x170258
+#define DORQ_REG_VF_USAGE_CT_LIMIT				 0x170340
+
 /* [RW 4] Initial activity counter value on the load request; when the
    shortcut is done. */
 #define DORQ_REG_SHRT_ACT_CNT					 0x170070
@@ -2571,6 +2588,7 @@
    current task in process). */
 #define PBF_REG_DISABLE_NEW_TASK_PROC_P4			 0x14006c
 #define PBF_REG_DISABLE_PF					 0x1402e8
+#define PBF_REG_DISABLE_VF					 0x1402ec
 /* [RW 18] For port 0: For each client that is subject to WFQ (the
  * corresponding bit is 1); indicates to which of the credit registers this
  * client is mapped. For clients which are not credit blocked; their mapping
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index cd08ac2..a5675b8 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -18,7 +18,35 @@
  */
 #include "bnx2x.h"
 #include "bnx2x_init.h"
+#include "bnx2x_cmn.h"
 #include "bnx2x_sriov.h"
+
+/* General service functions */
+static void storm_memset_vf_to_pf(struct bnx2x *bp, u16 abs_fid,
+					 u16 pf_id)
+{
+	REG_WR8(bp, BAR_XSTRORM_INTMEM + XSTORM_VF_TO_PF_OFFSET(abs_fid),
+		pf_id);
+	REG_WR8(bp, BAR_CSTRORM_INTMEM + CSTORM_VF_TO_PF_OFFSET(abs_fid),
+		pf_id);
+	REG_WR8(bp, BAR_TSTRORM_INTMEM + TSTORM_VF_TO_PF_OFFSET(abs_fid),
+		pf_id);
+	REG_WR8(bp, BAR_USTRORM_INTMEM + USTORM_VF_TO_PF_OFFSET(abs_fid),
+		pf_id);
+}
+
+static void storm_memset_func_en(struct bnx2x *bp, u16 abs_fid,
+					u8 enable)
+{
+	REG_WR8(bp, BAR_XSTRORM_INTMEM + XSTORM_FUNC_EN_OFFSET(abs_fid),
+		enable);
+	REG_WR8(bp, BAR_CSTRORM_INTMEM + CSTORM_FUNC_EN_OFFSET(abs_fid),
+		enable);
+	REG_WR8(bp, BAR_TSTRORM_INTMEM + TSTORM_FUNC_EN_OFFSET(abs_fid),
+		enable);
+	REG_WR8(bp, BAR_USTRORM_INTMEM + USTORM_FUNC_EN_OFFSET(abs_fid),
+		enable);
+}
 int bnx2x_vf_idx_by_abs_fid(struct bnx2x *bp, u16 abs_vfid)
 {
 	int idx;
@@ -263,7 +291,378 @@ failed:
 	__bnx2x_iov_free_vfdb(bp);
 	return err;
 }
+/* VF enable primitives
+ *
+ * when pretend is required the caller is responsible
+ * for calling pretend prior to calling these routines
+ */
+
+/* called only on E1H or E2.
+ * When pretending to be PF, the pretend value is the function number 0...7
+ * When pretending to be VF, the pretend val is the PF-num:VF-valid:ABS-VFID
+ * combination
+ */
+int bnx2x_pretend_func(struct bnx2x *bp, u16 pretend_func_val)
+{
+	u32 pretend_reg;
+
+	if (CHIP_IS_E1H(bp) && pretend_func_val > E1H_FUNC_MAX)
+		return -1;
+
+	/* get my own pretend register */
+	pretend_reg = bnx2x_get_pretend_reg(bp);
+	REG_WR(bp, pretend_reg, pretend_func_val);
+	REG_RD(bp, pretend_reg);
+	return 0;
+}
+
+/* internal vf enable - until vf is enabled internally all transactions
+ * are blocked. this routine should always be called last with pretend.
+ */
+static void bnx2x_vf_enable_internal(struct bnx2x *bp, u8 enable)
+{
+	REG_WR(bp, PGLUE_B_REG_INTERNAL_VFID_ENABLE, enable ? 1 : 0);
+}
+
+/* clears vf error in all semi blocks */
+static void bnx2x_vf_semi_clear_err(struct bnx2x *bp, u8 abs_vfid)
+{
+	REG_WR(bp, TSEM_REG_VFPF_ERR_NUM, abs_vfid);
+	REG_WR(bp, USEM_REG_VFPF_ERR_NUM, abs_vfid);
+	REG_WR(bp, CSEM_REG_VFPF_ERR_NUM, abs_vfid);
+	REG_WR(bp, XSEM_REG_VFPF_ERR_NUM, abs_vfid);
+}
+
+static void bnx2x_vf_pglue_clear_err(struct bnx2x *bp, u8 abs_vfid)
+{
+	u32 was_err_group = (2 * BP_PATH(bp) + abs_vfid) >> 5;
+	u32 was_err_reg = 0;
+
+	switch (was_err_group) {
+	case 0:
+	    was_err_reg = PGLUE_B_REG_WAS_ERROR_VF_31_0_CLR;
+	    break;
+	case 1:
+	    was_err_reg = PGLUE_B_REG_WAS_ERROR_VF_63_32_CLR;
+	    break;
+	case 2:
+	    was_err_reg = PGLUE_B_REG_WAS_ERROR_VF_95_64_CLR;
+	    break;
+	case 3:
+	    was_err_reg = PGLUE_B_REG_WAS_ERROR_VF_127_96_CLR;
+	    break;
+	}
+	REG_WR(bp, was_err_reg, 1 << (abs_vfid & 0x1f));
+}
+
+void bnx2x_vf_enable_access(struct bnx2x *bp, u8 abs_vfid)
+{
+	/* set the VF-PF association in the FW */
+	storm_memset_vf_to_pf(bp, FW_VF_HANDLE(abs_vfid), BP_FUNC(bp));
+	storm_memset_func_en(bp, FW_VF_HANDLE(abs_vfid), 1);
+
+	/* clear vf errors*/
+	bnx2x_vf_semi_clear_err(bp, abs_vfid);
+	bnx2x_vf_pglue_clear_err(bp, abs_vfid);
+
+	/* internal vf-enable - pretend */
+	bnx2x_pretend_func(bp, HW_VF_HANDLE(bp, abs_vfid));
+	DP(BNX2X_MSG_IOV, "enabling internal access for vf %x\n", abs_vfid);
+	bnx2x_vf_enable_internal(bp, true);
+	bnx2x_pretend_func(bp, BP_ABS_FUNC(bp));
+}
+
+static u8 bnx2x_vf_is_pcie_pending(struct bnx2x *bp, u8 abs_vfid)
+{
+	struct pci_dev *dev;
+	struct bnx2x_virtf *vf = bnx2x_vf_by_abs_fid(bp, abs_vfid);
+	if (!vf)
+		goto unknown_dev;
+
+	dev = pci_get_bus_and_slot(vf->bus, vf->devfn);
+	if (dev)
+		return bnx2x_is_pcie_pending(dev);
+
+unknown_dev:
+	BNX2X_ERR("Unknown device\n");
+	return false;
+}
+
+int bnx2x_vf_flr_clnup_epilog(struct bnx2x *bp, u8 abs_vfid)
+{
+	/* Wait 100ms */
+	msleep(100);
+
+	/* Verify no pending pci transactions */
+	if (bnx2x_vf_is_pcie_pending(bp, abs_vfid))
+		BNX2X_ERR("PCIE Transactions still pending\n");
+
+	return 0;
+}
+
+/* must be called after the number of PF queues and the number of VFs are
+ * both known
+ */
+static void
+bnx2x_iov_static_resc(struct bnx2x *bp, struct vf_pf_resc_request *resc)
+{
+	u16 vlan_count = 0;
+
+	/* will be set only during VF-ACQUIRE */
+	resc->num_rxqs = 0;
+	resc->num_txqs = 0;
+
+	/* no credit calculcis for macs (just yet) */
+	resc->num_mac_filters = 1;
+
+	/* divvy up vlan rules */
+	vlan_count = bp->vlans_pool.check(&bp->vlans_pool);
+	vlan_count = 1 << ilog2(vlan_count);
+	resc->num_vlan_filters = vlan_count / BNX2X_NR_VIRTFN(bp);
+
+	/* no real limitation */
+	resc->num_mc_filters = 0;
+
+	/* num_sbs already set */
+}
+
 /* called by bnx2x_init_hw_func, returns the next ilt line */
+
+/* IOV global initialization routines  */
+void bnx2x_iov_init_dq(struct bnx2x *bp)
+{
+	if (!IS_SRIOV(bp))
+		return;
+
+	/* Set the DQ such that the CID reflect the abs_vfid */
+	REG_WR(bp, DORQ_REG_VF_NORM_VF_BASE, 0);
+	REG_WR(bp, DORQ_REG_MAX_RVFID_SIZE, ilog2(BNX2X_MAX_NUM_OF_VFS));
+
+	/* Set VFs starting CID. If its > 0 the preceding CIDs are belong to
+	 * the PF L2 queues
+	 */
+	REG_WR(bp, DORQ_REG_VF_NORM_CID_BASE, BNX2X_FIRST_VF_CID);
+
+	/* The VF window size is the log2 of the max number of CIDs per VF */
+	REG_WR(bp, DORQ_REG_VF_NORM_CID_WND_SIZE, BNX2X_VF_CID_WND);
+
+	/* The VF doorbell size  0 - *B, 4 - 128B. We set it here to match
+	 * the Pf doorbell size although the 2 are independent.
+	 */
+	REG_WR(bp, DORQ_REG_VF_NORM_CID_OFST,
+	       BNX2X_DB_SHIFT - BNX2X_DB_MIN_SHIFT);
+
+	/* No security checks for now -
+	 * configure single rule (out of 16) mask = 0x1, value = 0x0,
+	 * CID range 0 - 0x1ffff
+	 */
+	REG_WR(bp, DORQ_REG_VF_TYPE_MASK_0, 1);
+	REG_WR(bp, DORQ_REG_VF_TYPE_VALUE_0, 0);
+	REG_WR(bp, DORQ_REG_VF_TYPE_MIN_MCID_0, 0);
+	REG_WR(bp, DORQ_REG_VF_TYPE_MAX_MCID_0, 0x1ffff);
+
+	/* set the number of VF alllowed doorbells to the full DQ range */
+	REG_WR(bp, DORQ_REG_VF_NORM_MAX_CID_COUNT, 0x20000);
+
+	/* set the VF doorbell threshold */
+	REG_WR(bp, DORQ_REG_VF_USAGE_CT_LIMIT, 4);
+}
+
+void bnx2x_iov_init_dmae(struct bnx2x *bp)
+{
+	DP(BNX2X_MSG_IOV, "SRIOV is %s\n", IS_SRIOV(bp) ? "ON" : "OFF");
+	if (!IS_SRIOV(bp))
+		return;
+
+	REG_WR(bp, DMAE_REG_BACKWARD_COMP_EN, 0);
+}
+
+static int bnx2x_vf_bus(struct bnx2x *bp, int vfid)
+{
+	struct pci_dev *dev = bp->pdev;
+	struct bnx2x_sriov *iov = &bp->vfdb->sriov;
+
+	return dev->bus->number + ((dev->devfn + iov->offset +
+				    iov->stride * vfid) >> 8);
+}
+
+static int bnx2x_vf_devfn(struct bnx2x *bp, int vfid)
+{
+	struct pci_dev *dev = bp->pdev;
+	struct bnx2x_sriov *iov = &bp->vfdb->sriov;
+
+	return (dev->devfn + iov->offset + iov->stride * vfid) & 0xff;
+}
+
+static void bnx2x_vf_set_bars(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	int i, n;
+	struct pci_dev *dev = bp->pdev;
+	struct bnx2x_sriov *iov = &bp->vfdb->sriov;
+
+	for (i = 0, n = 0; i < PCI_SRIOV_NUM_BARS; i += 2, n++) {
+		u64 start = pci_resource_start(dev, PCI_IOV_RESOURCES + i);
+		u32 size = pci_resource_len(dev, PCI_IOV_RESOURCES + i);
+		do_div(size, iov->total);
+		vf->bars[n].bar = start + size * vf->abs_vfid;
+		vf->bars[n].size = size;
+	}
+}
+
+void bnx2x_iov_free_mem(struct bnx2x *bp)
+{
+	int i;
+
+	if (!IS_SRIOV(bp))
+		return;
+
+	/* free vfs hw contexts */
+	for (i = 0; i < BNX2X_VF_CIDS/ILT_PAGE_CIDS; i++) {
+		struct hw_dma *cxt = &bp->vfdb->context[i];
+		BNX2X_PCI_FREE(cxt->addr, cxt->mapping, cxt->size);
+	}
+
+	BNX2X_PCI_FREE(BP_VFDB(bp)->sp_dma.addr,
+		       BP_VFDB(bp)->sp_dma.mapping,
+		       BP_VFDB(bp)->sp_dma.size);
+
+	BNX2X_PCI_FREE(BP_VF_MBX_DMA(bp)->addr,
+		       BP_VF_MBX_DMA(bp)->mapping,
+		       BP_VF_MBX_DMA(bp)->size);
+}
+
+
+int bnx2x_iov_alloc_mem(struct bnx2x *bp)
+{
+	size_t tot_size;
+	int i, rc = 0;
+
+	if (!IS_SRIOV(bp))
+		return rc;
+
+	/* allocate vfs hw contexts */
+	tot_size = (BP_VFDB(bp)->sriov.first_vf_in_pf + BNX2X_NR_VIRTFN(bp)) *
+		BNX2X_CIDS_PER_VF * sizeof(union cdu_context);
+
+	for (i = 0; i < BNX2X_VF_CIDS/ILT_PAGE_CIDS; i++) {
+		struct hw_dma *cxt = BP_VF_CXT_PAGE(bp, i);
+		cxt->size = min_t(size_t, tot_size, CDU_ILT_PAGE_SZ);
+
+		if (cxt->size) {
+			BNX2X_PCI_ALLOC(cxt->addr, &cxt->mapping, cxt->size);
+		} else {
+			cxt->addr = NULL;
+			cxt->mapping = 0;
+		}
+		tot_size -= cxt->size;
+	}
+
+	/* allocate vfs ramrods dma memory - client_init and set_mac */
+	tot_size = BNX2X_NR_VIRTFN(bp) * sizeof(struct bnx2x_vf_sp);
+	BNX2X_PCI_ALLOC(BP_VFDB(bp)->sp_dma.addr, &BP_VFDB(bp)->sp_dma.mapping,
+			tot_size);
+	BP_VFDB(bp)->sp_dma.size = tot_size;
+
+	/* allocate mailboxes */
+	tot_size = BNX2X_NR_VIRTFN(bp) * MBX_MSG_ALIGNED_SIZE;
+	BNX2X_PCI_ALLOC(BP_VF_MBX_DMA(bp)->addr, &BP_VF_MBX_DMA(bp)->mapping,
+			tot_size);
+	BP_VF_MBX_DMA(bp)->size = tot_size;
+
+	return 0;
+
+alloc_mem_err:
+	return -ENOMEM;
+}
+/* called by bnx2x_nic_load */
+int bnx2x_iov_nic_init(struct bnx2x *bp)
+{
+	int vfid, qcount, i;
+
+	if (!IS_SRIOV(bp)) {
+		DP(BNX2X_MSG_IOV, "vfdb was not allocated\n");
+		return 0;
+	}
+
+	DP(BNX2X_MSG_IOV, "num of vfs: %d\n", (bp)->vfdb->sriov.nr_virtfn);
+
+	/* initialize vf database */
+	for_each_vf(bp, vfid) {
+		struct bnx2x_virtf *vf = BP_VF(bp, vfid);
+
+		int base_vf_cid = (BP_VFDB(bp)->sriov.first_vf_in_pf + vfid) *
+			BNX2X_CIDS_PER_VF;
+
+		union cdu_context *base_cxt = (union cdu_context *)
+			BP_VF_CXT_PAGE(bp, base_vf_cid/ILT_PAGE_CIDS)->addr +
+			(base_vf_cid & (ILT_PAGE_CIDS-1));
+
+		DP(BNX2X_MSG_IOV,
+		   "VF[%d] Max IGU SBs: %d, base vf cid 0x%x, base cid 0x%x, base cxt %p\n",
+		   vf->abs_vfid, vf_sb_count(vf), base_vf_cid,
+		   BNX2X_FIRST_VF_CID + base_vf_cid, base_cxt);
+
+		/* init statically provisioned resources */
+		bnx2x_iov_static_resc(bp, &vf->alloc_resc);
+
+		/* queues are initialized during VF-ACQUIRE */
+
+		/* reserve the vf vlan credit */
+		bp->vlans_pool.get(&bp->vlans_pool, vf_vlan_rules_cnt(vf));
+
+		vf->filter_state = 0;
+		vf->sp_cl_id = bnx2x_fp(bp, 0, cl_id);
+
+		/*  init mcast object - This object will be re-initialized
+		 *  during VF-ACQUIRE with the proper cl_id and cid.
+		 *  It needs to be initialized here so that it can be safely
+		 *  handled by a subsequent FLR flow.
+		 */
+		bnx2x_init_mcast_obj(bp, &vf->mcast_obj, 0xFF,
+				     0xFF, 0xFF, 0xFF,
+				     bnx2x_vf_sp(bp, vf, mcast_rdata),
+				     bnx2x_vf_sp_map(bp, vf, mcast_rdata),
+				     BNX2X_FILTER_MCAST_PENDING,
+				     &vf->filter_state,
+				     BNX2X_OBJ_TYPE_RX_TX);
+
+		/* set the mailbox message addresses */
+		BP_VF_MBX(bp, vfid)->msg = (struct bnx2x_vf_mbx_msg *)
+			(((u8 *)BP_VF_MBX_DMA(bp)->addr) + vfid *
+			MBX_MSG_ALIGNED_SIZE);
+
+		BP_VF_MBX(bp, vfid)->msg_mapping = BP_VF_MBX_DMA(bp)->mapping +
+			vfid * MBX_MSG_ALIGNED_SIZE;
+
+		/* Enable vf mailbox */
+		bnx2x_vf_enable_mbx(bp, vf->abs_vfid);
+	}
+
+	/* Final VF init */
+	qcount = 0;
+	for_each_vf(bp, i) {
+		struct bnx2x_virtf *vf = BP_VF(bp, i);
+
+		/* fill in the BDF and bars */
+		vf->bus = bnx2x_vf_bus(bp, i);
+		vf->devfn = bnx2x_vf_devfn(bp, i);
+		bnx2x_vf_set_bars(bp, vf);
+
+		DP(BNX2X_MSG_IOV,
+		   "VF info[%d]: bus 0x%x, devfn 0x%x, bar0 [0x%08x, %d], bar1 [0x%08x, %d], bar2 [0x%08x, %d]\n",
+		   vf->abs_vfid, vf->bus, vf->devfn,
+		   (unsigned)vf->bars[0].bar, vf->bars[0].size,
+		   (unsigned)vf->bars[1].bar, vf->bars[1].size,
+		   (unsigned)vf->bars[2].bar, vf->bars[2].size);
+
+		/* set local queue arrays */
+		vf->vfqs = &bp->vfdb->vfqs[qcount];
+		qcount += bnx2x_vf(bp, i, alloc_resc.num_sbs);
+	}
+
+	return 0;
+}
+
 int bnx2x_iov_init_ilt(struct bnx2x *bp, u16 line)
 {
 	int i;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index 04925f0..d5e5103 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -181,6 +181,25 @@ struct bnx2x_virtf {
 #define for_each_vf(bp, var) \
 		for ((var) = 0; (var) < BNX2X_NR_VIRTFN(bp); (var)++)
 
+#define HW_VF_HANDLE(bp, abs_vfid) \
+	(u16)(BP_ABS_FUNC((bp)) | (1<<3) |  ((u16)(abs_vfid) << 4))
+
+#define FW_PF_MAX_HANDLE	8
+
+#define FW_VF_HANDLE(abs_vfid)	\
+	(abs_vfid + FW_PF_MAX_HANDLE)
+
+/* VF mail box (aka vf-pf channel) */
+
+/* a container for the bi-directional vf<-->pf messages.
+ *  The actual response will be placed according to the offset parameter
+ *  provided in the request
+ */
+
+#define MBX_MSG_ALIGN	8
+#define MBX_MSG_ALIGNED_SIZE	(roundup(sizeof(struct bnx2x_vf_mbx_msg), \
+				MBX_MSG_ALIGN))
+
 struct bnx2x_vf_mbx_msg {
 	union vfpf_tlvs req;
 	union pfvf_tlvs resp;
@@ -202,6 +221,29 @@ struct bnx2x_vf_mbx {
 					 */
 };
 
+struct bnx2x_vf_sp {
+	union {
+		struct eth_classify_rules_ramrod_data	e2;
+	} mac_rdata;
+
+	union {
+		struct eth_classify_rules_ramrod_data	e2;
+	} vlan_rdata;
+
+	union {
+		struct eth_filter_rules_ramrod_data	e2;
+	} rx_mode_rdata;
+
+	union {
+		struct eth_multicast_rules_ramrod_data  e2;
+	} mcast_rdata;
+
+	union {
+		struct client_init_ramrod_data  init_data;
+		struct client_update_ramrod_data update_data;
+	} q_data;
+};
+
 struct hw_dma {
 	void *addr;
 	dma_addr_t mapping;
@@ -242,11 +284,25 @@ struct bnx2x_vfdb {
 	u32 flrd_vfs[FLRD_VFS_DWORDS];
 };
 
+static inline u8 vf_igu_sb(struct bnx2x_virtf *vf, u16 sb_idx)
+{
+	return vf->igu_base_id + sb_idx;
+}
+
 /* global iov routines */
 int bnx2x_iov_init_ilt(struct bnx2x *bp, u16 line);
 int bnx2x_iov_init_one(struct bnx2x *bp, int int_mode_param, int num_vfs_param);
 void __devexit bnx2x_iov_remove_one(struct bnx2x *bp);
+void bnx2x_iov_free_mem(struct bnx2x *bp);
+int bnx2x_iov_alloc_mem(struct bnx2x *bp);
+int bnx2x_iov_nic_init(struct bnx2x *bp);
+void bnx2x_iov_init_dq(struct bnx2x *bp);
+void bnx2x_iov_init_dmae(struct bnx2x *bp);
+void bnx2x_vf_enable_mbx(struct bnx2x *bp, u8 abs_vfid);
 int bnx2x_vf_idx_by_abs_fid(struct bnx2x *bp, u16 abs_vfid);
+/* VF FLR helpers */
+int bnx2x_vf_flr_clnup_epilog(struct bnx2x *bp, u8 abs_vfid);
+void bnx2x_vf_enable_access(struct bnx2x *bp, u8 abs_vfid);
 void bnx2x_add_tlv(struct bnx2x *bp, void *tlvs_list, u16 offset, u16 type,
 		   u16 length);
 void bnx2x_vfpf_prep(struct bnx2x *bp, struct vfpf_first_tlv *first_tlv,
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 5bc9016..7259ea5 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -79,3 +79,40 @@ void bnx2x_dp_tlv_list(struct bnx2x *bp, void *tlvs_list)
 	DP(BNX2X_MSG_IOV, "TLV number %d: type %d, length %d\n", i,
 	   tlv->type, tlv->length);
 }
+
+/* General service functions */
+static void storm_memset_vf_mbx_ack(struct bnx2x *bp, u16 abs_fid)
+{
+	u32 addr = BAR_CSTRORM_INTMEM +
+			CSTORM_VF_PF_CHANNEL_STATE_OFFSET(abs_fid);
+
+	REG_WR8(bp, addr, VF_PF_CHANNEL_STATE_READY);
+}
+
+static void storm_memset_vf_mbx_valid(struct bnx2x *bp, u16 abs_fid)
+{
+	u32 addr = BAR_CSTRORM_INTMEM +
+			CSTORM_VF_PF_CHANNEL_VALID_OFFSET(abs_fid);
+
+	REG_WR8(bp, addr, 1);
+}
+static inline void bnx2x_set_vf_mbxs_valid(struct bnx2x *bp)
+{
+	int i;
+	for_each_vf(bp, i)
+		storm_memset_vf_mbx_valid(bp, bnx2x_vf(bp, i, abs_vfid));
+
+}
+
+/* enable vf_pf mailbox (aka vf-pf-chanell) */
+void bnx2x_vf_enable_mbx(struct bnx2x *bp, u8 abs_vfid)
+{
+	bnx2x_vf_flr_clnup_epilog(bp, abs_vfid);
+
+	/* enable the mailbox in the FW */
+	storm_memset_vf_mbx_ack(bp, abs_vfid);
+	storm_memset_vf_mbx_valid(bp, abs_vfid);
+
+	/* enable the VF access to the mailbox */
+	bnx2x_vf_enable_access(bp, abs_vfid);
+}
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 12/22] bnx2x: Support of PF driver of a VF acquire request
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

When a VF is probed by the VF driver, the VF driver sends an
'acquire' request over the VF <-> PF channel for the resources
it needs to operate (interrupts, queues, etc).
The PF driver either ratifies the request and allocates the resources,
responds with the maximum values it will allow the VF to acquire,
or fails the request entirely if there is a problem.

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c    |   28 +++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h    |   10 +
 .../net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c    |   13 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c  |  202 +++++++++++++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h  |   42 ++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c   |  204 ++++++++++++++++++++
 6 files changed, 488 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 859389f..27a0b28 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -87,6 +87,34 @@ static inline void bnx2x_move_fp(struct bnx2x *bp, int from, int to)
 	to_fp->txdata_ptr[0] = &bp->bnx2x_txq[new_txdata_index];
 }
 
+/**
+ * bnx2x_fill_fw_str - move content of the fastpath structure.
+ *
+ * @bp:        driver handle
+ * @buf:       character buffer to fill with the fw name
+ * @buf_len:   length of the above buffer
+ *
+ */
+void bnx2x_fill_fw_str(struct bnx2x *bp, char *buf, size_t buf_len)
+{
+	if (IS_PF(bp)) {
+		u8 phy_fw_ver[PHY_FW_VER_LEN];
+
+		phy_fw_ver[0] = '\0';
+		bnx2x_get_ext_phy_fw_version(&bp->link_params,
+					     phy_fw_ver, PHY_FW_VER_LEN);
+		strlcpy(buf, bp->fw_ver, buf_len);
+		snprintf(buf + strlen(bp->fw_ver), 32 - strlen(bp->fw_ver),
+			 "bc %d.%d.%d%s%s",
+			 (bp->common.bc_ver & 0xff0000) >> 16,
+			 (bp->common.bc_ver & 0xff00) >> 8,
+			 (bp->common.bc_ver & 0xff),
+			 ((phy_fw_ver[0] != '\0') ? " phy " : ""), phy_fw_ver);
+	} else {
+		strlcpy(buf, bp->acquire_resp.pfdev_info.fw_ver, buf_len);
+	}
+}
+
 int load_count[2][3] = { {0} }; /* per-path: 0-common, 1-port0, 2-port1 */
 
 /* free skb in the packet ring at pos idx
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index f6e92ac..2dfde45 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -1401,4 +1401,14 @@ static inline bool bnx2x_is_valid_ether_addr(struct bnx2x *bp, u8 *addr)
 	return false;
 }
 
+
+/**
+ * bnx2x_fill_fw_str - Fill buffer with FW version string
+ *
+ * @bp:         driver handle
+ * @buf:	buffer to fill with FW string
+ * @buf_len:	length of buffer
+ *
+ */
+void bnx2x_fill_fw_str(struct bnx2x *bp, char *buf, size_t buf_len);
 #endif /* BNX2X_CMN_H */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index ce6fb27..e5c2c3a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -817,21 +817,12 @@ static void bnx2x_get_drvinfo(struct net_device *dev,
 			      struct ethtool_drvinfo *info)
 {
 	struct bnx2x *bp = netdev_priv(dev);
-	u8 phy_fw_ver[PHY_FW_VER_LEN];
 
 	strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver));
 	strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version));
 
-	phy_fw_ver[0] = '\0';
-	bnx2x_get_ext_phy_fw_version(&bp->link_params,
-				     phy_fw_ver, PHY_FW_VER_LEN);
-	strlcpy(info->fw_version, bp->fw_ver, sizeof(info->fw_version));
-	snprintf(info->fw_version + strlen(bp->fw_ver), 32 - strlen(bp->fw_ver),
-		 "bc %d.%d.%d%s%s",
-		 (bp->common.bc_ver & 0xff0000) >> 16,
-		 (bp->common.bc_ver & 0xff00) >> 8,
-		 (bp->common.bc_ver & 0xff),
-		 ((phy_fw_ver[0] != '\0') ? " phy " : ""), phy_fw_ver);
+	bnx2x_fill_fw_str(bp, info->fw_version, sizeof(info->fw_version));
+
 	strlcpy(info->bus_info, pci_name(bp->pdev), sizeof(info->bus_info));
 	info->n_stats = BNX2X_NUM_STATS;
 	info->testinfo_len = BNX2X_NUM_TESTS(bp);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 93497e5..b91cb00 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -584,6 +584,66 @@ int bnx2x_iov_alloc_mem(struct bnx2x *bp)
 alloc_mem_err:
 	return -ENOMEM;
 }
+
+static void bnx2x_vfq_init(struct bnx2x *bp, struct bnx2x_virtf *vf,
+			   struct bnx2x_vf_queue *q)
+{
+	u8 cl_id = vfq_cl_id(vf, q);
+	u8 func_id = FW_VF_HANDLE(vf->abs_vfid);
+
+	unsigned long q_type = 0;
+	set_bit(BNX2X_Q_TYPE_HAS_TX, &q_type);
+	set_bit(BNX2X_Q_TYPE_HAS_RX, &q_type);
+
+	/* Queue State object */
+	bnx2x_init_queue_obj(bp, &q->sp_obj,
+			     cl_id, &q->cid, 1, func_id,
+			     bnx2x_vf_sp(bp, vf, q_data),
+			     bnx2x_vf_sp_map(bp, vf, q_data),
+			     q_type);
+
+	DP(BNX2X_MSG_IOV,
+	   "initialized vf %d's queue object. func id set to %d\n",
+	   vf->abs_vfid, q->sp_obj.func_id);
+
+
+	/* mac/vlan objects are per queue, but only those
+	 * that belong to the leading queue are initialized
+	 */
+	if (vfq_is_leading(q)) {
+
+		/* mac*/
+		bnx2x_init_mac_obj(bp, &q->mac_obj,
+				   cl_id, q->cid, func_id,
+				   bnx2x_vf_sp(bp, vf, mac_rdata),
+				   bnx2x_vf_sp_map(bp, vf, mac_rdata),
+				   BNX2X_FILTER_MAC_PENDING,
+				   &vf->filter_state,
+				   BNX2X_OBJ_TYPE_RX_TX,
+				   &bp->macs_pool);
+		/* vlan */
+		bnx2x_init_vlan_obj(bp, &q->vlan_obj,
+				    cl_id, q->cid, func_id,
+				    bnx2x_vf_sp(bp, vf, vlan_rdata),
+				    bnx2x_vf_sp_map(bp, vf, vlan_rdata),
+				    BNX2X_FILTER_VLAN_PENDING,
+				    &vf->filter_state,
+				    BNX2X_OBJ_TYPE_RX_TX,
+				    &bp->vlans_pool);
+
+		/* mcast */
+		bnx2x_init_mcast_obj(bp, &vf->mcast_obj, cl_id,
+				     q->cid, func_id, func_id,
+				     bnx2x_vf_sp(bp, vf, mcast_rdata),
+				     bnx2x_vf_sp_map(bp, vf, mcast_rdata),
+				     BNX2X_FILTER_MCAST_PENDING,
+				     &vf->filter_state,
+				     BNX2X_OBJ_TYPE_RX_TX);
+
+		vf->leading_rss = cl_id;
+	}
+}
+
 /* called by bnx2x_nic_load */
 int bnx2x_iov_nic_init(struct bnx2x *bp)
 {
@@ -928,3 +988,145 @@ void bnx2x_iov_sp_task(struct bnx2x *bp)
 		}
 	}
 }
+
+u8 bnx2x_vf_max_queue_cnt(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	return min_t(u8, min_t(u8, vf_sb_count(vf), BNX2X_CIDS_PER_VF),
+		     BNX2X_VF_MAX_QUEUES);
+}
+
+static
+int bnx2x_vf_chk_avail_resc(struct bnx2x *bp, struct bnx2x_virtf *vf,
+			    struct vf_pf_resc_request *req_resc)
+{
+	u8 rxq_cnt = vf_rxq_count(vf) ? : bnx2x_vf_max_queue_cnt(bp, vf);
+	u8 txq_cnt = vf_txq_count(vf) ? : bnx2x_vf_max_queue_cnt(bp, vf);
+
+	return ((req_resc->num_rxqs <= rxq_cnt) &&
+		(req_resc->num_txqs <= txq_cnt) &&
+		(req_resc->num_sbs <= vf_sb_count(vf))   &&
+		(req_resc->num_mac_filters <= vf_mac_rules_cnt(vf)) &&
+		(req_resc->num_vlan_filters <= vf_vlan_rules_cnt(vf)));
+}
+
+/* CORE VF API */
+int bnx2x_vf_acquire(struct bnx2x *bp, struct bnx2x_virtf *vf,
+		     struct vf_pf_resc_request *resc)
+{
+	int base_vf_cid = (BP_VFDB(bp)->sriov.first_vf_in_pf + vf->index) *
+		BNX2X_CIDS_PER_VF;
+
+	union cdu_context *base_cxt = (union cdu_context *)
+		BP_VF_CXT_PAGE(bp, base_vf_cid/ILT_PAGE_CIDS)->addr +
+		(base_vf_cid & (ILT_PAGE_CIDS-1));
+	int i;
+
+	/* if state is 'acquired' the VF was not released or FLR'd, in
+	 * this case the returned resources match the acquired already
+	 * acquired resources. Verify that the requested numbers do
+	 * not exceed the already acquired numbers.
+	 */
+	if (vf->state == VF_ACQUIRED) {
+		DP(BNX2X_MSG_IOV, "VF[%d] Trying to re-acquire resources (VF was not released or FLR'd)\n",
+		   vf->abs_vfid);
+
+		if (!bnx2x_vf_chk_avail_resc(bp, vf, resc)) {
+			BNX2X_ERR("VF[%d] When re-acquiring resources, requested numbers must be <= then previously acquired numbers\n",
+				  vf->abs_vfid);
+			return -EINVAL;
+		}
+		return 0;
+	}
+
+	/* Otherwise vf state must be 'free' or 'reset' */
+	if (vf->state != VF_FREE && vf->state != VF_RESET) {
+		BNX2X_ERR("VF[%d] Can not acquire a VF with state %d\n",
+			  vf->abs_vfid, vf->state);
+		return -EINVAL;
+	}
+
+	/* static allocation:
+	 * the global maximum number are fixed per VF. fail the request if
+	 * requested number exceed these globals
+	 */
+	if (!bnx2x_vf_chk_avail_resc(bp, vf, resc)) {
+		DP(BNX2X_MSG_IOV,
+		   "cannot fullfill vf resource request. Placing maximal available values in response");
+		/* set the max resource in the vf */
+		return -ENOMEM;
+	}
+
+	/* Set resources counters - 0 request means max available */
+	vf_sb_count(vf) = resc->num_sbs;
+	vf_rxq_count(vf) = resc->num_rxqs ? : bnx2x_vf_max_queue_cnt(bp, vf);
+	vf_txq_count(vf) = resc->num_txqs ? : bnx2x_vf_max_queue_cnt(bp, vf);
+	if (resc->num_mac_filters)
+		vf_mac_rules_cnt(vf) = resc->num_mac_filters;
+	if (resc->num_vlan_filters)
+		vf_vlan_rules_cnt(vf) = resc->num_vlan_filters;
+
+	DP(BNX2X_MSG_IOV,
+	   "Fullfilling vf request: sbount %d, tx_count %d, rx_count %d, mac_rules_count %d, vlan_rules_count %d\n",
+	   vf_sb_count(vf), vf_rxq_count(vf),
+	   vf_txq_count(vf), vf_mac_rules_cnt(vf),
+	   vf_vlan_rules_cnt(vf));
+
+	/* Initialize the queues */
+	if (!vf->vfqs) {
+		DP(BNX2X_MSG_IOV, "vf->vfqs was not allocated");
+		return -EINVAL;
+	}
+
+	for_each_vfq(vf, i) {
+		struct bnx2x_vf_queue *q = vfq_get(vf, i);
+
+		if (!q) {
+			DP(BNX2X_MSG_IOV, "q numer %d was not allocated", i);
+			return -EINVAL;
+		}
+
+		q->index = i;
+		q->cxt = &((base_cxt + i)->eth);
+		q->cid = BNX2X_FIRST_VF_CID + base_vf_cid + i;
+
+		DP(BNX2X_MSG_IOV, "VFQ[%d:%d]: index %d, cid 0x%x, cxt %p\n",
+		   vf->abs_vfid, i, q->index, q->cid, q->cxt);
+
+		/* init SP objects */
+		bnx2x_vfq_init(bp, vf, q);
+	}
+	vf->state = VF_ACQUIRED;
+	return 0;
+}
+
+void bnx2x_lock_vf_pf_channel(struct bnx2x *bp, struct bnx2x_virtf *vf,
+			      enum channel_tlvs tlv)
+{
+	/* lock the channel */
+	mutex_lock(&vf->op_mutex);
+
+	/* record the locking op */
+	vf->op_current = tlv;
+
+	/* log the lock */
+	DP(BNX2X_MSG_IOV, "VF[%d]: vf pf channel locked by %d\n",
+	   vf->abs_vfid, tlv);
+}
+
+void bnx2x_unlock_vf_pf_channel(struct bnx2x *bp, struct bnx2x_virtf *vf,
+				enum channel_tlvs expected_tlv)
+{
+	WARN(expected_tlv != vf->op_current,
+	     "lock mismatch: expected %d found %d", expected_tlv,
+	     vf->op_current);
+
+	/* lock the channel */
+	mutex_unlock(&vf->op_mutex);
+
+	/* log the unlock */
+	DP(BNX2X_MSG_IOV, "VF[%d]: vf pf channel unlocked by %d\n",
+	   vf->abs_vfid, vf->op_current);
+
+	/* record the locking op */
+	vf->op_current = CHANNEL_TLV_NONE;
+}
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index 249fa58..a6ae7f0 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -19,9 +19,13 @@
 #ifndef BNX2X_SRIOV_H
 #define BNX2X_SRIOV_H
 
+#include "bnx2x_vfpf.h"
+#include "bnx2x_cmn.h"
+
 /* The bnx2x device structure holds vfdb structure described below.
  * The VF array is indexed by the relative vfid.
  */
+#define BNX2X_VF_MAX_QUEUES		16
 struct bnx2x_sriov {
 	u32 first_vf_in_pf;
 
@@ -259,6 +263,12 @@ struct bnx2x_virtf {
 #define for_each_vf(bp, var) \
 		for ((var) = 0; (var) < BNX2X_NR_VIRTFN(bp); (var)++)
 
+#define for_each_vfq(vf, var) \
+		for ((var) = 0; (var) < vf_rxq_count(vf); (var)++)
+
+#define for_each_vf_sb(vf, var) \
+		for ((var) = 0; (var) < vf_sb_count(vf); (var)++)
+
 #define HW_VF_HANDLE(bp, abs_vfid) \
 	(u16)(BP_ABS_FUNC((bp)) | (1<<3) |  ((u16)(abs_vfid) << 4))
 
@@ -267,6 +277,13 @@ struct bnx2x_virtf {
 #define FW_VF_HANDLE(abs_vfid)	\
 	(abs_vfid + FW_PF_MAX_HANDLE)
 
+/* locking and unlocking the channel mutex */
+void bnx2x_lock_vf_pf_channel(struct bnx2x *bp, struct bnx2x_virtf *vf,
+			      enum channel_tlvs tlv);
+
+void bnx2x_unlock_vf_pf_channel(struct bnx2x *bp, struct bnx2x_virtf *vf,
+				enum channel_tlvs expected_tlv);
+
 /* VF mail box (aka vf-pf channel) */
 
 /* a container for the bi-directional vf<-->pf messages.
@@ -368,11 +385,31 @@ static inline struct bnx2x_vf_queue *vfq_get(struct bnx2x_virtf *vf, u8 index)
 	return &(vf->vfqs[index]);
 }
 
+static inline bool vfq_is_leading(struct bnx2x_vf_queue *vfq)
+{
+	return (vfq->index == 0);
+}
+
+/* FW ids */
 static inline u8 vf_igu_sb(struct bnx2x_virtf *vf, u16 sb_idx)
 {
 	return vf->igu_base_id + sb_idx;
 }
 
+static inline u8 vf_hc_qzone(struct bnx2x_virtf *vf, u16 sb_idx)
+{
+	return vf_igu_sb(vf, sb_idx);
+}
+
+static u8 vfq_cl_id(struct bnx2x_virtf *vf, struct bnx2x_vf_queue *q)
+{
+	return vf->igu_base_id + q->index;
+}
+static inline u8 vfq_qzone_id(struct bnx2x_virtf *vf, struct bnx2x_vf_queue *q)
+{
+	return vfq_cl_id(vf, q);
+}
+
 /* global iov routines */
 int bnx2x_iov_init_ilt(struct bnx2x *bp, u16 line);
 int bnx2x_iov_init_one(struct bnx2x *bp, int int_mode_param, int num_vfs_param);
@@ -390,6 +427,10 @@ void bnx2x_iov_sp_task(struct bnx2x *bp);
 /* global vf mailbox routines */
 void bnx2x_vf_mbx(struct bnx2x *bp, struct vf_pf_event_data *vfpf_event);
 void bnx2x_vf_enable_mbx(struct bnx2x *bp, u8 abs_vfid);
+/* acquire */
+int bnx2x_vf_acquire(struct bnx2x *bp, struct bnx2x_virtf *vf,
+			  struct vf_pf_resc_request *resc);
+
 static inline struct bnx2x_vfop *bnx2x_vfop_cur(struct bnx2x *bp,
 						struct bnx2x_virtf *vf)
 {
@@ -399,6 +440,7 @@ static inline struct bnx2x_vfop *bnx2x_vfop_cur(struct bnx2x *bp,
 }
 
 int bnx2x_vf_idx_by_abs_fid(struct bnx2x *bp, u16 abs_vfid);
+u8 bnx2x_vf_max_queue_cnt(struct bnx2x *bp, struct bnx2x_virtf *vf);
 /* VF FLR helpers */
 int bnx2x_vf_flr_clnup_epilog(struct bnx2x *bp, u8 abs_vfid);
 void bnx2x_vf_enable_access(struct bnx2x *bp, u8 abs_vfid);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 4d9f629..01ee50a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -185,6 +185,181 @@ static int bnx2x_copy32_vf_dmae(struct bnx2x *bp, u8 from_vf,
 	/* issue the command and wait for completion */
 	return bnx2x_issue_dmae_with_comp(bp, &dmae);
 }
+
+static void bnx2x_vf_mbx_resp(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	struct bnx2x_vf_mbx *mbx = BP_VF_MBX(bp, vf->index);
+	u64 vf_addr;
+	dma_addr_t pf_addr;
+	u16 length, type;
+	int rc;
+
+	struct pfvf_general_resp_tlv *resp = &mbx->msg->resp.general_resp;
+
+	/* prepare response */
+	type = mbx->first_tlv.tl.type;
+	length = type == CHANNEL_TLV_ACQUIRE ?
+		sizeof(struct pfvf_acquire_resp_tlv) :
+		sizeof(struct pfvf_general_resp_tlv);
+	bnx2x_add_tlv(bp, resp, 0, type, length);
+	resp->hdr.status = bnx2x_pfvf_status_codes(vf->op_rc);
+	bnx2x_add_tlv(bp, resp, length, CHANNEL_TLV_LIST_END,
+		      sizeof(struct channel_list_end_tlv));
+	bnx2x_dp_tlv_list(bp, resp);
+
+	DP(BNX2X_MSG_IOV, "mailbox vf address hi 0x%x, lo 0x%x, offset 0x%x",
+	   mbx->vf_addr_hi, mbx->vf_addr_lo, mbx->first_tlv.resp_msg_offset);
+
+	/* send response */
+	vf_addr = HILO_U64(mbx->vf_addr_hi, mbx->vf_addr_lo) +
+		  mbx->first_tlv.resp_msg_offset;
+
+	pf_addr = mbx->msg_mapping +
+		offsetof(struct bnx2x_vf_mbx_msg, resp);
+
+	/* copy the response body, if there is one, before the header, as the vf
+	 * is sensitive to the header being written
+	 */
+	if (resp->hdr.tl.length > sizeof(u64)) {
+		length = resp->hdr.tl.length - sizeof(u64);
+		vf_addr += sizeof(u64);
+		pf_addr += sizeof(u64);
+		rc = bnx2x_copy32_vf_dmae(bp, false, pf_addr, vf->abs_vfid,
+					  U64_HI(vf_addr),
+					  U64_LO(vf_addr),
+					  length/4);
+		if (rc) {
+			BNX2X_ERR("Failed to copy response body to VF %d\n",
+				  vf->abs_vfid);
+			return;
+		}
+		vf_addr -= sizeof(u64);
+		pf_addr -= sizeof(u64);
+	}
+
+	/* ack the FW */
+	storm_memset_vf_mbx_ack(bp, vf->abs_vfid);
+	mmiowb();
+
+	/* initiate dmae to send the response */
+	mbx->flags &= ~VF_MSG_INPROCESS;
+
+	/* copy the response header including status-done field,
+	 * must be last dmae, must be after FW is acked
+	 */
+	rc = bnx2x_copy32_vf_dmae(bp, false, pf_addr, vf->abs_vfid,
+				  U64_HI(vf_addr),
+				  U64_LO(vf_addr),
+				  sizeof(u64)/4);
+
+	/* unlock channel mutex */
+	bnx2x_unlock_vf_pf_channel(bp, vf, mbx->first_tlv.tl.type);
+
+	if (rc) {
+		BNX2X_ERR("Failed to copy response status to VF %d\n",
+			  vf->abs_vfid);
+	}
+	return;
+}
+
+static void bnx2x_vf_mbx_acquire_resp(struct bnx2x *bp, struct bnx2x_virtf *vf,
+				      struct bnx2x_vf_mbx *mbx, int vfop_status)
+{
+	int i;
+	struct pfvf_acquire_resp_tlv *resp = &mbx->msg->resp.acquire_resp;
+	struct pf_vf_resc *resc = &resp->resc;
+	u8 status = bnx2x_pfvf_status_codes(vfop_status);
+
+	memset(resp, 0, sizeof(*resp));
+
+	/* fill in pfdev info */
+	resp->pfdev_info.chip_num = bp->common.chip_id;
+	resp->pfdev_info.db_size = (1 << BNX2X_DB_SHIFT);
+	resp->pfdev_info.indices_per_sb = HC_SB_MAX_INDICES_E2;
+	resp->pfdev_info.pf_cap = (PFVF_CAP_RSS |
+				   /* PFVF_CAP_DHC |*/ PFVF_CAP_TPA);
+	bnx2x_fill_fw_str(bp, resp->pfdev_info.fw_ver,
+			  sizeof(resp->pfdev_info.fw_ver));
+
+	if (status == PFVF_STATUS_NO_RESOURCE ||
+	    status == PFVF_STATUS_SUCCESS) {
+
+		/* set resources numbers, if status equals NO_RESOURCE these
+		 * are max possible numbers
+		 */
+		resc->num_rxqs = vf_rxq_count(vf) ? :
+			bnx2x_vf_max_queue_cnt(bp, vf);
+		resc->num_txqs = vf_txq_count(vf) ? :
+			bnx2x_vf_max_queue_cnt(bp, vf);
+		resc->num_sbs = vf_sb_count(vf);
+		resc->num_mac_filters = vf_mac_rules_cnt(vf);
+		resc->num_vlan_filters = vf_vlan_rules_cnt(vf);
+		resc->num_mc_filters = 0;
+
+		if (status == PFVF_STATUS_SUCCESS) {
+			for_each_vfq(vf, i)
+				resc->hw_qid[i] =
+					vfq_qzone_id(vf, vfq_get(vf, i));
+
+			for_each_vf_sb(vf, i) {
+				resc->hw_sbs[i].hw_sb_id = vf_igu_sb(vf, i);
+				resc->hw_sbs[i].sb_qid = vf_hc_qzone(vf, i);
+			}
+		}
+	}
+
+	DP(BNX2X_MSG_IOV, "VF[%d] ACQUIRE_RESPONSE: pfdev_info- chip_num=0x%x, db_size=%d, idx_per_sb=%d, pf_cap=0x%x\n"
+	   "resources- n_rxq-%d, n_txq-%d, n_sbs-%d, n_macs-%d, n_vlans-%d, n_mcs-%d, fw_ver: '%s'\n",
+	   vf->abs_vfid,
+	   resp->pfdev_info.chip_num,
+	   resp->pfdev_info.db_size,
+	   resp->pfdev_info.indices_per_sb,
+	   resp->pfdev_info.pf_cap,
+	   resc->num_rxqs,
+	   resc->num_txqs,
+	   resc->num_sbs,
+	   resc->num_mac_filters,
+	   resc->num_vlan_filters,
+	   resc->num_mc_filters,
+	   resp->pfdev_info.fw_ver);
+
+	DP_CONT(BNX2X_MSG_IOV, "hw_qids- [ ");
+	for (i = 0; i < vf_rxq_count(vf); i++)
+		DP_CONT(BNX2X_MSG_IOV, "%d ", resc->hw_qid[i]);
+	DP_CONT(BNX2X_MSG_IOV, "], sb_info- [ ");
+	for (i = 0; i < vf_sb_count(vf); i++)
+		DP_CONT(BNX2X_MSG_IOV, "%d:%d ",
+			resc->hw_sbs[i].hw_sb_id,
+			resc->hw_sbs[i].sb_qid);
+	DP_CONT(BNX2X_MSG_IOV, "]\n");
+
+	/* send the response */
+	vf->op_rc = vfop_status;
+	bnx2x_vf_mbx_resp(bp, vf);
+}
+
+static void bnx2x_vf_mbx_acquire(struct bnx2x *bp, struct bnx2x_virtf *vf,
+				 struct bnx2x_vf_mbx *mbx)
+{
+	int rc;
+	struct vfpf_acquire_tlv *acquire = &mbx->msg->req.acquire;
+
+	/* log vfdef info */
+	DP(BNX2X_MSG_IOV,
+	   "VF[%d] ACQUIRE: vfdev_info- vf_id %d, vf_os %d resources- n_rxq-%d, n_txq-%d, n_sbs-%d, n_macs-%d, n_vlans-%d, n_mcs-%d\n",
+	   vf->abs_vfid, acquire->vfdev_info.vf_id, acquire->vfdev_info.vf_os,
+	   acquire->resc_request.num_rxqs, acquire->resc_request.num_txqs,
+	   acquire->resc_request.num_sbs, acquire->resc_request.num_mac_filters,
+	   acquire->resc_request.num_vlan_filters,
+	   acquire->resc_request.num_mc_filters);
+
+	/* acquire the resources */
+	rc = bnx2x_vf_acquire(bp, vf, &acquire->resc_request);
+
+	/* response */
+	bnx2x_vf_mbx_acquire_resp(bp, vf, mbx, rc);
+}
+
 /* dispatch request */
 static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
 				  struct bnx2x_vf_mbx *mbx)
@@ -194,8 +369,16 @@ static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
 	/* check if tlv type is known */
 	if (bnx2x_tlv_supported(mbx->first_tlv.tl.type)) {
 
+		/* Lock the per vf op mutex and note the locker's identity.
+		 * The unlock will take place in mbx response.
+		 */
+		bnx2x_lock_vf_pf_channel(bp, vf, mbx->first_tlv.tl.type);
+
 		/* switch on the opcode */
 		switch (mbx->first_tlv.tl.type) {
+		case CHANNEL_TLV_ACQUIRE:
+			bnx2x_vf_mbx_acquire(bp, vf, mbx);
+			break;
 		}
 
 	/* unknown TLV - this may belong to a VF driver from the future - a
@@ -210,6 +393,27 @@ static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
 		for (i = 0; i < 20; i++)
 			DP_CONT(BNX2X_MSG_IOV, "%x ",
 				mbx->msg->req.tlv_buf_size.tlv_buffer[i]);
+
+		/* test whether we can respond to the VF (do we have an address
+		 * for it?)
+		 */
+		if (vf->state == VF_ACQUIRED) {
+
+			/* mbx_resp uses the op_rc of the VF */
+			vf->op_rc = PFVF_STATUS_NOT_SUPPORTED;
+
+			/* notify the VF that we do not support this request */
+			bnx2x_vf_mbx_resp(bp, vf);
+		} else {
+
+			/* can't send a response since this VF is unknown to us
+			 * just unlock the channel and be done with.
+			 */
+			bnx2x_unlock_vf_pf_channel(bp, vf,
+						   mbx->first_tlv.tl.type);
+		}
+
+
 	}
 }
 
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 06/22] bnx2x: Add teardown_q and close to VF <-> PF channel
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

When a VF is being closed its queues are released via
the 'teardown_q' and the VF itself is closed with
'close'. These are essentially the unload counterparts of
'init' and 'setup_q' from the load flow.

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h      |    2 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c  |   12 +++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |   86 ++++++++++++++++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h |   18 +++++
 4 files changed, 116 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index eedbf4c..cd8c645 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -2226,7 +2226,9 @@ int bnx2x_send_msg2pf(struct bnx2x *bp, u8 *done, dma_addr_t msg_mapping);
 int __devinit bnx2x_vfpf_acquire(struct bnx2x *bp, u8 tx_count, u8 rx_count);
 int __devexit bnx2x_vfpf_release(struct bnx2x *bp);
 int bnx2x_vfpf_init(struct bnx2x *bp);
+void bnx2x_vfpf_close_vf(struct bnx2x *bp);
 int bnx2x_vfpf_setup_q(struct bnx2x *bp, int fp_idx);
+int bnx2x_vfpf_teardown_queue(struct bnx2x *bp, int qidx);
 int bnx2x_vfpf_set_mac(struct bnx2x *bp);
 int bnx2x_nic_load_analyze_req(struct bnx2x *bp, u32 load_code);
 /* Congestion management fairness mode */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 201e3bd..2519d5a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -2782,9 +2782,17 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
 	/* wait till consumers catch up with producers in all queues */
 	bnx2x_drain_tx_queues(bp);
 
-	/* Cleanup the chip if needed */
-	if (unload_mode != UNLOAD_RECOVERY)
+	/* if VF indicate to PF this function is going down (PF will delete sp
+	 * elsements and clear initializations
+	 */
+	if (IS_VF(bp))
+		bnx2x_vfpf_close_vf(bp);
+
+	/* if this is a normal/close unload need to clean up chip*/
+	else if (unload_mode != UNLOAD_RECOVERY)
 		bnx2x_chip_cleanup(bp, unload_mode, keep_link);
+
+	/* recovery */
 	else {
 		/* Send the UNLOAD_REQUEST to the MCP */
 		bnx2x_send_unload_req(bp, unload_mode);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 278146e..66ff9bc 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -13399,6 +13399,56 @@ int bnx2x_vfpf_init(struct bnx2x *bp)
 
 	return 0;
 }
+
+/* CLOSE VF - opposite to INIT_VF */
+void bnx2x_vfpf_close_vf(struct bnx2x *bp)
+{
+	struct vfpf_close_tlv *req = &bp->vf2pf_mbox->req.close;
+	struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp;
+	int i, rc;
+	u32 vf_id;
+
+	/* If we haven't got a valid VF id, there is no sense to
+	 * continue with sending messages
+	 */
+	if (bnx2x_get_vf_id(bp, &vf_id))
+		goto free_irq;
+
+	/* Close the queues */
+	for_each_queue(bp, i)
+		bnx2x_vfpf_teardown_queue(bp, i);
+
+	/* clear mailbox and prep first tlv */
+	bnx2x_vfpf_prep(bp, &req->first_tlv, CHANNEL_TLV_CLOSE, sizeof(*req));
+
+	req->vf_id = vf_id;
+
+	/* add list termination tlv */
+	bnx2x_add_tlv(bp, req, req->first_tlv.tl.length, CHANNEL_TLV_LIST_END,
+		      sizeof(struct channel_list_end_tlv));
+
+	/* output tlvs list */
+	bnx2x_dp_tlv_list(bp, req);
+
+	rc = bnx2x_send_msg2pf(bp, &resp->hdr.status, bp->vf2pf_mbox_mapping);
+
+	if (rc)
+		BNX2X_ERR("Sending CLOSE failed. rc was: %d\n", rc);
+
+	else if (resp->hdr.status != PFVF_STATUS_SUCCESS)
+		BNX2X_ERR("Sending CLOSE failed: pf response was %d\n",
+			  resp->hdr.status);
+
+free_irq:
+	/* Disable HW interrupts, NAPI */
+	bnx2x_netif_stop(bp, 0);
+	/* Delete all NAPI objects */
+	bnx2x_del_all_napi(bp);
+
+	/* Release IRQs */
+	bnx2x_free_irq(bp);
+}
+
 /* ask the pf to open a queue for the vf */
 int bnx2x_vfpf_setup_q(struct bnx2x *bp, int fp_idx)
 {
@@ -13482,6 +13532,42 @@ int bnx2x_vfpf_setup_q(struct bnx2x *bp, int fp_idx)
 	return rc;
 }
 
+int bnx2x_vfpf_teardown_queue(struct bnx2x *bp, int qidx)
+{
+	struct vfpf_q_op_tlv *req = &bp->vf2pf_mbox->req.q_op;
+	struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp;
+	int rc;
+
+	/* clear mailbox and prep first tlv */
+	bnx2x_vfpf_prep(bp, &req->first_tlv, CHANNEL_TLV_TEARDOWN_Q,
+			sizeof(*req));
+
+	req->vf_qid = qidx;
+
+	/* add list termination tlv */
+	bnx2x_add_tlv(bp, req, req->first_tlv.tl.length, CHANNEL_TLV_LIST_END,
+		      sizeof(struct channel_list_end_tlv));
+
+	/* output tlvs list */
+	bnx2x_dp_tlv_list(bp, req);
+
+	rc = bnx2x_send_msg2pf(bp, &resp->hdr.status, bp->vf2pf_mbox_mapping);
+
+	if (rc) {
+		BNX2X_ERR("Sending TEARDOWN for queue %d failed: %d\n", qidx,
+			  rc);
+		return rc;
+	}
+
+	if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+		BNX2X_ERR("TEARDOWN for queue %d failed: %d\n", qidx,
+			  resp->hdr.status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /* request pf to add a mac for the vf */
 int bnx2x_vfpf_set_mac(struct bnx2x *bp)
 {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
index a4c1ad9..80fdfad 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
@@ -109,6 +109,13 @@ struct vfpf_acquire_tlv {
 	aligned_u64 bulletin_addr;
 };
 
+/* simple operation request on queue */
+struct vfpf_q_op_tlv {
+	struct vfpf_first_tlv	first_tlv;
+	u8 vf_qid;
+	u8 padding[3];
+};
+
 /* acquire response tlv - carries the allocated resources */
 struct pfvf_acquire_resp_tlv {
 	struct pfvf_tlv hdr;
@@ -245,6 +252,13 @@ struct vfpf_set_q_filters_tlv {
 	u32 rx_mask;	/* see mask constants at the top of the file */
 };
 
+/* close VF (disable VF) */
+struct vfpf_close_tlv {
+	struct vfpf_first_tlv	first_tlv;
+	u16			vf_id;  /* for debug */
+	u8 padding[2];
+};
+
 /* release the VF's acquired resources */
 struct vfpf_release_tlv {
 	struct vfpf_first_tlv   first_tlv;
@@ -259,6 +273,8 @@ union vfpf_tlvs {
 	struct vfpf_first_tlv		first_tlv;
 	struct vfpf_acquire_tlv		acquire;
 	struct vfpf_init_tlv		init;
+	struct vfpf_close_tlv		close;
+	struct vfpf_q_op_tlv		q_op;
 	struct vfpf_setup_q_tlv		setup_q;
 	struct vfpf_set_q_filters_tlv	set_q_filters;
 	struct vfpf_release_tlv		release;
@@ -278,6 +294,8 @@ enum channel_tlvs {
 	   CHANNEL_TLV_INIT,
 	   CHANNEL_TLV_SETUP_Q,
 	   CHANNEL_TLV_SET_Q_FILTERS,
+	   CHANNEL_TLV_TEARDOWN_Q,
+	   CHANNEL_TLV_CLOSE,
 	   CHANNEL_TLV_RELEASE,
 	   CHANNEL_TLV_LIST_END,
 	   CHANNEL_TLV_MAX
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 03/22] bnx2x: Add to VF <-> PF channel the release request
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

VF driver uses this request when removed. The PF driver
reclaims all resources allocated for that VF at this
time.

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h      |    1 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |   45 ++++++++++++++++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h |   14 +++++++
 3 files changed, 60 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 3de66ef..d6f4e0e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -2221,6 +2221,7 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
 int bnx2x_get_vf_id(struct bnx2x *bp, u32 *vf_id);
 int bnx2x_send_msg2pf(struct bnx2x *bp, u8 *done, dma_addr_t msg_mapping);
 int __devinit bnx2x_vfpf_acquire(struct bnx2x *bp, u8 tx_count, u8 rx_count);
+int __devexit bnx2x_vfpf_release(struct bnx2x *bp);
 /* Congestion management fairness mode */
 #define CMNG_FNS_NONE		0
 #define CMNG_FNS_MINMAX		1
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 89092b0..0ee0572 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12280,6 +12280,9 @@ static void __devexit bnx2x_remove_one(struct pci_dev *pdev)
 
 	/* Make sure RESET task is not scheduled before continuing */
 	cancel_delayed_work_sync(&bp->sp_rtnl_task);
+	/* send message via vfpf channel to release the resources of this vf */
+	if (IS_VF(bp))
+		bnx2x_vfpf_release(bp);
 
 	if (bp->regview)
 		iounmap(bp->regview);
@@ -13266,3 +13269,45 @@ int __devinit bnx2x_vfpf_acquire(struct bnx2x *bp, u8 tx_count,
 
 	return 0;
 }
+
+int __devexit bnx2x_vfpf_release(struct bnx2x *bp)
+{
+	struct vfpf_release_tlv *req = &bp->vf2pf_mbox->req.release;
+	struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp;
+	u32 rc = 0, vf_id;
+
+	/* clear mailbox and prep first tlv */
+	bnx2x_vfpf_prep(bp, &req->first_tlv, CHANNEL_TLV_RELEASE, sizeof(*req));
+
+	if (bnx2x_get_vf_id(bp, &vf_id))
+		return -EAGAIN;
+
+	req->vf_id = vf_id;
+
+	/* add list termination tlv */
+	bnx2x_add_tlv(bp, req, req->first_tlv.tl.length, CHANNEL_TLV_LIST_END,
+		      sizeof(struct channel_list_end_tlv));
+
+	/* output tlvs list */
+	bnx2x_dp_tlv_list(bp, req);
+
+	/* send release request */
+	rc = bnx2x_send_msg2pf(bp, &resp->hdr.status, bp->vf2pf_mbox_mapping);
+
+	/* PF timeout */
+	if (rc)
+		return rc;
+
+	/* PF released us */
+	if (resp->hdr.status == PFVF_STATUS_SUCCESS) {
+		DP(BNX2X_MSG_SP, "vf released\n");
+
+	/* PF reports error */
+	} else {
+		BNX2X_ERR("PF failed our release request - are we out of sync? response status: %d\n",
+			  resp->hdr.status);
+		return -EAGAIN;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
index 8f124fe..79eedd3 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
@@ -67,6 +67,10 @@ struct pfvf_tlv {
 	u8 padding[3];
 };
 
+/* response tlv used for most tlvs */
+struct pfvf_general_resp_tlv {
+	struct pfvf_tlv hdr;
+};
 /* used to terminate and pad a tlv list */
 struct channel_list_end_tlv {
 	struct channel_tlv tl;
@@ -124,17 +128,26 @@ struct pfvf_acquire_resp_tlv {
 	} resc;
 };
 
+/* release the VF's acquired resources */
+struct vfpf_release_tlv {
+	struct vfpf_first_tlv   first_tlv;
+	u16			vf_id;  /* for debug */
+	u8 padding[2];
+};
+
 struct tlv_buffer_size {
 	u8 tlv_buffer[TLV_BUFFER_SIZE];
 };
 union vfpf_tlvs {
 	struct vfpf_first_tlv		first_tlv;
 	struct vfpf_acquire_tlv		acquire;
+	struct vfpf_release_tlv		release;
 	struct channel_list_end_tlv     list_end;
 	struct tlv_buffer_size		tlv_buf_size;
 };
 
 union pfvf_tlvs {
+	struct pfvf_general_resp_tlv	general_resp;
 	struct pfvf_acquire_resp_tlv	acquire_resp;
 	struct channel_list_end_tlv	list_end;
 	struct tlv_buffer_size		tlv_buf_size;
@@ -142,6 +155,7 @@ union pfvf_tlvs {
 enum channel_tlvs {
 	   CHANNEL_TLV_NONE, /* ends tlv sequence */
 	   CHANNEL_TLV_ACQUIRE,
+	   CHANNEL_TLV_RELEASE,
 	   CHANNEL_TLV_LIST_END,
 	   CHANNEL_TLV_MAX
 };
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 00/22] bnx2x: support SR-IOV
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior

Hi Dave,

Changes for v2:
-Remove redundant empty lines
-Remove redundant 'inline'

This patch series adds support for SR-IOV in the bnx2x driver.
In bnx2x SR-IOV scheme the same bnx2x driver drives both VFs and PFs (single
binary).
The bulk of the communication between the VF drivers and the PF driver is done
via the VF <-> PF channel, a hardware-based communications channel with TLV
messages. The TLVs are designed to support different versions of VF drivers
(from multiple VMs) communicating with the same PF driver.

Patches:
01-03 - Probing and removing a VF driver
Includes sending the 'acquire' and 'release' messages on the VF PF channel.

04-07 - Loading/Unloading a VF driver
Includes refactoring of the driver load code and differentiating the VF flow
from the PF flow. This also includes requests from the VF for the PF to open
a queue in the HW on its behalf, configure the device with macs/vlans/rxmode
data, etc. Likewise the unload flow has been modified for the PF to undo these
configurations when the VF indicates it is going down.

08    - Modify fastpath flows for VFs
VFs have almost identical behavior in fastpath to PFs. In this patch the
VFs prepare transmit transaction for tx-switching, and the code for acking a
fastpath interrupt has been reorganized to allow a VF or PF to preconfigure the
offset of the interrupt's location in the BAR (as they have BARs with different
mapping) so as not to do so in fastpath.

09-10 - Probe and Load a PF driver with SR-IOV
The PF driver allocates and initializes the VF database to manage and keep track
of its VFs, their resources, queues, etc.

11-19 - The PF side of the VF <-> PF channel requests
Here is the implementation on the PF's side of the requests submitted in patches
01 through 07 by the VF. Patch 14 adds support for statistics collection by the 
PF for all of its VFs (stats are DMAEed directly to VM GPA memory).

20    - Support for VF function level reset
When FLR indication is received for VFs, the PF reclaims all the resources
allocated for these VFs (interrupts, queues) and releases allocation it
performed for the FLRed VFs (it does so by consulting the VF database).

21    - Bulletin Board interface
This patch adds the PF <-> VF Bulletin Board interface. This interface is a
simple interface where the PF can be the initiator, and indicate to a VF that
it has a new MAC for it to use. In this interface each post "covers" any
previous posts (hence the name).

22    - Add the VF device ids and enable feature
In this patch we add the VF device ids of the various devices driven by bnx2x.
Here we also add the calls to "pci_enable_sriov" and "pci_disable_sriov".

Important: Currently work is being done in the pci tree for the dynamic control
of the enablement of SR-IOV and the number of VFs of a physical device. In this
patch series we have laid the ground work to interface with this infrastructure
once it is accepted and reaches your tree.

Meanwhile, in patch 9 this code is added to bnx2x_init_one():
--snip snip--
rc = bnx2x_iov_init_one(bp, int_mode, 0/*num vfs*/);
--snip snip--
recompiling the bnx2x with a number of vfs which is not 0 will enable SR-IOV
with that number of VFs.

Please consider applying these patches.

^ permalink raw reply

* Re: Benchmark results: "Enhanced NUMA scheduling with adaptive affinity"
From: Linus Torvalds @ 2012-11-15 16:29 UTC (permalink / raw)
  To: Mel Gorman, David Miller, Eric Dumazet; +Cc: Network Development
In-Reply-To: <20121115100805.GS8218@suse.de>

Davem, Eric -

 this oops may be related to the numa patches, but quite frankly, I
don't see why/how it should be. And there's been some GRO work since
3.6, and we had an earlier oops case, so I thought I'd forward this.

The code decodes to

  14: 39 d0                 cmp    %edx,%eax
  16: 89 53 68             mov    %edx,0x68(%rbx)
  19: 0f 87 c7 04 00 00     ja     0x4e6
  1f: 4c 01 ab e0 00 00 00 add    %r13,0xe0(%rbx)
  26: 49 8b 44 24 08       mov    0x8(%r12),%rax
  2b:* 48 89 18             mov    %rbx,(%rax)     <-- trapping instruction
  2e: 49 89 5c 24 08       mov    %rbx,0x8(%r12)
  33: 0f b6 43 7c           movzbl 0x7c(%rbx),%eax
  37: a8 10                 test   $0x10,%al

and if I read the disassembly right (which is not guaranteed), it's the line

        p->prev->next = skb;

in the "merge:" case in skb_gro_receive() (just after the __skb_pull()
- the "ja" and "add" above the trapping instruction is the BUG_ON()
plus the "skb->data += len" part of the inlined __skb_pull()).

             Linus

On Thu, Nov 15, 2012 at 2:08 AM, Mel Gorman <mgorman@suse.de> wrote:
>
> The machine was meant to test all this overnight but unfortunately when
> running a kernel build benchmark on the schednuma patches the machine
> hung while downloading the tarball with this
>
> [   73.863226] BUG: unable to handle kernel NULL pointer dereference at           (null)
> [   73.871062] IP: [<ffffffff8146feaa>] skb_gro_receive+0xaa/0x590
> [   73.876983] PGD 0
> [   73.878998] Oops: 0002 [#1] PREEMPT SMP
> [   73.882938] Modules linked in: af_packet mperf kvm_intel coretemp kvm crc32c_intel ghash_clmulni_intel aesni_intel ablk_helper cryptd sr_mod lrw cdrom aes_x86_64 ses pcspkr xts i7core_edac ata_piix enclosure lpc_ich dcdbas sg gf128mul mfd_core bnx2 edac_core wmi acpi_power_meter button serio_raw joydev microcode autofs4 processor thermal_sys scsi_dh_rdac scsi_dh_hp_sw scsi_dh_alua scsi_dh_emc scsi_dh ata_generic megaraid_sas pata_atiixp [last unloaded: oprofile]
> [   73.924659] CPU 0
> [   73.926493] Pid: 0, comm: swapper/0 Not tainted 3.7.0-rc4-schednuma-v2r3 #1 Dell Inc. PowerEdge R810/0TT6JF
> [   73.936380] RIP: 0010:[<ffffffff8146feaa>]  [<ffffffff8146feaa>] skb_gro_receive+0xaa/0x590
> [   73.944714] RSP: 0018:ffff88047f803b50  EFLAGS: 00010282
> [   73.950004] RAX: 0000000000000000 RBX: ffff88046c2bdbc0 RCX: 0000000000000900
> [   73.957113] RDX: 00000000000005a8 RSI: ffff88046c2bdbc0 RDI: ffff88046eadb800
> [   73.964221] RBP: ffff88047f803bb0 R08: 00000000000005dc R09: ffff88046ddeccc0
> [   73.971328] R10: ffff88086d795d78 R11: 0000000000000001 R12: ffff880462b282c0
> [   73.978436] R13: 0000000000000034 R14: 00000000000005a8 R15: ffff88046eadbec0
> [   73.985543] FS:  0000000000000000(0000) GS:ffff88047f800000(0000) knlGS:0000000000000000
> [   73.993602] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> [   73.999326] CR2: 0000000000000000 CR3: 0000000001a0c000 CR4: 00000000000007f0
> [   74.006435] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [   74.013543] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> [   74.020651] Process swapper/0 (pid: 0, threadinfo ffffffff81a00000, task ffffffff81a14420)
> [   74.028883] Stack:
> [   74.030885]  0000000000000060 ffff880462b282c0 ffff88086d795d78 ffffffff000005dc
> [   74.038300]  ffff88046e5f46c0 000000606a275ec0 0000000000000000 ffff88046c2bdbc0
> [   74.045715]  00000000000005a8 ffff88086d795d78 00000000000005a8 000000006c001080
> [   74.053131] Call Trace:
> [   74.055567]  <IRQ>
> [   74.057486]  [<ffffffff814b9573>] tcp_gro_receive+0x213/0x2b0
> [   74.063419]  [<ffffffff814cff49>] tcp4_gro_receive+0x99/0x110
> [   74.069150]  [<ffffffff814e096d>] inet_gro_receive+0x1cd/0x200
> [   74.074965]  [<ffffffff8147b30a>] dev_gro_receive+0x1ba/0x2b0
> [   74.080691]  [<ffffffff8147b6e3>] napi_gro_receive+0xe3/0x130
> [   74.086426]  [<ffffffffa009fda8>] bnx2_rx_int+0x3e8/0xf10 [bnx2]
> [   74.092416]  [<ffffffffa00a0cbd>] bnx2_poll_work+0x3ed/0x450 [bnx2]
> [   74.098666]  [<ffffffffa00a0d5e>] bnx2_poll_msix+0x3e/0xc0 [bnx2]
> [   74.104739]  [<ffffffff8147b969>] net_rx_action+0x159/0x290
> [   74.110298]  [<ffffffff8104d148>] __do_softirq+0xc8/0x250
> [   74.115682]  [<ffffffff8107bf9e>] ? sched_clock_idle_wakeup_event+0x1e/0x20
> [   74.122625]  [<ffffffff81577c9c>] call_softirq+0x1c/0x30
> [   74.127922]  [<ffffffff8100470d>] do_softirq+0x6d/0xa0
> [   74.133041]  [<ffffffff8104d44d>] irq_exit+0xad/0xc0
> [   74.137996]  [<ffffffff8107779d>] scheduler_ipi+0x5d/0x110
> [   74.143469]  [<ffffffff8102b7a4>] ? native_apic_msr_eoi_write+0x14/0x20
> [   74.150060]  [<ffffffff810257d5>] smp_reschedule_interrupt+0x25/0x30
> [   74.156394]  [<ffffffff8157785d>] reschedule_interrupt+0x6d/0x80
> [   74.162376]  <EOI>
> [   74.164295]  [<ffffffff81316798>] ? intel_idle+0xe8/0x150
> [   74.169875]  [<ffffffff81316779>] ? intel_idle+0xc9/0x150
> [   74.175259]  [<ffffffff8143de99>] cpuidle_enter+0x19/0x20
> [   74.180642]  [<ffffffff8143e522>] cpuidle_idle_call+0xa2/0x340
> [   74.186458]  [<ffffffff8100baca>] cpu_idle+0x7a/0xf0
> [   74.191410]  [<ffffffff8154b44b>] rest_init+0x7b/0x80
> [   74.196447]  [<ffffffff81ac3be2>] start_kernel+0x38f/0x39c
> [   74.201913]  [<ffffffff81ac3652>] ? repair_env_string+0x5e/0x5e
> [   74.207815]  [<ffffffff81ac3335>] x86_64_start_reservations+0x131/0x135
> [   74.214407]  [<ffffffff81ac3439>] x86_64_start_kernel+0x100/0x10f
> [   74.220475] Code: 8b e8 00 00 00 0f 87 86 00 00 00 8b 53 68 8b 43 6c 44 29 ea 39 d0 89 53 68 0f 87 c7 04 00 00 4c 01 ab e0 00 00 00 49 8b 44 24 08 <48> 89 18 49 89 5c 24 08 0f b6 43 7c a8 10 0f 85 ac 04 00 00 83
> [   74.240051] RIP  [<ffffffff8146feaa>] skb_gro_receive+0xaa/0x590
> [   74.246046]  RSP <ffff88047f803b50>
> [   74.249518] CR2: 0000000000000000
> [   74.252821] ---[ end trace 97cb529523f52c9b ]---
> [   74.258895] Kernel panic - not syncing: Fatal exception in interrupt
> -- 0:console -- time-stamp -- Nov/15/12  3:09:06 --
>
> I've no idea if it is directly related to your patches and I didn't try
> to reproduce it yet.

^ permalink raw reply

* Network soft and hard irqs statistics
From: Javier Domingo @ 2012-11-15 16:01 UTC (permalink / raw)
  To: netdev

Hello all,

I am migrating some statistics we use in our research group to v3.6.
This I don't think it will be usefull for anyone, as they measure
softirqs, hardirqs, times on them, etc.

We modified net_device structure to contain a structure that has
several field of statistics.

Patched the e1000 and tg3 drivers to measure hardirq times, and
polling times. We also patched net_rx_action (the softirq) to check if
we get out per budget, per jiffies and netif_receive_skb to measure
times and how many packets are captured.

At the moment, we have been working with a external module that
accessed this vars, creating proc entries, and allowing us to reset
those measures.

Now, I am trying to make it the most standard way, with the intention
that when I talk to my boss, he will allow me to release the code.

The main aim of this is to get some feedback about the interest this
can have and to ask a few questions:

-> Where may I create the proc entry? we currently use
/proc/net/stats/<netdev>. I have also thought introducing that entry
in fs/proc/proc_net.c, but I am not too sure which conventions there
are...

-> When migrating the net_rx_action, I found that we used this line:
if(cpus_equal(mask,irq_desc[timedev->irq].affinity))
before counting if we get out by budget or by jiffies to (I suppose)
check that the softirq was the one assigned to this processor. Is that
needed? I mean the softirq is run in just one of them... I don't
really understand why it is important, so if anyone can explain me, I
would be glad.

-> We have patched the hardirqs in the driver, and the polling times
too. I know the hardirqs are the only place in which we can measure
them, but would it be posible to, instead of measuring the polls in
e1000_clean (for example) measuring in dev.c net_rx_action, measure
them around n->poll() call?
   Have been doing like this because they told me that the context
change was important... But I am not too sure on how important it is,
if someone could give me any tip on this.

-> In tg3.c I have seen that there are several hardirq function,
though we usually only patched tg3_interrupt_tagged, I have patched
all of them (for what they might be). Why are so many of them? Is that
due to preparation for multiqueue cards?

I hope someone can attend my doubts, and that I dont have asked too
many newbie questions.

Best regards,

Javier Domingo

^ permalink raw reply

* Re: tc adding two filters with different protocols
From: Nieścierowicz Adam @ 2012-11-15 15:04 UTC (permalink / raw)
  To: Netdev

Hi,

anyone can help, or explain what i do wrong?

W dniu 12.11.2012 20:45, Nieścierowicz Adam napisał(a):

> Hello,
>
> writes the qos where i want classify traffic for two protocol: ip and
> pppoe. Example below
>
> ---
> tc qdisc del root dev eth2
> tc qdisc add dev eth2 root handle 1: htb
> tc class add dev eth2 parent 1:0 classid 1:1 htb rate 500Mbit ceil
> 500Mbit
>
> tc filter add dev eth2 parent 1:0 prio 5 protocol ip u32
> tc filter add dev eth2 parent 1:0 prio 5 handle 9: protocol ip u32
> divisor 256
> tc filter add dev eth2 protocol ip parent 1:0 prio 5 u32 ht 800:: 
> match
> ip dst 172.16.0.0/22 hashkey mask 0x000000ff at 16 link 9:
>
> tc filter add dev eth2 parent 1:0 prio 6 protocol ppp_ses u32
> tc filter add dev eth2 parent 1:0 prio 6 handle 4a: protocol ppp_ses
> u32 divisor 256
> tc filter add dev eth2 protocol ppp_ses parent 1:0 prio 6 u32 ht 
> 800::
> match ip dst 31.41.209.0/24 at 24 hashkey mask 0x000000ff at 24 link 
> 4a:
> ---
>
> Unfortunately, when you add lines "tc filter add dev eth2 protocol
> ppp_ses parent 1:0 prio 6 u32 ht 800:: match ip dst 31.41.209.0/24 at 
> 24
> hashkey mask 0x000000ff at 24 link 4a:" protocol change to ip
>
> command "tc -s -d filter show dev eth2" shows
> ---
> filter parent 1: protocol ip pref 5 u32
> filter parent 1: protocol ip pref 5 u32 fh 9: ht divisor 256
> filter parent 1: protocol ip pref 5 u32 fh 800: ht divisor 1
> filter parent 1: protocol ip pref 5 u32 fh 800::800 order 2048 key ht
> 800 bkt 0 link 9: (rule hit 557082 success 0)
> match ac100000/fffffc00 at 16 (success 267341 )
> hash mask 000000ff at 16
> filter parent 1: protocol ip pref 5 u32 fh 800::801 order 2049 key ht
> 800 bkt 0 link 4a: (rule hit 557039 success 0)
> match 1f29d100/ffffff00 at 24 (success 0 )
> hash mask 000000ff at 24
> filter parent 1: protocol ppp_ses pref 6 u32
> filter parent 1: protocol ppp_ses pref 6 u32 fh 4a: ht divisor 256
> filter parent 1: protocol ppp_ses pref 6 u32 fh 801: ht divisor 1
> ---
> if i change order and first protocol is ppp_ses the hashkey for ip
> protocol is changed to ppp_ses.
>
> Is this correct behavior and I do something wrong?
>
> Thanks
> Adam N.
>

^ permalink raw reply

* Re: [PATCH] tcp: fix retransmission in repair mode
From: Eric Dumazet @ 2012-11-15 15:03 UTC (permalink / raw)
  To: Andrey Vagin
  Cc: netdev, criu, linux-kernel, Pavel Emelyanov, David S. Miller,
	Alexey Kuznetsov, James Morris, Hideaki YOSHIFUJI,
	Patrick McHardy
In-Reply-To: <1352988197-14414-1-git-send-email-avagin@openvz.org>

On Thu, 2012-11-15 at 18:03 +0400, Andrey Vagin wrote:
> From: Andrew Vagin <avagin@openvz.org>
> 
> Currently if a socket was repaired with a few packet in a write queue,
> a kernel bug may be triggered:
> 
> kernel BUG at net/ipv4/tcp_output.c:2330!
> RIP: 0010:[<ffffffff8155784f>] tcp_retransmit_skb+0x5ff/0x610
> 
> According to the initial realization v3.4-rc2-963-gc0e88ff,
> all skb-s should look like already posted. This patch fixes code
> according with this sentence.
> 
> Here are three points, which were not done in the initial patch:
> 1. A tcp send head should not be changed
> 2. Initialize TSO state of a skb
> 3. Reset the retransmission time
> 
> This patch moves logic from tcp_sendmsg to tcp_write_xmit. A packet
> passes the ussual way, but isn't sent to network. This patch solves
> all described problems and handles tcp_sendpages.
> 
> Cc: Pavel Emelyanov <xemul@parallels.com>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
> Cc: James Morris <jmorris@namei.org>
> Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
> Cc: Patrick McHardy <kaber@trash.net>
> Signed-off-by: Andrey Vagin <avagin@openvz.org>

Any chance these tcp repair hacks could be done outside of tcp fast
path ?

^ permalink raw reply

* [PATCH v2 net-next] sctp: Add support to per-association statistics via a new SCTP_GET_ASSOC_STATS call
From: Michele Baldessari @ 2012-11-15 15:01 UTC (permalink / raw)
  To: linux-sctp
  Cc: michele, Neil Horman, Thomas Graf, Vlad Yasevich, netdev,
	David S. Miller

The current SCTP stack is lacking a mechanism to have per association
statistics. This is an implementation modeled after OpenSolaris'
SCTP_GET_ASSOC_STATS.

Userspace part will follow on lksctp if/when there is a general ACK on
this.

V2)
  - Implement partial retrieval of stat struct to cope for future expansion
  - Kill the rtxpackets counter as it cannot be precise anyway
  - Rename outseqtsns to outofseqtsns to make it clearer that these are out
    of sequence unexpected TSNs
  - Move asoc->ipackets++ under a lock to avoid potential miscounts
  - Fold asoc->opackets++ into the already existing asoc check
  - Kill unneeded (q->asoc) test when increasing rtxchunks
  - Do not count octrlchunks if sending failed (SCTP_XMIT_OK != 0)
  - Don't count SHUTDOWNs as SACKs
  - Move SCTP_GET_ASSOC_STATS to the private space API
  - Adjust the len check in sctp_getsockopt_assoc_stats() to allow for
    future struct growth
  - Move association statistics in their own struct
  - Update idupchunks when we send a SACK with dup TSNs
  - return min_rto in max_rto when RTO has not changed. Also return the
    transport when max_rto last changed.

Signed-off: Michele Baldessari <michele@acksyn.org>
Acked-by: Thomas Graf <tgraf@suug.ch>
---
 include/net/sctp/sctp.h    | 12 ++++++++
 include/net/sctp/structs.h | 36 +++++++++++++++++++++++
 include/net/sctp/user.h    | 27 +++++++++++++++++
 net/sctp/associola.c       |  9 +++++-
 net/sctp/endpointola.c     |  5 +++-
 net/sctp/input.c           |  2 ++
 net/sctp/output.c          | 14 +++++----
 net/sctp/outqueue.c        | 12 ++++++--
 net/sctp/sm_make_chunk.c   |  5 ++--
 net/sctp/sm_sideeffect.c   |  1 +
 net/sctp/sm_statefuns.c    | 10 +++++--
 net/sctp/socket.c          | 72 ++++++++++++++++++++++++++++++++++++++++++++++
 net/sctp/transport.c       |  2 ++
 13 files changed, 194 insertions(+), 13 deletions(-)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 9c6414f..7fdf298 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -272,6 +272,18 @@ struct sctp_mib {
         unsigned long   mibs[SCTP_MIB_MAX];
 };
 
+/* helper function to track stats about max rto and related transport */
+static inline void sctp_max_rto(struct sctp_association *asoc,
+				struct sctp_transport *trans)
+{
+	if (asoc->stats.max_obs_rto < (__u64)trans->rto) {
+		asoc->stats.max_obs_rto = trans->rto;
+		memset(&asoc->stats.obs_rto_ipaddr, 0,
+			sizeof(struct sockaddr_storage));
+		memcpy(&asoc->stats.obs_rto_ipaddr, &trans->ipaddr,
+			trans->af_specific->sockaddr_len);
+	}
+}
 
 /* Print debugging messages.  */
 #if SCTP_DEBUG
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 2b2f61d..bf94ddf 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1312,6 +1312,40 @@ struct sctp_inithdr_host {
 	__u32 initial_tsn;
 };
 
+/* SCTP_GET_ASSOC_STATS counters */
+struct sctp_priv_assoc_stats {
+	/* Maximum observed rto in the association. Value is set to
+	 * 0 when read and the max rto did not change. The transport where
+	 * the max_rto was observed is returned in obs_rto_ipaddr
+	 */
+	struct sockaddr_storage obs_rto_ipaddr;
+	__u64 max_obs_rto;
+	__u64 max_prev_obs_rto;
+	/* Total In and Out SACKs received and sent */
+	__u64 isacks;
+	__u64 osacks;
+	/* Total In and Out packets received and sent */
+	__u64 opackets;
+	__u64 ipackets;
+	/* Total retransmitted chunks */
+	__u64 rtxchunks;
+	/* TSN received > next expected */
+	__u64 outofseqtsns;
+	/* Duplicate Chunks received */
+	__u64 idupchunks;
+	/* Gap Ack Blocks received */
+	__u64 gapcnt;
+	/* Unordered data chunks sent and received */
+	__u64 ouodchunks;
+	__u64 iuodchunks;
+	/* Ordered data chunks sent and received */
+	__u64 oodchunks;
+	__u64 iodchunks;
+	/* Control chunks sent and received */
+	__u64 octrlchunks;
+	__u64 ictrlchunks;
+};
+
 /* RFC2960
  *
  * 12. Recommended Transmission Control Block (TCB) Parameters
@@ -1830,6 +1864,8 @@ struct sctp_association {
 
 	__u8 need_ecne:1,	/* Need to send an ECNE Chunk? */
 	     temp:1;		/* Is it a temporary association? */
+
+	struct sctp_priv_assoc_stats stats;
 };
 
 
diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
index 1b02d7a..c851e72 100644
--- a/include/net/sctp/user.h
+++ b/include/net/sctp/user.h
@@ -107,6 +107,7 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_GET_LOCAL_ADDRS	109		/* Get all local address. */
 #define SCTP_SOCKOPT_CONNECTX	110		/* CONNECTX requests. */
 #define SCTP_SOCKOPT_CONNECTX3	111	/* CONNECTX requests (updated) */
+#define SCTP_GET_ASSOC_STATS	112	/* Read only */
 
 /*
  * 5.2.1 SCTP Initiation Structure (SCTP_INIT)
@@ -719,6 +720,32 @@ struct sctp_getaddrs {
 	__u8			addrs[0]; /*output, variable size*/
 };
 
+/* A socket user request obtained via SCTP_GET_ASSOC_STATS that retrieves
+ * association stats. All stats are counts except sas_maxrto and
+ * sas_obs_rto_ipaddr. maxrto is the max observed rto + transport since
+ * the last call. Will return 0 when did not change since last call
+ */
+struct sctp_assoc_stats {
+	sctp_assoc_t	sas_assoc_id;    /* Input */
+					 /* Transport of observed max RTO */
+	struct sockaddr_storage sas_obs_rto_ipaddr;
+	__u64		sas_maxrto;      /* Maximum Observed RTO for period */
+	__u64		sas_isacks;	 /* SACKs received */
+	__u64		sas_osacks;	 /* SACKs sent */
+	__u64		sas_opackets;	 /* Packets sent */
+	__u64		sas_ipackets;	 /* Packets received */
+	__u64		sas_rtxchunks;   /* Retransmitted Chunks */
+	__u64		sas_outofseqtsns;/* TSN received > next expected */
+	__u64		sas_idupchunks;  /* Dups received (ordered+unordered) */
+	__u64		sas_gapcnt;      /* Gap Acknowledgements Received */
+	__u64		sas_ouodchunks;  /* Unordered data chunks sent */
+	__u64		sas_iuodchunks;  /* Unordered data chunks received */
+	__u64		sas_oodchunks;	 /* Ordered data chunks sent */
+	__u64		sas_iodchunks;	 /* Ordered data chunks received */
+	__u64		sas_octrlchunks; /* Control chunks sent */
+	__u64		sas_ictrlchunks; /* Control chunks received */
+};
+
 /* These are bit fields for msghdr->msg_flags.  See section 5.1.  */
 /* On user space Linux, these live in <bits/socket.h> as an enum.  */
 enum sctp_msg_flags {
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index b1ef3bc..afb7522 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -321,6 +321,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	asoc->default_timetolive = sp->default_timetolive;
 	asoc->default_rcv_context = sp->default_rcv_context;
 
+	/* SCTP_GET_ASSOC_STATS COUNTERS */
+	memset(&asoc->stats, 0, sizeof(struct sctp_priv_assoc_stats));
+
 	/* AUTH related initializations */
 	INIT_LIST_HEAD(&asoc->endpoint_shared_keys);
 	err = sctp_auth_asoc_copy_shkeys(ep, asoc, gfp);
@@ -760,6 +763,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 
 	/* Set the transport's RTO.initial value */
 	peer->rto = asoc->rto_initial;
+	sctp_max_rto(asoc, peer);
 
 	/* Set the peer's active state. */
 	peer->state = peer_state;
@@ -1152,8 +1156,11 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
 		 */
 		if (sctp_chunk_is_data(chunk))
 			asoc->peer.last_data_from = chunk->transport;
-		else
+		else {
 			SCTP_INC_STATS(net, SCTP_MIB_INCTRLCHUNKS);
+			if (chunk->chunk_hdr->type == SCTP_CID_SACK)
+				asoc->stats.isacks++;
+		}
 
 		if (chunk->transport)
 			chunk->transport->last_time_heard = jiffies;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 1859e2b..32ab55b 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -480,8 +480,11 @@ normal:
 		 */
 		if (asoc && sctp_chunk_is_data(chunk))
 			asoc->peer.last_data_from = chunk->transport;
-		else
+		else {
 			SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_INCTRLCHUNKS);
+			if (asoc)
+				asoc->stats.ictrlchunks++;
+		}
 
 		if (chunk->transport)
 			chunk->transport->last_time_heard = jiffies;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 8bd3c27..54c449b 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -281,6 +281,8 @@ int sctp_rcv(struct sk_buff *skb)
 		SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_SOFTIRQ);
 		sctp_inq_push(&chunk->rcvr->inqueue, chunk);
 	}
+	if (asoc)
+		asoc->stats.ipackets++;
 
 	sctp_bh_unlock_sock(sk);
 
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 4e90188bf..f5200a2 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -311,6 +311,8 @@ static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
 
 	    case SCTP_CID_SACK:
 		packet->has_sack = 1;
+		if (chunk->asoc)
+			chunk->asoc->stats.osacks++;
 		break;
 
 	    case SCTP_CID_AUTH:
@@ -584,11 +586,13 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	 */
 
 	/* Dump that on IP!  */
-	if (asoc && asoc->peer.last_sent_to != tp) {
-		/* Considering the multiple CPU scenario, this is a
-		 * "correcter" place for last_sent_to.  --xguo
-		 */
-		asoc->peer.last_sent_to = tp;
+	if (asoc) {
+		asoc->stats.opackets++;
+		if (asoc->peer.last_sent_to != tp)
+			/* Considering the multiple CPU scenario, this is a
+			 * "correcter" place for last_sent_to.  --xguo
+			 */
+			asoc->peer.last_sent_to = tp;
 	}
 
 	if (has_data) {
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 1b4a7f8..379c81d 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -667,6 +667,7 @@ redo:
 				chunk->fast_retransmit = SCTP_DONT_FRTX;
 
 			q->empty = 0;
+			q->asoc->stats.rtxchunks++;
 			break;
 		}
 
@@ -876,12 +877,14 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 			if (status  != SCTP_XMIT_OK) {
 				/* put the chunk back */
 				list_add(&chunk->list, &q->control_chunk_list);
-			} else if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) {
+			} else {
+				asoc->stats.octrlchunks++;
 				/* PR-SCTP C5) If a FORWARD TSN is sent, the
 				 * sender MUST assure that at least one T3-rtx
 				 * timer is running.
 				 */
-				sctp_transport_reset_timers(transport);
+				if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN)
+					sctp_transport_reset_timers(transport);
 			}
 			break;
 
@@ -1055,6 +1058,10 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 				 */
 				if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING)
 					chunk->chunk_hdr->flags |= SCTP_DATA_SACK_IMM;
+				if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
+					asoc->stats.ouodchunks++;
+				else
+					asoc->stats.oodchunks++;
 
 				break;
 
@@ -1162,6 +1169,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
 
 	sack_ctsn = ntohl(sack->cum_tsn_ack);
 	gap_ack_blocks = ntohs(sack->num_gap_ack_blocks);
+	asoc->stats.gapcnt += gap_ack_blocks;
 	/*
 	 * SFR-CACC algorithm:
 	 * On receipt of a SACK the sender SHOULD execute the
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index fbe1636..eb7633f 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -804,10 +804,11 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
 				 gabs);
 
 	/* Add the duplicate TSN information.  */
-	if (num_dup_tsns)
+	if (num_dup_tsns) {
+		aptr->stats.idupchunks += num_dup_tsns;
 		sctp_addto_chunk(retval, sizeof(__u32) * num_dup_tsns,
 				 sctp_tsnmap_get_dups(map));
-
+	}
 	/* Once we have a sack generated, check to see what our sack
 	 * generation is, if its 0, reset the transports to 0, and reset
 	 * the association generation to 1
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 6eecf7e..363727e 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -542,6 +542,7 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands,
 	 */
 	if (!is_hb || transport->hb_sent) {
 		transport->rto = min((transport->rto * 2), transport->asoc->rto_max);
+		sctp_max_rto(asoc, transport);
 	}
 }
 
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index b6adef8..ecf7a17 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -6127,6 +6127,8 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 		/* The TSN is too high--silently discard the chunk and
 		 * count on it getting retransmitted later.
 		 */
+		if (chunk->asoc)
+			chunk->asoc->stats.outofseqtsns++;
 		return SCTP_IERROR_HIGH_TSN;
 	} else if (tmp > 0) {
 		/* This is a duplicate.  Record it.  */
@@ -6226,10 +6228,14 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	/* Note: Some chunks may get overcounted (if we drop) or overcounted
 	 * if we renege and the chunk arrives again.
 	 */
-	if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
+	if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) {
 		SCTP_INC_STATS(net, SCTP_MIB_INUNORDERCHUNKS);
-	else {
+		if (chunk->asoc)
+			chunk->asoc->stats.iuodchunks++;
+	} else {
 		SCTP_INC_STATS(net, SCTP_MIB_INORDERCHUNKS);
+		if (chunk->asoc)
+			chunk->asoc->stats.iodchunks++;
 		ordered = 1;
 	}
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 15379ac..8113249 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -609,6 +609,7 @@ static int sctp_send_asconf_add_ip(struct sock		*sk,
 				    2*asoc->pathmtu, 4380));
 				trans->ssthresh = asoc->peer.i.a_rwnd;
 				trans->rto = asoc->rto_initial;
+				sctp_max_rto(asoc, trans);
 				trans->rtt = trans->srtt = trans->rttvar = 0;
 				sctp_transport_route(trans, NULL,
 				    sctp_sk(asoc->base.sk));
@@ -5633,6 +5634,74 @@ static int sctp_getsockopt_paddr_thresholds(struct sock *sk,
 	return 0;
 }
 
+/*
+ * SCTP_GET_ASSOC_STATS
+ *
+ * This option retrieves local per endpoint statistics. It is modeled
+ * after OpenSolaris' implementation
+ */
+static int sctp_getsockopt_assoc_stats(struct sock *sk, int len,
+				       char __user *optval,
+				       int __user *optlen)
+{
+	struct sctp_assoc_stats sas;
+	struct sctp_association *asoc = NULL;
+
+	/* User must provide at least the assoc id */
+	if (len < sizeof(sctp_assoc_t))
+		return -EINVAL;
+
+	if (copy_from_user(&sas, optval, len))
+		return -EFAULT;
+
+	asoc = sctp_id2assoc(sk, sas.sas_assoc_id);
+	if (!asoc)
+		return -EINVAL;
+
+	sas.sas_rtxchunks = asoc->stats.rtxchunks;
+	sas.sas_gapcnt = asoc->stats.gapcnt;
+	sas.sas_outofseqtsns = asoc->stats.outofseqtsns;
+	sas.sas_osacks = asoc->stats.osacks;
+	sas.sas_isacks = asoc->stats.isacks;
+	sas.sas_octrlchunks = asoc->stats.octrlchunks;
+	sas.sas_ictrlchunks = asoc->stats.ictrlchunks;
+	sas.sas_oodchunks = asoc->stats.oodchunks;
+	sas.sas_iodchunks = asoc->stats.iodchunks;
+	sas.sas_ouodchunks = asoc->stats.ouodchunks;
+	sas.sas_iuodchunks = asoc->stats.iuodchunks;
+	sas.sas_idupchunks = asoc->stats.idupchunks;
+	sas.sas_opackets = asoc->stats.opackets;
+	sas.sas_ipackets = asoc->stats.ipackets;
+
+	memcpy(&sas.sas_obs_rto_ipaddr, &asoc->stats.obs_rto_ipaddr,
+		sizeof(struct sockaddr_storage));
+	/* New high max rto observed */
+	if (asoc->stats.max_obs_rto > asoc->stats.max_prev_obs_rto)
+		sas.sas_maxrto = asoc->stats.max_obs_rto;
+	else /* return min_rto since max rto has not changed */
+		sas.sas_maxrto = asoc->rto_min;
+
+	/* Record the value sent to the user this period */
+	asoc->stats.max_prev_obs_rto = sas.sas_maxrto;
+
+	/* Mark beginning of a new observation period */
+	asoc->stats.max_obs_rto = 0;
+
+	/* Allow the struct to grow and fill in as much as possible */
+	len = min_t(size_t, len, sizeof(sas));
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+
+	SCTP_DEBUG_PRINTK("sctp_getsockopt_assoc_stat(%d): %d\n",
+			  len, sas.sas_assoc_id);
+
+	if (copy_to_user(optval, &sas, len))
+		return -EFAULT;
+
+	return 0;
+}
+
 SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
 				char __user *optval, int __user *optlen)
 {
@@ -5774,6 +5843,9 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
 	case SCTP_PEER_ADDR_THLDS:
 		retval = sctp_getsockopt_paddr_thresholds(sk, optval, len, optlen);
 		break;
+	case SCTP_GET_ASSOC_STATS:
+		retval = sctp_getsockopt_assoc_stats(sk, len, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 953c21e..8c6920d 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -350,6 +350,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
 
 	/* 6.3.1 C3) After the computation, update RTO <- SRTT + 4 * RTTVAR. */
 	tp->rto = tp->srtt + (tp->rttvar << 2);
+	sctp_max_rto(tp->asoc, tp);
 
 	/* 6.3.1 C6) Whenever RTO is computed, if it is less than RTO.Min
 	 * seconds then it is rounded up to RTO.Min seconds.
@@ -620,6 +621,7 @@ void sctp_transport_reset(struct sctp_transport *t)
 	t->burst_limited = 0;
 	t->ssthresh = asoc->peer.i.a_rwnd;
 	t->rto = asoc->rto_initial;
+	sctp_max_rto(asoc, t);
 	t->rtt = 0;
 	t->srtt = 0;
 	t->rttvar = 0;
-- 
1.8.0

^ permalink raw reply related

* Re: [PATCH 0/9 v4] use efficient this_cpu_* helper
From: Tejun Heo @ 2012-11-15 14:53 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: David Miller, NetDev, Shan Wei, Kernel-Maillist
In-Reply-To: <0000013b047079ab-01971b64-07e2-4f2e-a204-6baff2180fc3-000000@email.amazonses.com>

On Thu, Nov 15, 2012 at 02:19:38PM +0000, Christoph Lameter wrote:
> Tejon: Could you pick up this patchset?

Sure, but, Shan, when posting patchset, please make the patches
replies to the head message; otherwise, it's pretty difficult to track
what's going on with the patchset as a whole.  I see that some patches
are being picked up by respective subsystems.  If you have patches
left, please let me know.

Thanks.

-- 
tejun

^ permalink raw reply

* [net-next 9/9] ixgbevf: Add checksum statistics counters to rings
From: Jeff Kirsher @ 2012-11-15 14:39 UTC (permalink / raw)
  To: davem; +Cc: Greg Rose, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1352990387-3872-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Greg Rose <gregory.v.rose@intel.com>

Add hardware checksum statistic counters to the ring structures and
then during packet processing update those counters instead of the
global counters in the adapter structure.  Only update the adapter
structure counters when all other statistics are gathered in the
ixgbevf_update_stats() function.

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
Tested-by: Sibai Li <sibai.li@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf.h      |  2 ++
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 23 ++++++++++++++++-------
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index dbdf39b..fc0af9a 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -74,6 +74,8 @@ struct ixgbevf_ring {
 	u64			total_bytes;
 	u64			total_packets;
 	struct u64_stats_sync	syncp;
+	u64 hw_csum_rx_error;
+	u64 hw_csum_rx_good;
 
 	u16 head;
 	u16 tail;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index a52b14e..f267c00 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -295,12 +295,11 @@ static void ixgbevf_receive_skb(struct ixgbevf_q_vector *q_vector,
 
 /**
  * ixgbevf_rx_checksum - indicate in skb if hw indicated a good cksum
- * @adapter: address of board private structure
+ * @ring: pointer to Rx descriptor ring structure
  * @status_err: hardware indication of status of receive
  * @skb: skb currently being received and modified
  **/
-static inline void ixgbevf_rx_checksum(struct ixgbevf_adapter *adapter,
-				       struct ixgbevf_ring *ring,
+static inline void ixgbevf_rx_checksum(struct ixgbevf_ring *ring,
 				       u32 status_err, struct sk_buff *skb)
 {
 	skb_checksum_none_assert(skb);
@@ -312,7 +311,7 @@ static inline void ixgbevf_rx_checksum(struct ixgbevf_adapter *adapter,
 	/* if IP and error */
 	if ((status_err & IXGBE_RXD_STAT_IPCS) &&
 	    (status_err & IXGBE_RXDADV_ERR_IPE)) {
-		adapter->hw_csum_rx_error++;
+		ring->hw_csum_rx_error++;
 		return;
 	}
 
@@ -320,13 +319,13 @@ static inline void ixgbevf_rx_checksum(struct ixgbevf_adapter *adapter,
 		return;
 
 	if (status_err & IXGBE_RXDADV_ERR_TCPE) {
-		adapter->hw_csum_rx_error++;
+		ring->hw_csum_rx_error++;
 		return;
 	}
 
 	/* It must be a TCP or UDP packet with a valid checksum */
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
-	adapter->hw_csum_rx_good++;
+	ring->hw_csum_rx_good++;
 }
 
 /**
@@ -462,7 +461,7 @@ static bool ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 			goto next_desc;
 		}
 
-		ixgbevf_rx_checksum(adapter, rx_ring, staterr, skb);
+		ixgbevf_rx_checksum(rx_ring, staterr, skb);
 
 		/* probably a little skewed due to removing CRC */
 		total_rx_bytes += skb->len;
@@ -2094,6 +2093,7 @@ out:
 void ixgbevf_update_stats(struct ixgbevf_adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
 
 	UPDATE_VF_COUNTER_32bit(IXGBE_VFGPRC, adapter->stats.last_vfgprc,
 				adapter->stats.vfgprc);
@@ -2107,6 +2107,15 @@ void ixgbevf_update_stats(struct ixgbevf_adapter *adapter)
 				adapter->stats.vfgotc);
 	UPDATE_VF_COUNTER_32bit(IXGBE_VFMPRC, adapter->stats.last_vfmprc,
 				adapter->stats.vfmprc);
+
+	for (i = 0;  i  < adapter->num_rx_queues;  i++) {
+		adapter->hw_csum_rx_error +=
+			adapter->rx_ring[i].hw_csum_rx_error;
+		adapter->hw_csum_rx_good +=
+			adapter->rx_ring[i].hw_csum_rx_good;
+		adapter->rx_ring[i].hw_csum_rx_error = 0;
+		adapter->rx_ring[i].hw_csum_rx_good = 0;
+	}
 }
 
 /**
-- 
1.7.11.7

^ permalink raw reply related

* [net-next 8/9] ixgbevf: Remove unneeded and obsolete comment
From: Jeff Kirsher @ 2012-11-15 14:39 UTC (permalink / raw)
  To: davem; +Cc: Greg Rose, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1352990387-3872-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Greg Rose <gregory.v.rose@intel.com>

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
Tested-by: Sibai Li <sibai.li@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 57ae5cd..a52b14e 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1681,13 +1681,6 @@ void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter)
 	while (test_and_set_bit(__IXGBEVF_RESETTING, &adapter->state))
 		msleep(1);
 
-	/*
-	 * Check if PF is up before re-init.  If not then skip until
-	 * later when the PF is up and ready to service requests from
-	 * the VF via mailbox.  If the VF is up and running then the
-	 * watchdog task will continue to schedule reset tasks until
-	 * the PF is up and running.
-	 */
 	ixgbevf_down(adapter);
 	ixgbevf_up(adapter);
 
-- 
1.7.11.7

^ permalink raw reply related

* [net-next 6/9] ixgbevf: Remove mailbox spinlock from the reset function
From: Jeff Kirsher @ 2012-11-15 14:39 UTC (permalink / raw)
  To: davem; +Cc: Greg Rose, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1352990387-3872-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Greg Rose <gregory.v.rose@intel.com>

The spinlocks are not required during reset.  There won't be any
contention for the mailbox resource.

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
Tested-by: Sibai Li <sibai.li@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 8b8a685..592fe99 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1702,15 +1702,11 @@ void ixgbevf_reset(struct ixgbevf_adapter *adapter)
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct net_device *netdev = adapter->netdev;
 
-	spin_lock_bh(&adapter->mbx_lock);
-
 	if (hw->mac.ops.reset_hw(hw))
 		hw_dbg(hw, "PF still resetting\n");
 	else
 		hw->mac.ops.init_hw(hw);
 
-	spin_unlock_bh(&adapter->mbx_lock);
-
 	if (is_valid_ether_addr(adapter->hw.mac.addr)) {
 		memcpy(netdev->dev_addr, adapter->hw.mac.addr,
 		       netdev->addr_len);
-- 
1.7.11.7

^ permalink raw reply related

* [net-next 7/9] ixgbevf: White space and comments clean up
From: Jeff Kirsher @ 2012-11-15 14:39 UTC (permalink / raw)
  To: davem; +Cc: Greg Rose, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1352990387-3872-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Greg Rose <gregory.v.rose@intel.com>

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
Tested-by: Sibai Li <sibai.li@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 592fe99..57ae5cd 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -121,7 +121,6 @@ static inline void ixgbevf_release_rx_desc(struct ixgbe_hw *hw,
  * @direction: 0 for Rx, 1 for Tx, -1 for other causes
  * @queue: queue to map the corresponding interrupt to
  * @msix_vector: the vector to map to the corresponding queue
- *
  */
 static void ixgbevf_set_ivar(struct ixgbevf_adapter *adapter, s8 direction,
 			     u8 queue, u8 msix_vector)
@@ -380,7 +379,6 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_adapter *adapter,
 no_buffers:
 	if (rx_ring->next_to_use != i) {
 		rx_ring->next_to_use = i;
-
 		ixgbevf_release_rx_desc(&adapter->hw, rx_ring, i);
 	}
 }
@@ -765,7 +763,6 @@ static irqreturn_t ixgbevf_msix_other(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-
 /**
  * ixgbevf_msix_clean_rings - single unshared vector rx clean (all queues)
  * @irq: unused
@@ -1224,12 +1221,13 @@ static int ixgbevf_write_uc_addr_list(struct net_device *netdev)
 }
 
 /**
- * ixgbevf_set_rx_mode - Multicast set
+ * ixgbevf_set_rx_mode - Multicast and unicast set
  * @netdev: network interface device structure
  *
  * The set_rx_method entry point is called whenever the multicast address
- * list or the network interface flags are updated.  This routine is
- * responsible for configuring the hardware for proper multicast mode.
+ * list, unicast address list or the network interface flags are updated.
+ * This routine is responsible for configuring the hardware for proper
+ * multicast mode and configuring requested unicast filters.
  **/
 static void ixgbevf_set_rx_mode(struct net_device *netdev)
 {
@@ -1588,7 +1586,6 @@ static void ixgbevf_clean_tx_ring(struct ixgbevf_adapter *adapter,
 		return;
 
 	/* Free all the Tx ring sk_buffs */
-
 	for (i = 0; i < tx_ring->count; i++) {
 		tx_buffer_info = &tx_ring->tx_buffer_info[i];
 		ixgbevf_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
@@ -1757,6 +1754,7 @@ static int ixgbevf_acquire_msix_vectors(struct ixgbevf_adapter *adapter,
 		 */
 		adapter->num_msix_vectors = vectors;
 	}
+
 	return err;
 }
 
@@ -2053,7 +2051,7 @@ static int __devinit ixgbevf_sw_init(struct ixgbevf_adapter *adapter)
 			goto out;
 		}
 		memcpy(adapter->netdev->dev_addr, adapter->hw.mac.addr,
-			adapter->netdev->addr_len);
+		       adapter->netdev->addr_len);
 	}
 
 	/* lock to protect mailbox accesses */
@@ -2198,7 +2196,6 @@ static void ixgbevf_watchdog_task(struct work_struct *work)
 	 * Always check the link on the watchdog because we have
 	 * no LSC interrupt
 	 */
-
 	spin_lock_bh(&adapter->mbx_lock);
 
 	need_reset = hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
@@ -2704,9 +2701,6 @@ static int ixgbevf_tso(struct ixgbevf_ring *tx_ring,
 static bool ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring,
 			    struct sk_buff *skb, u32 tx_flags)
 {
-
-
-
 	u32 vlan_macip_lens = 0;
 	u32 mss_l4len_idx = 0;
 	u32 type_tucmd = 0;
@@ -2896,7 +2890,6 @@ static void ixgbevf_tx_queue(struct ixgbevf_ring *tx_ring, int tx_flags,
 		olinfo_status |= (1 << IXGBE_ADVTXD_IDX_SHIFT);
 		if (tx_flags & IXGBE_TX_FLAGS_IPV4)
 			olinfo_status |= IXGBE_ADVTXD_POPTS_IXSM;
-
 	}
 
 	/*
-- 
1.7.11.7

^ permalink raw reply related

* [net-next 4/9] ixgbevf: Remove the ring adapter pointer value
From: Jeff Kirsher @ 2012-11-15 14:39 UTC (permalink / raw)
  To: davem; +Cc: Greg Rose, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1352990387-3872-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Greg Rose <gregory.v.rose@intel.com>

It is unused - remove it.

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
Tested-by: Sibai Li <sibai.li@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index 1211fa0..dbdf39b 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -58,7 +58,6 @@ struct ixgbevf_ring {
 	struct ixgbevf_ring *next;
 	struct net_device *netdev;
 	struct device *dev;
-	struct ixgbevf_adapter *adapter;  /* backlink */
 	void *desc;			/* descriptor ring memory */
 	dma_addr_t dma;			/* phys. address of descriptor ring */
 	unsigned int size;		/* length in bytes */
-- 
1.7.11.7

^ permalink raw reply related

* [net-next 5/9] ixgbevf: Remove checking for mac.ops function pointers
From: Jeff Kirsher @ 2012-11-15 14:39 UTC (permalink / raw)
  To: davem; +Cc: Greg Rose, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1352990387-3872-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Greg Rose <gregory.v.rose@intel.com>

The function pointers will always be set - there is no good reason to
check them.  Also just remove get_bus_info() call as the VF has no bus
info to report.

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
Tested-by: Sibai Li <sibai.li@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 56 ++++++++---------------
 1 file changed, 18 insertions(+), 38 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index a001684..8b8a685 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1150,9 +1150,6 @@ static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 	struct ixgbe_hw *hw = &adapter->hw;
 	int err;
 
-	if (!hw->mac.ops.set_vfta)
-		return -EOPNOTSUPP;
-
 	spin_lock_bh(&adapter->mbx_lock);
 
 	/* add VID to filter table */
@@ -1181,8 +1178,7 @@ static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 	spin_lock_bh(&adapter->mbx_lock);
 
 	/* remove VID from filter table */
-	if (hw->mac.ops.set_vfta)
-		err = hw->mac.ops.set_vfta(hw, vid, 0, false);
+	err = hw->mac.ops.set_vfta(hw, vid, 0, false);
 
 	spin_unlock_bh(&adapter->mbx_lock);
 
@@ -1243,8 +1239,7 @@ static void ixgbevf_set_rx_mode(struct net_device *netdev)
 	spin_lock_bh(&adapter->mbx_lock);
 
 	/* reprogram multicast list */
-	if (hw->mac.ops.update_mc_addr_list)
-		hw->mac.ops.update_mc_addr_list(hw, netdev);
+	hw->mac.ops.update_mc_addr_list(hw, netdev);
 
 	ixgbevf_write_uc_addr_list(netdev);
 
@@ -1414,12 +1409,10 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter)
 
 	spin_lock_bh(&adapter->mbx_lock);
 
-	if (hw->mac.ops.set_rar) {
-		if (is_valid_ether_addr(hw->mac.addr))
-			hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0);
-		else
-			hw->mac.ops.set_rar(hw, 0, hw->mac.perm_addr, 0);
-	}
+	if (is_valid_ether_addr(hw->mac.addr))
+		hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0);
+	else
+		hw->mac.ops.set_rar(hw, 0, hw->mac.perm_addr, 0);
 
 	spin_unlock_bh(&adapter->mbx_lock);
 
@@ -2201,6 +2194,7 @@ static void ixgbevf_watchdog_task(struct work_struct *work)
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 link_speed = adapter->link_speed;
 	bool link_up = adapter->link_up;
+	s32 need_reset;
 
 	adapter->flags |= IXGBE_FLAG_IN_WATCHDOG_TASK;
 
@@ -2208,29 +2202,20 @@ static void ixgbevf_watchdog_task(struct work_struct *work)
 	 * Always check the link on the watchdog because we have
 	 * no LSC interrupt
 	 */
-	if (hw->mac.ops.check_link) {
-		s32 need_reset;
 
-		spin_lock_bh(&adapter->mbx_lock);
+	spin_lock_bh(&adapter->mbx_lock);
 
-		need_reset = hw->mac.ops.check_link(hw, &link_speed,
-						    &link_up, false);
+	need_reset = hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
 
-		spin_unlock_bh(&adapter->mbx_lock);
+	spin_unlock_bh(&adapter->mbx_lock);
 
-		if (need_reset) {
-			adapter->link_up = link_up;
-			adapter->link_speed = link_speed;
-			netif_carrier_off(netdev);
-			netif_tx_stop_all_queues(netdev);
-			schedule_work(&adapter->reset_task);
-			goto pf_has_reset;
-		}
-	} else {
-		/* always assume link is up, if no check link
-		 * function */
-		link_speed = IXGBE_LINK_SPEED_10GB_FULL;
-		link_up = true;
+	if (need_reset) {
+		adapter->link_up = link_up;
+		adapter->link_speed = link_speed;
+		netif_carrier_off(netdev);
+		netif_tx_stop_all_queues(netdev);
+		schedule_work(&adapter->reset_task);
+		goto pf_has_reset;
 	}
 	adapter->link_up = link_up;
 	adapter->link_speed = link_speed;
@@ -3070,8 +3055,7 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p)
 
 	spin_lock_bh(&adapter->mbx_lock);
 
-	if (hw->mac.ops.set_rar)
-		hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0);
+	hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0);
 
 	spin_unlock_bh(&adapter->mbx_lock);
 
@@ -3396,10 +3380,6 @@ static int __devinit ixgbevf_probe(struct pci_dev *pdev,
 	if (err)
 		goto err_sw_init;
 
-	/* pick up the PCI bus settings for reporting later */
-	if (hw->mac.ops.get_bus_info)
-		hw->mac.ops.get_bus_info(hw);
-
 	strcpy(netdev->name, "eth%d");
 
 	err = register_netdev(netdev);
-- 
1.7.11.7

^ permalink raw reply related

* [net-next 2/9] ixgbevf: Streamline the rx buffer allocation
From: Jeff Kirsher @ 2012-11-15 14:39 UTC (permalink / raw)
  To: davem; +Cc: Greg Rose, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1352990387-3872-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Greg Rose <gregory.v.rose@intel.com>

Moves allocation of local variable to section where it is needed and
removes unnecessary if statement.

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
Tested-by: Sibai Li <sibai.li@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 9d88153..b46dff8 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -341,15 +341,16 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_adapter *adapter,
 	struct pci_dev *pdev = adapter->pdev;
 	union ixgbe_adv_rx_desc *rx_desc;
 	struct ixgbevf_rx_buffer *bi;
-	struct sk_buff *skb;
 	unsigned int i = rx_ring->next_to_use;
 
 	bi = &rx_ring->rx_buffer_info[i];
 
 	while (cleaned_count--) {
 		rx_desc = IXGBEVF_RX_DESC(rx_ring, i);
-		skb = bi->skb;
-		if (!skb) {
+
+		if (!bi->skb) {
+			struct sk_buff *skb;
+
 			skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
 							rx_ring->rx_buf_len);
 			if (!skb) {
@@ -357,8 +358,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_adapter *adapter,
 				goto no_buffers;
 			}
 			bi->skb = skb;
-		}
-		if (!bi->dma) {
+
 			bi->dma = dma_map_single(&pdev->dev, skb->data,
 						 rx_ring->rx_buf_len,
 						 DMA_FROM_DEVICE);
-- 
1.7.11.7

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox