Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [Intel-wired-lan] [PATCH 06/15] ice: Initialize PF and setup miscellaneous interrupt
From: Shannon Nelson @ 2018-03-13  2:05 UTC (permalink / raw)
  To: Anirudh Venkataramanan, intel-wired-lan; +Cc: netdev
In-Reply-To: <20180309172136.9073-7-anirudh.venkataramanan@intel.com>

On 3/9/2018 9:21 AM, Anirudh Venkataramanan wrote:
> This patch continues the initialization flow as follows:
> 
> 1) Allocate and initialize necessary fields (like vsi, num_alloc_vsi,
>     irq_tracker, etc) in the ice_pf instance.
> 
> 2) Setup the miscellaneous interrupt handler. This also known as the
>     "other interrupt causes" (OIC) handler and is used to handle non
>     hotpath interrupts (like control queue events, link events,
>     exceptions, etc.
> 
> 3) Implement a background task to process admin queue receive (ARQ)
>     events received by the driver.
> 
> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
> ---
>   drivers/net/ethernet/intel/ice/ice.h            |  84 +++
>   drivers/net/ethernet/intel/ice/ice_adminq_cmd.h |   2 +
>   drivers/net/ethernet/intel/ice/ice_common.c     |   6 +
>   drivers/net/ethernet/intel/ice/ice_common.h     |   3 +
>   drivers/net/ethernet/intel/ice/ice_controlq.c   | 101 ++++
>   drivers/net/ethernet/intel/ice/ice_controlq.h   |   8 +
>   drivers/net/ethernet/intel/ice/ice_hw_autogen.h |  63 +++
>   drivers/net/ethernet/intel/ice/ice_main.c       | 719 +++++++++++++++++++++++-
>   drivers/net/ethernet/intel/ice/ice_txrx.h       |  43 ++
>   drivers/net/ethernet/intel/ice/ice_type.h       |  11 +
>   10 files changed, 1039 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/net/ethernet/intel/ice/ice_txrx.h
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
> index 9681e971bcab..c8079c852a48 100644
> --- a/drivers/net/ethernet/intel/ice/ice.h
> +++ b/drivers/net/ethernet/intel/ice/ice.h
> @@ -26,29 +26,113 @@
>   #include <linux/compiler.h>
>   #include <linux/etherdevice.h>
>   #include <linux/pci.h>
> +#include <linux/workqueue.h>
>   #include <linux/aer.h>
> +#include <linux/interrupt.h>
> +#include <linux/timer.h>
>   #include <linux/delay.h>
>   #include <linux/bitmap.h>
> +#include <linux/if_bridge.h>
>   #include "ice_devids.h"
>   #include "ice_type.h"
> +#include "ice_txrx.h"
>   #include "ice_switch.h"
>   #include "ice_common.h"
>   #include "ice_sched.h"
>   
>   #define ICE_BAR0		0
> +#define ICE_INT_NAME_STR_LEN	(IFNAMSIZ + 16)
>   #define ICE_AQ_LEN		64
> +#define ICE_MIN_MSIX		2
> +#define ICE_MAX_VSI_ALLOC	130
> +#define ICE_MAX_TXQS		2048
> +#define ICE_MAX_RXQS		2048
> +#define ICE_RES_VALID_BIT	0x8000
> +#define ICE_RES_MISC_VEC_ID	(ICE_RES_VALID_BIT - 1)
>   
>   #define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
>   
> +struct ice_res_tracker {
> +	u16 num_entries;
> +	u16 search_hint;
> +	u16 list[1];
> +};
> +
> +struct ice_sw {
> +	struct ice_pf *pf;
> +	u16 sw_id;		/* switch ID for this switch */
> +	u16 bridge_mode;	/* VEB/VEPA/Port Virtualizer */
> +};
> +
>   enum ice_state {
>   	__ICE_DOWN,
> +	__ICE_PFR_REQ,			/* set by driver and peers */
> +	__ICE_ADMINQ_EVENT_PENDING,
> +	__ICE_SERVICE_SCHED,
>   	__ICE_STATE_NBITS		/* must be last */
>   };
>   
> +/* struct that defines a VSI, associated with a dev */
> +struct ice_vsi {
> +	struct net_device *netdev;
> +	struct ice_port_info *port_info; /* back pointer to port_info */
> +	u16 vsi_num;			 /* HW (absolute) index of this VSI */
> +} ____cacheline_internodealigned_in_smp;
> +
> +enum ice_pf_flags {
> +	ICE_FLAG_MSIX_ENA,
> +	ICE_FLAG_FLTR_SYNC,
> +	ICE_FLAG_RSS_ENA,
> +	ICE_PF_FLAGS_NBITS		/* must be last */
> +};
> +
>   struct ice_pf {
>   	struct pci_dev *pdev;
> +	struct msix_entry *msix_entries;
> +	struct ice_res_tracker *irq_tracker;
> +	struct ice_vsi **vsi;		/* VSIs created by the driver */
> +	struct ice_sw *first_sw;	/* first switch created by firmware */
>   	DECLARE_BITMAP(state, __ICE_STATE_NBITS);
> +	DECLARE_BITMAP(avail_txqs, ICE_MAX_TXQS);
> +	DECLARE_BITMAP(avail_rxqs, ICE_MAX_RXQS);
> +	DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS);
> +	unsigned long serv_tmr_period;
> +	unsigned long serv_tmr_prev;
> +	struct timer_list serv_tmr;
> +	struct work_struct serv_task;
> +	struct mutex avail_q_mutex;	/* protects access to avail_[rx|tx]qs */
> +	struct mutex sw_mutex;		/* lock for protecting VSI alloc flow */
>   	u32 msg_enable;
> +	u32 oicr_idx;		/* Other interrupt cause vector index */
> +	u32 num_lan_msix;	/* Total MSIX vectors for base driver */
> +	u32 num_avail_msix;	/* remaining MSIX vectors left unclaimed */
> +	u16 num_lan_tx;		/* num lan tx queues setup */
> +	u16 num_lan_rx;		/* num lan rx queues setup */
> +	u16 q_left_tx;		/* remaining num tx queues left unclaimed */
> +	u16 q_left_rx;		/* remaining num rx queues left unclaimed */
> +	u16 next_vsi;		/* Next free slot in pf->vsi[] - 0-based! */
> +	u16 num_alloc_vsi;
> +
>   	struct ice_hw hw;
> +	char int_name[ICE_INT_NAME_STR_LEN];
>   };
> +
> +/**
> + * ice_irq_dynamic_ena - Enable default interrupt generation settings
> + * @hw: pointer to hw struct
> + */
> +static inline void ice_irq_dynamic_ena(struct ice_hw *hw)
> +{
> +	u32 vector = ((struct ice_pf *)hw->back)->oicr_idx;
> +	int itr = ICE_ITR_NONE;
> +	u32 val;
> +
> +	/* clear the PBA here, as this function is meant to clean out all
> +	 * previous interrupts and enable the interrupt
> +	 */
> +	val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
> +	      (itr << GLINT_DYN_CTL_ITR_INDX_S);
> +
> +	wr32(hw, GLINT_DYN_CTL(vector), val);
> +}
>   #endif /* _ICE_H_ */
> diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
> index 13e3b7f3e24d..1acd936eec49 100644
> --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
> +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
> @@ -597,11 +597,13 @@ struct ice_aq_desc {
>   /* FW defined boundary for a large buffer, 4k >= Large buffer > 512 bytes */
>   #define ICE_AQ_LG_BUF	512
>   
> +#define ICE_AQ_FLAG_ERR_S	2
>   #define ICE_AQ_FLAG_LB_S	9
>   #define ICE_AQ_FLAG_RD_S	10
>   #define ICE_AQ_FLAG_BUF_S	12
>   #define ICE_AQ_FLAG_SI_S	13
>   
> +#define ICE_AQ_FLAG_ERR		BIT(ICE_AQ_FLAG_ERR_S) /* 0x4    */
>   #define ICE_AQ_FLAG_LB		BIT(ICE_AQ_FLAG_LB_S)  /* 0x200  */
>   #define ICE_AQ_FLAG_RD		BIT(ICE_AQ_FLAG_RD_S)  /* 0x400  */
>   #define ICE_AQ_FLAG_BUF		BIT(ICE_AQ_FLAG_BUF_S) /* 0x1000 */
> diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
> index 78677a3fe448..4b94f737d7f3 100644
> --- a/drivers/net/ethernet/intel/ice/ice_common.c
> +++ b/drivers/net/ethernet/intel/ice/ice_common.c
> @@ -298,6 +298,12 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
>   	if (status)
>   		return status;
>   
> +	/* set these values to minimum allowed */
> +	hw->itr_gran_200 = ICE_ITR_GRAN_MIN_200;
> +	hw->itr_gran_100 = ICE_ITR_GRAN_MIN_100;
> +	hw->itr_gran_50 = ICE_ITR_GRAN_MIN_50;
> +	hw->itr_gran_25 = ICE_ITR_GRAN_MIN_25;
> +
>   	status = ice_init_all_ctrlq(hw);
>   	if (status)
>   		goto err_unroll_cqinit;
> diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
> index 3e3b18fc421d..ab47204dfc5a 100644
> --- a/drivers/net/ethernet/intel/ice/ice_common.h
> +++ b/drivers/net/ethernet/intel/ice/ice_common.h
> @@ -31,6 +31,9 @@ enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req);
>   enum ice_status ice_init_all_ctrlq(struct ice_hw *hw);
>   void ice_shutdown_all_ctrlq(struct ice_hw *hw);
>   enum ice_status
> +ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
> +		  struct ice_rq_event_info *e, u16 *pending);
> +enum ice_status
>   ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res,
>   		enum ice_aq_res_access_type access);
>   void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res);
> diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
> index b1143d66d4bd..3f63a20b45c0 100644
> --- a/drivers/net/ethernet/intel/ice/ice_controlq.c
> +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
> @@ -977,3 +977,104 @@ void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode)
>   	desc->opcode = cpu_to_le16(opcode);
>   	desc->flags = cpu_to_le16(ICE_AQ_FLAG_SI);
>   }
> +
> +/**
> + * ice_clean_rq_elem
> + * @hw: pointer to the hw struct
> + * @cq: pointer to the specific Control queue
> + * @e: event info from the receive descriptor, includes any buffers
> + * @pending: number of events that could be left to process
> + *
> + * This function cleans one Admin Receive Queue element and returns
> + * the contents through e.  It can also return how many events are
> + * left to process through 'pending'.
> + */
> +enum ice_status
> +ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
> +		  struct ice_rq_event_info *e, u16 *pending)
> +{
> +	u16 ntc = cq->rq.next_to_clean;
> +	enum ice_status ret_code = 0;
> +	struct ice_aq_desc *desc;
> +	struct ice_dma_mem *bi;
> +	u16 desc_idx;
> +	u16 datalen;
> +	u16 flags;
> +	u16 ntu;
> +
> +	/* pre-clean the event info */
> +	memset(&e->desc, 0, sizeof(e->desc));
> +
> +	/* take the lock before we start messing with the ring */
> +	mutex_lock(&cq->rq_lock);
> +
> +	if (!cq->rq.count) {
> +		ice_debug(hw, ICE_DBG_AQ_MSG,
> +			  "Control Receive queue not initialized.\n");
> +		ret_code = ICE_ERR_AQ_EMPTY;
> +		goto clean_rq_elem_err;
> +	}
> +
> +	/* set next_to_use to head */
> +	ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask);
> +
> +	if (ntu == ntc) {
> +		/* nothing to do - shouldn't need to update ring's values */
> +		ret_code = ICE_ERR_AQ_NO_WORK;
> +		goto clean_rq_elem_out;
> +	}
> +
> +	/* now clean the next descriptor */
> +	desc = ICE_CTL_Q_DESC(cq->rq, ntc);
> +	desc_idx = ntc;
> +
> +	flags = le16_to_cpu(desc->flags);
> +	if (flags & ICE_AQ_FLAG_ERR) {
> +		ret_code = ICE_ERR_AQ_ERROR;
> +		cq->rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval);
> +		ice_debug(hw, ICE_DBG_AQ_MSG,
> +			  "Control Receive Queue Event received with error 0x%x\n",
> +			  cq->rq_last_status);
> +	}
> +	memcpy(&e->desc, desc, sizeof(e->desc));
> +	datalen = le16_to_cpu(desc->datalen);
> +	e->msg_len = min(datalen, e->buf_len);
> +	if (e->msg_buf && e->msg_len)
> +		memcpy(e->msg_buf, cq->rq.r.rq_bi[desc_idx].va, e->msg_len);
> +
> +	ice_debug(hw, ICE_DBG_AQ_MSG, "ARQ: desc and buffer:\n");
> +
> +	ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc, e->msg_buf,
> +		     cq->rq_buf_size);
> +
> +	/* Restore the original datalen and buffer address in the desc,
> +	 * FW updates datalen to indicate the event message size
> +	 */
> +	bi = &cq->rq.r.rq_bi[ntc];
> +	memset(desc, 0, sizeof(*desc));
> +
> +	desc->flags = cpu_to_le16(ICE_AQ_FLAG_BUF);
> +	if (cq->rq_buf_size > ICE_AQ_LG_BUF)
> +		desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB);
> +	desc->datalen = cpu_to_le16(bi->size);
> +	desc->params.generic.addr_high = cpu_to_le32(upper_32_bits(bi->pa));
> +	desc->params.generic.addr_low = cpu_to_le32(lower_32_bits(bi->pa));
> +
> +	/* set tail = the last cleaned desc index. */
> +	wr32(hw, cq->rq.tail, ntc);
> +	/* ntc is updated to tail + 1 */
> +	ntc++;
> +	if (ntc == cq->num_rq_entries)
> +		ntc = 0;
> +	cq->rq.next_to_clean = ntc;
> +	cq->rq.next_to_use = ntu;
> +
> +clean_rq_elem_out:
> +	/* Set pending if needed, unlock and return */
> +	if (pending)
> +		*pending = (u16)((ntc > ntu ? cq->rq.count : 0) + (ntu - ntc));
> +clean_rq_elem_err:
> +	mutex_unlock(&cq->rq_lock);
> +
> +	return ret_code;
> +}
> diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
> index 835c035419a3..403613606652 100644
> --- a/drivers/net/ethernet/intel/ice/ice_controlq.h
> +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
> @@ -81,6 +81,14 @@ struct ice_sq_cd {
>   
>   #define ICE_CTL_Q_DETAILS(R, i) (&(((struct ice_sq_cd *)((R).cmd_buf))[i]))
>   
> +/* rq event information */
> +struct ice_rq_event_info {
> +	struct ice_aq_desc desc;
> +	u16 msg_len;
> +	u16 buf_len;
> +	u8 *msg_buf;
> +};
> +
>   /* Control Queue information */
>   struct ice_ctl_q_info {
>   	enum ice_ctl_q qtype;
> diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
> index e258a12099b8..700edc7e7280 100644
> --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
> +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
> @@ -28,6 +28,12 @@
>   #define PF_FW_ARQLEN			0x00080280
>   #define PF_FW_ARQLEN_ARQLEN_S		0
>   #define PF_FW_ARQLEN_ARQLEN_M		ICE_M(0x3FF, PF_FW_ARQLEN_ARQLEN_S)
> +#define PF_FW_ARQLEN_ARQVFE_S		28
> +#define PF_FW_ARQLEN_ARQVFE_M		BIT(PF_FW_ARQLEN_ARQVFE_S)
> +#define PF_FW_ARQLEN_ARQOVFL_S		29
> +#define PF_FW_ARQLEN_ARQOVFL_M		BIT(PF_FW_ARQLEN_ARQOVFL_S)
> +#define PF_FW_ARQLEN_ARQCRIT_S		30
> +#define PF_FW_ARQLEN_ARQCRIT_M		BIT(PF_FW_ARQLEN_ARQCRIT_S)
>   #define PF_FW_ARQLEN_ARQENABLE_S	31
>   #define PF_FW_ARQLEN_ARQENABLE_M	BIT(PF_FW_ARQLEN_ARQENABLE_S)
>   #define PF_FW_ARQT			0x00080480
> @@ -39,6 +45,12 @@
>   #define PF_FW_ATQLEN			0x00080200
>   #define PF_FW_ATQLEN_ATQLEN_S		0
>   #define PF_FW_ATQLEN_ATQLEN_M		ICE_M(0x3FF, PF_FW_ATQLEN_ATQLEN_S)
> +#define PF_FW_ATQLEN_ATQVFE_S		28
> +#define PF_FW_ATQLEN_ATQVFE_M		BIT(PF_FW_ATQLEN_ATQVFE_S)
> +#define PF_FW_ATQLEN_ATQOVFL_S		29
> +#define PF_FW_ATQLEN_ATQOVFL_M		BIT(PF_FW_ATQLEN_ATQOVFL_S)
> +#define PF_FW_ATQLEN_ATQCRIT_S		30
> +#define PF_FW_ATQLEN_ATQCRIT_M		BIT(PF_FW_ATQLEN_ATQCRIT_S)
>   #define PF_FW_ATQLEN_ATQENABLE_S	31
>   #define PF_FW_ATQLEN_ATQENABLE_M	BIT(PF_FW_ATQLEN_ATQENABLE_S)
>   #define PF_FW_ATQT			0x00080400
> @@ -57,6 +69,57 @@
>   #define PFGEN_CTRL			0x00091000
>   #define PFGEN_CTRL_PFSWR_S		0
>   #define PFGEN_CTRL_PFSWR_M		BIT(PFGEN_CTRL_PFSWR_S)
> +#define PFHMC_ERRORDATA			0x00520500
> +#define PFHMC_ERRORINFO			0x00520400
> +#define GLINT_DYN_CTL(_INT)		(0x00160000 + ((_INT) * 4))
> +#define GLINT_DYN_CTL_INTENA_S		0
> +#define GLINT_DYN_CTL_INTENA_M		BIT(GLINT_DYN_CTL_INTENA_S)
> +#define GLINT_DYN_CTL_CLEARPBA_S	1
> +#define GLINT_DYN_CTL_CLEARPBA_M	BIT(GLINT_DYN_CTL_CLEARPBA_S)
> +#define GLINT_DYN_CTL_ITR_INDX_S	3
> +#define GLINT_DYN_CTL_SW_ITR_INDX_S	25
> +#define GLINT_DYN_CTL_SW_ITR_INDX_M	ICE_M(0x3, GLINT_DYN_CTL_SW_ITR_INDX_S)
> +#define GLINT_DYN_CTL_INTENA_MSK_S	31
> +#define GLINT_DYN_CTL_INTENA_MSK_M	BIT(GLINT_DYN_CTL_INTENA_MSK_S)
> +#define GLINT_ITR(_i, _INT)		(0x00154000 + ((_i) * 8192 + (_INT) * 4))
> +#define PFINT_FW_CTL			0x0016C800
> +#define PFINT_FW_CTL_MSIX_INDX_S	0
> +#define PFINT_FW_CTL_MSIX_INDX_M	ICE_M(0x7FF, PFINT_FW_CTL_MSIX_INDX_S)
> +#define PFINT_FW_CTL_ITR_INDX_S		11
> +#define PFINT_FW_CTL_ITR_INDX_M		ICE_M(0x3, PFINT_FW_CTL_ITR_INDX_S)
> +#define PFINT_FW_CTL_CAUSE_ENA_S	30
> +#define PFINT_FW_CTL_CAUSE_ENA_M	BIT(PFINT_FW_CTL_CAUSE_ENA_S)
> +#define PFINT_OICR			0x0016CA00
> +#define PFINT_OICR_INTEVENT_S		0
> +#define PFINT_OICR_INTEVENT_M		BIT(PFINT_OICR_INTEVENT_S)
> +#define PFINT_OICR_HLP_RDY_S		14
> +#define PFINT_OICR_HLP_RDY_M		BIT(PFINT_OICR_HLP_RDY_S)
> +#define PFINT_OICR_CPM_RDY_S		15
> +#define PFINT_OICR_CPM_RDY_M		BIT(PFINT_OICR_CPM_RDY_S)
> +#define PFINT_OICR_ECC_ERR_S		16
> +#define PFINT_OICR_ECC_ERR_M		BIT(PFINT_OICR_ECC_ERR_S)
> +#define PFINT_OICR_MAL_DETECT_S		19
> +#define PFINT_OICR_MAL_DETECT_M		BIT(PFINT_OICR_MAL_DETECT_S)
> +#define PFINT_OICR_GRST_S		20
> +#define PFINT_OICR_GRST_M		BIT(PFINT_OICR_GRST_S)
> +#define PFINT_OICR_PCI_EXCEPTION_S	21
> +#define PFINT_OICR_PCI_EXCEPTION_M	BIT(PFINT_OICR_PCI_EXCEPTION_S)
> +#define PFINT_OICR_GPIO_S		22
> +#define PFINT_OICR_GPIO_M		BIT(PFINT_OICR_GPIO_S)
> +#define PFINT_OICR_STORM_DETECT_S	24
> +#define PFINT_OICR_STORM_DETECT_M	BIT(PFINT_OICR_STORM_DETECT_S)
> +#define PFINT_OICR_HMC_ERR_S		26
> +#define PFINT_OICR_HMC_ERR_M		BIT(PFINT_OICR_HMC_ERR_S)
> +#define PFINT_OICR_PE_CRITERR_S		28
> +#define PFINT_OICR_PE_CRITERR_M		BIT(PFINT_OICR_PE_CRITERR_S)
> +#define PFINT_OICR_CTL			0x0016CA80
> +#define PFINT_OICR_CTL_MSIX_INDX_S	0
> +#define PFINT_OICR_CTL_MSIX_INDX_M	ICE_M(0x7FF, PFINT_OICR_CTL_MSIX_INDX_S)
> +#define PFINT_OICR_CTL_ITR_INDX_S	11
> +#define PFINT_OICR_CTL_ITR_INDX_M	ICE_M(0x3, PFINT_OICR_CTL_ITR_INDX_S)
> +#define PFINT_OICR_CTL_CAUSE_ENA_S	30
> +#define PFINT_OICR_CTL_CAUSE_ENA_M	BIT(PFINT_OICR_CTL_CAUSE_ENA_S)
> +#define PFINT_OICR_ENA			0x0016C900
>   #define GLLAN_RCTL_0			0x002941F8
>   #define GLNVM_FLA			0x000B6108
>   #define GLNVM_FLA_LOCKED_S		6
> diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
> index 2ee4a0547ba3..b07ce86381bb 100644
> --- a/drivers/net/ethernet/intel/ice/ice_main.c
> +++ b/drivers/net/ethernet/intel/ice/ice_main.c
> @@ -40,6 +40,294 @@ MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXX
>   MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
>   #endif /* !CONFIG_DYNAMIC_DEBUG */
>   
> +static struct workqueue_struct *ice_wq;
> +
> +/**
> + * ice_search_res - Search the tracker for a block of resources
> + * @res: pointer to the resource
> + * @needed: size of the block needed
> + * @id: identifier to track owner
> + * Returns the base item index of the block, or -ENOMEM for error
> + */
> +static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
> +{
> +	int start = res->search_hint;
> +	int end = start;
> +
> +	id |= ICE_RES_VALID_BIT;
> +
> +	do {
> +		/* skip already allocated entries */
> +		if (res->list[end++] & ICE_RES_VALID_BIT) {
> +			start = end;
> +			if ((start + needed) > res->num_entries)
> +				break;
> +		}
> +
> +		if (end == (start + needed)) {
> +			int i = start;
> +
> +			/* there was enough, so assign it to the requestor */
> +			while (i != end)
> +				res->list[i++] = id;
> +
> +			if (end == res->num_entries)
> +				end = 0;
> +
> +			res->search_hint = end;
> +			return start;
> +		}
> +	} while (1);
> +
> +	return -ENOMEM;
> +}
> +
> +/**
> + * ice_get_res - get a block of resources
> + * @pf: board private structure
> + * @res: pointer to the resource
> + * @needed: size of the block needed
> + * @id: identifier to track owner
> + *
> + * Returns the base item index of the block, or -ENOMEM for error
> + * The search_hint trick and lack of advanced fit-finding only works
> + * because we're highly likely to have all the same size lump requests.

The new naming for this resource tracking is much better than what 
someone used in i40e, but you can probably replace the "lump" reference 
here as well.

Now that there is a 2nd driver using essentially the same code, should 
there be some effort to make it generic and only have the code once in 
the kernel, if it isn't already available?

sln

^ permalink raw reply

* Re: [Intel-wired-lan] [PATCH 04/15] ice: Get switch config, scheduler config and device capabilities
From: Shannon Nelson @ 2018-03-13  2:05 UTC (permalink / raw)
  To: Anirudh Venkataramanan, intel-wired-lan; +Cc: netdev
In-Reply-To: <20180309172136.9073-5-anirudh.venkataramanan@intel.com>

On 3/9/2018 9:21 AM, Anirudh Venkataramanan wrote:
> This patch adds to the initialization flow by getting switch
> configuration, scheduler configuration and device capabilities.
> 
> Switch configuration:
> On boot, an L2 switch element is created in the firmware per physical
> function. Each physical function is also mapped to a port, to which its
> switch element is connected. In other words, this switch can be visualized
> as an embedded vSwitch that can connect a physical functions's virtual
> station interfaces (VSIs) to the egress/ingress port. Egress/ingress
> filters will be eventually created and applied on this switch element.
> As part of the initialization flow, the driver gets configuration data
> from this switch element and stores it.
> 
> Scheduler configuration:
> The Tx scheduler is a subsystem responsible for setting and enforcing QoS.
> As part of the initialization flow, the driver queries and stores the
> default scheduler configuration for the given physical function.
> 
> Device capabilities:
> As part of initialization, the driver has to determine what the device is
> capable of (ex. max queues, VSIs, etc). This information is obtained from
> the firmware and stored by the driver.
> 
> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
> ---
>   drivers/net/ethernet/intel/ice/Makefile         |   4 +-
>   drivers/net/ethernet/intel/ice/ice.h            |   2 +
>   drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 209 ++++++++++++++
>   drivers/net/ethernet/intel/ice/ice_common.c     | 223 +++++++++++++++
>   drivers/net/ethernet/intel/ice/ice_common.h     |   2 +
>   drivers/net/ethernet/intel/ice/ice_sched.c      | 354 ++++++++++++++++++++++++
>   drivers/net/ethernet/intel/ice/ice_sched.h      |  42 +++
>   drivers/net/ethernet/intel/ice/ice_switch.c     | 158 +++++++++++
>   drivers/net/ethernet/intel/ice/ice_switch.h     |  28 ++
>   drivers/net/ethernet/intel/ice/ice_type.h       | 109 ++++++++
>   10 files changed, 1130 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/net/ethernet/intel/ice/ice_sched.c
>   create mode 100644 drivers/net/ethernet/intel/ice/ice_sched.h
>   create mode 100644 drivers/net/ethernet/intel/ice/ice_switch.c
>   create mode 100644 drivers/net/ethernet/intel/ice/ice_switch.h
> 
> diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
> index 373d481dbb25..809d85c04398 100644
> --- a/drivers/net/ethernet/intel/ice/Makefile
> +++ b/drivers/net/ethernet/intel/ice/Makefile
> @@ -27,4 +27,6 @@ obj-$(CONFIG_ICE) += ice.o
>   ice-y := ice_main.o	\
>   	 ice_controlq.o	\
>   	 ice_common.o	\
> -	 ice_nvm.o
> +	 ice_nvm.o	\
> +	 ice_switch.o	\
> +	 ice_sched.o
> diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
> index ab2800c31906..f6e3339591bb 100644
> --- a/drivers/net/ethernet/intel/ice/ice.h
> +++ b/drivers/net/ethernet/intel/ice/ice.h
> @@ -30,7 +30,9 @@
>   #include <linux/bitmap.h>
>   #include "ice_devids.h"
>   #include "ice_type.h"
> +#include "ice_switch.h"
>   #include "ice_common.h"
> +#include "ice_sched.h"
>   
>   #define ICE_BAR0		0
>   #define ICE_AQ_LEN		64
> diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
> index 05b22a1ffd70..66a3f41df673 100644
> --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
> +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
> @@ -22,6 +22,8 @@
>    * descriptor format.  It is shared between Firmware and Software.
>    */
>   
> +#define ICE_AQC_TOPO_MAX_LEVEL_NUM	0x9
> +
>   struct ice_aqc_generic {
>   	__le32 param0;
>   	__le32 param1;
> @@ -82,6 +84,40 @@ struct ice_aqc_req_res {
>   	u8 reserved[2];
>   };
>   
> +/* Get function capabilities (indirect 0x000A)
> + * Get device capabilities (indirect 0x000B)
> + */
> +struct ice_aqc_list_caps {
> +	u8 cmd_flags;
> +	u8 pf_index;
> +	u8 reserved[2];
> +	__le32 count;
> +	__le32 addr_high;
> +	__le32 addr_low;
> +};
> +
> +/* Device/Function buffer entry, repeated per reported capability */
> +struct ice_aqc_list_caps_elem {
> +	__le16 cap;
> +#define ICE_AQC_CAPS_VSI				0x0017
> +#define ICE_AQC_CAPS_RSS				0x0040
> +#define ICE_AQC_CAPS_RXQS				0x0041
> +#define ICE_AQC_CAPS_TXQS				0x0042
> +#define ICE_AQC_CAPS_MSIX				0x0043
> +#define ICE_AQC_CAPS_MAX_MTU				0x0047
> +
> +	u8 major_ver;
> +	u8 minor_ver;
> +	/* Number of resources described by this capability */
> +	__le32 number;
> +	/* Only meaningful for some types of resources */
> +	__le32 logical_id;
> +	/* Only meaningful for some types of resources */
> +	__le32 phys_id;
> +	__le64 rsvd1;
> +	__le64 rsvd2;
> +};
> +
>   /* Clear PXE Command and response (direct 0x0110) */
>   struct ice_aqc_clear_pxe {
>   	u8 rx_cnt;
> @@ -89,6 +125,161 @@ struct ice_aqc_clear_pxe {
>   	u8 reserved[15];
>   };
>   
> +/* Get switch configuration (0x0200) */
> +struct ice_aqc_get_sw_cfg {
> +	/* Reserved for command and copy of request flags for response */
> +	__le16 flags;
> +	/* First desc in case of command and next_elem in case of response
> +	 * In case of response, if it is not zero, means all the configuration
> +	 * was not returned and new command shall be sent with this value in
> +	 * the 'first desc' field
> +	 */
> +	__le16 element;
> +	/* Reserved for command, only used for response */
> +	__le16 num_elems;
> +	__le16 rsvd;
> +	__le32 addr_high;
> +	__le32 addr_low;
> +};
> +
> +/* Each entry in the response buffer is of the following type: */
> +struct ice_aqc_get_sw_cfg_resp_elem {
> +	/* VSI/Port Number */
> +	__le16 vsi_port_num;
> +#define ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_S	0
> +#define ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_M	\
> +			(0x3FF << ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_S)
> +#define ICE_AQC_GET_SW_CONF_RESP_TYPE_S	14
> +#define ICE_AQC_GET_SW_CONF_RESP_TYPE_M	(0x3 << ICE_AQC_GET_SW_CONF_RESP_TYPE_S)
> +#define ICE_AQC_GET_SW_CONF_RESP_PHYS_PORT	0
> +#define ICE_AQC_GET_SW_CONF_RESP_VIRT_PORT	1
> +#define ICE_AQC_GET_SW_CONF_RESP_VSI		2
> +
> +	/* SWID VSI/Port belongs to */
> +	__le16 swid;
> +
> +	/* Bit 14..0 : PF/VF number VSI belongs to
> +	 * Bit 15 : VF indication bit
> +	 */
> +	__le16 pf_vf_num;
> +#define ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_S	0
> +#define ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_M	\
> +				(0x7FFF << ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_S)
> +#define ICE_AQC_GET_SW_CONF_RESP_IS_VF		BIT(15)
> +};
> +
> +/* The response buffer is as follows. Note that the length of the
> + * elements array varies with the length of the command response.
> + */
> +struct ice_aqc_get_sw_cfg_resp {
> +	struct ice_aqc_get_sw_cfg_resp_elem elements[1];
> +};
> +
> +/* Add TSE (indirect 0x0401)
> + * Delete TSE (indirect 0x040F)
> + * Move TSE (indirect 0x0408)
> + */
> +struct ice_aqc_add_move_delete_elem {
> +	__le16 num_grps_req;
> +	__le16 num_grps_updated;
> +	__le32 reserved;
> +	__le32 addr_high;
> +	__le32 addr_low;
> +};
> +
> +struct ice_aqc_elem_info_bw {
> +	__le16 bw_profile_idx;
> +	__le16 bw_alloc;
> +};
> +
> +struct ice_aqc_txsched_elem {
> +	u8 elem_type; /* Special field, reserved for some aq calls */
> +#define ICE_AQC_ELEM_TYPE_UNDEFINED		0x0
> +#define ICE_AQC_ELEM_TYPE_ROOT_PORT		0x1
> +#define ICE_AQC_ELEM_TYPE_TC			0x2
> +#define ICE_AQC_ELEM_TYPE_SE_GENERIC		0x3
> +#define ICE_AQC_ELEM_TYPE_ENTRY_POINT		0x4
> +#define ICE_AQC_ELEM_TYPE_LEAF			0x5
> +#define ICE_AQC_ELEM_TYPE_SE_PADDED		0x6
> +	u8 valid_sections;
> +#define ICE_AQC_ELEM_VALID_GENERIC		BIT(0)
> +#define ICE_AQC_ELEM_VALID_CIR			BIT(1)
> +#define ICE_AQC_ELEM_VALID_EIR			BIT(2)
> +#define ICE_AQC_ELEM_VALID_SHARED		BIT(3)
> +	u8 generic;
> +#define ICE_AQC_ELEM_GENERIC_MODE_M		0x1
> +#define ICE_AQC_ELEM_GENERIC_PRIO_S		0x1
> +#define ICE_AQC_ELEM_GENERIC_PRIO_M	(0x7 << ICE_AQC_ELEM_GENERIC_PRIO_S)
> +#define ICE_AQC_ELEM_GENERIC_SP_S		0x4
> +#define ICE_AQC_ELEM_GENERIC_SP_M	(0x1 << ICE_AQC_ELEM_GENERIC_SP_S)
> +#define ICE_AQC_ELEM_GENERIC_ADJUST_VAL_S	0x5
> +#define ICE_AQC_ELEM_GENERIC_ADJUST_VAL_M	\
> +	(0x3 << ICE_AQC_ELEM_GENERIC_ADJUST_VAL_S)
> +	u8 flags; /* Special field, reserved for some aq calls */
> +#define ICE_AQC_ELEM_FLAG_SUSPEND_M		0x1
> +	struct ice_aqc_elem_info_bw cir_bw;
> +	struct ice_aqc_elem_info_bw eir_bw;
> +	__le16 srl_id;
> +	__le16 reserved2;
> +};
> +
> +struct ice_aqc_txsched_elem_data {
> +	__le32 parent_teid;
> +	__le32 node_teid;
> +	struct ice_aqc_txsched_elem data;
> +};
> +
> +struct ice_aqc_txsched_topo_grp_info_hdr {
> +	__le32 parent_teid;
> +	__le16 num_elems;
> +	__le16 reserved2;
> +};
> +
> +struct ice_aqc_delete_elem {
> +	struct ice_aqc_txsched_topo_grp_info_hdr hdr;
> +	__le32 teid[1];
> +};
> +
> +/* Query Scheduler Resource Allocation (indirect 0x0412)
> + * This indirect command retrieves the scheduler resources allocated by
> + * EMP Firmware to the given PF.
> + */
> +struct ice_aqc_query_txsched_res {
> +	u8 reserved[8];
> +	__le32 addr_high;
> +	__le32 addr_low;
> +};
> +
> +struct ice_aqc_generic_sched_props {
> +	__le16 phys_levels;
> +	__le16 logical_levels;
> +	u8 flattening_bitmap;
> +	u8 max_device_cgds;
> +	u8 max_pf_cgds;
> +	u8 rsvd0;
> +	__le16 rdma_qsets;
> +	u8 rsvd1[22];
> +};
> +
> +struct ice_aqc_layer_props {
> +	u8 logical_layer;
> +	u8 chunk_size;
> +	__le16 max_device_nodes;
> +	__le16 max_pf_nodes;
> +	u8 rsvd0[2];
> +	__le16 max_shared_rate_lmtr;
> +	__le16 max_children;
> +	__le16 max_cir_rl_profiles;
> +	__le16 max_eir_rl_profiles;
> +	__le16 max_srl_profiles;
> +	u8 rsvd1[14];
> +};
> +
> +struct ice_aqc_query_txsched_res_resp {
> +	struct ice_aqc_generic_sched_props sched_props;
> +	struct ice_aqc_layer_props layer_props[ICE_AQC_TOPO_MAX_LEVEL_NUM];
> +};
> +
>   /* NVM Read command (indirect 0x0701)
>    * NVM Erase commands (direct 0x0702)
>    * NVM Update commands (indirect 0x0703)
> @@ -142,6 +333,10 @@ struct ice_aq_desc {
>   		struct ice_aqc_q_shutdown q_shutdown;
>   		struct ice_aqc_req_res res_owner;
>   		struct ice_aqc_clear_pxe clear_pxe;
> +		struct ice_aqc_list_caps get_cap;
> +		struct ice_aqc_get_sw_cfg get_sw_conf;
> +		struct ice_aqc_query_txsched_res query_sched_res;
> +		struct ice_aqc_add_move_delete_elem add_move_delete_elem;
>   		struct ice_aqc_nvm nvm;
>   	} params;
>   };
> @@ -150,16 +345,19 @@ struct ice_aq_desc {
>   #define ICE_AQ_LG_BUF	512
>   
>   #define ICE_AQ_FLAG_LB_S	9
> +#define ICE_AQ_FLAG_RD_S	10
>   #define ICE_AQ_FLAG_BUF_S	12
>   #define ICE_AQ_FLAG_SI_S	13
>   
>   #define ICE_AQ_FLAG_LB		BIT(ICE_AQ_FLAG_LB_S)  /* 0x200  */
> +#define ICE_AQ_FLAG_RD		BIT(ICE_AQ_FLAG_RD_S)  /* 0x400  */
>   #define ICE_AQ_FLAG_BUF		BIT(ICE_AQ_FLAG_BUF_S) /* 0x1000 */
>   #define ICE_AQ_FLAG_SI		BIT(ICE_AQ_FLAG_SI_S)  /* 0x2000 */
>   
>   /* error codes */
>   enum ice_aq_err {
>   	ICE_AQ_RC_OK		= 0,  /* success */
> +	ICE_AQ_RC_ENOMEM	= 9,  /* Out of memory */
>   	ICE_AQ_RC_EBUSY		= 12, /* Device or resource busy */
>   	ICE_AQ_RC_EEXIST	= 13, /* object already exists */
>   };
> @@ -174,11 +372,22 @@ enum ice_adminq_opc {
>   	ice_aqc_opc_req_res				= 0x0008,
>   	ice_aqc_opc_release_res				= 0x0009,
>   
> +	/* device/function capabilities */
> +	ice_aqc_opc_list_func_caps			= 0x000A,
> +	ice_aqc_opc_list_dev_caps			= 0x000B,
> +
>   	/* PXE */
>   	ice_aqc_opc_clear_pxe_mode			= 0x0110,
>   
> +	/* internal switch commands */
> +	ice_aqc_opc_get_sw_cfg				= 0x0200,
> +
>   	ice_aqc_opc_clear_pf_cfg			= 0x02A4,
>   
> +	/* transmit scheduler commands */
> +	ice_aqc_opc_delete_sched_elems			= 0x040F,
> +	ice_aqc_opc_query_sched_res			= 0x0412,
> +
>   	/* NVM commands */
>   	ice_aqc_opc_nvm_read				= 0x0701,
>   
> diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
> index eb3e06488705..a64e406c129a 100644
> --- a/drivers/net/ethernet/intel/ice/ice_common.c
> +++ b/drivers/net/ethernet/intel/ice/ice_common.c
> @@ -16,9 +16,12 @@
>    */
>   
>   #include "ice_common.h"
> +#include "ice_sched.h"
>   #include "ice_adminq_cmd.h"
>   
>   #define ICE_PF_RESET_WAIT_COUNT	200
> +#define ICE_GET_CAP_BUF_COUNT	40
> +#define ICE_GET_CAP_RETRY_COUNT	20
>   
>   /**
>    * ice_set_mac_type - Sets MAC type
> @@ -84,8 +87,37 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
>   	if (status)
>   		goto err_unroll_cqinit;
>   
> +	status = ice_get_caps(hw);
> +	if (status)
> +		goto err_unroll_cqinit;
> +
> +	hw->port_info = devm_kzalloc(ice_hw_to_dev(hw),
> +				     sizeof(*hw->port_info), GFP_KERNEL);
> +	if (!hw->port_info) {
> +		status = ICE_ERR_NO_MEMORY;
> +		goto err_unroll_cqinit;
> +	}
> +
> +	/* set the back pointer to hw */
> +	hw->port_info->hw = hw;
> +
> +	/* Initialize port_info struct with switch configuration data */
> +	status = ice_get_initial_sw_cfg(hw);
> +	if (status)
> +		goto err_unroll_alloc;
> +
> +	/* Query the allocated resources for tx scheduler */
> +	status = ice_sched_query_res_alloc(hw);
> +	if (status) {
> +		ice_debug(hw, ICE_DBG_SCHED,
> +			  "Failed to get scheduler allocated resources\n");
> +		goto err_unroll_alloc;
> +	}
> +
>   	return 0;
>   
> +err_unroll_alloc:
> +	devm_kfree(ice_hw_to_dev(hw), hw->port_info);
>   err_unroll_cqinit:
>   	ice_shutdown_all_ctrlq(hw);
>   	return status;
> @@ -97,7 +129,12 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
>    */
>   void ice_deinit_hw(struct ice_hw *hw)
>   {
> +	ice_sched_cleanup_all(hw);
>   	ice_shutdown_all_ctrlq(hw);
> +	if (hw->port_info) {
> +		devm_kfree(ice_hw_to_dev(hw), hw->port_info);
> +		hw->port_info = NULL;
> +	}
>   }
>   
>   /**
> @@ -519,6 +556,192 @@ void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res)
>   	}
>   }
>   
> +/**
> + * ice_parse_caps - parse function/device capabilities
> + * @hw: pointer to the hw struct
> + * @buf: pointer to a buffer containing function/device capability records
> + * @cap_count: number of capability records in the list
> + * @opc: type of capabilities list to parse
> + *
> + * Helper function to parse function(0x000a)/device(0x000b) capabilities list.
> + */
> +static void
> +ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
> +	       enum ice_adminq_opc opc)
> +{
> +	struct ice_aqc_list_caps_elem *cap_resp;
> +	struct ice_hw_func_caps *func_p = NULL;
> +	struct ice_hw_dev_caps *dev_p = NULL;
> +	struct ice_hw_common_caps *caps;
> +	u32 i;
> +
> +	if (!buf)
> +		return;
> +
> +	cap_resp = (struct ice_aqc_list_caps_elem *)buf;
> +
> +	if (opc == ice_aqc_opc_list_dev_caps) {
> +		dev_p = &hw->dev_caps;
> +		caps = &dev_p->common_cap;
> +	} else if (opc == ice_aqc_opc_list_func_caps) {
> +		func_p = &hw->func_caps;
> +		caps = &func_p->common_cap;
> +	} else {
> +		ice_debug(hw, ICE_DBG_INIT, "wrong opcode\n");
> +		return;
> +	}
> +
> +	for (i = 0; caps && i < cap_count; i++, cap_resp++) {
> +		u32 logical_id = le32_to_cpu(cap_resp->logical_id);
> +		u32 phys_id = le32_to_cpu(cap_resp->phys_id);
> +		u32 number = le32_to_cpu(cap_resp->number);
> +		u16 cap = le16_to_cpu(cap_resp->cap);
> +
> +		switch (cap) {
> +		case ICE_AQC_CAPS_VSI:
> +			if (dev_p) {
> +				dev_p->num_vsi_allocd_to_host = number;
> +				ice_debug(hw, ICE_DBG_INIT,
> +					  "HW caps: Dev.VSI cnt = %d\n",
> +					  dev_p->num_vsi_allocd_to_host);
> +			} else if (func_p) {
> +				func_p->guaranteed_num_vsi = number;
> +				ice_debug(hw, ICE_DBG_INIT,
> +					  "HW caps: Func.VSI cnt = %d\n",
> +					  func_p->guaranteed_num_vsi);
> +			}
> +			break;
> +		case ICE_AQC_CAPS_RSS:
> +			caps->rss_table_size = number;
> +			caps->rss_table_entry_width = logical_id;
> +			ice_debug(hw, ICE_DBG_INIT,
> +				  "HW caps: RSS table size = %d\n",
> +				  caps->rss_table_size);
> +			ice_debug(hw, ICE_DBG_INIT,
> +				  "HW caps: RSS table width = %d\n",
> +				  caps->rss_table_entry_width);
> +			break;
> +		case ICE_AQC_CAPS_RXQS:
> +			caps->num_rxq = number;
> +			caps->rxq_first_id = phys_id;
> +			ice_debug(hw, ICE_DBG_INIT,
> +				  "HW caps: Num Rx Qs = %d\n", caps->num_rxq);
> +			ice_debug(hw, ICE_DBG_INIT,
> +				  "HW caps: Rx first queue ID = %d\n",
> +				  caps->rxq_first_id);
> +			break;
> +		case ICE_AQC_CAPS_TXQS:
> +			caps->num_txq = number;
> +			caps->txq_first_id = phys_id;
> +			ice_debug(hw, ICE_DBG_INIT,
> +				  "HW caps: Num Tx Qs = %d\n", caps->num_txq);
> +			ice_debug(hw, ICE_DBG_INIT,
> +				  "HW caps: Tx first queue ID = %d\n",
> +				  caps->txq_first_id);
> +			break;
> +		case ICE_AQC_CAPS_MSIX:
> +			caps->num_msix_vectors = number;
> +			caps->msix_vector_first_id = phys_id;
> +			ice_debug(hw, ICE_DBG_INIT,
> +				  "HW caps: MSIX vector count = %d\n",
> +				  caps->num_msix_vectors);
> +			ice_debug(hw, ICE_DBG_INIT,
> +				  "HW caps: MSIX first vector index = %d\n",
> +				  caps->msix_vector_first_id);
> +			break;
> +		case ICE_AQC_CAPS_MAX_MTU:
> +			caps->max_mtu = number;
> +			if (dev_p)
> +				ice_debug(hw, ICE_DBG_INIT,
> +					  "HW caps: Dev.MaxMTU = %d\n",
> +					  caps->max_mtu);
> +			else if (func_p)
> +				ice_debug(hw, ICE_DBG_INIT,
> +					  "HW caps: func.MaxMTU = %d\n",
> +					  caps->max_mtu);
> +			break;
> +		default:
> +			ice_debug(hw, ICE_DBG_INIT,
> +				  "HW caps: Unknown capability[%d]: 0x%x\n", i,
> +				  cap);
> +			break;
> +		}
> +	}
> +}
> +
> +/**
> + * ice_aq_discover_caps - query function/device capabilities
> + * @hw: pointer to the hw struct
> + * @buf: a virtual buffer to hold the capabilities
> + * @buf_size: Size of the virtual buffer
> + * @data_size: Size of the returned data, or buf size needed if AQ err==ENOMEM
> + * @opc: capabilities type to discover - pass in the command opcode
> + * @cd: pointer to command details structure or NULL
> + *
> + * Get the function(0x000a)/device(0x000b) capabilities description from
> + * the firmware.
> + */
> +static enum ice_status
> +ice_aq_discover_caps(struct ice_hw *hw, void *buf, u16 buf_size, u16 *data_size,
> +		     enum ice_adminq_opc opc, struct ice_sq_cd *cd)
> +{
> +	struct ice_aqc_list_caps *cmd;
> +	struct ice_aq_desc desc;
> +	enum ice_status status;
> +
> +	cmd = &desc.params.get_cap;
> +
> +	if (opc != ice_aqc_opc_list_func_caps &&
> +	    opc != ice_aqc_opc_list_dev_caps)
> +		return ICE_ERR_PARAM;
> +
> +	ice_fill_dflt_direct_cmd_desc(&desc, opc);
> +
> +	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
> +	if (!status)
> +		ice_parse_caps(hw, buf, le32_to_cpu(cmd->count), opc);
> +	*data_size = le16_to_cpu(desc.datalen);
> +
> +	return status;
> +}
> +
> +/**
> + * ice_get_caps - get info about the HW
> + * @hw: pointer to the hardware structure
> + */
> +enum ice_status ice_get_caps(struct ice_hw *hw)
> +{
> +	enum ice_status status;
> +	u16 data_size = 0;
> +	u16 cbuf_len;
> +	u8 retries;
> +
> +	cbuf_len = ICE_GET_CAP_BUF_COUNT *
> +		sizeof(struct ice_aqc_list_caps_elem);
> +
> +	retries = ICE_GET_CAP_RETRY_COUNT;
> +
> +	do {
> +		void *cbuf;
> +
> +		cbuf = devm_kzalloc(ice_hw_to_dev(hw), cbuf_len, GFP_KERNEL);
> +		if (!cbuf)
> +			return ICE_ERR_NO_MEMORY;
> +
> +		status = ice_aq_discover_caps(hw, cbuf, cbuf_len, &data_size,
> +					      ice_aqc_opc_list_func_caps, NULL);
> +		devm_kfree(ice_hw_to_dev(hw), cbuf);
> +
> +		if (!status || hw->adminq.sq_last_status != ICE_AQ_RC_ENOMEM)
> +			break;
> +
> +		/* If ENOMEM is returned, try again with bigger buffer */
> +		cbuf_len = data_size;
> +	} while (--retries);

If data size is the only reason for a retry, why bother with a retries 
variable of 20?  Is there any reason you won't be given the right 
data_size hint the first time?

sln

^ permalink raw reply

* Re: [Intel-wired-lan] [PATCH 03/15] ice: Start hardware initialization
From: Shannon Nelson @ 2018-03-13  2:05 UTC (permalink / raw)
  To: Anirudh Venkataramanan, intel-wired-lan; +Cc: netdev
In-Reply-To: <20180309172136.9073-4-anirudh.venkataramanan@intel.com>

On 3/9/2018 9:21 AM, Anirudh Venkataramanan wrote:
> This patch implements multiple pieces of the initialization flow
> as follows:
> 
> 1) A reset is issued to ensure a clean device state, followed
>     by initialization of admin queue interface.
> 
> 2) Once the admin queue interface is up, clear the PF config
>     and transition the device to non-PXE mode.
> 
> 3) Get the NVM configuration stored in the device's non-volatile
>     memory (NVM) using ice_init_nvm.
> 
> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
> ---
>   drivers/net/ethernet/intel/ice/Makefile         |   3 +-
>   drivers/net/ethernet/intel/ice/ice.h            |   2 +
>   drivers/net/ethernet/intel/ice/ice_adminq_cmd.h |  79 +++++
>   drivers/net/ethernet/intel/ice/ice_common.c     | 410 ++++++++++++++++++++++++
>   drivers/net/ethernet/intel/ice/ice_common.h     |  11 +
>   drivers/net/ethernet/intel/ice/ice_controlq.h   |   3 +
>   drivers/net/ethernet/intel/ice/ice_hw_autogen.h |  30 ++
>   drivers/net/ethernet/intel/ice/ice_main.c       |  31 ++
>   drivers/net/ethernet/intel/ice/ice_nvm.c        | 245 ++++++++++++++
>   drivers/net/ethernet/intel/ice/ice_osdep.h      |   1 +
>   drivers/net/ethernet/intel/ice/ice_status.h     |   5 +
>   drivers/net/ethernet/intel/ice/ice_type.h       |  49 +++
>   12 files changed, 868 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/net/ethernet/intel/ice/ice_nvm.c
> 
> diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
> index eebf619e84a8..373d481dbb25 100644
> --- a/drivers/net/ethernet/intel/ice/Makefile
> +++ b/drivers/net/ethernet/intel/ice/Makefile
> @@ -26,4 +26,5 @@ obj-$(CONFIG_ICE) += ice.o
>   
>   ice-y := ice_main.o	\
>   	 ice_controlq.o	\
> -	 ice_common.o
> +	 ice_common.o	\
> +	 ice_nvm.o
> diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
> index ea2fb63bb095..ab2800c31906 100644
> --- a/drivers/net/ethernet/intel/ice/ice.h
> +++ b/drivers/net/ethernet/intel/ice/ice.h
> @@ -30,8 +30,10 @@
>   #include <linux/bitmap.h>
>   #include "ice_devids.h"
>   #include "ice_type.h"
> +#include "ice_common.h"
>   
>   #define ICE_BAR0		0
> +#define ICE_AQ_LEN		64
>   
>   #define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
>   
> diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
> index 885fa3c6fec4..05b22a1ffd70 100644
> --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
> +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
> @@ -50,6 +50,67 @@ struct ice_aqc_q_shutdown {
>   	u8 reserved[12];
>   };
>   
> +/* Request resource ownership (direct 0x0008)
> + * Release resource ownership (direct 0x0009)
> + */
> +struct ice_aqc_req_res {
> +	__le16 res_id;
> +#define ICE_AQC_RES_ID_NVM		1
> +#define ICE_AQC_RES_ID_SDP		2
> +#define ICE_AQC_RES_ID_CHNG_LOCK	3
> +#define ICE_AQC_RES_ID_GLBL_LOCK	4
> +	__le16 access_type;
> +#define ICE_AQC_RES_ACCESS_READ		1
> +#define ICE_AQC_RES_ACCESS_WRITE	2
> +
> +	/* Upon successful completion, FW writes this value and driver is
> +	 * expected to release resource before timeout. This value is provided
> +	 * in milliseconds.
> +	 */
> +	__le32 timeout;
> +#define ICE_AQ_RES_NVM_READ_DFLT_TIMEOUT_MS	3000
> +#define ICE_AQ_RES_NVM_WRITE_DFLT_TIMEOUT_MS	180000
> +#define ICE_AQ_RES_CHNG_LOCK_DFLT_TIMEOUT_MS	1000
> +#define ICE_AQ_RES_GLBL_LOCK_DFLT_TIMEOUT_MS	3000
> +	/* For SDP: pin id of the SDP */
> +	__le32 res_number;
> +	/* Status is only used for ICE_AQC_RES_ID_GLBL_LOCK */
> +	__le16 status;
> +#define ICE_AQ_RES_GLBL_SUCCESS		0
> +#define ICE_AQ_RES_GLBL_IN_PROG		1
> +#define ICE_AQ_RES_GLBL_DONE		2
> +	u8 reserved[2];

Since these structs all become part of the descriptor's param union, 
perhaps adding reserved space to the end is not necessary.

> +};
> +
> +/* Clear PXE Command and response (direct 0x0110) */
> +struct ice_aqc_clear_pxe {
> +	u8 rx_cnt;
> +#define ICE_AQC_CLEAR_PXE_RX_CNT		0x2
> +	u8 reserved[15];
> +};
> +
> +/* NVM Read command (indirect 0x0701)
> + * NVM Erase commands (direct 0x0702)
> + * NVM Update commands (indirect 0x0703)
> + */
> +struct ice_aqc_nvm {
> +	u8	cmd_flags;
> +#define ICE_AQC_NVM_LAST_CMD		BIT(0)
> +#define ICE_AQC_NVM_PCIR_REQ		BIT(0)	/* Used by NVM Update reply */
> +#define ICE_AQC_NVM_PRESERVATION_S	1
> +#define ICE_AQC_NVM_PRESERVATION_M	(3 << CSR_AQ_NVM_PRESERVATION_S)
> +#define ICE_AQC_NVM_NO_PRESERVATION	(0 << CSR_AQ_NVM_PRESERVATION_S)
> +#define ICE_AQC_NVM_PRESERVE_ALL	BIT(1)
> +#define ICE_AQC_NVM_PRESERVE_SELECTED	(3 << CSR_AQ_NVM_PRESERVATION_S)
> +#define ICE_AQC_NVM_FLASH_ONLY		BIT(7)
> +	u8	module_typeid;
> +	__le16	length;
> +#define ICE_AQC_NVM_ERASE_LEN	0xFFFF
> +	__le32	offset;
> +	__le32	addr_high;
> +	__le32	addr_low;
> +};
> +
>   /**
>    * struct ice_aq_desc - Admin Queue (AQ) descriptor
>    * @flags: ICE_AQ_FLAG_* flags
> @@ -79,6 +140,9 @@ struct ice_aq_desc {
>   		struct ice_aqc_generic generic;
>   		struct ice_aqc_get_ver get_ver;
>   		struct ice_aqc_q_shutdown q_shutdown;
> +		struct ice_aqc_req_res res_owner;
> +		struct ice_aqc_clear_pxe clear_pxe;
> +		struct ice_aqc_nvm nvm;
>   	} params;
>   };
>   
> @@ -96,6 +160,8 @@ struct ice_aq_desc {
>   /* error codes */
>   enum ice_aq_err {
>   	ICE_AQ_RC_OK		= 0,  /* success */
> +	ICE_AQ_RC_EBUSY		= 12, /* Device or resource busy */
> +	ICE_AQ_RC_EEXIST	= 13, /* object already exists */

Are we eventually going to get an ENOTTY error value?  :-)

>   };
>   
>   /* Admin Queue command opcodes */
> @@ -103,6 +169,19 @@ enum ice_adminq_opc {
>   	/* AQ commands */
>   	ice_aqc_opc_get_ver				= 0x0001,
>   	ice_aqc_opc_q_shutdown				= 0x0003,
> +
> +	/* resource ownership */
> +	ice_aqc_opc_req_res				= 0x0008,
> +	ice_aqc_opc_release_res				= 0x0009,
> +
> +	/* PXE */
> +	ice_aqc_opc_clear_pxe_mode			= 0x0110,
> +
> +	ice_aqc_opc_clear_pf_cfg			= 0x02A4,
> +
> +	/* NVM commands */
> +	ice_aqc_opc_nvm_read				= 0x0701,
> +
>   };
>   
>   #endif /* _ICE_ADMINQ_CMD_H_ */
> diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
> index d980f0518744..eb3e06488705 100644
> --- a/drivers/net/ethernet/intel/ice/ice_common.c
> +++ b/drivers/net/ethernet/intel/ice/ice_common.c
> @@ -18,6 +18,224 @@
>   #include "ice_common.h"
>   #include "ice_adminq_cmd.h"
>   
> +#define ICE_PF_RESET_WAIT_COUNT	200
> +
> +/**
> + * ice_set_mac_type - Sets MAC type
> + * @hw: pointer to the HW structure
> + *
> + * This function sets the MAC type of the adapter based on the
> + * vendor ID and device ID stored in the hw structure.
> + */
> +static enum ice_status ice_set_mac_type(struct ice_hw *hw)
> +{
> +	if (hw->vendor_id != PCI_VENDOR_ID_INTEL)
> +		return ICE_ERR_DEVICE_NOT_SUPPORTED;
> +
> +	hw->mac_type = ICE_MAC_GENERIC;
> +	return 0;
> +}
> +
> +/**
> + * ice_clear_pf_cfg - Clear PF configuration
> + * @hw: pointer to the hardware structure
> + */
> +enum ice_status ice_clear_pf_cfg(struct ice_hw *hw)
> +{
> +	struct ice_aq_desc desc;
> +
> +	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_clear_pf_cfg);
> +
> +	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
> +}
> +
> +/**
> + * ice_init_hw - main hardware initialization routine
> + * @hw: pointer to the hardware structure
> + */
> +enum ice_status ice_init_hw(struct ice_hw *hw)
> +{
> +	enum ice_status status;
> +
> +	/* Set MAC type based on DeviceID */
> +	status = ice_set_mac_type(hw);
> +	if (status)
> +		return status;
> +
> +	hw->pf_id = (u8)(rd32(hw, PF_FUNC_RID) &
> +			 PF_FUNC_RID_FUNC_NUM_M) >>
> +		PF_FUNC_RID_FUNC_NUM_S;
> +
> +	status = ice_reset(hw, ICE_RESET_PFR);
> +	if (status)
> +		return status;
> +
> +	status = ice_init_all_ctrlq(hw);
> +	if (status)
> +		goto err_unroll_cqinit;
> +
> +	status = ice_clear_pf_cfg(hw);
> +	if (status)
> +		goto err_unroll_cqinit;
> +
> +	ice_clear_pxe_mode(hw);
> +
> +	status = ice_init_nvm(hw);
> +	if (status)
> +		goto err_unroll_cqinit;
> +
> +	return 0;
> +
> +err_unroll_cqinit:
> +	ice_shutdown_all_ctrlq(hw);
> +	return status;
> +}
> +
> +/**
> + * ice_deinit_hw - unroll initialization operations done by ice_init_hw
> + * @hw: pointer to the hardware structure
> + */
> +void ice_deinit_hw(struct ice_hw *hw)
> +{
> +	ice_shutdown_all_ctrlq(hw);
> +}
> +
> +/**
> + * ice_check_reset - Check to see if a global reset is complete
> + * @hw: pointer to the hardware structure
> + */
> +enum ice_status ice_check_reset(struct ice_hw *hw)
> +{
> +	u32 cnt, reg = 0, grst_delay;
> +
> +	/* Poll for Device Active state in case a recent CORER, GLOBR,
> +	 * or EMPR has occurred. The grst delay value is in 100ms units.
> +	 * Add 1sec for outstanding AQ commands that can take a long time.
> +	 */
> +	grst_delay = ((rd32(hw, GLGEN_RSTCTL) & GLGEN_RSTCTL_GRSTDEL_M) >>
> +		      GLGEN_RSTCTL_GRSTDEL_S) + 10;

Will this be long enough for any longer-running async completion 
commands, maybe for NVM?  Or will that matter?

> +
> +	for (cnt = 0; cnt < grst_delay; cnt++) {
> +		mdelay(100);
> +		reg = rd32(hw, GLGEN_RSTAT);
> +		if (!(reg & GLGEN_RSTAT_DEVSTATE_M))
> +			break;
> +	}
> +
> +	if (cnt == grst_delay) {
> +		ice_debug(hw, ICE_DBG_INIT,
> +			  "Global reset polling failed to complete.\n");
> +		return ICE_ERR_RESET_FAILED;
> +	}
> +
> +#define ICE_RESET_DONE_MASK	(GLNVM_ULD_CORER_DONE_M | \
> +				 GLNVM_ULD_GLOBR_DONE_M)
> +
> +	/* Device is Active; check Global Reset processes are done */
> +	for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) {
> +		reg = rd32(hw, GLNVM_ULD) & ICE_RESET_DONE_MASK;
> +		if (reg == ICE_RESET_DONE_MASK) {
> +			ice_debug(hw, ICE_DBG_INIT,
> +				  "Global reset processes done. %d\n", cnt);
> +			break;
> +		}
> +		mdelay(10);
> +	}
> +
> +	if (cnt == ICE_PF_RESET_WAIT_COUNT) {
> +		ice_debug(hw, ICE_DBG_INIT,
> +			  "Wait for Reset Done timed out. GLNVM_ULD = 0x%x\n",
> +			  reg);
> +		return ICE_ERR_RESET_FAILED;
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * ice_pf_reset - Reset the PF
> + * @hw: pointer to the hardware structure
> + *
> + * If a global reset has been triggered, this function checks
> + * for its completion and then issues the PF reset
> + */
> +static enum ice_status ice_pf_reset(struct ice_hw *hw)
> +{
> +	u32 cnt, reg;
> +
> +	/* If at function entry a global reset was already in progress, i.e.
> +	 * state is not 'device active' or any of the reset done bits are not
> +	 * set in GLNVM_ULD, there is no need for a PF Reset; poll until the
> +	 * global reset is done.
> +	 */
> +	if ((rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_DEVSTATE_M) ||
> +	    (rd32(hw, GLNVM_ULD) & ICE_RESET_DONE_MASK) ^ ICE_RESET_DONE_MASK) {
> +		/* poll on global reset currently in progress until done */
> +		if (ice_check_reset(hw))
> +			return ICE_ERR_RESET_FAILED;
> +
> +		return 0;
> +	}
> +
> +	/* Reset the PF */
> +	reg = rd32(hw, PFGEN_CTRL);
> +
> +	wr32(hw, PFGEN_CTRL, (reg | PFGEN_CTRL_PFSWR_M));
> +
> +	for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) {
> +		reg = rd32(hw, PFGEN_CTRL);
> +		if (!(reg & PFGEN_CTRL_PFSWR_M))
> +			break;
> +
> +		mdelay(1);
> +	}
> +
> +	if (cnt == ICE_PF_RESET_WAIT_COUNT) {
> +		ice_debug(hw, ICE_DBG_INIT,
> +			  "PF reset polling failed to complete.\n");
> +		return ICE_ERR_RESET_FAILED;
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * ice_reset - Perform different types of reset
> + * @hw: pointer to the hardware structure
> + * @req: reset request
> + *
> + * This function triggers a reset as specified by the req parameter.
> + *
> + * Note:
> + * If anything other than a PF reset is triggered, PXE mode is restored.
> + * This has to be cleared using ice_clear_pxe_mode again, once the AQ
> + * interface has been restored in the rebuild flow.
> + */
> +enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req)
> +{
> +	u32 val = 0;
> +
> +	switch (req) {
> +	case ICE_RESET_PFR:
> +		return ice_pf_reset(hw);
> +	case ICE_RESET_CORER:
> +		ice_debug(hw, ICE_DBG_INIT, "CoreR requested\n");
> +		val = GLGEN_RTRIG_CORER_M;
> +		break;
> +	case ICE_RESET_GLOBR:
> +		ice_debug(hw, ICE_DBG_INIT, "GlobalR requested\n");
> +		val = GLGEN_RTRIG_GLOBR_M;
> +		break;
> +	}
> +
> +	val |= rd32(hw, GLGEN_RTRIG);
> +	wr32(hw, GLGEN_RTRIG, val);
> +	ice_flush(hw);
> +
> +	/* wait for the FW to be ready */
> +	return ice_check_reset(hw);
> +}
> +
>   /**
>    * ice_debug_cq
>    * @hw: pointer to the hardware structure
> @@ -142,3 +360,195 @@ enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading)
>   
>   	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
>   }
> +
> +/**
> + * ice_aq_req_res
> + * @hw: pointer to the hw struct
> + * @res: resource id
> + * @access: access type
> + * @sdp_number: resource number
> + * @timeout: the maximum time in ms that the driver may hold the resource
> + * @cd: pointer to command details structure or NULL
> + *
> + * requests common resource using the admin queue commands (0x0008)
> + */
> +static enum ice_status
> +ice_aq_req_res(struct ice_hw *hw, enum ice_aq_res_ids res,
> +	       enum ice_aq_res_access_type access, u8 sdp_number, u32 *timeout,
> +	       struct ice_sq_cd *cd)
> +{
> +	struct ice_aqc_req_res *cmd_resp;
> +	struct ice_aq_desc desc;
> +	enum ice_status status;
> +
> +	cmd_resp = &desc.params.res_owner;
> +
> +	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_req_res);
> +
> +	cmd_resp->res_id = cpu_to_le16(res);
> +	cmd_resp->access_type = cpu_to_le16(access);
> +	cmd_resp->res_number = cpu_to_le32(sdp_number);
> +
> +	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
> +	/* The completion specifies the maximum time in ms that the driver
> +	 * may hold the resource in the Timeout field.
> +	 * If the resource is held by someone else, the command completes with
> +	 * busy return value and the timeout field indicates the maximum time
> +	 * the current owner of the resource has to free it.
> +	 */
> +	if (!status || hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY)
> +		*timeout = le32_to_cpu(cmd_resp->timeout);
> +
> +	return status;
> +}
> +
> +/**
> + * ice_aq_release_res
> + * @hw: pointer to the hw struct
> + * @res: resource id
> + * @sdp_number: resource number
> + * @cd: pointer to command details structure or NULL
> + *
> + * release common resource using the admin queue commands (0x0009)
> + */
> +static enum ice_status
> +ice_aq_release_res(struct ice_hw *hw, enum ice_aq_res_ids res, u8 sdp_number,
> +		   struct ice_sq_cd *cd)
> +{
> +	struct ice_aqc_req_res *cmd;
> +	struct ice_aq_desc desc;
> +
> +	cmd = &desc.params.res_owner;
> +
> +	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_release_res);
> +
> +	cmd->res_id = cpu_to_le16(res);
> +	cmd->res_number = cpu_to_le32(sdp_number);
> +
> +	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
> +}
> +
> +/**
> + * ice_acquire_res
> + * @hw: pointer to the HW structure
> + * @res: resource id
> + * @access: access type (read or write)
> + *
> + * This function will attempt to acquire the ownership of a resource.
> + */
> +enum ice_status
> +ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res,
> +		enum ice_aq_res_access_type access)
> +{
> +#define ICE_RES_POLLING_DELAY_MS	10
> +	u32 delay = ICE_RES_POLLING_DELAY_MS;
> +	enum ice_status status;
> +	u32 time_left = 0;
> +	u32 timeout;
> +
> +	status = ice_aq_req_res(hw, res, access, 0, &time_left, NULL);
> +
> +	/* An admin queue return code of ICE_AQ_RC_EEXIST means that another
> +	 * driver has previously acquired the resource and performed any
> +	 * necessary updates; in this case the caller does not obtain the
> +	 * resource and has no further work to do.
> +	 */
> +	if (hw->adminq.sq_last_status == ICE_AQ_RC_EEXIST) {
> +		status = ICE_ERR_AQ_NO_WORK;
> +		goto ice_acquire_res_exit;
> +	}
> +
> +	if (status)
> +		ice_debug(hw, ICE_DBG_RES,
> +			  "resource %d acquire type %d failed.\n", res, access);
> +
> +	/* If necessary, poll until the current lock owner timeouts */
> +	timeout = time_left;
> +	while (status && timeout && time_left) {
> +		mdelay(delay);
> +		timeout = (timeout > delay) ? timeout - delay : 0;
> +		status = ice_aq_req_res(hw, res, access, 0, &time_left, NULL);
> +
> +		if (hw->adminq.sq_last_status == ICE_AQ_RC_EEXIST) {
> +			/* lock free, but no work to do */
> +			status = ICE_ERR_AQ_NO_WORK;
> +			break;
> +		}
> +
> +		if (!status)
> +			/* lock acquired */
> +			break;
> +	}
> +	if (status && status != ICE_ERR_AQ_NO_WORK)
> +		ice_debug(hw, ICE_DBG_RES, "resource acquire timed out.\n");
> +
> +ice_acquire_res_exit:
> +	if (status == ICE_ERR_AQ_NO_WORK) {
> +		if (access == ICE_RES_WRITE)
> +			ice_debug(hw, ICE_DBG_RES,
> +				  "resource indicates no work to do.\n");
> +		else
> +			ice_debug(hw, ICE_DBG_RES,
> +				  "Warning: ICE_ERR_AQ_NO_WORK not expected\n");
> +	}
> +	return status;
> +}
> +
> +/**
> + * ice_release_res
> + * @hw: pointer to the HW structure
> + * @res: resource id
> + *
> + * This function will release a resource using the proper Admin Command.
> + */
> +void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res)
> +{
> +	enum ice_status status;
> +	u32 total_delay = 0;
> +
> +	status = ice_aq_release_res(hw, res, 0, NULL);
> +
> +	/* there are some rare cases when trying to release the resource
> +	 * results in an admin Q timeout, so handle them correctly
> +	 */
> +	while ((status == ICE_ERR_AQ_TIMEOUT) &&
> +	       (total_delay < hw->adminq.sq_cmd_timeout)) {
> +		mdelay(1);
> +		status = ice_aq_release_res(hw, res, 0, NULL);
> +		total_delay++;
> +	}
> +}
> +
> +/**
> + * ice_aq_clear_pxe_mode
> + * @hw: pointer to the hw struct
> + *
> + * Tell the firmware that the driver is taking over from PXE (0x0110).
> + */
> +static enum ice_status ice_aq_clear_pxe_mode(struct ice_hw *hw)
> +{
> +	struct ice_aq_desc desc;
> +	enum ice_status status;
> +
> +	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_clear_pxe_mode);
> +	desc.params.clear_pxe.rx_cnt = ICE_AQC_CLEAR_PXE_RX_CNT;
> +
> +	status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
> +
> +	wr32(hw, GLLAN_RCTL_0, 0x1);

So you can do this write regardless of the send_cmd() status?

> +
> +	return status;
> +}
> +
> +/**
> + * ice_clear_pxe_mode - clear pxe operations mode
> + * @hw: pointer to the hw struct
> + *
> + * Make sure all PXE mode settings are cleared, including things
> + * like descriptor fetch/write-back mode.
> + */
> +void ice_clear_pxe_mode(struct ice_hw *hw)
> +{
> +	if (ice_check_sq_alive(hw, &hw->adminq))
> +		ice_aq_clear_pxe_mode(hw);
> +}
> diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
> index 1e3caecc38c6..0876fd98090a 100644
> --- a/drivers/net/ethernet/intel/ice/ice_common.h
> +++ b/drivers/net/ethernet/intel/ice/ice_common.h
> @@ -23,12 +23,22 @@
>   
>   void ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf,
>   		  u16 buf_len);
> +enum ice_status ice_init_hw(struct ice_hw *hw);
> +void ice_deinit_hw(struct ice_hw *hw);
> +enum ice_status ice_check_reset(struct ice_hw *hw);
> +enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req);
>   enum ice_status ice_init_all_ctrlq(struct ice_hw *hw);
>   void ice_shutdown_all_ctrlq(struct ice_hw *hw);
>   enum ice_status
> +ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res,
> +		enum ice_aq_res_access_type access);
> +void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res);
> +enum ice_status ice_init_nvm(struct ice_hw *hw);
> +enum ice_status
>   ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
>   		struct ice_aq_desc *desc, void *buf, u16 buf_size,
>   		struct ice_sq_cd *cd);
> +void ice_clear_pxe_mode(struct ice_hw *hw);
>   bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq);
>   enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading);
>   void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode);
> @@ -36,4 +46,5 @@ enum ice_status
>   ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc,
>   		void *buf, u16 buf_size, struct ice_sq_cd *cd);
>   enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd);
> +enum ice_status ice_clear_pf_cfg(struct ice_hw *hw);
>   #endif /* _ICE_COMMON_H_ */
> diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
> index 143578d02aec..835c035419a3 100644
> --- a/drivers/net/ethernet/intel/ice/ice_controlq.h
> +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
> @@ -20,6 +20,9 @@
>   
>   #include "ice_adminq_cmd.h"
>   
> +/* Maximum buffer lengths for all control queue types */
> +#define ICE_AQ_MAX_BUF_LEN 4096
> +
>   #define ICE_CTL_Q_DESC(R, i) \
>   	(&(((struct ice_aq_desc *)((R).desc_buf.va))[i]))
>   
> diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
> index 3d6bb273e4c8..e258a12099b8 100644
> --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
> +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
> @@ -42,5 +42,35 @@
>   #define PF_FW_ATQLEN_ATQENABLE_S	31
>   #define PF_FW_ATQLEN_ATQENABLE_M	BIT(PF_FW_ATQLEN_ATQENABLE_S)
>   #define PF_FW_ATQT			0x00080400
> +#define GLGEN_RSTAT			0x000B8188
> +#define GLGEN_RSTAT_DEVSTATE_S		0
> +#define GLGEN_RSTAT_DEVSTATE_M		ICE_M(0x3, GLGEN_RSTAT_DEVSTATE_S)
> +#define GLGEN_RSTCTL			0x000B8180
> +#define GLGEN_RSTCTL_GRSTDEL_S		0
> +#define GLGEN_RSTCTL_GRSTDEL_M		ICE_M(0x3F, GLGEN_RSTCTL_GRSTDEL_S)
> +#define GLGEN_RTRIG			0x000B8190
> +#define GLGEN_RTRIG_CORER_S		0
> +#define GLGEN_RTRIG_CORER_M		BIT(GLGEN_RTRIG_CORER_S)
> +#define GLGEN_RTRIG_GLOBR_S		1
> +#define GLGEN_RTRIG_GLOBR_M		BIT(GLGEN_RTRIG_GLOBR_S)
> +#define GLGEN_STAT			0x000B612C
> +#define PFGEN_CTRL			0x00091000
> +#define PFGEN_CTRL_PFSWR_S		0
> +#define PFGEN_CTRL_PFSWR_M		BIT(PFGEN_CTRL_PFSWR_S)
> +#define GLLAN_RCTL_0			0x002941F8
> +#define GLNVM_FLA			0x000B6108
> +#define GLNVM_FLA_LOCKED_S		6
> +#define GLNVM_FLA_LOCKED_M		BIT(GLNVM_FLA_LOCKED_S)
> +#define GLNVM_GENS			0x000B6100
> +#define GLNVM_GENS_SR_SIZE_S		5
> +#define GLNVM_GENS_SR_SIZE_M		ICE_M(0x7, GLNVM_GENS_SR_SIZE_S)
> +#define GLNVM_ULD			0x000B6008
> +#define GLNVM_ULD_CORER_DONE_S		3
> +#define GLNVM_ULD_CORER_DONE_M		BIT(GLNVM_ULD_CORER_DONE_S)
> +#define GLNVM_ULD_GLOBR_DONE_S		4
> +#define GLNVM_ULD_GLOBR_DONE_M		BIT(GLNVM_ULD_GLOBR_DONE_S)
> +#define PF_FUNC_RID			0x0009E880
> +#define PF_FUNC_RID_FUNC_NUM_S		0
> +#define PF_FUNC_RID_FUNC_NUM_M		ICE_M(0x7, PF_FUNC_RID_FUNC_NUM_S)
>   
>   #endif /* _ICE_HW_AUTOGEN_H_ */
> diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
> index 408ae90d6562..2ee4a0547ba3 100644
> --- a/drivers/net/ethernet/intel/ice/ice_main.c
> +++ b/drivers/net/ethernet/intel/ice/ice_main.c
> @@ -40,6 +40,18 @@ MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXX
>   MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
>   #endif /* !CONFIG_DYNAMIC_DEBUG */
>   
> +/**
> + * ice_set_ctrlq_len - helper function to set controlq length
> + * @hw: pointer to the hw instance
> + */
> +static void ice_set_ctrlq_len(struct ice_hw *hw)
> +{
> +	hw->adminq.num_rq_entries = ICE_AQ_LEN;
> +	hw->adminq.num_sq_entries = ICE_AQ_LEN;
> +	hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN;
> +	hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
> +}
> +
>   /**
>    * ice_probe - Device initialization routine
>    * @pdev: PCI device information struct
> @@ -95,6 +107,8 @@ static int ice_probe(struct pci_dev *pdev,
>   	hw->subsystem_device_id = pdev->subsystem_device;
>   	hw->bus.device = PCI_SLOT(pdev->devfn);
>   	hw->bus.func = PCI_FUNC(pdev->devfn);
> +	ice_set_ctrlq_len(hw);
> +
>   	pf->msg_enable = netif_msg_init(debug, ICE_DFLT_NETIF_M);
>   
>   #ifndef CONFIG_DYNAMIC_DEBUG
> @@ -102,7 +116,22 @@ static int ice_probe(struct pci_dev *pdev,
>   		hw->debug_mask = debug;
>   #endif
>   
> +	err = ice_init_hw(hw);
> +	if (err) {
> +		dev_err(&pdev->dev, "ice_init_hw failed: %d\n", err);
> +		err = -EIO;
> +		goto err_exit_unroll;
> +	}
> +
> +	dev_info(&pdev->dev, "firmware %d.%d.%05d api %d.%d\n",
> +		 hw->fw_maj_ver, hw->fw_min_ver, hw->fw_build,
> +		 hw->api_maj_ver, hw->api_min_ver);
> +
>   	return 0;
> +
> +err_exit_unroll:
> +	pci_disable_pcie_error_reporting(pdev);
> +	return err;
>   }
>   
>   /**
> @@ -117,6 +146,8 @@ static void ice_remove(struct pci_dev *pdev)
>   		return;
>   
>   	set_bit(__ICE_DOWN, pf->state);
> +
> +	ice_deinit_hw(&pf->hw);
>   	pci_disable_pcie_error_reporting(pdev);
>   }
>   
> diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
> new file mode 100644
> index 000000000000..565910f01290
> --- /dev/null
> +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
> @@ -0,0 +1,245 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Intel(R) Ethernet Connection E800 Series Linux Driver
> + * Copyright (c) 2018, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * The full GNU General Public License is included in this distribution in
> + * the file called "COPYING".
> + */
> +
> +#include "ice_common.h"
> +
> +/**
> + * ice_aq_read_nvm
> + * @hw: pointer to the hw struct
> + * @module_typeid: module pointer location in words from the NVM beginning
> + * @offset: byte offset from the module beginning
> + * @length: length of the section to be read (in bytes from the offset)
> + * @data: command buffer (size [bytes] = length)
> + * @last_command: tells if this is the last command in a series
> + * @cd: pointer to command details structure or NULL
> + *
> + * Read the NVM using the admin queue commands (0x0701)
> + */
> +static enum ice_status
> +ice_aq_read_nvm(struct ice_hw *hw, u8 module_typeid, u32 offset, u16 length,
> +		void *data, bool last_command, struct ice_sq_cd *cd)
> +{
> +	struct ice_aq_desc desc;
> +	struct ice_aqc_nvm *cmd;
> +
> +	cmd = &desc.params.nvm;
> +
> +	/* In offset the highest byte must be zeroed. */
> +	if (offset & 0xFF000000)
> +		return ICE_ERR_PARAM;
> +
> +	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_read);
> +
> +	/* If this is the last command in a series, set the proper flag. */
> +	if (last_command)
> +		cmd->cmd_flags |= ICE_AQC_NVM_LAST_CMD;
> +	cmd->module_typeid = module_typeid;
> +	cmd->offset = cpu_to_le32(offset);
> +	cmd->length = cpu_to_le16(length);
> +
> +	return ice_aq_send_cmd(hw, &desc, data, length, cd);
> +}
> +
> +/**
> + * ice_check_sr_access_params - verify params for Shadow RAM R/W operations.
> + * @hw: pointer to the HW structure
> + * @offset: offset in words from module start
> + * @words: number of words to access
> + */
> +static enum ice_status
> +ice_check_sr_access_params(struct ice_hw *hw, u32 offset, u16 words)
> +{
> +	if ((offset + words) > hw->nvm.sr_words) {
> +		ice_debug(hw, ICE_DBG_NVM,
> +			  "NVM error: offset beyond SR lmt.\n");
> +		return ICE_ERR_PARAM;
> +	}
> +
> +	if (words > ICE_SR_SECTOR_SIZE_IN_WORDS) {
> +		/* We can access only up to 4KB (one sector), in one AQ write */
> +		ice_debug(hw, ICE_DBG_NVM,
> +			  "NVM error: tried to access %d words, limit is %d.\n",
> +			  words, ICE_SR_SECTOR_SIZE_IN_WORDS);
> +		return ICE_ERR_PARAM;
> +	}
> +
> +	if (((offset + (words - 1)) / ICE_SR_SECTOR_SIZE_IN_WORDS) !=
> +	    (offset / ICE_SR_SECTOR_SIZE_IN_WORDS)) {
> +		/* A single access cannot spread over two sectors */
> +		ice_debug(hw, ICE_DBG_NVM,
> +			  "NVM error: cannot spread over two sectors.\n");
> +		return ICE_ERR_PARAM;
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * ice_read_sr_aq - Read Shadow RAM.
> + * @hw: pointer to the HW structure
> + * @offset: offset in words from module start
> + * @words: number of words to read
> + * @data: buffer for words reads from Shadow RAM
> + * @last_command: tells the AdminQ that this is the last command
> + *
> + * Reads 16-bit word buffers from the Shadow RAM using the admin command.
> + */
> +static enum ice_status
> +ice_read_sr_aq(struct ice_hw *hw, u32 offset, u16 words, u16 *data,
> +	       bool last_command)
> +{
> +	enum ice_status status;
> +
> +	status = ice_check_sr_access_params(hw, offset, words);
> +	if (!status)
> +		status = ice_aq_read_nvm(hw, 0, 2 * offset, 2 * words, data,

Why the doubling of offset and words?  If this is some general 
adjustment made for the AQ interface, it should be made in 
ice_aq_read_nvm().  If not, then some explanation is needed here.

sln

^ permalink raw reply

* [PATCH net-next] ibmvnic: Fix recent errata commit
From: Thomas Falcon @ 2018-03-13  2:05 UTC (permalink / raw)
  To: netdev; +Cc: jallen, nfont, Thomas Falcon

Sorry, one of the patches I sent in an earlier series
has some dumb mistakes. One was that I had changed the
parameter for the errata workaround function but forgot
to make that change in the code that called it.

The second mistake was a forgotten return value at the end
of the function in case the workaround was not needed.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index e02d3b9..6ff43d7 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1351,6 +1351,8 @@ static int ibmvnic_xmit_workarounds(struct sk_buff *skb,
 	 */
 	if (skb->len < netdev->min_mtu)
 		return skb_put_padto(skb, netdev->min_mtu);
+
+	return 0;
 }
 
 static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
@@ -1390,7 +1392,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		goto out;
 	}
 
-	if (ibmvnic_xmit_workarounds(skb, adapter)) {
+	if (ibmvnic_xmit_workarounds(skb, netdev)) {
 		tx_dropped++;
 		tx_send_failed++;
 		ret = NETDEV_TX_OK;
-- 
1.8.3.1

^ permalink raw reply related

* Re: [Intel-wired-lan] [PATCH 02/15] ice: Add support for control queues
From: Shannon Nelson @ 2018-03-13  2:05 UTC (permalink / raw)
  To: Anirudh Venkataramanan, intel-wired-lan; +Cc: netdev
In-Reply-To: <20180309172136.9073-3-anirudh.venkataramanan@intel.com>

On 3/9/2018 9:21 AM, Anirudh Venkataramanan wrote:
> A control queue is a hardware interface which is used by the driver
> to interact with other subsystems (like firmware, PHY, etc.). It is
> implemented as a producer-consumer ring. More specifically, an
> "admin queue" is a type of control queue used to interact with the
> firmware.
> 
> This patch introduces data structures and functions to initialize
> and teardown control/admin queues. Once the admin queue is initialized,
> the driver uses it to get the firmware version.
> 
> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
> ---
>   drivers/net/ethernet/intel/ice/Makefile         |   4 +-
>   drivers/net/ethernet/intel/ice/ice.h            |   1 +
>   drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 108 +++
>   drivers/net/ethernet/intel/ice/ice_common.c     | 144 ++++
>   drivers/net/ethernet/intel/ice/ice_common.h     |  39 +
>   drivers/net/ethernet/intel/ice/ice_controlq.c   | 979 ++++++++++++++++++++++++
>   drivers/net/ethernet/intel/ice/ice_controlq.h   |  97 +++
>   drivers/net/ethernet/intel/ice/ice_hw_autogen.h |  46 ++
>   drivers/net/ethernet/intel/ice/ice_main.c       |  11 +-
>   drivers/net/ethernet/intel/ice/ice_osdep.h      |  86 +++
>   drivers/net/ethernet/intel/ice/ice_status.h     |  35 +
>   drivers/net/ethernet/intel/ice/ice_type.h       |  22 +
>   12 files changed, 1570 insertions(+), 2 deletions(-)
>   create mode 100644 drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
>   create mode 100644 drivers/net/ethernet/intel/ice/ice_common.c
>   create mode 100644 drivers/net/ethernet/intel/ice/ice_common.h
>   create mode 100644 drivers/net/ethernet/intel/ice/ice_controlq.c
>   create mode 100644 drivers/net/ethernet/intel/ice/ice_controlq.h
>   create mode 100644 drivers/net/ethernet/intel/ice/ice_hw_autogen.h
>   create mode 100644 drivers/net/ethernet/intel/ice/ice_osdep.h
>   create mode 100644 drivers/net/ethernet/intel/ice/ice_status.h
> 
> diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
> index 2a177ea21b74..eebf619e84a8 100644
> --- a/drivers/net/ethernet/intel/ice/Makefile
> +++ b/drivers/net/ethernet/intel/ice/Makefile
> @@ -24,4 +24,6 @@
>   
>   obj-$(CONFIG_ICE) += ice.o
>   
> -ice-y := ice_main.o
> +ice-y := ice_main.o	\
> +	 ice_controlq.o	\
> +	 ice_common.o
> diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
> index d781027330cc..ea2fb63bb095 100644
> --- a/drivers/net/ethernet/intel/ice/ice.h
> +++ b/drivers/net/ethernet/intel/ice/ice.h
> @@ -26,6 +26,7 @@
>   #include <linux/compiler.h>
>   #include <linux/pci.h>
>   #include <linux/aer.h>
> +#include <linux/delay.h>
>   #include <linux/bitmap.h>
>   #include "ice_devids.h"
>   #include "ice_type.h"
> diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
> new file mode 100644
> index 000000000000..885fa3c6fec4
> --- /dev/null
> +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
> @@ -0,0 +1,108 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/* Intel(R) Ethernet Connection E800 Series Linux Driver
> + * Copyright (c) 2018, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * The full GNU General Public License is included in this distribution in
> + * the file called "COPYING".
> + */
> +
> +#ifndef _ICE_ADMINQ_CMD_H_
> +#define _ICE_ADMINQ_CMD_H_
> +
> +/* This header file defines the Admin Queue commands, error codes and
> + * descriptor format.  It is shared between Firmware and Software.
> + */
> +
> +struct ice_aqc_generic {
> +	__le32 param0;
> +	__le32 param1;
> +	__le32 addr_high;
> +	__le32 addr_low;
> +};
> +
> +/* Get version (direct 0x0001) */
> +struct ice_aqc_get_ver {
> +	__le32 rom_ver;
> +	__le32 fw_build;
> +	u8 fw_branch;
> +	u8 fw_major;
> +	u8 fw_minor;
> +	u8 fw_patch;
> +	u8 api_branch;
> +	u8 api_major;
> +	u8 api_minor;
> +	u8 api_patch;
> +};
> +
> +/* Queue Shutdown (direct 0x0003) */
> +struct ice_aqc_q_shutdown {
> +#define ICE_AQC_DRIVER_UNLOADING	BIT(0)
> +	__le32 driver_unloading;
> +	u8 reserved[12];
> +};
> +
> +/**
> + * struct ice_aq_desc - Admin Queue (AQ) descriptor
> + * @flags: ICE_AQ_FLAG_* flags
> + * @opcode: AQ command opcode
> + * @datalen: length in bytes of indirect/external data buffer
> + * @retval: return value from firmware
> + * @cookie_h: opaque data high-half
> + * @cookie_l: opaque data low-half
> + * @params: command-specific parameters
> + *
> + * Descriptor format for commands the driver posts on the Admin Transmit Queue
> + * (ATQ).  The firmware writes back onto the command descriptor and returns
> + * the result of the command.  Asynchronous events that are not an immediate
> + * result of the command are written to the Admin Receive Queue (ARQ) using
> + * the same descriptor format.  Descriptors are in little-endian notation with
> + * 32-bit words.
> + */
> +struct ice_aq_desc {
> +	__le16 flags;
> +	__le16 opcode;
> +	__le16 datalen;
> +	__le16 retval;
> +	__le32 cookie_high;
> +	__le32 cookie_low;
> +	union {
> +		u8 raw[16];
> +		struct ice_aqc_generic generic;
> +		struct ice_aqc_get_ver get_ver;
> +		struct ice_aqc_q_shutdown q_shutdown;
> +	} params;
> +};

You might put a compile-time size check on this struct - it helped many 
times in i40e development.

> +
> +/* FW defined boundary for a large buffer, 4k >= Large buffer > 512 bytes */
> +#define ICE_AQ_LG_BUF	512
> +
> +#define ICE_AQ_FLAG_LB_S	9
> +#define ICE_AQ_FLAG_BUF_S	12
> +#define ICE_AQ_FLAG_SI_S	13
> +
> +#define ICE_AQ_FLAG_LB		BIT(ICE_AQ_FLAG_LB_S)  /* 0x200  */
> +#define ICE_AQ_FLAG_BUF		BIT(ICE_AQ_FLAG_BUF_S) /* 0x1000 */
> +#define ICE_AQ_FLAG_SI		BIT(ICE_AQ_FLAG_SI_S)  /* 0x2000 */
> +
> +/* error codes */
> +enum ice_aq_err {
> +	ICE_AQ_RC_OK		= 0,  /* success */
> +};
> +
> +/* Admin Queue command opcodes */
> +enum ice_adminq_opc {
> +	/* AQ commands */
> +	ice_aqc_opc_get_ver				= 0x0001,
> +	ice_aqc_opc_q_shutdown				= 0x0003,
> +};
> +
> +#endif /* _ICE_ADMINQ_CMD_H_ */
> diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
> new file mode 100644
> index 000000000000..d980f0518744
> --- /dev/null
> +++ b/drivers/net/ethernet/intel/ice/ice_common.c
> @@ -0,0 +1,144 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Intel(R) Ethernet Connection E800 Series Linux Driver
> + * Copyright (c) 2018, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * The full GNU General Public License is included in this distribution in
> + * the file called "COPYING".
> + */
> +
> +#include "ice_common.h"
> +#include "ice_adminq_cmd.h"
> +
> +/**
> + * ice_debug_cq
> + * @hw: pointer to the hardware structure
> + * @mask: debug mask
> + * @desc: pointer to control queue descriptor
> + * @buf: pointer to command buffer
> + * @buf_len: max length of buf
> + *
> + * Dumps debug log about control command with descriptor contents.
> + */
> +void ice_debug_cq(struct ice_hw *hw, u32 __maybe_unused mask, void *desc,
> +		  void *buf, u16 buf_len)
> +{
> +	struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc;
> +	u16 len;
> +
> +#ifndef CONFIG_DYNAMIC_DEBUG
> +	if (!(mask & hw->debug_mask))
> +		return;
> +#endif
> +
> +	if (!desc)
> +		return;
> +
> +	len = le16_to_cpu(cq_desc->datalen);
> +
> +	ice_debug(hw, mask,
> +		  "CQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
> +		  le16_to_cpu(cq_desc->opcode),
> +		  le16_to_cpu(cq_desc->flags),
> +		  le16_to_cpu(cq_desc->datalen), le16_to_cpu(cq_desc->retval));
> +	ice_debug(hw, mask, "\tcookie (h,l) 0x%08X 0x%08X\n",
> +		  le32_to_cpu(cq_desc->cookie_high),
> +		  le32_to_cpu(cq_desc->cookie_low));
> +	ice_debug(hw, mask, "\tparam (0,1)  0x%08X 0x%08X\n",
> +		  le32_to_cpu(cq_desc->params.generic.param0),
> +		  le32_to_cpu(cq_desc->params.generic.param1));
> +	ice_debug(hw, mask, "\taddr (h,l)   0x%08X 0x%08X\n",
> +		  le32_to_cpu(cq_desc->params.generic.addr_high),
> +		  le32_to_cpu(cq_desc->params.generic.addr_low));
> +	if (buf && cq_desc->datalen != 0) {
> +		ice_debug(hw, mask, "Buffer:\n");
> +		if (buf_len < len)
> +			len = buf_len;
> +
> +		ice_debug_array(hw, mask, 16, 1, (u8 *)buf, len);
> +	}
> +}
> +
> +/* FW Admin Queue command wrappers */
> +
> +/**
> + * ice_aq_send_cmd - send FW Admin Queue command to FW Admin Queue
> + * @hw: pointer to the hw struct
> + * @desc: descriptor describing the command
> + * @buf: buffer to use for indirect commands (NULL for direct commands)
> + * @buf_size: size of buffer for indirect commands (0 for direct commands)
> + * @cd: pointer to command details structure
> + *
> + * Helper function to send FW Admin Queue commands to the FW Admin Queue.
> + */
> +enum ice_status
> +ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf,
> +		u16 buf_size, struct ice_sq_cd *cd)
> +{
> +	return ice_sq_send_cmd(hw, &hw->adminq, desc, buf, buf_size, cd);
> +}
> +
> +/**
> + * ice_aq_get_fw_ver
> + * @hw: pointer to the hw struct
> + * @cd: pointer to command details structure or NULL
> + *
> + * Get the firmware version (0x0001) from the admin queue commands
> + */
> +enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd)
> +{
> +	struct ice_aqc_get_ver *resp;
> +	struct ice_aq_desc desc;
> +	enum ice_status status;
> +
> +	resp = &desc.params.get_ver;
> +
> +	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_ver);
> +
> +	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
> +
> +	if (!status) {
> +		hw->fw_branch = resp->fw_branch;
> +		hw->fw_maj_ver = resp->fw_major;
> +		hw->fw_min_ver = resp->fw_minor;
> +		hw->fw_patch = resp->fw_patch;
> +		hw->fw_build = le32_to_cpu(resp->fw_build);
> +		hw->api_branch = resp->api_branch;
> +		hw->api_maj_ver = resp->api_major;
> +		hw->api_min_ver = resp->api_minor;
> +		hw->api_patch = resp->api_patch;
> +	}
> +
> +	return status;
> +}
> +
> +/**
> + * ice_aq_q_shutdown
> + * @hw: pointer to the hw struct
> + * @unloading: is the driver unloading itself
> + *
> + * Tell the Firmware that we're shutting down the AdminQ and whether
> + * or not the driver is unloading as well (0x0003).
> + */
> +enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading)
> +{
> +	struct ice_aqc_q_shutdown *cmd;
> +	struct ice_aq_desc desc;
> +
> +	cmd = &desc.params.q_shutdown;
> +
> +	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_q_shutdown);
> +
> +	if (unloading)
> +		cmd->driver_unloading = cpu_to_le32(ICE_AQC_DRIVER_UNLOADING);
> +
> +	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
> +}
> diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
> new file mode 100644
> index 000000000000..1e3caecc38c6
> --- /dev/null
> +++ b/drivers/net/ethernet/intel/ice/ice_common.h
> @@ -0,0 +1,39 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/* Intel(R) Ethernet Connection E800 Series Linux Driver
> + * Copyright (c) 2018, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * The full GNU General Public License is included in this distribution in
> + * the file called "COPYING".
> + */
> +
> +#ifndef _ICE_COMMON_H_
> +#define _ICE_COMMON_H_
> +
> +#include "ice.h"
> +#include "ice_type.h"
> +
> +void ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf,
> +		  u16 buf_len);
> +enum ice_status ice_init_all_ctrlq(struct ice_hw *hw);
> +void ice_shutdown_all_ctrlq(struct ice_hw *hw);
> +enum ice_status
> +ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
> +		struct ice_aq_desc *desc, void *buf, u16 buf_size,
> +		struct ice_sq_cd *cd);
> +bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq);
> +enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading);
> +void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode);
> +enum ice_status
> +ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc,
> +		void *buf, u16 buf_size, struct ice_sq_cd *cd);
> +enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd);
> +#endif /* _ICE_COMMON_H_ */
> diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
> new file mode 100644
> index 000000000000..b1143d66d4bd
> --- /dev/null
> +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
> @@ -0,0 +1,979 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Intel(R) Ethernet Connection E800 Series Linux Driver
> + * Copyright (c) 2018, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * The full GNU General Public License is included in this distribution in
> + * the file called "COPYING".
> + */
> +
> +#include "ice_common.h"
> +
> +/**
> + * ice_adminq_init_regs - Initialize AdminQ registers
> + * @hw: pointer to the hardware structure
> + *
> + * This assumes the alloc_sq and alloc_rq functions have already been called
> + */
> +static void ice_adminq_init_regs(struct ice_hw *hw)
> +{
> +	struct ice_ctl_q_info *cq = &hw->adminq;
> +
> +	cq->sq.head = PF_FW_ATQH;
> +	cq->sq.tail = PF_FW_ATQT;
> +	cq->sq.len = PF_FW_ATQLEN;
> +	cq->sq.bah = PF_FW_ATQBAH;
> +	cq->sq.bal = PF_FW_ATQBAL;
> +	cq->sq.len_mask = PF_FW_ATQLEN_ATQLEN_M;
> +	cq->sq.len_ena_mask = PF_FW_ATQLEN_ATQENABLE_M;
> +	cq->sq.head_mask = PF_FW_ATQH_ATQH_M;
> +
> +	cq->rq.head = PF_FW_ARQH;
> +	cq->rq.tail = PF_FW_ARQT;
> +	cq->rq.len = PF_FW_ARQLEN;
> +	cq->rq.bah = PF_FW_ARQBAH;
> +	cq->rq.bal = PF_FW_ARQBAL;
> +	cq->rq.len_mask = PF_FW_ARQLEN_ARQLEN_M;
> +	cq->rq.len_ena_mask = PF_FW_ARQLEN_ARQENABLE_M;
> +	cq->rq.head_mask = PF_FW_ARQH_ARQH_M;
> +}
> +
> +/**
> + * ice_check_sq_alive
> + * @hw: pointer to the hw struct
> + * @cq: pointer to the specific Control queue
> + *
> + * Returns true if Queue is enabled else false.
> + */
> +bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq)
> +{
> +	/* check both queue-length and queue-enable fields */
> +	if (cq->sq.len && cq->sq.len_mask && cq->sq.len_ena_mask)
> +		return (rd32(hw, cq->sq.len) & (cq->sq.len_mask |
> +						cq->sq.len_ena_mask)) ==
> +			(cq->num_sq_entries | cq->sq.len_ena_mask);
> +
> +	return false;
> +}
> +
> +/**
> + * ice_alloc_ctrlq_sq_ring - Allocate Control Transmit Queue (ATQ) rings
> + * @hw: pointer to the hardware structure
> + * @cq: pointer to the specific Control queue
> + */
> +static enum ice_status
> +ice_alloc_ctrlq_sq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq)
> +{
> +	size_t size = cq->num_sq_entries * sizeof(struct ice_aq_desc);
> +
> +	cq->sq.desc_buf.va = dmam_alloc_coherent(ice_hw_to_dev(hw), size,
> +						 &cq->sq.desc_buf.pa,
> +						 GFP_KERNEL | __GFP_ZERO);
> +	if (!cq->sq.desc_buf.va)
> +		return ICE_ERR_NO_MEMORY;
> +	cq->sq.desc_buf.size = size;
> +
> +	cq->sq.cmd_buf = devm_kcalloc(ice_hw_to_dev(hw), cq->num_sq_entries,
> +				      sizeof(struct ice_sq_cd), GFP_KERNEL);
> +	if (!cq->sq.cmd_buf) {
> +		dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.desc_buf.size,
> +				   cq->sq.desc_buf.va, cq->sq.desc_buf.pa);
> +		cq->sq.desc_buf.va = NULL;
> +		cq->sq.desc_buf.pa = 0;
> +		cq->sq.desc_buf.size = 0;
> +		return ICE_ERR_NO_MEMORY;
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * ice_alloc_ctrlq_rq_ring - Allocate Control Receive Queue (ARQ) rings
> + * @hw: pointer to the hardware structure
> + * @cq: pointer to the specific Control queue
> + */
> +static enum ice_status
> +ice_alloc_ctrlq_rq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq)
> +{
> +	size_t size = cq->num_rq_entries * sizeof(struct ice_aq_desc);
> +
> +	cq->rq.desc_buf.va = dmam_alloc_coherent(ice_hw_to_dev(hw), size,
> +						 &cq->rq.desc_buf.pa,
> +						 GFP_KERNEL | __GFP_ZERO);
> +	if (!cq->rq.desc_buf.va)
> +		return ICE_ERR_NO_MEMORY;
> +	cq->rq.desc_buf.size = size;
> +	return 0;
> +}
> +
> +/**
> + * ice_free_ctrlq_sq_ring - Free Control Transmit Queue (ATQ) rings
> + * @hw: pointer to the hardware structure
> + * @cq: pointer to the specific Control queue
> + *
> + * This assumes the posted send buffers have already been cleaned
> + * and de-allocated
> + */
> +static void ice_free_ctrlq_sq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq)
> +{
> +	dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.desc_buf.size,
> +			   cq->sq.desc_buf.va, cq->sq.desc_buf.pa);
> +	cq->sq.desc_buf.va = NULL;
> +	cq->sq.desc_buf.pa = 0;
> +	cq->sq.desc_buf.size = 0;
> +}
> +
> +/**
> + * ice_free_ctrlq_rq_ring - Free Control Receive Queue (ARQ) rings
> + * @hw: pointer to the hardware structure
> + * @cq: pointer to the specific Control queue
> + *
> + * This assumes the posted receive buffers have already been cleaned
> + * and de-allocated
> + */
> +static void ice_free_ctrlq_rq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq)
> +{
> +	dmam_free_coherent(ice_hw_to_dev(hw), cq->rq.desc_buf.size,
> +			   cq->rq.desc_buf.va, cq->rq.desc_buf.pa);
> +	cq->rq.desc_buf.va = NULL;
> +	cq->rq.desc_buf.pa = 0;
> +	cq->rq.desc_buf.size = 0;
> +}
> +
> +/**
> + * ice_alloc_rq_bufs - Allocate pre-posted buffers for the ARQ
> + * @hw: pointer to the hardware structure
> + * @cq: pointer to the specific Control queue
> + */
> +static enum ice_status
> +ice_alloc_rq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
> +{
> +	int i;
> +
> +	/* We'll be allocating the buffer info memory first, then we can
> +	 * allocate the mapped buffers for the event processing
> +	 */
> +	cq->rq.dma_head = devm_kcalloc(ice_hw_to_dev(hw), cq->num_rq_entries,
> +				       sizeof(cq->rq.desc_buf), GFP_KERNEL);
> +	if (!cq->rq.dma_head)
> +		return ICE_ERR_NO_MEMORY;
> +	cq->rq.r.rq_bi = (struct ice_dma_mem *)cq->rq.dma_head;
> +
> +	/* allocate the mapped buffers */
> +	for (i = 0; i < cq->num_rq_entries; i++) {
> +		struct ice_aq_desc *desc;
> +		struct ice_dma_mem *bi;
> +
> +		bi = &cq->rq.r.rq_bi[i];
> +		bi->va = dmam_alloc_coherent(ice_hw_to_dev(hw),
> +					     cq->rq_buf_size, &bi->pa,
> +					     GFP_KERNEL | __GFP_ZERO);
> +		if (!bi->va)
> +			goto unwind_alloc_rq_bufs;
> +		bi->size = cq->rq_buf_size;
> +
> +		/* now configure the descriptors for use */
> +		desc = ICE_CTL_Q_DESC(cq->rq, i);
> +
> +		desc->flags = cpu_to_le16(ICE_AQ_FLAG_BUF);
> +		if (cq->rq_buf_size > ICE_AQ_LG_BUF)
> +			desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB);
> +		desc->opcode = 0;
> +		/* This is in accordance with Admin queue design, there is no
> +		 * register for buffer size configuration
> +		 */
> +		desc->datalen = cpu_to_le16(bi->size);
> +		desc->retval = 0;
> +		desc->cookie_high = 0;
> +		desc->cookie_low = 0;
> +		desc->params.generic.addr_high =
> +			cpu_to_le32(upper_32_bits(bi->pa));
> +		desc->params.generic.addr_low =
> +			cpu_to_le32(lower_32_bits(bi->pa));
> +		desc->params.generic.param0 = 0;
> +		desc->params.generic.param1 = 0;
> +	}
> +	return 0;
> +
> +unwind_alloc_rq_bufs:
> +	/* don't try to free the one that failed... */
> +	i--;
> +	for (; i >= 0; i--) {
> +		dmam_free_coherent(ice_hw_to_dev(hw), cq->rq.r.rq_bi[i].size,
> +				   cq->rq.r.rq_bi[i].va, cq->rq.r.rq_bi[i].pa);
> +		cq->rq.r.rq_bi[i].va = NULL;
> +		cq->rq.r.rq_bi[i].pa = 0;
> +		cq->rq.r.rq_bi[i].size = 0;
> +	}
> +	devm_kfree(ice_hw_to_dev(hw), cq->rq.dma_head);
> +
> +	return ICE_ERR_NO_MEMORY;
> +}
> +
> +/**
> + * ice_alloc_sq_bufs - Allocate empty buffer structs for the ATQ
> + * @hw: pointer to the hardware structure
> + * @cq: pointer to the specific Control queue
> + */
> +static enum ice_status
> +ice_alloc_sq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
> +{
> +	int i;
> +
> +	/* No mapped memory needed yet, just the buffer info structures */
> +	cq->sq.dma_head = devm_kcalloc(ice_hw_to_dev(hw), cq->num_sq_entries,
> +				       sizeof(cq->sq.desc_buf), GFP_KERNEL);
> +	if (!cq->sq.dma_head)
> +		return ICE_ERR_NO_MEMORY;
> +	cq->sq.r.sq_bi = (struct ice_dma_mem *)cq->sq.dma_head;
> +
> +	/* allocate the mapped buffers */
> +	for (i = 0; i < cq->num_sq_entries; i++) {
> +		struct ice_dma_mem *bi;
> +
> +		bi = &cq->sq.r.sq_bi[i];
> +		bi->va = dmam_alloc_coherent(ice_hw_to_dev(hw),
> +					     cq->sq_buf_size, &bi->pa,
> +					     GFP_KERNEL | __GFP_ZERO);
> +		if (!bi->va)
> +			goto unwind_alloc_sq_bufs;
> +		bi->size = cq->sq_buf_size;
> +	}
> +	return 0;
> +
> +unwind_alloc_sq_bufs:
> +	/* don't try to free the one that failed... */
> +	i--;
> +	for (; i >= 0; i--) {
> +		dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.r.sq_bi[i].size,
> +				   cq->sq.r.sq_bi[i].va, cq->sq.r.sq_bi[i].pa);
> +		cq->sq.r.sq_bi[i].va = NULL;
> +		cq->sq.r.sq_bi[i].pa = 0;
> +		cq->sq.r.sq_bi[i].size = 0;
> +	}
> +	devm_kfree(ice_hw_to_dev(hw), cq->sq.dma_head);
> +
> +	return ICE_ERR_NO_MEMORY;
> +}
> +
> +/**
> + * ice_free_rq_bufs - Free ARQ buffer info elements
> + * @hw: pointer to the hardware structure
> + * @cq: pointer to the specific Control queue
> + */
> +static void ice_free_rq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
> +{
> +	int i;
> +
> +	/* free descriptors */
> +	for (i = 0; i < cq->num_rq_entries; i++) {
> +		dmam_free_coherent(ice_hw_to_dev(hw), cq->rq.r.rq_bi[i].size,
> +				   cq->rq.r.rq_bi[i].va, cq->rq.r.rq_bi[i].pa);
> +		cq->rq.r.rq_bi[i].va = NULL;
> +		cq->rq.r.rq_bi[i].pa = 0;
> +		cq->rq.r.rq_bi[i].size = 0;
> +	}
> +
> +	/* free the dma header */
> +	devm_kfree(ice_hw_to_dev(hw), cq->rq.dma_head);
> +}
> +
> +/**
> + * ice_free_sq_bufs - Free ATQ buffer info elements
> + * @hw: pointer to the hardware structure
> + * @cq: pointer to the specific Control queue
> + */
> +static void ice_free_sq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
> +{
> +	int i;
> +
> +	/* only unmap if the address is non-NULL */
> +	for (i = 0; i < cq->num_sq_entries; i++)
> +		if (cq->sq.r.sq_bi[i].pa) {
> +			dmam_free_coherent(ice_hw_to_dev(hw),
> +					   cq->sq.r.sq_bi[i].size,
> +					   cq->sq.r.sq_bi[i].va,
> +					   cq->sq.r.sq_bi[i].pa);
> +			cq->sq.r.sq_bi[i].va = NULL;
> +			cq->sq.r.sq_bi[i].pa = 0;
> +			cq->sq.r.sq_bi[i].size = 0;
> +		}
> +
> +	/* free the buffer info list */
> +	devm_kfree(ice_hw_to_dev(hw), cq->sq.cmd_buf);
> +
> +	/* free the dma header */
> +	devm_kfree(ice_hw_to_dev(hw), cq->sq.dma_head);
> +}
> +
> +/**
> + * ice_cfg_sq_regs - configure Control ATQ registers
> + * @hw: pointer to the hardware structure
> + * @cq: pointer to the specific Control queue
> + *
> + * Configure base address and length registers for the transmit queue
> + */
> +static enum ice_status
> +ice_cfg_sq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
> +{
> +	u32 reg = 0;
> +
> +	/* Clear Head and Tail */
> +	wr32(hw, cq->sq.head, 0);
> +	wr32(hw, cq->sq.tail, 0);
> +
> +	/* set starting point */
> +	wr32(hw, cq->sq.len, (cq->num_sq_entries | cq->sq.len_ena_mask));
> +	wr32(hw, cq->sq.bal, lower_32_bits(cq->sq.desc_buf.pa));
> +	wr32(hw, cq->sq.bah, upper_32_bits(cq->sq.desc_buf.pa));
> +
> +	/* Check one register to verify that config was applied */
> +	reg = rd32(hw, cq->sq.bal);
> +	if (reg != lower_32_bits(cq->sq.desc_buf.pa))
> +		return ICE_ERR_AQ_ERROR;
> +
> +	return 0;
> +}
> +
> +/**
> + * ice_cfg_rq_regs - configure Control ARQ register
> + * @hw: pointer to the hardware structure
> + * @cq: pointer to the specific Control queue
> + *
> + * Configure base address and length registers for the receive (event q)
> + */
> +static enum ice_status
> +ice_cfg_rq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
> +{
> +	u32 reg = 0;
> +
> +	/* Clear Head and Tail */
> +	wr32(hw, cq->rq.head, 0);
> +	wr32(hw, cq->rq.tail, 0);
> +
> +	/* set starting point */
> +	wr32(hw, cq->rq.len, (cq->num_rq_entries | cq->rq.len_ena_mask));
> +	wr32(hw, cq->rq.bal, lower_32_bits(cq->rq.desc_buf.pa));
> +	wr32(hw, cq->rq.bah, upper_32_bits(cq->rq.desc_buf.pa));
> +
> +	/* Update tail in the HW to post pre-allocated buffers */
> +	wr32(hw, cq->rq.tail, (u32)(cq->num_rq_entries - 1));
> +
> +	/* Check one register to verify that config was applied */
> +	reg = rd32(hw, cq->rq.bal);
> +	if (reg != lower_32_bits(cq->rq.desc_buf.pa))
> +		return ICE_ERR_AQ_ERROR;
> +
> +	return 0;
> +}
> +
> +/**
> + * ice_init_sq - main initialization routine for Control ATQ
> + * @hw: pointer to the hardware structure
> + * @cq: pointer to the specific Control queue
> + *
> + * This is the main initialization routine for the Control Send Queue
> + * Prior to calling this function, drivers *MUST* set the following fields
> + * in the cq->structure:
> + *     - cq->num_sq_entries
> + *     - cq->sq_buf_size
> + *
> + * Do *NOT* hold the lock when calling this as the memory allocation routines
> + * called are not going to be atomic context safe
> + */
> +static enum ice_status ice_init_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
> +{
> +	enum ice_status ret_code;
> +
> +	if (cq->sq.count > 0) {
> +		/* queue already initialized */
> +		ret_code = ICE_ERR_NOT_READY;
> +		goto init_ctrlq_exit;
> +	}
> +
> +	/* verify input for valid configuration */
> +	if (!cq->num_sq_entries || !cq->sq_buf_size) {
> +		ret_code = ICE_ERR_CFG;
> +		goto init_ctrlq_exit;
> +	}
> +
> +	cq->sq.next_to_use = 0;
> +	cq->sq.next_to_clean = 0;
> +
> +	/* allocate the ring memory */
> +	ret_code = ice_alloc_ctrlq_sq_ring(hw, cq);
> +	if (ret_code)
> +		goto init_ctrlq_exit;
> +
> +	/* allocate buffers in the rings */
> +	ret_code = ice_alloc_sq_bufs(hw, cq);
> +	if (ret_code)
> +		goto init_ctrlq_free_rings;
> +
> +	/* initialize base registers */
> +	ret_code = ice_cfg_sq_regs(hw, cq);
> +	if (ret_code)
> +		goto init_ctrlq_free_rings;
> +
> +	/* success! */
> +	cq->sq.count = cq->num_sq_entries;
> +	goto init_ctrlq_exit;
> +
> +init_ctrlq_free_rings:
> +	ice_free_ctrlq_sq_ring(hw, cq);
> +
> +init_ctrlq_exit:
> +	return ret_code;
> +}
> +
> +/**
> + * ice_init_rq - initialize ARQ
> + * @hw: pointer to the hardware structure
> + * @cq: pointer to the specific Control queue
> + *
> + * The main initialization routine for the Admin Receive (Event) Queue.
> + * Prior to calling this function, drivers *MUST* set the following fields
> + * in the cq->structure:
> + *     - cq->num_rq_entries
> + *     - cq->rq_buf_size
> + *
> + * Do *NOT* hold the lock when calling this as the memory allocation routines
> + * called are not going to be atomic context safe
> + */
> +static enum ice_status ice_init_rq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
> +{
> +	enum ice_status ret_code;
> +
> +	if (cq->rq.count > 0) {
> +		/* queue already initialized */
> +		ret_code = ICE_ERR_NOT_READY;
> +		goto init_ctrlq_exit;
> +	}
> +
> +	/* verify input for valid configuration */
> +	if (!cq->num_rq_entries || !cq->rq_buf_size) {
> +		ret_code = ICE_ERR_CFG;
> +		goto init_ctrlq_exit;
> +	}
> +
> +	cq->rq.next_to_use = 0;
> +	cq->rq.next_to_clean = 0;
> +
> +	/* allocate the ring memory */
> +	ret_code = ice_alloc_ctrlq_rq_ring(hw, cq);
> +	if (ret_code)
> +		goto init_ctrlq_exit;
> +
> +	/* allocate buffers in the rings */
> +	ret_code = ice_alloc_rq_bufs(hw, cq);
> +	if (ret_code)
> +		goto init_ctrlq_free_rings;
> +
> +	/* initialize base registers */
> +	ret_code = ice_cfg_rq_regs(hw, cq);
> +	if (ret_code)
> +		goto init_ctrlq_free_rings;
> +
> +	/* success! */
> +	cq->rq.count = cq->num_rq_entries;
> +	goto init_ctrlq_exit;
> +
> +init_ctrlq_free_rings:
> +	ice_free_ctrlq_rq_ring(hw, cq);
> +
> +init_ctrlq_exit:
> +	return ret_code;
> +}
> +
> +/**
> + * ice_shutdown_sq - shutdown the Control ATQ
> + * @hw: pointer to the hardware structure
> + * @cq: pointer to the specific Control queue
> + *
> + * The main shutdown routine for the Control Transmit Queue
> + */
> +static enum ice_status
> +ice_shutdown_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
> +{
> +	enum ice_status ret_code = 0;
> +
> +	mutex_lock(&cq->sq_lock);
> +
> +	if (!cq->sq.count) {
> +		ret_code = ICE_ERR_NOT_READY;
> +		goto shutdown_sq_out;
> +	}
> +
> +	/* Stop firmware AdminQ processing */
> +	wr32(hw, cq->sq.head, 0);
> +	wr32(hw, cq->sq.tail, 0);
> +	wr32(hw, cq->sq.len, 0);
> +	wr32(hw, cq->sq.bal, 0);
> +	wr32(hw, cq->sq.bah, 0);
> +
> +	cq->sq.count = 0;	/* to indicate uninitialized queue */
> +
> +	/* free ring buffers and the ring itself */
> +	ice_free_sq_bufs(hw, cq);
> +	ice_free_ctrlq_sq_ring(hw, cq);
> +
> +shutdown_sq_out:
> +	mutex_unlock(&cq->sq_lock);
> +	return ret_code;
> +}
> +
> +/**
> + * ice_aq_ver_check - Check the reported AQ API version.
> + * @fw_branch: The "branch" of FW, typically describes the device type
> + * @fw_major: The major version of the FW API
> + * @fw_minor: The minor version increment of the FW API
> + *
> + * Checks if the driver should load on a given AQ API version.
> + *
> + * Return: 'true' iff the driver should attempt to load. 'false' otherwise.
> + */
> +static bool ice_aq_ver_check(u8 fw_branch, u8 fw_major, u8 fw_minor)
> +{
> +	if (fw_branch != EXP_FW_API_VER_BRANCH)
> +		return false;
> +	if (fw_major != EXP_FW_API_VER_MAJOR)
> +		return false;
> +	if (fw_minor != EXP_FW_API_VER_MINOR)
> +		return false;
> +	return true;
> +}
> +
> +/**
> + * ice_shutdown_rq - shutdown Control ARQ
> + * @hw: pointer to the hardware structure
> + * @cq: pointer to the specific Control queue
> + *
> + * The main shutdown routine for the Control Receive Queue
> + */
> +static enum ice_status
> +ice_shutdown_rq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
> +{
> +	enum ice_status ret_code = 0;
> +
> +	mutex_lock(&cq->rq_lock);
> +
> +	if (!cq->rq.count) {
> +		ret_code = ICE_ERR_NOT_READY;
> +		goto shutdown_rq_out;
> +	}
> +
> +	/* Stop Control Queue processing */
> +	wr32(hw, cq->rq.head, 0);
> +	wr32(hw, cq->rq.tail, 0);
> +	wr32(hw, cq->rq.len, 0);
> +	wr32(hw, cq->rq.bal, 0);
> +	wr32(hw, cq->rq.bah, 0);
> +
> +	/* set rq.count to 0 to indicate uninitialized queue */
> +	cq->rq.count = 0;
> +
> +	/* free ring buffers and the ring itself */
> +	ice_free_rq_bufs(hw, cq);
> +	ice_free_ctrlq_rq_ring(hw, cq);
> +
> +shutdown_rq_out:
> +	mutex_unlock(&cq->rq_lock);
> +	return ret_code;
> +}
> +
> +/**
> + * ice_init_check_adminq - Check version for Admin Queue to know if its alive
> + * @hw: pointer to the hardware structure
> + */
> +static enum ice_status ice_init_check_adminq(struct ice_hw *hw)
> +{
> +	struct ice_ctl_q_info *cq = &hw->adminq;
> +	enum ice_status status;
> +
> +	status = ice_aq_get_fw_ver(hw, NULL);
> +	if (status)
> +		goto init_ctrlq_free_rq;
> +
> +	if (!ice_aq_ver_check(hw->api_branch, hw->api_maj_ver,
> +			      hw->api_min_ver)) {
> +		status = ICE_ERR_FW_API_VER;
> +		goto init_ctrlq_free_rq;
> +	}
> +
> +	return 0;
> +
> +init_ctrlq_free_rq:
> +	ice_shutdown_rq(hw, cq);
> +	ice_shutdown_sq(hw, cq);
> +	mutex_destroy(&cq->sq_lock);
> +	mutex_destroy(&cq->rq_lock);
> +	return status;
> +}
> +
> +/**
> + * ice_init_ctrlq - main initialization routine for any control Queue
> + * @hw: pointer to the hardware structure
> + * @q_type: specific Control queue type
> + *
> + * Prior to calling this function, drivers *MUST* set the following fields
> + * in the cq->structure:
> + *     - cq->num_sq_entries
> + *     - cq->num_rq_entries
> + *     - cq->rq_buf_size
> + *     - cq->sq_buf_size
> + *
> + */
> +static enum ice_status ice_init_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
> +{
> +	struct ice_ctl_q_info *cq;
> +	enum ice_status ret_code;
> +
> +	switch (q_type) {
> +	case ICE_CTL_Q_ADMIN:
> +		ice_adminq_init_regs(hw);
> +		cq = &hw->adminq;
> +		break;
> +	default:
> +		return ICE_ERR_PARAM;
> +	}
> +	cq->qtype = q_type;
> +
> +	/* verify input for valid configuration */
> +	if (!cq->num_rq_entries || !cq->num_sq_entries ||
> +	    !cq->rq_buf_size || !cq->sq_buf_size) {
> +		return ICE_ERR_CFG;
> +	}
> +	mutex_init(&cq->sq_lock);
> +	mutex_init(&cq->rq_lock);
> +
> +	/* setup SQ command write back timeout */
> +	cq->sq_cmd_timeout = ICE_CTL_Q_SQ_CMD_TIMEOUT;
> +
> +	/* allocate the ATQ */
> +	ret_code = ice_init_sq(hw, cq);
> +	if (ret_code)
> +		goto init_ctrlq_destroy_locks;
> +
> +	/* allocate the ARQ */
> +	ret_code = ice_init_rq(hw, cq);
> +	if (ret_code)
> +		goto init_ctrlq_free_sq;
> +
> +	/* success! */
> +	return 0;
> +
> +init_ctrlq_free_sq:
> +	ice_shutdown_sq(hw, cq);
> +init_ctrlq_destroy_locks:
> +	mutex_destroy(&cq->sq_lock);
> +	mutex_destroy(&cq->rq_lock);
> +	return ret_code;
> +}
> +
> +/**
> + * ice_init_all_ctrlq - main initialization routine for all control queues
> + * @hw: pointer to the hardware structure
> + *
> + * Prior to calling this function, drivers *MUST* set the following fields
> + * in the cq->structure for all control queues:
> + *     - cq->num_sq_entries
> + *     - cq->num_rq_entries
> + *     - cq->rq_buf_size
> + *     - cq->sq_buf_size
> + */
> +enum ice_status ice_init_all_ctrlq(struct ice_hw *hw)
> +{
> +	enum ice_status ret_code;
> +
> +	/* Init FW admin queue */
> +	ret_code = ice_init_ctrlq(hw, ICE_CTL_Q_ADMIN);
> +	if (ret_code)
> +		return ret_code;
> +
> +	return ice_init_check_adminq(hw);
> +}
> +
> +/**
> + * ice_shutdown_ctrlq - shutdown routine for any control queue
> + * @hw: pointer to the hardware structure
> + * @q_type: specific Control queue type
> + */
> +static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
> +{
> +	struct ice_ctl_q_info *cq;
> +
> +	switch (q_type) {
> +	case ICE_CTL_Q_ADMIN:
> +		cq = &hw->adminq;
> +		if (ice_check_sq_alive(hw, cq))
> +			ice_aq_q_shutdown(hw, true);
> +		break;
> +	default:
> +		return;
> +	}
> +
> +	ice_shutdown_sq(hw, cq);
> +	ice_shutdown_rq(hw, cq);
> +	mutex_destroy(&cq->sq_lock);
> +	mutex_destroy(&cq->rq_lock);
> +}
> +
> +/**
> + * ice_shutdown_all_ctrlq - shutdown routine for all control queues
> + * @hw: pointer to the hardware structure
> + */
> +void ice_shutdown_all_ctrlq(struct ice_hw *hw)
> +{
> +	/* Shutdown FW admin queue */
> +	ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
> +}
> +
> +/**
> + * ice_clean_sq - cleans Admin send queue (ATQ)
> + * @hw: pointer to the hardware structure
> + * @cq: pointer to the specific Control queue
> + *
> + * returns the number of free desc
> + */
> +static u16 ice_clean_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
> +{
> +	struct ice_ctl_q_ring *sq = &cq->sq;
> +	u16 ntc = sq->next_to_clean;
> +	struct ice_sq_cd *details;
> +	struct ice_aq_desc *desc;
> +
> +	desc = ICE_CTL_Q_DESC(*sq, ntc);
> +	details = ICE_CTL_Q_DETAILS(*sq, ntc);
> +
> +	while (rd32(hw, cq->sq.head) != ntc) {
> +		ice_debug(hw, ICE_DBG_AQ_MSG,
> +			  "ntc %d head %d.\n", ntc, rd32(hw, cq->sq.head));
> +		memset(desc, 0, sizeof(*desc));
> +		memset(details, 0, sizeof(*details));
> +		ntc++;
> +		if (ntc == sq->count)
> +			ntc = 0;
> +		desc = ICE_CTL_Q_DESC(*sq, ntc);
> +		details = ICE_CTL_Q_DETAILS(*sq, ntc);
> +	}
> +
> +	sq->next_to_clean = ntc;
> +
> +	return ICE_CTL_Q_DESC_UNUSED(sq);
> +}
> +
> +/**
> + * ice_sq_done - check if FW has processed the Admin Send Queue (ATQ)
> + * @hw: pointer to the hw struct
> + * @cq: pointer to the specific Control queue
> + *
> + * Returns true if the firmware has processed all descriptors on the
> + * admin send queue. Returns false if there are still requests pending.
> + */
> +static bool ice_sq_done(struct ice_hw *hw, struct ice_ctl_q_info *cq)
> +{
> +	/* AQ designers suggest use of head for better
> +	 * timing reliability than DD bit
> +	 */
> +	return rd32(hw, cq->sq.head) == cq->sq.next_to_use;
> +}
> +
> +/**
> + * ice_sq_send_cmd - send command to Control Queue (ATQ)
> + * @hw: pointer to the hw struct
> + * @cq: pointer to the specific Control queue
> + * @desc: prefilled descriptor describing the command (non DMA mem)
> + * @buf: buffer to use for indirect commands (or NULL for direct commands)
> + * @buf_size: size of buffer for indirect commands (or 0 for direct commands)
> + * @cd: pointer to command details structure
> + *
> + * This is the main send command routine for the ATQ.  It runs the q,
> + * cleans the queue, etc.
> + */
> +enum ice_status
> +ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
> +		struct ice_aq_desc *desc, void *buf, u16 buf_size,
> +		struct ice_sq_cd *cd)
> +{
> +	struct ice_dma_mem *dma_buf = NULL;
> +	struct ice_aq_desc *desc_on_ring;
> +	bool cmd_completed = false;
> +	enum ice_status status = 0;
> +	struct ice_sq_cd *details;
> +	u32 total_delay = 0;
> +	u16 retval = 0;
> +	u32 val = 0;
> +
> +	mutex_lock(&cq->sq_lock);
> +
> +	cq->sq_last_status = ICE_AQ_RC_OK;
> +
> +	if (!cq->sq.count) {
> +		ice_debug(hw, ICE_DBG_AQ_MSG,
> +			  "Control Send queue not initialized.\n");
> +		status = ICE_ERR_AQ_EMPTY;
> +		goto sq_send_command_error;
> +	}
> +
> +	if ((buf && !buf_size) || (!buf && buf_size)) {
> +		status = ICE_ERR_PARAM;
> +		goto sq_send_command_error;
> +	}
> +
> +	if (buf) {
> +		if (buf_size > cq->sq_buf_size) {
> +			ice_debug(hw, ICE_DBG_AQ_MSG,
> +				  "Invalid buffer size for Control Send queue: %d.\n",
> +				  buf_size);
> +			status = ICE_ERR_INVAL_SIZE;
> +			goto sq_send_command_error;
> +		}
> +
> +		desc->flags |= cpu_to_le16(ICE_AQ_FLAG_BUF);
> +		if (buf_size > ICE_AQ_LG_BUF)
> +			desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB);
> +	}
> +
> +	val = rd32(hw, cq->sq.head);
> +	if (val >= cq->num_sq_entries) {
> +		ice_debug(hw, ICE_DBG_AQ_MSG,
> +			  "head overrun at %d in the Control Send Queue ring\n",
> +			  val);
> +		status = ICE_ERR_AQ_EMPTY;
> +		goto sq_send_command_error;
> +	}
> +
> +	details = ICE_CTL_Q_DETAILS(cq->sq, cq->sq.next_to_use);
> +	if (cd)
> +		memcpy(details, cd, sizeof(*details));
> +	else
> +		memset(details, 0, sizeof(*details));
> +
> +	/* Call clean and check queue available function to reclaim the
> +	 * descriptors that were processed by FW/MBX; the function returns the
> +	 * number of desc available. The clean function called here could be
> +	 * called in a separate thread in case of asynchronous completions.
> +	 */
> +	if (ice_clean_sq(hw, cq) == 0) {
> +		ice_debug(hw, ICE_DBG_AQ_MSG,
> +			  "Error: Control Send Queue is full.\n");
> +		status = ICE_ERR_AQ_FULL;
> +		goto sq_send_command_error;
> +	}
> +
> +	/* initialize the temp desc pointer with the right desc */
> +	desc_on_ring = ICE_CTL_Q_DESC(cq->sq, cq->sq.next_to_use);
> +
> +	/* if the desc is available copy the temp desc to the right place */
> +	memcpy(desc_on_ring, desc, sizeof(*desc_on_ring));
> +
> +	/* if buf is not NULL assume indirect command */
> +	if (buf) {
> +		dma_buf = &cq->sq.r.sq_bi[cq->sq.next_to_use];
> +		/* copy the user buf into the respective DMA buf */
> +		memcpy(dma_buf->va, buf, buf_size);
> +		desc_on_ring->datalen = cpu_to_le16(buf_size);
> +
> +		/* Update the address values in the desc with the pa value
> +		 * for respective buffer
> +		 */
> +		desc_on_ring->params.generic.addr_high =
> +			cpu_to_le32(upper_32_bits(dma_buf->pa));
> +		desc_on_ring->params.generic.addr_low =
> +			cpu_to_le32(lower_32_bits(dma_buf->pa));
> +	}
> +
> +	/* Debug desc and buffer */
> +	ice_debug(hw, ICE_DBG_AQ_MSG,
> +		  "ATQ: Control Send queue desc and buffer:\n");
> +
> +	ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc_on_ring, buf, buf_size);
> +
> +	(cq->sq.next_to_use)++;
> +	if (cq->sq.next_to_use == cq->sq.count)
> +		cq->sq.next_to_use = 0;
> +	wr32(hw, cq->sq.tail, cq->sq.next_to_use);
> +
> +	do {
> +		if (ice_sq_done(hw, cq))
> +			break;
> +
> +		mdelay(1);
> +		total_delay++;
> +	} while (total_delay < cq->sq_cmd_timeout);
> +
> +	/* if ready, copy the desc back to temp */
> +	if (ice_sq_done(hw, cq)) {
> +		memcpy(desc, desc_on_ring, sizeof(*desc));
> +		if (buf) {
> +			/* get returned length to copy */
> +			u16 copy_size = le16_to_cpu(desc->datalen);
> +
> +			if (copy_size > buf_size) {
> +				ice_debug(hw, ICE_DBG_AQ_MSG,
> +					  "Return len %d > than buf len %d\n",
> +					  copy_size, buf_size);
> +				status = ICE_ERR_AQ_ERROR;
> +			} else {
> +				memcpy(buf, dma_buf->va, copy_size);
> +			}
> +		}
> +		retval = le16_to_cpu(desc->retval);
> +		if (retval) {
> +			ice_debug(hw, ICE_DBG_AQ_MSG,
> +				  "Control Send Queue command completed with error 0x%x\n",
> +				  retval);
> +
> +			/* strip off FW internal code */
> +			retval &= 0xff;
> +		}
> +		cmd_completed = true;
> +		if (!status && retval != ICE_AQ_RC_OK)
> +			status = ICE_ERR_AQ_ERROR;
> +		cq->sq_last_status = (enum ice_aq_err)retval;
> +	}
> +
> +	ice_debug(hw, ICE_DBG_AQ_MSG,
> +		  "ATQ: desc and buffer writeback:\n");
> +
> +	ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc, buf, buf_size);
> +
> +	/* save writeback AQ if requested */
> +	if (details->wb_desc)
> +		memcpy(details->wb_desc, desc_on_ring,
> +		       sizeof(*details->wb_desc));
> +
> +	/* update the error if time out occurred */
> +	if (!cmd_completed) {
> +		ice_debug(hw, ICE_DBG_AQ_MSG,
> +			  "Control Send Queue Writeback timeout.\n");
> +		status = ICE_ERR_AQ_TIMEOUT;
> +	}
> +
> +sq_send_command_error:
> +	mutex_unlock(&cq->sq_lock);
> +	return status;
> +}
> +
> +/**
> + * ice_fill_dflt_direct_cmd_desc - AQ descriptor helper function
> + * @desc: pointer to the temp descriptor (non DMA mem)
> + * @opcode: the opcode can be used to decide which flags to turn off or on
> + *
> + * Fill the desc with default values
> + */
> +void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode)
> +{
> +	/* zero out the desc */
> +	memset(desc, 0, sizeof(*desc));
> +	desc->opcode = cpu_to_le16(opcode);
> +	desc->flags = cpu_to_le16(ICE_AQ_FLAG_SI);
> +}
> diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
> new file mode 100644
> index 000000000000..143578d02aec
> --- /dev/null
> +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
> @@ -0,0 +1,97 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/* Intel(R) Ethernet Connection E800 Series Linux Driver
> + * Copyright (c) 2018, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * The full GNU General Public License is included in this distribution in
> + * the file called "COPYING".
> + */
> +
> +#ifndef _ICE_CONTROLQ_H_
> +#define _ICE_CONTROLQ_H_
> +
> +#include "ice_adminq_cmd.h"
> +
> +#define ICE_CTL_Q_DESC(R, i) \
> +	(&(((struct ice_aq_desc *)((R).desc_buf.va))[i]))
> +
> +#define ICE_CTL_Q_DESC_UNUSED(R) \
> +	(u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
> +	      (R)->next_to_clean - (R)->next_to_use - 1)
> +
> +/* Defines that help manage the driver vs FW API checks.
> + * Take a look at ice_aq_ver_check in ice_controlq.c for actual usage.
> + *
> + */
> +#define EXP_FW_API_VER_BRANCH		0x00
> +#define EXP_FW_API_VER_MAJOR		0x00
> +#define EXP_FW_API_VER_MINOR		0x01
> +
> +/* Different control queue types: These are mainly for SW consumption. */
> +enum ice_ctl_q {
> +	ICE_CTL_Q_UNKNOWN = 0,
> +	ICE_CTL_Q_ADMIN,
> +};
> +
> +/* Control Queue default settings */
> +#define ICE_CTL_Q_SQ_CMD_TIMEOUT	250  /* msecs */
> +
> +struct ice_ctl_q_ring {
> +	void *dma_head;			/* Virtual address to dma head */
> +	struct ice_dma_mem desc_buf;	/* descriptor ring memory */
> +	void *cmd_buf;			/* command buffer memory */
> +
> +	union {
> +		struct ice_dma_mem *sq_bi;
> +		struct ice_dma_mem *rq_bi;
> +	} r;
> +
> +	u16 count;		/* Number of descriptors */
> +
> +	/* used for interrupt processing */
> +	u16 next_to_use;
> +	u16 next_to_clean;
> +
> +	/* used for queue tracking */
> +	u32 head;
> +	u32 tail;
> +	u32 len;
> +	u32 bah;
> +	u32 bal;
> +	u32 len_mask;
> +	u32 len_ena_mask;
> +	u32 head_mask;
> +};
> +
> +/* sq transaction details */
> +struct ice_sq_cd {
> +	struct ice_aq_desc *wb_desc;
> +};
> +
> +#define ICE_CTL_Q_DETAILS(R, i) (&(((struct ice_sq_cd *)((R).cmd_buf))[i]))
> +
> +/* Control Queue information */
> +struct ice_ctl_q_info {
> +	enum ice_ctl_q qtype;
> +	struct ice_ctl_q_ring rq;	/* receive queue */
> +	struct ice_ctl_q_ring sq;	/* send queue */
> +	u32 sq_cmd_timeout;		/* send queue cmd write back timeout */
> +	u16 num_rq_entries;		/* receive queue depth */
> +	u16 num_sq_entries;		/* send queue depth */
> +	u16 rq_buf_size;		/* receive queue buffer size */
> +	u16 sq_buf_size;		/* send queue buffer size */
> +	struct mutex sq_lock;		/* Send queue lock */
> +	struct mutex rq_lock;		/* Receive queue lock */
> +	enum ice_aq_err sq_last_status;	/* last status on send queue */
> +	enum ice_aq_err rq_last_status;	/* last status on receive queue */
> +};
> +
> +#endif /* _ICE_CONTROLQ_H_ */
> diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
> new file mode 100644
> index 000000000000..3d6bb273e4c8
> --- /dev/null
> +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
> @@ -0,0 +1,46 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/* Intel(R) Ethernet Connection E800 Series Linux Driver
> + * Copyright (c) 2018, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * The full GNU General Public License is included in this distribution in
> + * the file called "COPYING".
> + */
> +
> +/* Machine-generated file */
> +
> +#ifndef _ICE_HW_AUTOGEN_H_
> +#define _ICE_HW_AUTOGEN_H_
> +
> +#define PF_FW_ARQBAH			0x00080180
> +#define PF_FW_ARQBAL			0x00080080
> +#define PF_FW_ARQH			0x00080380
> +#define PF_FW_ARQH_ARQH_S		0
> +#define PF_FW_ARQH_ARQH_M		ICE_M(0x3FF, PF_FW_ARQH_ARQH_S)
> +#define PF_FW_ARQLEN			0x00080280
> +#define PF_FW_ARQLEN_ARQLEN_S		0
> +#define PF_FW_ARQLEN_ARQLEN_M		ICE_M(0x3FF, PF_FW_ARQLEN_ARQLEN_S)
> +#define PF_FW_ARQLEN_ARQENABLE_S	31
> +#define PF_FW_ARQLEN_ARQENABLE_M	BIT(PF_FW_ARQLEN_ARQENABLE_S)
> +#define PF_FW_ARQT			0x00080480
> +#define PF_FW_ATQBAH			0x00080100
> +#define PF_FW_ATQBAL			0x00080000
> +#define PF_FW_ATQH			0x00080300
> +#define PF_FW_ATQH_ATQH_S		0
> +#define PF_FW_ATQH_ATQH_M		ICE_M(0x3FF, PF_FW_ATQH_ATQH_S)
> +#define PF_FW_ATQLEN			0x00080200
> +#define PF_FW_ATQLEN_ATQLEN_S		0
> +#define PF_FW_ATQLEN_ATQLEN_M		ICE_M(0x3FF, PF_FW_ATQLEN_ATQLEN_S)
> +#define PF_FW_ATQLEN_ATQENABLE_S	31
> +#define PF_FW_ATQLEN_ATQENABLE_M	BIT(PF_FW_ATQLEN_ATQENABLE_S)
> +#define PF_FW_ATQT			0x00080400
> +
> +#endif /* _ICE_HW_AUTOGEN_H_ */
> diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
> index 0631812aef2b..408ae90d6562 100644
> --- a/drivers/net/ethernet/intel/ice/ice_main.c
> +++ b/drivers/net/ethernet/intel/ice/ice_main.c
> @@ -34,7 +34,11 @@ MODULE_VERSION(DRV_VERSION);
>   
>   static int debug = -1;
>   module_param(debug, int, 0644);
> -MODULE_PARM_DESC(debug, "netif message level (0=none,...,0x7FFF=all)");
> +#ifndef CONFIG_DYNAMIC_DEBUG
> +MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXXXXX)");
> +#else
> +MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
> +#endif /* !CONFIG_DYNAMIC_DEBUG */
>   
>   /**
>    * ice_probe - Device initialization routine
> @@ -93,6 +97,11 @@ static int ice_probe(struct pci_dev *pdev,
>   	hw->bus.func = PCI_FUNC(pdev->devfn);
>   	pf->msg_enable = netif_msg_init(debug, ICE_DFLT_NETIF_M);
>   
> +#ifndef CONFIG_DYNAMIC_DEBUG
> +	if (debug < -1)
> +		hw->debug_mask = debug;
> +#endif
> +
>   	return 0;
>   }
>   
> diff --git a/drivers/net/ethernet/intel/ice/ice_osdep.h b/drivers/net/ethernet/intel/ice/ice_osdep.h
> new file mode 100644
> index 000000000000..fc6576a3a9d1
> --- /dev/null
> +++ b/drivers/net/ethernet/intel/ice/ice_osdep.h
> @@ -0,0 +1,86 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/* Intel(R) Ethernet Connection E800 Series Linux Driver
> + * Copyright (c) 2018, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * The full GNU General Public License is included in this distribution in
> + * the file called "COPYING".
> + */
> +
> +#ifndef _ICE_OSDEP_H_
> +#define _ICE_OSDEP_H_
> +
> +#include <linux/types.h>
> +#include <linux/io.h>
> +#ifndef CONFIG_64BIT
> +#include <linux/io-64-nonatomic-lo-hi.h>
> +#endif
> +
> +#define wr32(a, reg, value)	writel((value), ((a)->hw_addr + (reg)))
> +#define rd32(a, reg)		readl((a)->hw_addr + (reg))
> +#define wr64(a, reg, value)	writeq((value), ((a)->hw_addr + (reg)))
> +#define rd64(a, reg)		readq((a)->hw_addr + (reg))
> +
> +#define ICE_M(m, s)		((m) << (s))
> +
> +struct ice_dma_mem {
> +	void *va;
> +	dma_addr_t pa;
> +	size_t size;
> +};
> +
> +#define ice_hw_to_dev(ptr)	\
> +	(&(container_of((ptr), struct ice_pf, hw))->pdev->dev)
> +
> +#ifdef CONFIG_DYNAMIC_DEBUG
> +#define ice_debug(hw, type, fmt, args...) \
> +	dev_dbg(ice_hw_to_dev(hw), fmt, ##args)
> +
> +#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
> +	print_hex_dump_debug(KBUILD_MODNAME " ",		\
> +			     DUMP_PREFIX_OFFSET, rowsize,	\
> +			     groupsize, buf, len, false)
> +#else
> +#define ice_debug(hw, type, fmt, args...)			\
> +do {								\
> +	if ((type) & (hw)->debug_mask)				\
> +		dev_info(ice_hw_to_dev(hw), fmt, ##args);	\
> +} while (0)
> +
> +#ifdef DEBUG
> +#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
> +do {								\
> +	if ((type) & (hw)->debug_mask)				\
> +		print_hex_dump_debug(KBUILD_MODNAME,		\
> +				     DUMP_PREFIX_OFFSET,	\
> +				     rowsize, groupsize, buf,	\
> +				     len, false);		\
> +} while (0)
> +#else
> +#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
> +do {								\
> +	struct ice_hw *hw_l = hw;				\
> +	if ((type) & (hw_l)->debug_mask) {			\
> +		u16 len_l = len;				\
> +		u8 *buf_l = buf;				\
> +		int i;						\
> +		for (i = 0; i < (len_l - 16); i += 16)		\
> +			ice_debug(hw_l, type, "0x%04X  %16ph\n",\
> +				  i, ((buf_l) + i));		\
> +		if (i < len_l)					\
> +			ice_debug(hw_l, type, "0x%04X  %*ph\n", \
> +				  i, ((len_l) - i), ((buf_l) + i));\
> +	}							\
> +} while (0)
> +#endif /* DEBUG */
> +#endif /* CONFIG_DYNAMIC_DEBUG */
> +
> +#endif /* _ICE_OSDEP_H_ */
> diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h
> new file mode 100644
> index 000000000000..940d6f57adcf
> --- /dev/null
> +++ b/drivers/net/ethernet/intel/ice/ice_status.h
> @@ -0,0 +1,35 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/* Intel(R) Ethernet Connection E800 Series Linux Driver
> + * Copyright (c) 2018, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * The full GNU General Public License is included in this distribution in
> + * the file called "COPYING".
> + */
> +
> +#ifndef _ICE_STATUS_H_
> +#define _ICE_STATUS_H_
> +
> +/* Error Codes */
> +enum ice_status {
> +	ICE_ERR_PARAM				= -1,
> +	ICE_ERR_NOT_READY			= -3,
> +	ICE_ERR_INVAL_SIZE			= -6,
> +	ICE_ERR_FW_API_VER			= -10,
> +	ICE_ERR_NO_MEMORY			= -11,
> +	ICE_ERR_CFG				= -12,
> +	ICE_ERR_AQ_ERROR			= -100,
> +	ICE_ERR_AQ_TIMEOUT			= -101,
> +	ICE_ERR_AQ_FULL				= -102,
> +	ICE_ERR_AQ_EMPTY			= -104,
> +};
> +
> +#endif /* _ICE_STATUS_H_ */
> diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
> index ad01e5f73d2c..cfa98e55a33a 100644
> --- a/drivers/net/ethernet/intel/ice/ice_type.h
> +++ b/drivers/net/ethernet/intel/ice/ice_type.h
> @@ -18,6 +18,15 @@
>   #ifndef _ICE_TYPE_H_
>   #define _ICE_TYPE_H_
>   
> +#include "ice_status.h"
> +#include "ice_hw_autogen.h"
> +#include "ice_osdep.h"
> +#include "ice_controlq.h"
> +
> +/* debug masks - set these bits in hw->debug_mask to control output */
> +#define ICE_DBG_AQ_MSG		BIT_ULL(24)
> +#define ICE_DBG_AQ_CMD		BIT_ULL(27)
> +
>   /* Bus parameters */
>   struct ice_bus_info {
>   	u16 device;
> @@ -28,6 +37,7 @@ struct ice_bus_info {
>   struct ice_hw {
>   	u8 __iomem *hw_addr;
>   	void *back;
> +	u64 debug_mask;		/* bitmap for debug mask */
>   
>   	/* pci info */
>   	u16 device_id;
> @@ -37,6 +47,18 @@ struct ice_hw {
>   	u8 revision_id;
>   
>   	struct ice_bus_info bus;
> +	/* Control Queue info */
> +	struct ice_ctl_q_info adminq;
> +
> +	u8 api_branch;		/* API branch version */
> +	u8 api_maj_ver;		/* API major version */
> +	u8 api_min_ver;		/* API minor version */
> +	u8 api_patch;		/* API patch version */
> +	u8 fw_branch;		/* firmware branch version */
> +	u8 fw_maj_ver;		/* firmware major version */
> +	u8 fw_min_ver;		/* firmware minor version */
> +	u8 fw_patch;		/* firmware patch version */
> +	u32 fw_build;		/* firmware build number */

Perhaps you can simply use struct ice_aqc_get_ver here rather than 
redefining these fields?

sln


>   };
>   
>   #endif /* _ICE_TYPE_H_ */
> 

^ permalink raw reply

* Re: [pull request][for-next 00/11] Mellanox, mlx5 IPSec updates 2018-02-28-2 (Part 2)
From: Doug Ledford @ 2018-03-13  1:43 UTC (permalink / raw)
  To: Saeed Mahameed, davem@davemloft.net
  Cc: Jason Gunthorpe, netdev@vger.kernel.org, Matan Barak,
	linux-rdma@vger.kernel.org, Leon Romanovsky, Aviad Yehezkel,
	Boris Pismenny
In-Reply-To: <1520890989.24565.111.camel@mellanox.com>

[-- Attachment #1: Type: text/plain, Size: 1154 bytes --]

On Mon, 2018-03-12 at 21:43 +0000, Saeed Mahameed wrote:
> On Thu, 2018-03-08 at 14:14 -0500, Doug Ledford wrote:
> > On 3/8/2018 1:04 PM, David Miller wrote:
> > > From: Saeed Mahameed <saeedm@mellanox.com>
> > > Date: Wed,  7 Mar 2018 17:26:03 -0800
> > > 
> > > > Hi Dave and Doug,
> > > > 
> > > > This series includes shared code updates (IPSec part2) for mlx5
> > > > core 
> > > > driver for both netdev and rdma subsystems.  This series should
> > > > be pulled
> > > > to both trees so we can continue netdev and rdma specific
> > > > submissions
> > > > separately.
> > > 
> > > Doug, please give this series a quick review.
> > > 
> > > Thank you.
> > > 
> > 
> > I'm good with it.  Pull it as you see fit.
> > 
> 
> Hi Doug, 
> 
> Just FYI, I see that Dave already pulled the series into net-next.
> I think you can pull as well, so Leon will start the mlx5 RDMA
> submissions as planned on top this last pull request.
> 
> thanks,
> Saeed.

Ok, thanks Saeed.

-- 
Doug Ledford <dledford@redhat.com>
    GPG KeyID: B826A3330E572FDD
    Key fingerprint = AE6B 1BDA 122B 23B4 265B  1274 B826 A333 0E57 2FDD

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [PATCH net-next] net: stmmac: remove superfluous wmb() memory barriers
From: David Miller @ 2018-03-13  1:20 UTC (permalink / raw)
  To: niklas.cassel
  Cc: Jose.Abreu, peppe.cavallaro, alexandre.torgue, pavel, netdev,
	linux-kernel
In-Reply-To: <20180312085541.GA406@axis.com>

From: Niklas Cassel <niklas.cassel@axis.com>
Date: Mon, 12 Mar 2018 09:55:42 +0100

> Jose is simply responding to the commit message description of this patch.
> 
> You explained that there is an implicit memory barrier between physical memory
> writes and those to MMIO register space, as long as you used writel().
> 
> I assumed that you meant writel() vs writel_relaxed(), where there latter
> does not do an implicit barrier.
> 
> I also found this from you:
> https://lwn.net/Articles/198995/
> 
> If my assumption was incorrect, please correct me.
> 
> As you seem to possess knowledge regarding this, you are probably the most
> suited person to know if this patch simply needs a commit message rewrite,
> or if it should be dropped completely.

Yes, I have always argued that the non-relaxed {read,write}{b,w,l}()
interfaces should imply barriers wrt. physical memory accesses.

Without that, drivers are harder to write.  Specifically, drivers that
work properly on all architectures will be very hard to write.

But looking at some drivers, probably this isn't fully the case right
now.  Which is unfortunate, but we must code to reality.

For example, looking at drivers/net/ethernet/broadcom/tg3.c, we have
tg3_start_xmit() going:

	write descriptors
	...
	/* Sync BD data before updating mailbox */
	wmb();
	...
		/* Packets are ready, update Tx producer idx on card. */
		tw32_tx_mbox(tnapi->prodmbox, entry);

so it really seems to be necessary.

So this stmmac revert is not valid.

Sorry for all the confusion.  I guess it's a lot of wishful thinking on
my part :-)

^ permalink raw reply

* [PATCH] netfilter: nf_tables: remove VLA usage
From: Gustavo A. R. Silva @ 2018-03-13  1:13 UTC (permalink / raw)
  To: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal,
	David S. Miller
  Cc: netfilter-devel, coreteam, netdev, linux-kernel, Kernel Hardening,
	Kees Cook, Gustavo A. R. Silva

In preparation to enabling -Wvla, remove VLA and replace it
with dynamic memory allocation.

>From a security viewpoint, the use of Variable Length Arrays can be
a vector for stack overflow attacks. Also, in general, as the code
evolves it is easy to lose track of how big a VLA can get. Thus, we
can end up having segfaults that are hard to debug.

Also, fixed as part of the directive to remove all VLAs from
the kernel: https://lkml.org/lkml/2018/3/7/621

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
---
 net/netfilter/nf_tables_api.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3f815b6..ea76903 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4357,16 +4357,20 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
 				       const struct nft_object_type *type,
 				       const struct nlattr *attr)
 {
-	struct nlattr *tb[type->maxattr + 1];
+	struct nlattr **tb;
 	const struct nft_object_ops *ops;
 	struct nft_object *obj;
-	int err;
+	int err = -ENOMEM;
+
+	tb = kcalloc(type->maxattr + 1, sizeof(*tb), GFP_KERNEL);
+	if (!tb)
+		goto err1;
 
 	if (attr) {
 		err = nla_parse_nested(tb, type->maxattr, attr, type->policy,
 				       NULL);
 		if (err < 0)
-			goto err1;
+			goto err2;
 	} else {
 		memset(tb, 0, sizeof(tb[0]) * (type->maxattr + 1));
 	}
@@ -4375,7 +4379,7 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
 		ops = type->select_ops(ctx, (const struct nlattr * const *)tb);
 		if (IS_ERR(ops)) {
 			err = PTR_ERR(ops);
-			goto err1;
+			goto err2;
 		}
 	} else {
 		ops = type->ops;
@@ -4383,18 +4387,21 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
 
 	err = -ENOMEM;
 	obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL);
-	if (obj == NULL)
-		goto err1;
+	if (!obj)
+		goto err2;
 
 	err = ops->init(ctx, (const struct nlattr * const *)tb, obj);
 	if (err < 0)
-		goto err2;
+		goto err3;
 
 	obj->ops = ops;
 
+	kfree(tb);
 	return obj;
-err2:
+err3:
 	kfree(obj);
+err2:
+	kfree(tb);
 err1:
 	return ERR_PTR(err);
 }
-- 
2.7.4

^ permalink raw reply related

* Re: [RFC PATCH net-next 3/5] bridge: allow switchdev port to handle flooding by itself
From: Andrew Lunn @ 2018-03-13  1:11 UTC (permalink / raw)
  To: Igor Mitsyanko
  Cc: ivecera, jiri, netdev, bridge, sergey.matyukevich.os, ashevchenko,
	smaksimenko, dlebed
In-Reply-To: <c22c1758-e383-2930-34c8-8c8c7b8455c1@quantenna.com>

> The flag was introduced to enable hardware switch capabilities of
> drivers/net/wireless/quantenna/qtnfmac wifi driver. It does not have any
> switchdev functionality in upstream tree at this moment, and this patchset
> was intended as a preparatory change.

O.K. But i suggest you add basic switchdev support first. Then think
about adding new functionality. That way you can learn more about
switchdev, and we can learn more about your hardware.

> qtnfmac driver provides several physical radios (5 GHz and 2.4 GHz), each
> can have up to 8 virtual network interfaces. These interfaces can be bridged
> together in various configurations, and I'm trying to figure out what is the
> most efficient way to handle it from bridging perspective.

I think the first thing to do is get this part correctly represented
by switchdev. I don't think any of us maintainers have thought about
how wireless and switchdev can be combined. The wifi model seems to be
one phy device, with multiple MACs running on top of it, with each MAC
being a single SSID.  So is it one SSID per virtual interface?  Or are
your virtual network interfaces actually virtual phys in the wireless
model, and you can have multiple MACs on top of each virtual phy?

> My assumption was that software FDB and hardware FDB should always
> be in sync with each other. I guess it is a safe assumption if
> handled correctly?  Hardware should send a notification for each new
> FDB it has learned, and switchdev driver should process FDB
> notifications from software bridge.

No, you cannot make this assumption. Take the example of DSA
switches. They are generally connected over an MDIO bus, or an SPI
bus. The bandwidth is small. How long do you think it takes the
hardware to learn 8K MAC addresses with 5x 1Gbps ports receiving 64
byte packets? DSA drivers have no way of keeping up with the
hardware. And there is no need to. Everything works fine with the SW
and the HW bridge having different dynamic FDB entries.

I don't even think your hardware will have the hardware and software
in sync. How fast can your hardware learn new addresses? 'Line' rate?
Or do you prevent the hardware learning a new address until the
software bridge has confirmed it has learnt the previous new address?

> qtnfmac hardware has its own memory and maintains FWT table, so for the best
> efficiency forwarding between virtual interfaces should be handled locally.
> Qtnfmac can handle all the mentioned flooding by itself:
> - unknown unicasts
> - broadcast and unknown multicast
> - known multicasts (does have IGMP snooping)
> - can do multicast-to-unicast translation if required.
> 
> The most important usecase IMO is a muticast transmission, specific example
> being:
> - 2.4GHz x 8 and 5GHz x 8 virtual wifi interfaces, bridged with backbone
> ethernet interface in Linux
> - multicast video streaming from a server behind ethernet
> - multicast clients connected to some wifi interfaces

I agree this makes sense. But we need to ensure the solution is
generic, not something which just works for your hardware/firmware.  I
know somebody who would love to be able to do something like this with
DSA drivers. They would probably sacrifice IGMP snooping and just
flood everywhere, if that is all the hardware could do. But so far,
i've not been able to figure out a way to do this.

     Andrew

^ permalink raw reply

* Re: [RESEND PATCH net-next 1/1] tc-testing: updated gact tests with batch test cases
From: David Miller @ 2018-03-13  1:09 UTC (permalink / raw)
  To: mrv; +Cc: netdev, kernel, jhs, xiyou.wangcong, jiri
In-Reply-To: <1520885222-11496-1-git-send-email-mrv@mojatatu.com>

From: Roman Mashak <mrv@mojatatu.com>
Date: Mon, 12 Mar 2018 16:07:02 -0400

> Signed-off-by: Roman Mashak <mrv@mojatatu.com>

Applied.

> -]
> +]
> \ No newline at end of file

Please fix this.

^ permalink raw reply

* Re: [RESEND PATCH net-next 1/1] tc-testing: add TC vlan action tests
From: David Miller @ 2018-03-13  1:09 UTC (permalink / raw)
  To: mrv; +Cc: netdev, kernel, jhs, xiyou.wangcong, jiri
In-Reply-To: <1520885197-11257-1-git-send-email-mrv@mojatatu.com>

From: Roman Mashak <mrv@mojatatu.com>
Date: Mon, 12 Mar 2018 16:06:37 -0400

> Signed-off-by: Roman Mashak <mrv@mojatatu.com>

Applied.

^ permalink raw reply

* Re: [PATCH net] net: dsa: Fix dsa_is_user_port() test inversion
From: David Miller @ 2018-03-13  1:06 UTC (permalink / raw)
  To: f.fainelli; +Cc: netdev, yxj790222, andrew, vivien.didelot, linux-kernel
In-Reply-To: <1520895640-28139-1-git-send-email-f.fainelli@gmail.com>

From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 12 Mar 2018 16:00:40 -0700

> During the conversion to dsa_is_user_port(), a condition ended up being
> reversed, which would prevent the creation of any user port when using
> the legacy binding and/or platform data, fix that.
> 
> Fixes: 4a5b85ffe2a0 ("net: dsa: use dsa_is_user_port everywhere")
> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>

Applied and queued up for -stable, thanks Florian.

^ permalink raw reply

* Re: [PATCH net-next v2 0/4] ibmvnic: Fix VLAN and other device errata
From: Thomas Falcon @ 2018-03-13  1:00 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, jallen, nfont
In-Reply-To: <20180312.205927.730960247311710768.davem@davemloft.net>

On 03/12/2018 07:59 PM, David Miller wrote:
> From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
> Date: Mon, 12 Mar 2018 17:07:38 -0500
>
>> On 03/12/2018 11:56 AM, David Miller wrote:
>>> From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
>>> Date: Mon, 12 Mar 2018 11:51:01 -0500
>>>
>>>> This patch series contains fixes for VLAN and other backing hardware
>>>> errata. The VLAN fixes are mostly to account for the additional four
>>>> bytes VLAN header in TX descriptors and buffers, when applicable.
>>>>
>>>> The other fixes for device errata are to pad small packets to avoid a
>>>> possible connection error that can occur when some devices attempt to
>>>> transmit small packets. The other fixes are GSO related. Some devices
>>>> cannot handle a smaller MSS or a packet with a single segment, so
>>>> disable GSO in those cases.
>>>>
>>>> v2: Fix style mistake (unneeded brackets) in patch 3/4
>>> Series applied, thanks Thomas.
>>>
>> Really sorry about this, but 3/4 is still wrong.  There is actually a compiler warning caused by it, which I'm surprised wasn't caught by the test robot.  Is there still time to send a v3?
> If it's in my tree you must send a fixup patch.
>
Thanks, I will do that shortly.

^ permalink raw reply

* Re: [PATCH net-next] net: phy: set link state to down when creating the phy_device
From: David Miller @ 2018-03-13  1:00 UTC (permalink / raw)
  To: hkallweit1; +Cc: andrew, f.fainelli, netdev
In-Reply-To: <a48309ab-97ee-153a-8153-98c38117af09@gmail.com>

From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 11 Mar 2018 15:00:37 +0100

> Currently the link state is initialized to "up" when the phy_device is
> being created. This is not consistent with the phy state being
> initialized to PHY_DOWN.
> 
> Usually this doen't do any harm because the link state is updated
> once the PHY reaches state PHY_AN. However e.g. if a LAN port isn't
> used and the PHY remains down this inconsistency remains and calls
> to functions like phy_print_status() give false results.
> Therefore change the initialization to link being down.
> 
> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>

Applied, thank you.

^ permalink raw reply

* Re: [PATCH net-next v2 0/4] ibmvnic: Fix VLAN and other device errata
From: David Miller @ 2018-03-13  0:59 UTC (permalink / raw)
  To: tlfalcon; +Cc: netdev, jallen, nfont
In-Reply-To: <f0568ee1-2c9a-1d60-3660-50da5844b620@linux.vnet.ibm.com>

From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Mon, 12 Mar 2018 17:07:38 -0500

> On 03/12/2018 11:56 AM, David Miller wrote:
>> From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
>> Date: Mon, 12 Mar 2018 11:51:01 -0500
>>
>>> This patch series contains fixes for VLAN and other backing hardware
>>> errata. The VLAN fixes are mostly to account for the additional four
>>> bytes VLAN header in TX descriptors and buffers, when applicable.
>>>
>>> The other fixes for device errata are to pad small packets to avoid a
>>> possible connection error that can occur when some devices attempt to
>>> transmit small packets. The other fixes are GSO related. Some devices
>>> cannot handle a smaller MSS or a packet with a single segment, so
>>> disable GSO in those cases.
>>>
>>> v2: Fix style mistake (unneeded brackets) in patch 3/4
>> Series applied, thanks Thomas.
>>
> Really sorry about this, but 3/4 is still wrong.  There is actually a compiler warning caused by it, which I'm surprised wasn't caught by the test robot.  Is there still time to send a v3?

If it's in my tree you must send a fixup patch.

^ permalink raw reply

* linux-next: manual merge of the net-next tree with the net tree
From: Stephen Rothwell @ 2018-03-13  0:29 UTC (permalink / raw)
  To: David Miller, Networking
  Cc: Linux-Next Mailing List, Linux Kernel Mailing List, Petr Machata,
	Jiri Pirko, Ido Schimmel

[-- Attachment #1: Type: text/plain, Size: 4339 bytes --]

Hi all,

Today's linux-next merge of the net-next tree got conflicts in:

  drivers/net/ethernet/mellanox/mlxsw/spectrum.h
  drivers/net/ethernet/mellanox/mlxsw/spectrum.c

between commit:

  663f1b26f9c1 ("mlxsw: spectrum: Prevent duplicate mirrors")

from the net tree and commit:

  a629ef210d89 ("mlxsw: spectrum: Move SPAN code to separate module")

from the net-next tree.

I fixed it up (the code changed in the former was moved in the latter -
I applied the below merge fix patch) and can carry the fix as
necessary. This is now fixed as far as linux-next is concerned, but any
non trivial conflicts should be mentioned to your upstream maintainer
when your tree is submitted for merging.  You may also want to consider
cooperating with the maintainer of the conflicting tree to minimise any
particularly complex conflicts.

From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 13 Mar 2018 11:25:13 +1100
Subject: [PATCH] mlxsw: merge fix for move of SPAN code

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_span.c    | 28 ++++++++++++++++++----
 .../net/ethernet/mellanox/mlxsw/spectrum_span.h    |  3 +++
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index ae22a3daffbf..4d6ed207b4af 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -600,13 +600,17 @@ int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
 }
 
 static struct mlxsw_sp_span_inspected_port *
-mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port,
-				    struct mlxsw_sp_span_entry *span_entry)
+mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_span_entry *span_entry,
+				    enum mlxsw_sp_span_type type,
+				    struct mlxsw_sp_port *port,
+				    bool bind)
 {
 	struct mlxsw_sp_span_inspected_port *p;
 
 	list_for_each_entry(p, &span_entry->bound_ports_list, list)
-		if (port->local_port == p->local_port)
+		if (type == p->type &&
+		    port->local_port == p->local_port &&
+		    bind == p->bound)
 			return p;
 	return NULL;
 }
@@ -636,8 +640,22 @@ mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
 	struct mlxsw_sp_span_inspected_port *inspected_port;
 	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
 	char sbib_pl[MLXSW_REG_SBIB_LEN];
+	int i;
 	int err;
 
+	/* A given (source port, direction) can only be bound to one analyzer,
+	 * so if a binding is requested, check for conflicts.
+	 */
+	if (bind)
+		for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+			struct mlxsw_sp_span_entry *curr =
+				&mlxsw_sp->span.entries[i];
+
+			if (mlxsw_sp_span_entry_bound_port_find(curr, type,
+								port, bind))
+				return -EEXIST;
+		}
+
 	/* if it is an egress SPAN, bind a shared buffer to it */
 	if (type == MLXSW_SP_SPAN_EGRESS) {
 		u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp,
@@ -665,6 +683,7 @@ mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
 	}
 	inspected_port->local_port = port->local_port;
 	inspected_port->type = type;
+	inspected_port->bound = bind;
 	list_add_tail(&inspected_port->list, &span_entry->bound_ports_list);
 
 	return 0;
@@ -691,7 +710,8 @@ mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
 	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
 	char sbib_pl[MLXSW_REG_SBIB_LEN];
 
-	inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry);
+	inspected_port = mlxsw_sp_span_entry_bound_port_find(span_entry, type,
+							     port, bind);
 	if (!inspected_port)
 		return;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
index 948aceb512c5..4b87ec20e658 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -51,6 +51,9 @@ struct mlxsw_sp_span_inspected_port {
 	struct list_head list;
 	enum mlxsw_sp_span_type type;
 	u8 local_port;
+
+	/* Whether this is a directly bound mirror (port-to-port) or an ACL. */
+	bool bound;
 };
 
 struct mlxsw_sp_span_parms {
-- 
2.16.1

-- 
Cheers,
Stephen Rothwell

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply related

* Re: [RFC PATCH v0 2/2] skbuff: Notify errors with sk_error_report()
From: Eric Dumazet @ 2018-03-13  0:23 UTC (permalink / raw)
  To: Vinicius Costa Gomes, netdev; +Cc: randy.e.witt, davem
In-Reply-To: <20180312231052.13961-3-vinicius.gomes@intel.com>



On 03/12/2018 04:10 PM, Vinicius Costa Gomes wrote:
> When errors are enqueued to the error queue via sock_queue_err_skb()
> function, it is possible that the correct application is not notified.

Your patch makes sense, thanks.

^ permalink raw reply

* [PATCH] netfilter: nfnetlink_cthelper: Remove VLA usage
From: Gustavo A. R. Silva @ 2018-03-13  0:21 UTC (permalink / raw)
  To: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal,
	David S. Miller
  Cc: netfilter-devel, coreteam, netdev, linux-kernel, Kernel Hardening,
	Kees Cook, Gustavo A. R. Silva

In preparation to enabling -Wvla, remove VLA and replace it
with dynamic memory allocation.

>From a security viewpoint, the use of Variable Length Arrays can be
a vector for stack overflow attacks. Also, in general, as the code
evolves it is easy to lose track of how big a VLA can get. Thus, we
can end up having segfaults that are hard to debug.

Also, fixed as part of the directive to remove all VLAs from
the kernel: https://lkml.org/lkml/2018/3/7/621

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
---
 net/netfilter/nfnetlink_cthelper.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index d33ce6d..4a4b293 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -314,23 +314,30 @@ nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy,
 static int nfnl_cthelper_update_policy_all(struct nlattr *tb[],
 					   struct nf_conntrack_helper *helper)
 {
-	struct nf_conntrack_expect_policy new_policy[helper->expect_class_max + 1];
+	struct nf_conntrack_expect_policy *new_policy;
 	struct nf_conntrack_expect_policy *policy;
-	int i, err;
+	int i, ret = 0;
+
+	new_policy = kmalloc_array(helper->expect_class_max + 1,
+				   sizeof(*new_policy), GFP_KERNEL);
+	if (!new_policy)
+		return -ENOMEM;
 
 	/* Check first that all policy attributes are well-formed, so we don't
 	 * leave things in inconsistent state on errors.
 	 */
 	for (i = 0; i < helper->expect_class_max + 1; i++) {
 
-		if (!tb[NFCTH_POLICY_SET + i])
-			return -EINVAL;
+		if (!tb[NFCTH_POLICY_SET + i]) {
+			ret = -EINVAL;
+			goto err;
+		}
 
-		err = nfnl_cthelper_update_policy_one(&helper->expect_policy[i],
+		ret = nfnl_cthelper_update_policy_one(&helper->expect_policy[i],
 						      &new_policy[i],
 						      tb[NFCTH_POLICY_SET + i]);
-		if (err < 0)
-			return err;
+		if (ret < 0)
+			goto err;
 	}
 	/* Now we can safely update them. */
 	for (i = 0; i < helper->expect_class_max + 1; i++) {
@@ -340,7 +347,9 @@ static int nfnl_cthelper_update_policy_all(struct nlattr *tb[],
 		policy->timeout	= new_policy->timeout;
 	}
 
-	return 0;
+err:
+	kfree(new_policy);
+	return ret;
 }
 
 static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper,
-- 
2.7.4

^ permalink raw reply related

* linux-next: manual merge of the net-next tree with the net tree
From: Stephen Rothwell @ 2018-03-13  0:04 UTC (permalink / raw)
  To: David Miller, Networking
  Cc: Linux-Next Mailing List, Linux Kernel Mailing List, Brad Mouring,
	Heiner Kallweit

[-- Attachment #1: Type: text/plain, Size: 2935 bytes --]

Hi all,

Today's linux-next merge of the net-next tree got a conflict in:

  drivers/net/phy/phy.c

between commit:

  a2c054a896b8 ("net: phy: Tell caller result of phy_change()")

from the net tree and commit:

  4fff2d33c707 ("net: phy: remove phy_error from phy_disable_interrupts")

from the net-next tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc drivers/net/phy/phy.c
index 9aabfa1a455a,c2d9027be863..000000000000
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@@ -617,77 -617,6 +617,68 @@@ static void phy_error(struct phy_devic
  	phy_trigger_machine(phydev, false);
  }
  
 +/**
 + * phy_disable_interrupts - Disable the PHY interrupts from the PHY side
 + * @phydev: target phy_device struct
 + */
 +static int phy_disable_interrupts(struct phy_device *phydev)
 +{
 +	int err;
 +
 +	/* Disable PHY interrupts */
 +	err = phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED);
 +	if (err)
- 		goto phy_err;
++		return err;
 +
 +	/* Clear the interrupt */
- 	err = phy_clear_interrupt(phydev);
- 	if (err)
- 		goto phy_err;
- 
- 	return 0;
- 
- phy_err:
- 	phy_error(phydev);
- 
- 	return err;
++	return phy_clear_interrupt(phydev);
 +}
 +
 +/**
 + * phy_change - Called by the phy_interrupt to handle PHY changes
 + * @phydev: phy_device struct that interrupted
 + */
 +static irqreturn_t phy_change(struct phy_device *phydev)
 +{
 +	if (phy_interrupt_is_valid(phydev)) {
 +		if (phydev->drv->did_interrupt &&
 +		    !phydev->drv->did_interrupt(phydev))
 +			return IRQ_NONE;
 +
 +		if (phydev->state == PHY_HALTED)
 +			if (phy_disable_interrupts(phydev))
 +				goto phy_err;
 +	}
 +
 +	mutex_lock(&phydev->lock);
 +	if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state))
 +		phydev->state = PHY_CHANGELINK;
 +	mutex_unlock(&phydev->lock);
 +
 +	/* reschedule state queue work to run as soon as possible */
 +	phy_trigger_machine(phydev, true);
 +
 +	if (phy_interrupt_is_valid(phydev) && phy_clear_interrupt(phydev))
 +		goto phy_err;
 +	return IRQ_HANDLED;
 +
 +phy_err:
 +	phy_error(phydev);
 +	return IRQ_NONE;
 +}
 +
 +/**
 + * phy_change_work - Scheduled by the phy_mac_interrupt to handle PHY changes
 + * @work: work_struct that describes the work to be done
 + */
 +void phy_change_work(struct work_struct *work)
 +{
 +	struct phy_device *phydev =
 +		container_of(work, struct phy_device, phy_queue);
 +
 +	phy_change(phydev);
 +}
 +
  /**
   * phy_interrupt - PHY interrupt handler
   * @irq: interrupt line

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* Re: [PATCH] netfilter: cttimeout: remove VLA usage
From: Joe Perches @ 2018-03-12 23:58 UTC (permalink / raw)
  To: Gustavo A. R. Silva, Pablo Neira Ayuso, Jozsef Kadlecsik,
	Florian Westphal, David S. Miller
  Cc: netfilter-devel, coreteam, netdev, linux-kernel, Kernel Hardening,
	Kees Cook, Gustavo A. R. Silva
In-Reply-To: <20180312231442.GA22071@embeddedgus>

On Mon, 2018-03-12 at 18:14 -0500, Gustavo A. R. Silva wrote:
> In preparation to enabling -Wvla, remove VLA and replace it
> with dynamic memory allocation.
> 
> From a security viewpoint, the use of Variable Length Arrays can be
> a vector for stack overflow attacks. Also, in general, as the code
> evolves it is easy to lose track of how big a VLA can get. Thus, we
> can end up having segfaults that are hard to debug.
> 
> Also, fixed as part of the directive to remove all VLAs from
[]
> diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
[]
> @@ -51,19 +51,27 @@ ctnl_timeout_parse_policy(void *timeouts,
>  			  const struct nf_conntrack_l4proto *l4proto,
>  			  struct net *net, const struct nlattr *attr)
>  {
> +	struct nlattr **tb;
>  	int ret = 0;
>  
> -	if (likely(l4proto->ctnl_timeout.nlattr_to_obj)) {
> -		struct nlattr *tb[l4proto->ctnl_timeout.nlattr_max+1];
> +	if (!l4proto->ctnl_timeout.nlattr_to_obj)
> +		return 0;

Why not
	if unlikely(!...)
	
>  
> -		ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max,
> -				       attr, l4proto->ctnl_timeout.nla_policy,
> -				       NULL);
> -		if (ret < 0)
> -			return ret;
> +	tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb),
> +		     GFP_KERNEL);

kmalloc_array?

>  
> -		ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts);
> -	}
> +	if (!tb)
> +		return -ENOMEM;
> +
> +	ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, attr,
> +			       l4proto->ctnl_timeout.nla_policy, NULL);
> +	if (ret < 0)
> +		goto err;
> +
> +	ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts);
> +
> +err:
> +	kfree(tb);
>  	return ret;
>  }
>  

^ permalink raw reply

* Re: [PATCH v3] kernel.h: Skip single-eval logic on literals in min()/max()
From: Linus Torvalds @ 2018-03-12 23:57 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Kees Cook, Linux Kernel Mailing List, Josh Poimboeuf,
	Rasmus Villemoes, Gustavo A. R. Silva, Tobin C. Harding,
	Steven Rostedt, Jonathan Corbet, Chris Mason, Josef Bacik,
	David Sterba, David S. Miller, Alexey Kuznetsov,
	Hideaki YOSHIFUJI, Ingo Molnar, Peter Zijlstra, Thomas Gleixner,
	Masahiro Yamada, Borislav Petkov, Randy Dunlap
In-Reply-To: <20180312155524.b421f07d7f08f24c57bd1887@linux-foundation.org>

On Mon, Mar 12, 2018 at 3:55 PM, Andrew Morton
<akpm@linux-foundation.org> wrote:
>
> Replacing the __builtin_choose_expr() with ?: works of course.

Hmm. That sounds like the right thing to do. We were so myopically
staring at the __builtin_choose_expr() problem that we overlooked the
obvious solution.

Using __builtin_constant_p() together with a ?: is in fact our common
pattern, so that should be fine. The only real reason to use
__builtin_choose_expr() is if you want to get the *type* to vary
depending on which side you choose, but that's not an issue for
min/max.

> What will be the runtime effects?

There should be none. Gcc will turn the conditional for the ?: into a
constant, and DTRT.

              Linus

^ permalink raw reply

* Re: [PATCH 1/1] net: check dev->reg_state before deref of napi netdev_ops
From: Cong Wang @ 2018-03-12 23:17 UTC (permalink / raw)
  To: Josh Elsasser
  Cc: David Miller, Greg Kroah-Hartman, Eric Dumazet, Sasha Levin,
	Willem de Bruijn, Alexander Potapenko, Michal Kubeček,
	Linux Kernel Network Developers, LKML
In-Reply-To: <20180311192322.101598-2-jelsasser@appneta.com>

On Sun, Mar 11, 2018 at 12:22 PM, Josh Elsasser <jelsasser@appneta.com> wrote:
> init_dummy_netdev() leaves its netdev_ops pointer zeroed. This leads
> to a NULL pointer dereference when sk_busy_loop fires against an iwlwifi
> wireless adapter and checks napi->dev->netdev_ops->ndo_busy_poll.
>
> Avoid this by ensuring that napi->dev is not a dummy device before
> dereferencing napi dev's netdev_ops, preventing the following panic:

Hmm, how about just checking ->netdev_ops? Checking reg_state looks
odd, although works.

^ permalink raw reply

* [PATCH] netfilter: cttimeout: remove VLA usage
From: Gustavo A. R. Silva @ 2018-03-12 23:14 UTC (permalink / raw)
  To: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal,
	David S. Miller
  Cc: netfilter-devel, coreteam, netdev, linux-kernel, Kernel Hardening,
	Kees Cook, Gustavo A. R. Silva

In preparation to enabling -Wvla, remove VLA and replace it
with dynamic memory allocation.

>From a security viewpoint, the use of Variable Length Arrays can be
a vector for stack overflow attacks. Also, in general, as the code
evolves it is easy to lose track of how big a VLA can get. Thus, we
can end up having segfaults that are hard to debug.

Also, fixed as part of the directive to remove all VLAs from
the kernel: https://lkml.org/lkml/2018/3/7/621

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
---
 net/netfilter/nfnetlink_cttimeout.c | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 6819300..dcd7bd3 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -51,19 +51,27 @@ ctnl_timeout_parse_policy(void *timeouts,
 			  const struct nf_conntrack_l4proto *l4proto,
 			  struct net *net, const struct nlattr *attr)
 {
+	struct nlattr **tb;
 	int ret = 0;
 
-	if (likely(l4proto->ctnl_timeout.nlattr_to_obj)) {
-		struct nlattr *tb[l4proto->ctnl_timeout.nlattr_max+1];
+	if (!l4proto->ctnl_timeout.nlattr_to_obj)
+		return 0;
 
-		ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max,
-				       attr, l4proto->ctnl_timeout.nla_policy,
-				       NULL);
-		if (ret < 0)
-			return ret;
+	tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb),
+		     GFP_KERNEL);
 
-		ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts);
-	}
+	if (!tb)
+		return -ENOMEM;
+
+	ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, attr,
+			       l4proto->ctnl_timeout.nla_policy, NULL);
+	if (ret < 0)
+		goto err;
+
+	ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts);
+
+err:
+	kfree(tb);
 	return ret;
 }
 
-- 
2.7.4

^ permalink raw reply related

* [RFC PATCH v0 2/2] skbuff: Notify errors with sk_error_report()
From: Vinicius Costa Gomes @ 2018-03-12 23:10 UTC (permalink / raw)
  To: netdev; +Cc: Vinicius Costa Gomes, randy.e.witt, davem
In-Reply-To: <20180312231052.13961-1-vinicius.gomes@intel.com>

When errors are enqueued to the error queue via sock_queue_err_skb()
function, it is possible that the correct application is not notified.

Reported-by: Randy E. Witt <randy.e.witt@intel.com>
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
---
 net/core/skbuff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 715c13495ba6..6def3534f509 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4181,7 +4181,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
 
 	skb_queue_tail(&sk->sk_error_queue, skb);
 	if (!sock_flag(sk, SOCK_DEAD))
-		sk->sk_data_ready(sk);
+		sk->sk_error_report(sk);
 	return 0;
 }
 EXPORT_SYMBOL(sock_queue_err_skb);
-- 
2.16.2

^ permalink raw reply related

* [RFC PATCH v0 1/2] selftests/txtimestamp: Add more configurable parameters
From: Vinicius Costa Gomes @ 2018-03-12 23:10 UTC (permalink / raw)
  To: netdev; +Cc: Vinicius Costa Gomes, randy.e.witt, davem
In-Reply-To: <20180312231052.13961-1-vinicius.gomes@intel.com>

Add a way to configure if poll() should wait forever for an event, the
number of packets that should be sent for each and if there should be
any delay between packets.

Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
---
 .../selftests/networking/timestamping/txtimestamp.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c
index 5df07047ca86..5190b1dd78b1 100644
--- a/tools/testing/selftests/networking/timestamping/txtimestamp.c
+++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c
@@ -68,9 +68,11 @@ static int cfg_num_pkts = 4;
 static int do_ipv4 = 1;
 static int do_ipv6 = 1;
 static int cfg_payload_len = 10;
+static int cfg_poll_timeout = 100;
 static bool cfg_show_payload;
 static bool cfg_do_pktinfo;
 static bool cfg_loop_nodata;
+static bool cfg_no_delay;
 static uint16_t dest_port = 9000;
 
 static struct sockaddr_in daddr;
@@ -171,7 +173,7 @@ static void __poll(int fd)
 
 	memset(&pollfd, 0, sizeof(pollfd));
 	pollfd.fd = fd;
-	ret = poll(&pollfd, 1, 100);
+	ret = poll(&pollfd, 1, cfg_poll_timeout);
 	if (ret != 1)
 		error(1, errno, "poll");
 }
@@ -371,7 +373,8 @@ static void do_test(int family, unsigned int opt)
 			error(1, errno, "send");
 
 		/* wait for all errors to be queued, else ACKs arrive OOO */
-		usleep(50 * 1000);
+		if (!cfg_no_delay)
+			usleep(50 * 1000);
 
 		__poll(fd);
 
@@ -397,6 +400,9 @@ static void __attribute__((noreturn)) usage(const char *filepath)
 			"  -n:   set no-payload option\n"
 			"  -r:   use raw\n"
 			"  -R:   use raw (IP_HDRINCL)\n"
+			"  -D:   no delay between packets\n"
+			"  -F:   poll() waits forever for an event\n"
+			"  -c N: number of packets for each test\n"
 			"  -p N: connect to port N\n"
 			"  -u:   use udp\n"
 			"  -x:   show payload (up to 70 bytes)\n",
@@ -409,7 +415,7 @@ static void parse_opt(int argc, char **argv)
 	int proto_count = 0;
 	char c;
 
-	while ((c = getopt(argc, argv, "46hIl:np:rRux")) != -1) {
+	while ((c = getopt(argc, argv, "46hIl:np:rRuxc:DF")) != -1) {
 		switch (c) {
 		case '4':
 			do_ipv6 = 0;
@@ -447,6 +453,15 @@ static void parse_opt(int argc, char **argv)
 		case 'x':
 			cfg_show_payload = true;
 			break;
+		case 'c':
+			cfg_num_pkts = strtoul(optarg, NULL, 10);
+			break;
+		case 'D':
+			cfg_no_delay = true;
+			break;
+		case 'F':
+			cfg_poll_timeout = -1;
+			break;
 		case 'h':
 		default:
 			usage(argv[0]);
-- 
2.16.2

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox