* [PATCH 11/20] octeontx2-af: Add support for stripping STAG/CTAG
From: sunil.kovvuri @ 2018-11-08 18:35 UTC (permalink / raw)
To: netdev, davem; +Cc: arnd, linux-soc, Tomasz Duszynski, Sunil Goutham
In-Reply-To: <1541702161-30673-1-git-send-email-sunil.kovvuri@gmail.com>
From: Tomasz Duszynski <tduszynski@marvell.com>
This works by shadowing existing UCAST MCAM entry
with a new one additionally matching either NPC_LT_LB_CTAG
or NPC_LT_LB_STAG. For this to fully work one needs to
send properly configured NIX_VTAG_CFG message afterwards i.e with
strip and capture enabled and type set to 0.
On receiving tagged packet NIX will remove outer VLAN and capture
TCI in NIX_RX_PARSE_S.
Also simplified RX Vtag configuration flow
With this setting STRIP/CAPTURE VTAG actions separately would be
possible. Following combinations are possible: STRIP,
STRIP and CAPTURE, CAPTURE or nothing (0 disables respective actions).
Signed-off-by: Tomasz Duszynski <tduszynski@marvell.com>
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
---
drivers/net/ethernet/marvell/octeontx2/af/mbox.h | 6 +-
drivers/net/ethernet/marvell/octeontx2/af/npc.h | 30 ++++++++
drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 8 +++
.../net/ethernet/marvell/octeontx2/af/rvu_nix.c | 83 +++++++++++++++++-----
.../net/ethernet/marvell/octeontx2/af/rvu_npc.c | 46 +++++++++++-
5 files changed, 152 insertions(+), 21 deletions(-)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 737dbc9..f2bf77d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -181,7 +181,8 @@ M(NIX_SET_MAC_ADDR, 0x800a, nix_set_mac_addr, msg_rsp) \
M(NIX_SET_RX_MODE, 0x800b, nix_rx_mode, msg_rsp) \
M(NIX_SET_HW_FRS, 0x800c, nix_frs_cfg, msg_rsp) \
M(NIX_LF_START_RX, 0x800d, msg_req, msg_rsp) \
-M(NIX_LF_STOP_RX, 0x800e, msg_req, msg_rsp)
+M(NIX_LF_STOP_RX, 0x800e, msg_req, msg_rsp) \
+M(NIX_RXVLAN_ALLOC, 0x8012, msg_req, msg_rsp)
/* Messages initiated by AF (range 0xC00 - 0xDFF) */
#define MBOX_UP_CGX_MESSAGES \
@@ -499,6 +500,7 @@ struct nix_txschq_config {
struct nix_vtag_config {
struct mbox_msghdr hdr;
+ /* '0' for 4 octet VTAG, '1' for 8 octet VTAG */
u8 vtag_size;
/* cfg_type is '0' for tx vlan cfg
* cfg_type is '1' for rx vlan cfg
@@ -519,7 +521,7 @@ struct nix_vtag_config {
/* valid when cfg_type is '1' */
struct {
- /* rx vtag type index */
+ /* rx vtag type index, valid values are in 0..7 range */
u8 vtag_type;
/* rx vtag strip */
u8 strip_vtag :1;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
index f98b011..3f7e5e6 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
@@ -259,4 +259,34 @@ struct nix_rx_action {
#endif
};
+struct nix_rx_vtag_action {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 rsvd_63_48 :16;
+ u64 vtag1_valid :1;
+ u64 vtag1_type :3;
+ u64 rsvd_43 :1;
+ u64 vtag1_lid :3;
+ u64 vtag1_relptr :8;
+ u64 rsvd_31_16 :16;
+ u64 vtag0_valid :1;
+ u64 vtag0_type :3;
+ u64 rsvd_11 :1;
+ u64 vtag0_lid :3;
+ u64 vtag0_relptr :8;
+#else
+ u64 vtag0_relptr :8;
+ u64 vtag0_lid :3;
+ u64 rsvd_11 :1;
+ u64 vtag0_type :3;
+ u64 vtag0_valid :1;
+ u64 rsvd_31_16 :16;
+ u64 vtag1_relptr :8;
+ u64 vtag1_lid :3;
+ u64 rsvd_43 :1;
+ u64 vtag1_type :3;
+ u64 vtag1_valid :1;
+ u64 rsvd_63_48 :16;
+#endif
+};
+
#endif /* NPC_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 12fbdba..e213bf4 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -142,6 +142,11 @@ struct rvu_pfvf {
/* Broadcast pkt replication info */
u16 bcast_mce_idx;
struct nix_mce_list bcast_mce_list;
+
+ /* VLAN offload */
+ struct mcam_entry entry;
+ int rxvlan_index;
+ bool rxvlan;
};
struct nix_txsch {
@@ -356,6 +361,8 @@ int rvu_mbox_handler_NIX_STATS_RST(struct rvu *rvu, struct msg_req *req,
int rvu_mbox_handler_NIX_VTAG_CFG(struct rvu *rvu,
struct nix_vtag_config *req,
struct msg_rsp *rsp);
+int rvu_mbox_handler_NIX_RXVLAN_ALLOC(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp);
int rvu_mbox_handler_NIX_RSS_FLOWKEY_CFG(struct rvu *rvu,
struct nix_rss_flowkey_cfg *req,
struct msg_rsp *rsp);
@@ -384,6 +391,7 @@ void rvu_npc_disable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf);
void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf);
void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
int nixlf, u64 chan);
+int rvu_npc_update_rxvlan(struct rvu *rvu, u16 pcifunc, int nixlf);
void rvu_npc_disable_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf);
void rvu_npc_disable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf);
void rvu_npc_enable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 5853af4..70a2997 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -192,6 +192,7 @@ static void nix_interface_deinit(struct rvu *rvu, u16 pcifunc, u8 nixlf)
pfvf->maxlen = 0;
pfvf->minlen = 0;
+ pfvf->rxvlan = false;
/* Remove this PF_FUNC from bcast pkt replication list */
err = nix_update_bcast_mce_list(rvu, pcifunc, false);
@@ -1209,28 +1210,15 @@ int rvu_mbox_handler_NIX_TXSCHQ_CFG(struct rvu *rvu,
static int nix_rx_vtag_cfg(struct rvu *rvu, int nixlf, int blkaddr,
struct nix_vtag_config *req)
{
- u64 regval = 0;
+ u64 regval = req->vtag_size;
-#define NIX_VTAGTYPE_MAX 0x8ull
-#define NIX_VTAGSIZE_MASK 0x7ull
-#define NIX_VTAGSTRIP_CAP_MASK 0x30ull
-
- if (req->rx.vtag_type >= NIX_VTAGTYPE_MAX ||
- req->vtag_size > VTAGSIZE_T8)
+ if (req->rx.vtag_type > 7 || req->vtag_size > VTAGSIZE_T8)
return -EINVAL;
- regval = rvu_read64(rvu, blkaddr,
- NIX_AF_LFX_RX_VTAG_TYPEX(nixlf, req->rx.vtag_type));
-
- if (req->rx.strip_vtag && req->rx.capture_vtag)
- regval |= BIT_ULL(4) | BIT_ULL(5);
- else if (req->rx.strip_vtag)
+ if (req->rx.capture_vtag)
+ regval |= BIT_ULL(5);
+ if (req->rx.strip_vtag)
regval |= BIT_ULL(4);
- else
- regval &= ~(BIT_ULL(4) | BIT_ULL(5));
-
- regval &= ~NIX_VTAGSIZE_MASK;
- regval |= req->vtag_size & NIX_VTAGSIZE_MASK;
rvu_write64(rvu, blkaddr,
NIX_AF_LFX_RX_VTAG_TYPEX(nixlf, req->rx.vtag_type), regval);
@@ -1770,6 +1758,9 @@ int rvu_mbox_handler_NIX_SET_MAC_ADDR(struct rvu *rvu,
rvu_npc_install_ucast_entry(rvu, pcifunc, nixlf,
pfvf->rx_chan_base, req->mac_addr);
+
+ rvu_npc_update_rxvlan(rvu, pcifunc, nixlf);
+
return 0;
}
@@ -1803,6 +1794,9 @@ int rvu_mbox_handler_NIX_SET_RX_MODE(struct rvu *rvu, struct nix_rx_mode *req,
else
rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
pfvf->rx_chan_base, allmulti);
+
+ rvu_npc_update_rxvlan(rvu, pcifunc, nixlf);
+
return 0;
}
@@ -1941,6 +1935,59 @@ int rvu_mbox_handler_NIX_SET_HW_FRS(struct rvu *rvu, struct nix_frs_cfg *req,
return 0;
}
+int rvu_mbox_handler_NIX_RXVLAN_ALLOC(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp)
+{
+ struct npc_mcam_alloc_entry_req alloc_req = { };
+ struct npc_mcam_alloc_entry_rsp alloc_rsp = { };
+ struct npc_mcam_free_entry_req free_req = { };
+ u16 pcifunc = req->hdr.pcifunc;
+ int blkaddr, nixlf, err;
+ struct rvu_pfvf *pfvf;
+
+ pfvf = rvu_get_pfvf(rvu, pcifunc);
+ if (pfvf->rxvlan)
+ return 0;
+
+ /* alloc new mcam entry */
+ alloc_req.hdr.pcifunc = pcifunc;
+ alloc_req.count = 1;
+
+ err = rvu_mbox_handler_NPC_MCAM_ALLOC_ENTRY(rvu, &alloc_req,
+ &alloc_rsp);
+ if (err)
+ return err;
+
+ /* update entry to enable rxvlan offload */
+ blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+ if (blkaddr < 0) {
+ err = NIX_AF_ERR_AF_LF_INVALID;
+ goto free_entry;
+ }
+
+ nixlf = rvu_get_lf(rvu, &rvu->hw->block[blkaddr], pcifunc, 0);
+ if (nixlf < 0) {
+ err = NIX_AF_ERR_AF_LF_INVALID;
+ goto free_entry;
+ }
+
+ pfvf->rxvlan_index = alloc_rsp.entry_list[0];
+ /* all it means is that rxvlan_index is valid */
+ pfvf->rxvlan = true;
+
+ err = rvu_npc_update_rxvlan(rvu, pcifunc, nixlf);
+ if (err)
+ goto free_entry;
+
+ return 0;
+free_entry:
+ free_req.hdr.pcifunc = pcifunc;
+ free_req.entry = alloc_rsp.entry_list[0];
+ rvu_mbox_handler_NPC_MCAM_FREE_ENTRY(rvu, &free_req, rsp);
+ pfvf->rxvlan = false;
+ return err;
+}
+
static void nix_link_config(struct rvu *rvu, int blkaddr)
{
struct rvu_hwinfo *hw = rvu->hw;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 100ce29..5dbb5cd 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -306,7 +306,9 @@ static u64 npc_get_mcam_action(struct rvu *rvu, struct npc_mcam *mcam,
void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
int nixlf, u64 chan, u8 *mac_addr)
{
+ struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
struct npc_mcam *mcam = &rvu->hw->mcam;
+ struct nix_rx_vtag_action vtag_action;
struct mcam_entry entry = { {0} };
struct nix_rx_action action;
int blkaddr, index, kwi;
@@ -345,6 +347,20 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
entry.action = *(u64 *)&action;
npc_config_mcam_entry(rvu, mcam, blkaddr, index,
NIX_INTF_RX, &entry, true);
+
+ /* add VLAN matching, setup action and save entry back for later */
+ entry.kw[0] |= (NPC_LT_LB_STAG | NPC_LT_LB_CTAG) << 20;
+ entry.kw_mask[0] |= (NPC_LT_LB_STAG & NPC_LT_LB_CTAG) << 20;
+
+ *(u64 *)&vtag_action = 0;
+ vtag_action.vtag0_valid = 1;
+ /* must match type set in NIX_VTAG_CFG */
+ vtag_action.vtag0_type = 0;
+ vtag_action.vtag0_lid = NPC_LID_LA;
+ vtag_action.vtag0_relptr = 12;
+ entry.vtag_action = *(u64 *)&vtag_action;
+
+ memcpy(&pfvf->entry, &entry, sizeof(entry));
}
void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
@@ -352,7 +368,7 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
{
struct npc_mcam *mcam = &rvu->hw->mcam;
struct mcam_entry entry = { {0} };
- struct nix_rx_action action;
+ struct nix_rx_action action = { };
int blkaddr, index, kwi;
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
@@ -521,6 +537,8 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf,
rvu_write64(rvu, blkaddr,
NPC_AF_MCAMEX_BANKX_ACTION(index, bank), *(u64 *)&action);
+
+ rvu_npc_update_rxvlan(rvu, pcifunc, nixlf);
}
static void npc_enadis_default_entries(struct rvu *rvu, u16 pcifunc,
@@ -560,6 +578,8 @@ static void npc_enadis_default_entries(struct rvu *rvu, u16 pcifunc,
rvu_npc_enable_promisc_entry(rvu, pcifunc, nixlf);
else
rvu_npc_disable_promisc_entry(rvu, pcifunc, nixlf);
+
+ rvu_npc_update_rxvlan(rvu, pcifunc, nixlf);
}
void rvu_npc_disable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf)
@@ -2018,3 +2038,27 @@ int rvu_mbox_handler_NPC_GET_KEX_CFG(struct rvu *rvu, struct msg_req *req,
}
return 0;
}
+
+int rvu_npc_update_rxvlan(struct rvu *rvu, u16 pcifunc, int nixlf)
+{
+ struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+ struct npc_mcam *mcam = &rvu->hw->mcam;
+ int blkaddr, index;
+ bool enable;
+
+ blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+ if (blkaddr < 0)
+ return NIX_AF_ERR_AF_LF_INVALID;
+
+ if (!pfvf->rxvlan)
+ return 0;
+
+ index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf,
+ NIXLF_UCAST_ENTRY);
+ pfvf->entry.action = npc_get_mcam_action(rvu, mcam, blkaddr, index);
+ enable = is_mcam_entry_enabled(rvu, mcam, blkaddr, index);
+ npc_config_mcam_entry(rvu, mcam, blkaddr, pfvf->rxvlan_index,
+ NIX_INTF_RX, &pfvf->entry, enable);
+
+ return 0;
+}
--
2.7.4
^ permalink raw reply related
* [PATCH 12/20] octeontx2-af: Verify NPA/SSO/NIX PF_FUNC mapping
From: sunil.kovvuri @ 2018-11-08 18:35 UTC (permalink / raw)
To: netdev, davem; +Cc: arnd, linux-soc, Sunil Goutham
In-Reply-To: <1541702161-30673-1-git-send-email-sunil.kovvuri@gmail.com>
From: Sunil Goutham <sgoutham@marvell.com>
While mapping a NIX LF to a NPA LF attached PF_FUNC or
SSO LF attached PF_FUNC, verify if PF_FUNC is valid and
if that PF_FUNC has a LF of that block attached to it or not.
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
---
drivers/net/ethernet/marvell/octeontx2/af/mbox.h | 2 ++
drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 38 ++++++++++++++++++++++
drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 1 +
.../net/ethernet/marvell/octeontx2/af/rvu_nix.c | 32 +++++++++++-------
4 files changed, 62 insertions(+), 11 deletions(-)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index f2bf77d..96dd59e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -389,6 +389,8 @@ enum nix_af_status {
NIX_AF_INVAL_TXSCHQ_CFG = -412,
NIX_AF_SMQ_FLUSH_FAILED = -413,
NIX_AF_ERR_LF_RESET = -414,
+ NIX_AF_INVAL_NPA_PF_FUNC = -419,
+ NIX_AF_INVAL_SSO_PF_FUNC = -420,
};
/* For NIX LF context alloc and init */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index c7f00895..bd1df1c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -337,6 +337,28 @@ struct rvu_pfvf *rvu_get_pfvf(struct rvu *rvu, int pcifunc)
return &rvu->pf[rvu_get_pf(pcifunc)];
}
+static bool is_pf_func_valid(struct rvu *rvu, u16 pcifunc)
+{
+ int pf, vf, nvfs;
+ u64 cfg;
+
+ pf = rvu_get_pf(pcifunc);
+ if (pf >= rvu->hw->total_pfs)
+ return false;
+
+ if (!(pcifunc & RVU_PFVF_FUNC_MASK))
+ return true;
+
+ /* Check if VF is within number of VFs attached to this PF */
+ vf = (pcifunc & RVU_PFVF_FUNC_MASK) - 1;
+ cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf));
+ nvfs = (cfg >> 12) & 0xFF;
+ if (vf >= nvfs)
+ return false;
+
+ return true;
+}
+
bool is_block_implemented(struct rvu_hwinfo *hw, int blkaddr)
{
struct rvu_block *block;
@@ -860,6 +882,22 @@ static u16 rvu_get_rsrc_mapcount(struct rvu_pfvf *pfvf, int blktype)
return 0;
}
+bool is_pffunc_map_valid(struct rvu *rvu, u16 pcifunc, int blktype)
+{
+ struct rvu_pfvf *pfvf;
+
+ if (!is_pf_func_valid(rvu, pcifunc))
+ return false;
+
+ pfvf = rvu_get_pfvf(rvu, pcifunc);
+
+ /* Check if this PFFUNC has a LF of type blktype attached */
+ if (!rvu_get_rsrc_mapcount(pfvf, blktype))
+ return false;
+
+ return true;
+}
+
static int rvu_lookup_rsrc(struct rvu *rvu, struct rvu_block *block,
int pcifunc, int slot)
{
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index e213bf4..d54db21 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -255,6 +255,7 @@ int rvu_get_pf(u16 pcifunc);
struct rvu_pfvf *rvu_get_pfvf(struct rvu *rvu, int pcifunc);
void rvu_get_pf_numvfs(struct rvu *rvu, int pf, int *numvfs, int *hwvf);
bool is_block_implemented(struct rvu_hwinfo *hw, int blkaddr);
+bool is_pffunc_map_valid(struct rvu *rvu, u16 pcifunc, int blktype);
int rvu_get_lf(struct rvu *rvu, struct rvu_block *block, u16 pcifunc, u16 slot);
int rvu_lf_reset(struct rvu *rvu, struct rvu_block *block, int lf);
int rvu_get_blkaddr(struct rvu *rvu, int blktype, u16 pcifunc);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 70a2997..d8d8947 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -694,6 +694,24 @@ int rvu_mbox_handler_NIX_LF_ALLOC(struct rvu *rvu,
if (nixlf < 0)
return NIX_AF_ERR_AF_LF_INVALID;
+ /* Check if requested 'NIXLF <=> NPALF' mapping is valid */
+ if (req->npa_func) {
+ /* If default, use 'this' NIXLF's PFFUNC */
+ if (req->npa_func == RVU_DEFAULT_PF_FUNC)
+ req->npa_func = pcifunc;
+ if (!is_pffunc_map_valid(rvu, req->npa_func, BLKTYPE_NPA))
+ return NIX_AF_INVAL_NPA_PF_FUNC;
+ }
+
+ /* Check if requested 'NIXLF <=> SSOLF' mapping is valid */
+ if (req->sso_func) {
+ /* If default, use 'this' NIXLF's PFFUNC */
+ if (req->sso_func == RVU_DEFAULT_PF_FUNC)
+ req->sso_func = pcifunc;
+ if (!is_pffunc_map_valid(rvu, req->sso_func, BLKTYPE_SSO))
+ return NIX_AF_INVAL_SSO_PF_FUNC;
+ }
+
/* If RSS is being enabled, check if requested config is valid.
* RSS table size should be power of two, otherwise
* RSS_GRP::OFFSET + adder might go beyond that group or
@@ -798,18 +816,10 @@ int rvu_mbox_handler_NIX_LF_ALLOC(struct rvu *rvu,
/* Enable LMTST for this NIX LF */
rvu_write64(rvu, blkaddr, NIX_AF_LFX_TX_CFG2(nixlf), BIT_ULL(0));
- /* Set CQE/WQE size, NPA_PF_FUNC for SQBs and also SSO_PF_FUNC
- * If requester has sent a 'RVU_DEFAULT_PF_FUNC' use this NIX LF's
- * PCIFUNC itself.
- */
- if (req->npa_func == RVU_DEFAULT_PF_FUNC)
- cfg = pcifunc;
- else
+ /* Set CQE/WQE size, NPA_PF_FUNC for SQBs and also SSO_PF_FUNC */
+ if (req->npa_func)
cfg = req->npa_func;
-
- if (req->sso_func == RVU_DEFAULT_PF_FUNC)
- cfg |= (u64)pcifunc << 16;
- else
+ if (req->sso_func)
cfg |= (u64)req->sso_func << 16;
cfg |= (u64)req->xqe_sz << 33;
--
2.7.4
^ permalink raw reply related
* [PATCH 13/20] octeontx2-af: Add FLR interrupt handler
From: sunil.kovvuri @ 2018-11-08 18:35 UTC (permalink / raw)
To: netdev, davem; +Cc: arnd, linux-soc, Geetha sowjanya, Sunil Goutham
In-Reply-To: <1541702161-30673-1-git-send-email-sunil.kovvuri@gmail.com>
From: Geetha sowjanya <gakula@marvell.com>
All RVU PF's upon receiving a FLR will trigger an IRQ to RVU AF.
This patch enables all RVU PF's FLR interrupt and registers a
handler. Upon receiving an IRQ a workqueue is scheduled to
cleanup all RVU blocks being used by the PF/VF which received
the FLR.
Signed-off-by: Geetha sowjanya <gakula@marvell.com>
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
---
drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 117 +++++++++++++++++++++++-
drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 5 +
2 files changed, 121 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index bd1df1c..0f5923a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -1601,6 +1601,44 @@ static void rvu_enable_mbox_intr(struct rvu *rvu)
INTR_MASK(hw->total_pfs) & ~1ULL);
}
+static void rvu_flr_handler(struct work_struct *work)
+{
+ struct rvu_work *flrwork = container_of(work, struct rvu_work, work);
+ struct rvu *rvu = flrwork->rvu;
+ u16 pf;
+
+ pf = flrwork - rvu->flr_wrk;
+
+ /* Signal FLR finish */
+ rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFTRPEND, BIT_ULL(pf));
+
+ /* Enable interrupt */
+ rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFFLR_INT_ENA_W1S, BIT_ULL(pf));
+}
+
+static irqreturn_t rvu_flr_intr_handler(int irq, void *rvu_irq)
+{
+ struct rvu *rvu = (struct rvu *)rvu_irq;
+ u64 intr;
+ u8 pf;
+
+ intr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PFFLR_INT);
+
+ for (pf = 0; pf < rvu->hw->total_pfs; pf++) {
+ if (intr & (1ULL << pf)) {
+ /* PF is already dead do only AF related operations */
+ queue_work(rvu->flr_wq, &rvu->flr_wrk[pf].work);
+ /* clear interrupt */
+ rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFFLR_INT,
+ BIT_ULL(pf));
+ /* Disable the interrupt */
+ rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFFLR_INT_ENA_W1C,
+ BIT_ULL(pf));
+ }
+ }
+ return IRQ_HANDLED;
+}
+
static void rvu_unregister_interrupts(struct rvu *rvu)
{
int irq;
@@ -1609,6 +1647,10 @@ static void rvu_unregister_interrupts(struct rvu *rvu)
rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFAF_MBOX_INT_ENA_W1C,
INTR_MASK(rvu->hw->total_pfs) & ~1ULL);
+ /* Disable the PF FLR interrupt */
+ rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFFLR_INT_ENA_W1C,
+ INTR_MASK(rvu->hw->total_pfs) & ~1ULL);
+
for (irq = 0; irq < rvu->num_vec; irq++) {
if (rvu->irq_allocated[irq])
free_irq(pci_irq_vector(rvu->pdev, irq), rvu);
@@ -1660,6 +1702,27 @@ static int rvu_register_interrupts(struct rvu *rvu)
/* Enable mailbox interrupts from all PFs */
rvu_enable_mbox_intr(rvu);
+ /* Register FLR interrupt handler */
+ sprintf(&rvu->irq_name[RVU_AF_INT_VEC_PFFLR * NAME_SIZE],
+ "RVUAF FLR");
+ ret = request_irq(pci_irq_vector(rvu->pdev, RVU_AF_INT_VEC_PFFLR),
+ rvu_flr_intr_handler, 0,
+ &rvu->irq_name[RVU_AF_INT_VEC_PFFLR * NAME_SIZE],
+ rvu);
+ if (ret) {
+ dev_err(rvu->dev,
+ "RVUAF: IRQ registration failed for FLR\n");
+ goto fail;
+ }
+ rvu->irq_allocated[RVU_AF_INT_VEC_PFFLR] = true;
+
+ /* Enable FLR interrupt for all PFs*/
+ rvu_write64(rvu, BLKADDR_RVUM,
+ RVU_AF_PFFLR_INT, INTR_MASK(rvu->hw->total_pfs));
+
+ rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFFLR_INT_ENA_W1S,
+ INTR_MASK(rvu->hw->total_pfs) & ~1ULL);
+
return 0;
fail:
@@ -1667,6 +1730,51 @@ static int rvu_register_interrupts(struct rvu *rvu)
return ret;
}
+static void rvu_flr_wq_destroy(struct rvu *rvu)
+{
+ if (rvu->flr_wq) {
+ flush_workqueue(rvu->flr_wq);
+ destroy_workqueue(rvu->flr_wq);
+ rvu->flr_wq = NULL;
+ }
+ kfree(rvu->flr_wrk);
+}
+
+static int rvu_flr_init(struct rvu *rvu)
+{
+ u64 cfg;
+ int pf;
+
+ /* Enable FLR for all PFs*/
+ for (pf = 1; pf < rvu->hw->total_pfs; pf++) {
+ cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf));
+ rvu_write64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf),
+ cfg | BIT_ULL(22));
+ }
+
+ rvu->flr_wq = alloc_workqueue("rvu_afpf_flr",
+ WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
+ 1);
+ if (!rvu->flr_wq)
+ return -ENOMEM;
+
+ rvu->flr_wrk = devm_kcalloc(rvu->dev, rvu->hw->total_pfs,
+ sizeof(struct rvu_work), GFP_KERNEL);
+ if (!rvu->flr_wrk) {
+ destroy_workqueue(rvu->flr_wq);
+ return -ENOMEM;
+ }
+
+ for (pf = 0; pf < rvu->hw->total_pfs; pf++) {
+ rvu->flr_wrk[pf].rvu = rvu;
+ INIT_WORK(&rvu->flr_wrk[pf].work, rvu_flr_handler);
+ }
+
+ mutex_init(&rvu->flr_lock);
+
+ return 0;
+}
+
static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct device *dev = &pdev->dev;
@@ -1737,11 +1845,17 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (err)
goto err_mbox;
- err = rvu_register_interrupts(rvu);
+ err = rvu_flr_init(rvu);
if (err)
goto err_cgx;
+ err = rvu_register_interrupts(rvu);
+ if (err)
+ goto err_flr;
+
return 0;
+err_flr:
+ rvu_flr_wq_destroy(rvu);
err_cgx:
rvu_cgx_wq_destroy(rvu);
err_mbox:
@@ -1765,6 +1879,7 @@ static void rvu_remove(struct pci_dev *pdev)
struct rvu *rvu = pci_get_drvdata(pdev);
rvu_unregister_interrupts(rvu);
+ rvu_flr_wq_destroy(rvu);
rvu_cgx_wq_destroy(rvu);
rvu_mbox_destroy(rvu);
rvu_reset_all_blocks(rvu);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index d54db21..510ae33 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -200,6 +200,11 @@ struct rvu {
struct rvu_work *mbox_wrk_up;
struct workqueue_struct *mbox_wq;
+ /* PF FLR */
+ struct rvu_work *flr_wrk;
+ struct workqueue_struct *flr_wq;
+ struct mutex flr_lock; /* Serialize FLRs */
+
/* MSI-X */
u16 num_vec;
char *irq_name;
--
2.7.4
^ permalink raw reply related
* [PATCH 14/20] octeontx2-af: Teardown NPA, NIX LF upon receiving FLR
From: sunil.kovvuri @ 2018-11-08 18:35 UTC (permalink / raw)
To: netdev, davem
Cc: arnd, linux-soc, Geetha sowjanya, Stanislaw Kardach,
Sunil Goutham
In-Reply-To: <1541702161-30673-1-git-send-email-sunil.kovvuri@gmail.com>
From: Geetha sowjanya <gakula@marvell.com>
Upon receiving FLR IRQ for a RVU PF, teardown or cleanup
resources held by that PF_FUNC. This patch cleans up,
NIX LF
- Stop ingress/egress traffic
- Disable NPC MCAM entries being used.
- Free Tx scheduler queues
- Disable RQ/SQ/CQ HW contexts
NPA LF
- Disable Pool/Aura HW contexts
In future teardown of SSO/SSOW/TIM/CPT will be added.
Also added a mailbox message for a RVU PF to request
AF, to perform FLR for a RVU VF under it.
Signed-off-by: Geetha sowjanya <gakula@marvell.com>
Signed-off-by: Stanislaw Kardach <skardach@marvell.com>
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
---
drivers/net/ethernet/marvell/octeontx2/af/mbox.h | 10 ++-
drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 82 +++++++++++++++++++++-
drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 2 +
.../net/ethernet/marvell/octeontx2/af/rvu_nix.c | 48 +++++++++++++
.../net/ethernet/marvell/octeontx2/af/rvu_npa.c | 17 +++++
5 files changed, 157 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 96dd59e..4ea7efd 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -123,7 +123,8 @@ static inline struct mbox_msghdr *otx2_mbox_alloc_msg(struct otx2_mbox *mbox,
M(READY, 0x001, msg_req, ready_msg_rsp) \
M(ATTACH_RESOURCES, 0x002, rsrc_attach, msg_rsp) \
M(DETACH_RESOURCES, 0x003, rsrc_detach, msg_rsp) \
-M(MSIX_OFFSET, 0x004, msg_req, msix_offset_rsp) \
+M(MSIX_OFFSET, 0x005, msg_req, msix_offset_rsp) \
+M(VF_FLR, 0x006, msg_req, msg_rsp) \
/* CGX mbox IDs (range 0x200 - 0x3FF) */ \
M(CGX_START_RXTX, 0x200, msg_req, msg_rsp) \
M(CGX_STOP_RXTX, 0x201, msg_req, msg_rsp) \
@@ -213,6 +214,13 @@ struct msg_rsp {
struct mbox_msghdr hdr;
};
+/* RVU mailbox error codes
+ * Range 256 - 300.
+ */
+enum rvu_af_status {
+ RVU_INVALID_VF_ID = -256,
+};
+
struct ready_msg_rsp {
struct mbox_msghdr hdr;
u16 sclk_feq; /* SCLK frequency */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index 0f5923a..60340dc 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -29,6 +29,7 @@ static void rvu_set_msix_offset(struct rvu *rvu, struct rvu_pfvf *pfvf,
struct rvu_block *block, int lf);
static void rvu_clear_msix_offset(struct rvu *rvu, struct rvu_pfvf *pfvf,
struct rvu_block *block, int lf);
+static void __rvu_flr_handler(struct rvu *rvu, u16 pcifunc);
/* Supported devices */
static const struct pci_device_id rvu_id_table[] = {
@@ -1320,6 +1321,26 @@ static int rvu_mbox_handler_MSIX_OFFSET(struct rvu *rvu, struct msg_req *req,
return 0;
}
+static int rvu_mbox_handler_VF_FLR(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp)
+{
+ u16 pcifunc = req->hdr.pcifunc;
+ u16 vf, numvfs;
+ u64 cfg;
+
+ vf = pcifunc & RVU_PFVF_FUNC_MASK;
+ cfg = rvu_read64(rvu, BLKADDR_RVUM,
+ RVU_PRIV_PFX_CFG(rvu_get_pf(pcifunc)));
+ numvfs = (cfg >> 12) & 0xFF;
+
+ if (vf && vf <= numvfs)
+ __rvu_flr_handler(rvu, pcifunc);
+ else
+ return RVU_INVALID_VF_ID;
+
+ return 0;
+}
+
static int rvu_process_mbox_msg(struct rvu *rvu, int devid,
struct mbox_msghdr *req)
{
@@ -1601,14 +1622,73 @@ static void rvu_enable_mbox_intr(struct rvu *rvu)
INTR_MASK(hw->total_pfs) & ~1ULL);
}
+static void rvu_blklf_teardown(struct rvu *rvu, u16 pcifunc, u8 blkaddr)
+{
+ struct rvu_block *block;
+ int slot, lf, num_lfs;
+ int err;
+
+ block = &rvu->hw->block[blkaddr];
+ num_lfs = rvu_get_rsrc_mapcount(rvu_get_pfvf(rvu, pcifunc),
+ block->type);
+ if (!num_lfs)
+ return;
+ for (slot = 0; slot < num_lfs; slot++) {
+ lf = rvu_get_lf(rvu, block, pcifunc, slot);
+ if (lf < 0)
+ continue;
+
+ /* Cleanup LF and reset it */
+ if (block->addr == BLKADDR_NIX0)
+ rvu_nix_lf_teardown(rvu, pcifunc, block->addr, lf);
+ else if (block->addr == BLKADDR_NPA)
+ rvu_npa_lf_teardown(rvu, pcifunc, lf);
+
+ err = rvu_lf_reset(rvu, block, lf);
+ if (err) {
+ dev_err(rvu->dev, "Failed to reset blkaddr %d LF%d\n",
+ block->addr, lf);
+ }
+ }
+}
+
+static void __rvu_flr_handler(struct rvu *rvu, u16 pcifunc)
+{
+ mutex_lock(&rvu->flr_lock);
+ /* Reset order should reflect inter-block dependencies:
+ * 1. Reset any packet/work sources (NIX, CPT, TIM)
+ * 2. Flush and reset SSO/SSOW
+ * 3. Cleanup pools (NPA)
+ */
+ rvu_blklf_teardown(rvu, pcifunc, BLKADDR_NIX0);
+ rvu_blklf_teardown(rvu, pcifunc, BLKADDR_CPT0);
+ rvu_blklf_teardown(rvu, pcifunc, BLKADDR_TIM);
+ rvu_blklf_teardown(rvu, pcifunc, BLKADDR_SSOW);
+ rvu_blklf_teardown(rvu, pcifunc, BLKADDR_SSO);
+ rvu_blklf_teardown(rvu, pcifunc, BLKADDR_NPA);
+ rvu_detach_rsrcs(rvu, NULL, pcifunc);
+ mutex_unlock(&rvu->flr_lock);
+}
+
static void rvu_flr_handler(struct work_struct *work)
{
struct rvu_work *flrwork = container_of(work, struct rvu_work, work);
struct rvu *rvu = flrwork->rvu;
- u16 pf;
+ u16 pcifunc, numvfs, vf;
+ u64 cfg;
+ int pf;
pf = flrwork - rvu->flr_wrk;
+ cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf));
+ numvfs = (cfg >> 12) & 0xFF;
+ pcifunc = pf << RVU_PFVF_PF_SHIFT;
+
+ for (vf = 0; vf < numvfs; vf++)
+ __rvu_flr_handler(rvu, (pcifunc | (vf + 1)));
+
+ __rvu_flr_handler(rvu, pcifunc);
+
/* Signal FLR finish */
rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFTRPEND, BIT_ULL(pf));
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 510ae33..9e3843e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -325,6 +325,7 @@ int rvu_mbox_handler_CGX_INTLBK_DISABLE(struct rvu *rvu, struct msg_req *req,
/* NPA APIs */
int rvu_npa_init(struct rvu *rvu);
void rvu_npa_freemem(struct rvu *rvu);
+void rvu_npa_lf_teardown(struct rvu *rvu, u16 pcifunc, int npalf);
int rvu_mbox_handler_NPA_AQ_ENQ(struct rvu *rvu,
struct npa_aq_enq_req *req,
struct npa_aq_enq_rsp *rsp);
@@ -342,6 +343,7 @@ bool is_nixlf_attached(struct rvu *rvu, u16 pcifunc);
int rvu_nix_init(struct rvu *rvu);
void rvu_nix_freemem(struct rvu *rvu);
int rvu_get_nixlf_count(struct rvu *rvu);
+void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int npalf);
int rvu_mbox_handler_NIX_LF_ALLOC(struct rvu *rvu,
struct nix_lf_alloc_req *req,
struct nix_lf_alloc_rsp *rsp);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index d8d8947..ad99cf7 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -105,6 +105,17 @@ static inline struct nix_hw *get_nix_hw(struct rvu_hwinfo *hw, int blkaddr)
return NULL;
}
+static void nix_rx_sync(struct rvu *rvu, int blkaddr)
+{
+ int err;
+
+ /*Sync all in flight RX packets to LLC/DRAM */
+ rvu_write64(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0));
+ err = rvu_poll_reg(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0), true);
+ if (err)
+ dev_err(rvu->dev, "NIX RX software sync failed\n");
+}
+
static bool is_valid_txschq(struct rvu *rvu, int blkaddr,
int lvl, u16 pcifunc, u16 schq)
{
@@ -2281,3 +2292,40 @@ int rvu_mbox_handler_NIX_LF_STOP_RX(struct rvu *rvu, struct msg_req *req,
rvu_npc_disable_default_entries(rvu, pcifunc, nixlf);
return 0;
}
+
+void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int nixlf)
+{
+ struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+ struct hwctx_disable_req ctx_req;
+ int err;
+
+ ctx_req.hdr.pcifunc = pcifunc;
+
+ /* Cleanup NPC MCAM entries, free Tx scheduler queues being used */
+ nix_interface_deinit(rvu, pcifunc, nixlf);
+ nix_rx_sync(rvu, blkaddr);
+ nix_txschq_free(rvu, pcifunc);
+
+ if (pfvf->sq_ctx) {
+ ctx_req.ctype = NIX_AQ_CTYPE_SQ;
+ err = nix_lf_hwctx_disable(rvu, &ctx_req);
+ if (err)
+ dev_err(rvu->dev, "SQ ctx disable failed\n");
+ }
+
+ if (pfvf->rq_ctx) {
+ ctx_req.ctype = NIX_AQ_CTYPE_RQ;
+ err = nix_lf_hwctx_disable(rvu, &ctx_req);
+ if (err)
+ dev_err(rvu->dev, "RQ ctx disable failed\n");
+ }
+
+ if (pfvf->cq_ctx) {
+ ctx_req.ctype = NIX_AQ_CTYPE_CQ;
+ err = nix_lf_hwctx_disable(rvu, &ctx_req);
+ if (err)
+ dev_err(rvu->dev, "CQ ctx disable failed\n");
+ }
+
+ nix_ctx_free(rvu, pfvf);
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
index 7531fdc..887daaa 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
@@ -470,3 +470,20 @@ void rvu_npa_freemem(struct rvu *rvu)
block = &hw->block[blkaddr];
rvu_aq_free(rvu, block->aq);
}
+
+void rvu_npa_lf_teardown(struct rvu *rvu, u16 pcifunc, int npalf)
+{
+ struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+ struct hwctx_disable_req ctx_req;
+
+ /* Disable all pools */
+ ctx_req.hdr.pcifunc = pcifunc;
+ ctx_req.ctype = NPA_AQ_CTYPE_POOL;
+ npa_lf_hwctx_disable(rvu, &ctx_req);
+
+ /* Disable all auras */
+ ctx_req.ctype = NPA_AQ_CTYPE_AURA;
+ npa_lf_hwctx_disable(rvu, &ctx_req);
+
+ npa_ctx_free(rvu, pfvf);
+}
--
2.7.4
^ permalink raw reply related
* [PATCH 15/20] octeontx2-af: Mbox communication support btw AF and it's VFs
From: sunil.kovvuri @ 2018-11-08 18:35 UTC (permalink / raw)
To: netdev, davem
Cc: arnd, linux-soc, Tomasz Duszynski, Marko Kallio, Sunil Goutham
In-Reply-To: <1541702161-30673-1-git-send-email-sunil.kovvuri@gmail.com>
From: Tomasz Duszynski <tduszynski@marvell.com>
VFs attached to PFs other than AF can not communicate with AF
directly. Instead they are supposed to first send message to
the PF they are residing on and PF forwards it to the AF.
Responses to messages are handled in the reverse order.
On the other hand if VFs are on AF (PF0) itself then direct mailbox
communication is possible since there's no other PF in the way.
This patch addresses this particular case and adds support for
handling it.
Signed-off-by: Tomasz Duszynski <tduszynski@marvell.com>
Signed-off-by: Marko Kallio <mkallio@marvell.com>
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
---
drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 295 +++++++++++++++------
drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 17 +-
.../net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 6 +-
3 files changed, 223 insertions(+), 95 deletions(-)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index 60340dc..cf4df08 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -31,6 +31,15 @@ static void rvu_clear_msix_offset(struct rvu *rvu, struct rvu_pfvf *pfvf,
struct rvu_block *block, int lf);
static void __rvu_flr_handler(struct rvu *rvu, u16 pcifunc);
+static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw,
+ int type, int num,
+ void (mbox_handler)(struct work_struct *),
+ void (mbox_up_handler)(struct work_struct *));
+enum {
+ TYPE_AFVF,
+ TYPE_AFPF,
+};
+
/* Supported devices */
static const struct pci_device_id rvu_id_table[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_RVU_AF) },
@@ -1341,9 +1350,11 @@ static int rvu_mbox_handler_VF_FLR(struct rvu *rvu, struct msg_req *req,
return 0;
}
-static int rvu_process_mbox_msg(struct rvu *rvu, int devid,
+static int rvu_process_mbox_msg(struct otx2_mbox *mbox, int devid,
struct mbox_msghdr *req)
{
+ struct rvu *rvu = pci_get_drvdata(mbox->pdev);
+
/* Check if valid, if not reply with a invalid msg */
if (req->sig != OTX2_MBOX_REQ_SIG)
goto bad_message;
@@ -1355,8 +1366,15 @@ static int rvu_process_mbox_msg(struct rvu *rvu, int devid,
int err; \
\
rsp = (struct _rsp_type *)otx2_mbox_alloc_msg( \
- &rvu->mbox, devid, \
+ mbox, devid, \
sizeof(struct _rsp_type)); \
+ /* some handlers should complete even if reply */ \
+ /* could not be allocated */ \
+ if (!rsp && \
+ _id != MBOX_MSG_DETACH_RESOURCES && \
+ _id != MBOX_MSG_NIX_TXSCH_FREE && \
+ _id != MBOX_MSG_VF_FLR) \
+ return -ENOMEM; \
if (rsp) { \
rsp->hdr.id = _id; \
rsp->hdr.sig = OTX2_MBOX_RSP_SIG; \
@@ -1374,29 +1392,38 @@ static int rvu_process_mbox_msg(struct rvu *rvu, int devid,
}
MBOX_MESSAGES
#undef M
- break;
+
bad_message:
default:
- otx2_reply_invalid_msg(&rvu->mbox, devid, req->pcifunc,
- req->id);
+ otx2_reply_invalid_msg(mbox, devid, req->pcifunc, req->id);
return -ENODEV;
}
}
-static void rvu_mbox_handler(struct work_struct *work)
+static void __rvu_mbox_handler(struct rvu_work *mwork, int type)
{
- struct rvu_work *mwork = container_of(work, struct rvu_work, work);
struct rvu *rvu = mwork->rvu;
+ int offset, err, id, devid;
struct otx2_mbox_dev *mdev;
struct mbox_hdr *req_hdr;
struct mbox_msghdr *msg;
+ struct mbox_wq_info *mw;
struct otx2_mbox *mbox;
- int offset, id, err;
- u16 pf;
- mbox = &rvu->mbox;
- pf = mwork - rvu->mbox_wrk;
- mdev = &mbox->dev[pf];
+ switch (type) {
+ case TYPE_AFPF:
+ mw = &rvu->afpf_wq_info;
+ break;
+ case TYPE_AFVF:
+ mw = &rvu->afvf_wq_info;
+ break;
+ default:
+ return;
+ }
+
+ devid = mwork - mw->mbox_wrk;
+ mbox = &mw->mbox;
+ mdev = &mbox->dev[devid];
/* Process received mbox messages */
req_hdr = mdev->mbase + mbox->rx_start;
@@ -1408,10 +1435,21 @@ static void rvu_mbox_handler(struct work_struct *work)
for (id = 0; id < req_hdr->num_msgs; id++) {
msg = mdev->mbase + offset;
- /* Set which PF sent this message based on mbox IRQ */
- msg->pcifunc &= ~(RVU_PFVF_PF_MASK << RVU_PFVF_PF_SHIFT);
- msg->pcifunc |= (pf << RVU_PFVF_PF_SHIFT);
- err = rvu_process_mbox_msg(rvu, pf, msg);
+ /* Set which PF/VF sent this message based on mbox IRQ */
+ switch (type) {
+ case TYPE_AFPF:
+ msg->pcifunc &=
+ ~(RVU_PFVF_PF_MASK << RVU_PFVF_PF_SHIFT);
+ msg->pcifunc |= (devid << RVU_PFVF_PF_SHIFT);
+ break;
+ case TYPE_AFVF:
+ msg->pcifunc &=
+ ~(RVU_PFVF_FUNC_MASK << RVU_PFVF_FUNC_SHIFT);
+ msg->pcifunc |= (devid << RVU_PFVF_FUNC_SHIFT) + 1;
+ break;
+ }
+
+ err = rvu_process_mbox_msg(mbox, devid, msg);
if (!err) {
offset = mbox->rx_start + msg->next_msgoff;
continue;
@@ -1419,31 +1457,57 @@ static void rvu_mbox_handler(struct work_struct *work)
if (msg->pcifunc & RVU_PFVF_FUNC_MASK)
dev_warn(rvu->dev, "Error %d when processing message %s (0x%x) from PF%d:VF%d\n",
- err, otx2_mbox_id2name(msg->id), msg->id, pf,
+ err, otx2_mbox_id2name(msg->id),
+ msg->id, devid,
(msg->pcifunc & RVU_PFVF_FUNC_MASK) - 1);
else
dev_warn(rvu->dev, "Error %d when processing message %s (0x%x) from PF%d\n",
- err, otx2_mbox_id2name(msg->id), msg->id, pf);
+ err, otx2_mbox_id2name(msg->id),
+ msg->id, devid);
}
- /* Send mbox responses to PF */
- otx2_mbox_msg_send(mbox, pf);
+ /* Send mbox responses to VF/PF */
+ otx2_mbox_msg_send(mbox, devid);
}
-static void rvu_mbox_up_handler(struct work_struct *work)
+static inline void rvu_afpf_mbox_handler(struct work_struct *work)
{
struct rvu_work *mwork = container_of(work, struct rvu_work, work);
+
+ __rvu_mbox_handler(mwork, TYPE_AFPF);
+}
+
+static inline void rvu_afvf_mbox_handler(struct work_struct *work)
+{
+ struct rvu_work *mwork = container_of(work, struct rvu_work, work);
+
+ __rvu_mbox_handler(mwork, TYPE_AFVF);
+}
+
+static void __rvu_mbox_up_handler(struct rvu_work *mwork, int type)
+{
struct rvu *rvu = mwork->rvu;
struct otx2_mbox_dev *mdev;
struct mbox_hdr *rsp_hdr;
struct mbox_msghdr *msg;
+ struct mbox_wq_info *mw;
struct otx2_mbox *mbox;
- int offset, id;
- u16 pf;
+ int offset, id, devid;
+
+ switch (type) {
+ case TYPE_AFPF:
+ mw = &rvu->afpf_wq_info;
+ break;
+ case TYPE_AFVF:
+ mw = &rvu->afvf_wq_info;
+ break;
+ default:
+ return;
+ }
- mbox = &rvu->mbox_up;
- pf = mwork - rvu->mbox_wrk_up;
- mdev = &mbox->dev[pf];
+ devid = mwork - mw->mbox_wrk_up;
+ mbox = &mw->mbox_up;
+ mdev = &mbox->dev[devid];
rsp_hdr = mdev->mbase + mbox->rx_start;
if (rsp_hdr->num_msgs == 0) {
@@ -1484,128 +1548,182 @@ static void rvu_mbox_up_handler(struct work_struct *work)
mdev->msgs_acked++;
}
- otx2_mbox_reset(mbox, 0);
+ otx2_mbox_reset(mbox, devid);
}
-static int rvu_mbox_init(struct rvu *rvu)
+static inline void rvu_afpf_mbox_up_handler(struct work_struct *work)
{
- struct rvu_hwinfo *hw = rvu->hw;
- void __iomem *hwbase = NULL;
+ struct rvu_work *mwork = container_of(work, struct rvu_work, work);
+
+ __rvu_mbox_up_handler(mwork, TYPE_AFPF);
+}
+
+static inline void rvu_afvf_mbox_up_handler(struct work_struct *work)
+{
+ struct rvu_work *mwork = container_of(work, struct rvu_work, work);
+
+ __rvu_mbox_up_handler(mwork, TYPE_AFVF);
+}
+
+static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw,
+ int type, int num,
+ void (mbox_handler)(struct work_struct *),
+ void (mbox_up_handler)(struct work_struct *))
+{
+ void __iomem *hwbase = NULL, *reg_base;
+ int err, i, dir, dir_up;
struct rvu_work *mwork;
+ const char *name;
u64 bar4_addr;
- int err, pf;
- rvu->mbox_wq = alloc_workqueue("rvu_afpf_mailbox",
- WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
- hw->total_pfs);
- if (!rvu->mbox_wq)
+ switch (type) {
+ case TYPE_AFPF:
+ name = "rvu_afpf_mailbox";
+ bar4_addr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PF_BAR4_ADDR);
+ dir = MBOX_DIR_AFPF;
+ dir_up = MBOX_DIR_AFPF_UP;
+ reg_base = rvu->afreg_base;
+ break;
+ case TYPE_AFVF:
+ name = "rvu_afvf_mailbox";
+ bar4_addr = rvupf_read64(rvu, RVU_PF_VF_BAR4_ADDR);
+ dir = MBOX_DIR_PFVF;
+ dir_up = MBOX_DIR_PFVF_UP;
+ reg_base = rvu->pfreg_base;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ mw->mbox_wq = alloc_workqueue(name,
+ WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
+ num);
+ if (!mw->mbox_wq)
return -ENOMEM;
- rvu->mbox_wrk = devm_kcalloc(rvu->dev, hw->total_pfs,
- sizeof(struct rvu_work), GFP_KERNEL);
- if (!rvu->mbox_wrk) {
+ mw->mbox_wrk = devm_kcalloc(rvu->dev, num,
+ sizeof(struct rvu_work), GFP_KERNEL);
+ if (!mw->mbox_wrk) {
err = -ENOMEM;
goto exit;
}
- rvu->mbox_wrk_up = devm_kcalloc(rvu->dev, hw->total_pfs,
- sizeof(struct rvu_work), GFP_KERNEL);
- if (!rvu->mbox_wrk_up) {
+ mw->mbox_wrk_up = devm_kcalloc(rvu->dev, num,
+ sizeof(struct rvu_work), GFP_KERNEL);
+ if (!mw->mbox_wrk_up) {
err = -ENOMEM;
goto exit;
}
- /* Map mbox region shared with PFs */
- bar4_addr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PF_BAR4_ADDR);
/* Mailbox is a reserved memory (in RAM) region shared between
* RVU devices, shouldn't be mapped as device memory to allow
* unaligned accesses.
*/
- hwbase = ioremap_wc(bar4_addr, MBOX_SIZE * hw->total_pfs);
+ hwbase = ioremap_wc(bar4_addr, MBOX_SIZE * num);
if (!hwbase) {
dev_err(rvu->dev, "Unable to map mailbox region\n");
err = -ENOMEM;
goto exit;
}
- err = otx2_mbox_init(&rvu->mbox, hwbase, rvu->pdev, rvu->afreg_base,
- MBOX_DIR_AFPF, hw->total_pfs);
+ err = otx2_mbox_init(&mw->mbox, hwbase, rvu->pdev, reg_base, dir, num);
if (err)
goto exit;
- err = otx2_mbox_init(&rvu->mbox_up, hwbase, rvu->pdev, rvu->afreg_base,
- MBOX_DIR_AFPF_UP, hw->total_pfs);
+ err = otx2_mbox_init(&mw->mbox_up, hwbase, rvu->pdev,
+ reg_base, dir_up, num);
if (err)
goto exit;
- for (pf = 0; pf < hw->total_pfs; pf++) {
- mwork = &rvu->mbox_wrk[pf];
+ for (i = 0; i < num; i++) {
+ mwork = &mw->mbox_wrk[i];
mwork->rvu = rvu;
- INIT_WORK(&mwork->work, rvu_mbox_handler);
- }
+ INIT_WORK(&mwork->work, mbox_handler);
- for (pf = 0; pf < hw->total_pfs; pf++) {
- mwork = &rvu->mbox_wrk_up[pf];
+ mwork = &mw->mbox_wrk_up[i];
mwork->rvu = rvu;
- INIT_WORK(&mwork->work, rvu_mbox_up_handler);
+ INIT_WORK(&mwork->work, mbox_up_handler);
}
return 0;
exit:
if (hwbase)
iounmap((void __iomem *)hwbase);
- destroy_workqueue(rvu->mbox_wq);
+ destroy_workqueue(mw->mbox_wq);
return err;
}
-static void rvu_mbox_destroy(struct rvu *rvu)
+static void rvu_mbox_destroy(struct mbox_wq_info *mw)
{
- if (rvu->mbox_wq) {
- flush_workqueue(rvu->mbox_wq);
- destroy_workqueue(rvu->mbox_wq);
- rvu->mbox_wq = NULL;
+ if (mw->mbox_wq) {
+ flush_workqueue(mw->mbox_wq);
+ destroy_workqueue(mw->mbox_wq);
+ mw->mbox_wq = NULL;
}
- if (rvu->mbox.hwbase)
- iounmap((void __iomem *)rvu->mbox.hwbase);
+ if (mw->mbox.hwbase)
+ iounmap((void __iomem *)mw->mbox.hwbase);
- otx2_mbox_destroy(&rvu->mbox);
- otx2_mbox_destroy(&rvu->mbox_up);
+ otx2_mbox_destroy(&mw->mbox);
+ otx2_mbox_destroy(&mw->mbox_up);
}
-static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq)
+static void rvu_queue_work(struct mbox_wq_info *mw, int first,
+ int mdevs, u64 intr)
{
- struct rvu *rvu = (struct rvu *)rvu_irq;
struct otx2_mbox_dev *mdev;
struct otx2_mbox *mbox;
struct mbox_hdr *hdr;
+ int i;
+
+ for (i = first; i < mdevs; i++) {
+ /* start from 0 */
+ if (!(intr & BIT_ULL(i - first)))
+ continue;
+
+ mbox = &mw->mbox;
+ mdev = &mbox->dev[i];
+ hdr = mdev->mbase + mbox->rx_start;
+ if (hdr->num_msgs)
+ queue_work(mw->mbox_wq, &mw->mbox_wrk[i].work);
+
+ mbox = &mw->mbox_up;
+ mdev = &mbox->dev[i];
+ hdr = mdev->mbase + mbox->rx_start;
+ if (hdr->num_msgs)
+ queue_work(mw->mbox_wq, &mw->mbox_wrk_up[i].work);
+ }
+}
+
+static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq)
+{
+ struct rvu *rvu = (struct rvu *)rvu_irq;
+ int vfs = pci_num_vf(rvu->pdev);
u64 intr;
- u8 pf;
intr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PFAF_MBOX_INT);
/* Clear interrupts */
rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFAF_MBOX_INT, intr);
/* Sync with mbox memory region */
- smp_wmb();
+ rmb();
- for (pf = 0; pf < rvu->hw->total_pfs; pf++) {
- if (intr & (1ULL << pf)) {
- mbox = &rvu->mbox;
- mdev = &mbox->dev[pf];
- hdr = mdev->mbase + mbox->rx_start;
- if (hdr->num_msgs)
- queue_work(rvu->mbox_wq,
- &rvu->mbox_wrk[pf].work);
- mbox = &rvu->mbox_up;
- mdev = &mbox->dev[pf];
- hdr = mdev->mbase + mbox->rx_start;
- if (hdr->num_msgs)
- queue_work(rvu->mbox_wq,
- &rvu->mbox_wrk_up[pf].work);
- }
+ rvu_queue_work(&rvu->afpf_wq_info, 0, rvu->hw->total_pfs, intr);
+
+ /* Handle VF interrupts */
+ if (vfs > 64) {
+ intr = rvupf_read64(rvu, RVU_PF_VFPF_MBOX_INTX(1));
+ rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INTX(1), intr);
+
+ rvu_queue_work(&rvu->afvf_wq_info, 64, vfs, intr);
+ vfs -= 64;
}
+ intr = rvupf_read64(rvu, RVU_PF_VFPF_MBOX_INTX(0));
+ rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INTX(0), intr);
+
+ rvu_queue_work(&rvu->afvf_wq_info, 0, vfs, intr);
+
return IRQ_HANDLED;
}
@@ -1917,7 +2035,10 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (err)
goto err_release_regions;
- err = rvu_mbox_init(rvu);
+ /* Init mailbox btw AF and PFs */
+ err = rvu_mbox_init(rvu, &rvu->afpf_wq_info, TYPE_AFPF,
+ rvu->hw->total_pfs, rvu_afpf_mbox_handler,
+ rvu_afpf_mbox_up_handler);
if (err)
goto err_hwsetup;
@@ -1939,7 +2060,7 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
err_cgx:
rvu_cgx_wq_destroy(rvu);
err_mbox:
- rvu_mbox_destroy(rvu);
+ rvu_mbox_destroy(&rvu->afpf_wq_info);
err_hwsetup:
rvu_reset_all_blocks(rvu);
rvu_free_hw_resources(rvu);
@@ -1961,7 +2082,7 @@ static void rvu_remove(struct pci_dev *pdev)
rvu_unregister_interrupts(rvu);
rvu_flr_wq_destroy(rvu);
rvu_cgx_wq_destroy(rvu);
- rvu_mbox_destroy(rvu);
+ rvu_mbox_destroy(&rvu->afpf_wq_info);
rvu_reset_all_blocks(rvu);
rvu_free_hw_resources(rvu);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 9e3843e..b491760 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -183,6 +183,16 @@ struct rvu_hwinfo {
struct npc_mcam mcam;
};
+struct mbox_wq_info {
+ struct otx2_mbox mbox;
+ struct rvu_work *mbox_wrk;
+
+ struct otx2_mbox mbox_up;
+ struct rvu_work *mbox_wrk_up;
+
+ struct workqueue_struct *mbox_wq;
+};
+
struct rvu {
void __iomem *afreg_base;
void __iomem *pfreg_base;
@@ -194,11 +204,8 @@ struct rvu {
struct mutex rsrc_lock; /* Serialize resource alloc/free */
/* Mbox */
- struct otx2_mbox mbox;
- struct rvu_work *mbox_wrk;
- struct otx2_mbox mbox_up;
- struct rvu_work *mbox_wrk_up;
- struct workqueue_struct *mbox_wq;
+ struct mbox_wq_info afpf_wq_info;
+ struct mbox_wq_info afvf_wq_info;
/* PF FLR */
struct rvu_work *flr_wrk;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
index 188185c..d341ba0 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
@@ -27,7 +27,7 @@ static struct _req_type __maybe_unused \
struct _req_type *req; \
\
req = (struct _req_type *)otx2_mbox_alloc_msg_rsp( \
- &rvu->mbox_up, devid, sizeof(struct _req_type), \
+ &rvu->afpf_wq_info.mbox_up, devid, sizeof(struct _req_type), \
sizeof(struct _rsp_type)); \
if (!req) \
return NULL; \
@@ -181,8 +181,8 @@ static void cgx_notify_pfs(struct cgx_link_event *event, struct rvu *rvu)
if (!msg)
continue;
msg->link_info = *linfo;
- otx2_mbox_msg_send(&rvu->mbox_up, pfid);
- err = otx2_mbox_wait_for_rsp(&rvu->mbox_up, pfid);
+ otx2_mbox_msg_send(&rvu->afpf_wq_info.mbox_up, pfid);
+ err = otx2_mbox_wait_for_rsp(&rvu->afpf_wq_info.mbox_up, pfid);
if (err)
dev_warn(rvu->dev, "notification to pf %d failed\n",
pfid);
--
2.7.4
^ permalink raw reply related
* [PATCH 16/20] octeontx2-af: Enable sriov on AF to create VFs
From: sunil.kovvuri @ 2018-11-08 18:35 UTC (permalink / raw)
To: netdev, davem; +Cc: arnd, linux-soc, Tomasz Duszynski, Sunil Goutham
In-Reply-To: <1541702161-30673-1-git-send-email-sunil.kovvuri@gmail.com>
From: Tomasz Duszynski <tduszynski@marvell.com>
Enable all AF VFs during probe. Since AF's VFs work in pairs
(eg: Pkts sent on VF0 are received by VF1 and viceversa),
enable only even number of VFs out of totalVFs, which should
again be less than number of loopback (LBK) channels.
Also enable VF's mailbox interrupts.
Signed-off-by: Tomasz Duszynski <tduszynski@marvell.com>
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
---
drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 185 +++++++++++++++++++++++-
drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 3 +-
2 files changed, 185 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index cf4df08..8e51f4e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -1698,7 +1698,7 @@ static void rvu_queue_work(struct mbox_wq_info *mw, int first,
static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq)
{
struct rvu *rvu = (struct rvu *)rvu_irq;
- int vfs = pci_num_vf(rvu->pdev);
+ int vfs = rvu->vfs;
u64 intr;
intr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PFAF_MBOX_INT);
@@ -1858,9 +1858,25 @@ static void rvu_unregister_interrupts(struct rvu *rvu)
rvu->num_vec = 0;
}
+static int rvu_afvf_msix_vectors_num_ok(struct rvu *rvu)
+{
+ struct rvu_pfvf *pfvf = &rvu->pf[0];
+ int offset;
+
+ pfvf = &rvu->pf[0];
+ offset = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_INT_CFG(0)) & 0x3ff;
+
+ /* Make sure there are enough MSIX vectors configured so that
+ * VF interrupts can be handled. Offset equal to zero means
+ * that PF vectors are not configured and overlapping AF vectors.
+ */
+ return (pfvf->msix.max >= RVU_AF_INT_VEC_CNT + RVU_PF_INT_VEC_CNT) &&
+ offset;
+}
+
static int rvu_register_interrupts(struct rvu *rvu)
{
- int ret;
+ int ret, offset;
rvu->num_vec = pci_msix_vec_count(rvu->pdev);
@@ -1921,6 +1937,40 @@ static int rvu_register_interrupts(struct rvu *rvu)
rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFFLR_INT_ENA_W1S,
INTR_MASK(rvu->hw->total_pfs) & ~1ULL);
+ if (!rvu_afvf_msix_vectors_num_ok(rvu))
+ return 0;
+
+ /* Get PF MSIX vectors offset. */
+ offset = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_INT_CFG(0)) & 0x3ff;
+
+ /* Register MBOX0 interrupt. */
+ offset += RVU_PF_INT_VEC_VFPF_MBOX0;
+ sprintf(&rvu->irq_name[offset * NAME_SIZE], "RVUAFVF Mbox0");
+ ret = request_irq(pci_irq_vector(rvu->pdev, offset),
+ rvu_mbox_intr_handler, 0,
+ &rvu->irq_name[offset * NAME_SIZE],
+ rvu);
+ if (ret)
+ dev_err(rvu->dev,
+ "RVUAF: IRQ registration failed for Mbox0\n");
+
+ rvu->irq_allocated[offset] = true;
+
+ /* Register MBOX1 interrupt. MBOX1 IRQ number follows MBOX0 so
+ * simply increment current offset by 1.
+ */
+ offset += 1;
+ sprintf(&rvu->irq_name[offset * NAME_SIZE], "RVUAFVF Mbox1");
+ ret = request_irq(pci_irq_vector(rvu->pdev, offset),
+ rvu_mbox_intr_handler, 0,
+ &rvu->irq_name[offset * NAME_SIZE],
+ rvu);
+ if (ret)
+ dev_err(rvu->dev,
+ "RVUAF: IRQ registration failed for Mbox1\n");
+
+ rvu->irq_allocated[offset] = true;
+
return 0;
fail:
@@ -1973,6 +2023,129 @@ static int rvu_flr_init(struct rvu *rvu)
return 0;
}
+static void rvu_disable_afvf_mbox_intr(struct rvu *rvu)
+{
+ int vfs = rvu->vfs;
+
+ rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INT_ENA_W1CX(0), INTR_MASK(vfs));
+ if (vfs > 64)
+ rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INT_ENA_W1CX(1),
+ INTR_MASK(vfs - 64));
+}
+
+static void rvu_enable_afvf_mbox_intr(struct rvu *rvu)
+{
+ int vfs = rvu->vfs;
+
+ /* Clear any pending interrupts and enable AF VF interrupts for
+ * the first 64 VFs.
+ */
+ rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INTX(0), INTR_MASK(vfs));
+ rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INT_ENA_W1SX(0), INTR_MASK(vfs));
+
+ /* Same for remaining VFs, if any. */
+ if (vfs <= 64)
+ return;
+
+ rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INTX(1), INTR_MASK(vfs - 64));
+ rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INT_ENA_W1SX(1),
+ INTR_MASK(vfs - 64));
+}
+
+#define PCI_DEVID_OCTEONTX2_LBK 0xA061
+
+static int lbk_get_num_chans(void)
+{
+ struct pci_dev *pdev;
+ void __iomem *base;
+ int ret = -EIO;
+
+ pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_LBK,
+ NULL);
+ if (!pdev)
+ goto err;
+
+ base = pci_ioremap_bar(pdev, 0);
+ if (!base)
+ goto err_put;
+
+ /* Read number of available LBK channels from LBK(0)_CONST register. */
+ ret = (readq(base + 0x10) >> 32) & 0xffff;
+ iounmap(base);
+err_put:
+ pci_dev_put(pdev);
+err:
+ return ret;
+}
+
+static int rvu_enable_sriov(struct rvu *rvu)
+{
+ struct pci_dev *pdev = rvu->pdev;
+ int err, chans, vfs;
+
+ if (!rvu_afvf_msix_vectors_num_ok(rvu)) {
+ dev_warn(&pdev->dev,
+ "Skipping SRIOV enablement since not enough IRQs are available\n");
+ return 0;
+ }
+
+ chans = lbk_get_num_chans();
+ if (chans < 0)
+ return chans;
+
+ vfs = pci_sriov_get_totalvfs(pdev);
+
+ /* Limit VFs in case we have more VFs than LBK channels available. */
+ if (vfs > chans)
+ vfs = chans;
+
+ /* AF's VFs work in pairs and talk over consecutive loopback channels.
+ * Thus we want to enable maximum even number of VFs. In case
+ * odd number of VFs are available then the last VF on the list
+ * remains disabled.
+ */
+ if (vfs & 0x1) {
+ dev_warn(&pdev->dev,
+ "Number of VFs should be even. Enabling %d out of %d.\n",
+ vfs - 1, vfs);
+ vfs--;
+ }
+
+ if (!vfs)
+ return 0;
+
+ /* Save VFs number for reference in VF interrupts handlers.
+ * Since interrupts might start arriving during SRIOV enablement
+ * ordinary API cannot be used to get number of enabled VFs.
+ */
+ rvu->vfs = vfs;
+
+ err = rvu_mbox_init(rvu, &rvu->afvf_wq_info, TYPE_AFVF, vfs,
+ rvu_afvf_mbox_handler, rvu_afvf_mbox_up_handler);
+ if (err)
+ return err;
+
+ rvu_enable_afvf_mbox_intr(rvu);
+ /* Make sure IRQs are enabled before SRIOV. */
+ mb();
+
+ err = pci_enable_sriov(pdev, vfs);
+ if (err) {
+ rvu_disable_afvf_mbox_intr(rvu);
+ rvu_mbox_destroy(&rvu->afvf_wq_info);
+ return err;
+ }
+
+ return 0;
+}
+
+static void rvu_disable_sriov(struct rvu *rvu)
+{
+ rvu_disable_afvf_mbox_intr(rvu);
+ rvu_mbox_destroy(&rvu->afvf_wq_info);
+ pci_disable_sriov(rvu->pdev);
+}
+
static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct device *dev = &pdev->dev;
@@ -2054,7 +2227,14 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (err)
goto err_flr;
+ /* Enable AF's VFs (if any) */
+ err = rvu_enable_sriov(rvu);
+ if (err)
+ goto err_irq;
+
return 0;
+err_irq:
+ rvu_unregister_interrupts(rvu);
err_flr:
rvu_flr_wq_destroy(rvu);
err_cgx:
@@ -2083,6 +2263,7 @@ static void rvu_remove(struct pci_dev *pdev)
rvu_flr_wq_destroy(rvu);
rvu_cgx_wq_destroy(rvu);
rvu_mbox_destroy(&rvu->afpf_wq_info);
+ rvu_disable_sriov(rvu);
rvu_reset_all_blocks(rvu);
rvu_free_hw_resources(rvu);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index b491760..ffed649 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -201,7 +201,8 @@ struct rvu {
struct rvu_hwinfo *hw;
struct rvu_pfvf *pf;
struct rvu_pfvf *hwvf;
- struct mutex rsrc_lock; /* Serialize resource alloc/free */
+ struct mutex rsrc_lock; /* Serialize resource alloc/free */
+ int vfs; /* Number of VFs attached to RVU */
/* Mbox */
struct mbox_wq_info afpf_wq_info;
--
2.7.4
^ permalink raw reply related
* [PATCH 17/20] octeontx2-af: Configure AF VFs to talk over LBK channels
From: sunil.kovvuri @ 2018-11-08 18:35 UTC (permalink / raw)
To: netdev, davem; +Cc: arnd, linux-soc, Tomasz Duszynski, Sunil Goutham
In-Reply-To: <1541702161-30673-1-git-send-email-sunil.kovvuri@gmail.com>
From: Tomasz Duszynski <tduszynski@marvell.com>
Configure AF VFs such that they are able to talk over consecutive
loopback channels.
If 8 VFs are attached to AF then communication will work as below:
TX RX
lbk0 -> lbk1
lbk1 -> lbk0
lbk2 -> lbk3
lbk3 -> lbk2
lbk4 -> lbk5
lbk5 -> lbk4
lbk6 -> lbk7
lbk7 -> lbk6
Signed-off-by: Tomasz Duszynski <tduszynski@marvell.com>
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
---
drivers/net/ethernet/marvell/octeontx2/af/common.h | 2 ++
drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 5 +++++
.../net/ethernet/marvell/octeontx2/af/rvu_nix.c | 25 +++++++++++++++++++---
.../net/ethernet/marvell/octeontx2/af/rvu_npc.c | 12 +++++++----
4 files changed, 37 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h
index a8c89df..9bddb03 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h
@@ -174,7 +174,9 @@ enum nix_scheduler {
#define MAX_LMAC_PKIND 12
#define NIX_LINK_CGX_LMAC(a, b) (0 + 4 * (a) + (b))
+#define NIX_LINK_LBK(a) (12 + (a))
#define NIX_CHAN_CGX_LMAC_CHX(a, b, c) (0x800 + 0x100 * (a) + 0x10 * (b) + (c))
+#define NIX_CHAN_LBK_CHX(a, b) (0 + 0x100 * (a) + (b))
/* NIX LSO format indices.
* As of now TSO is the only one using, so statically assigning indices.
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index ffed649..cf9139e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -258,6 +258,11 @@ static inline u64 rvupf_read64(struct rvu *rvu, u64 offset)
/* Function Prototypes
* RVU
*/
+static inline int is_afvf(u16 pcifunc)
+{
+ return !(pcifunc & ~RVU_PFVF_FUNC_MASK);
+}
+
int rvu_alloc_bitmap(struct rsrc_bmap *rsrc);
int rvu_alloc_rsrc(struct rsrc_bmap *rsrc);
void rvu_free_rsrc(struct rsrc_bmap *rsrc, int id);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index ad99cf7..1f73c05 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -144,7 +144,7 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf)
{
struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
u8 cgx_id, lmac_id;
- int pkind, pf;
+ int pkind, pf, vf;
int err;
pf = rvu_get_pf(pcifunc);
@@ -170,6 +170,14 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf)
rvu_npc_set_pkind(rvu, pkind, pfvf);
break;
case NIX_INTF_TYPE_LBK:
+ vf = (pcifunc & RVU_PFVF_FUNC_MASK) - 1;
+ pfvf->rx_chan_base = NIX_CHAN_LBK_CHX(0, vf);
+ pfvf->tx_chan_base = vf & 0x1 ? NIX_CHAN_LBK_CHX(0, vf - 1) :
+ NIX_CHAN_LBK_CHX(0, vf + 1);
+ pfvf->rx_chan_cnt = 1;
+ pfvf->tx_chan_cnt = 1;
+ rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
+ pfvf->rx_chan_base, false);
break;
}
@@ -684,7 +692,7 @@ int rvu_mbox_handler_NIX_LF_ALLOC(struct rvu *rvu,
struct nix_lf_alloc_req *req,
struct nix_lf_alloc_rsp *rsp)
{
- int nixlf, qints, hwctx_size, err, rc = 0;
+ int nixlf, qints, hwctx_size, intf, err, rc = 0;
struct rvu_hwinfo *hw = rvu->hw;
u16 pcifunc = req->hdr.pcifunc;
struct rvu_block *block;
@@ -839,7 +847,8 @@ int rvu_mbox_handler_NIX_LF_ALLOC(struct rvu *rvu,
/* Config Rx pkt length, csum checks and apad enable / disable */
rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_CFG(nixlf), req->rx_cfg);
- err = nix_interface_init(rvu, pcifunc, NIX_INTF_TYPE_CGX, nixlf);
+ intf = is_afvf(pcifunc) ? NIX_INTF_TYPE_LBK : NIX_INTF_TYPE_CGX;
+ err = nix_interface_init(rvu, pcifunc, intf, nixlf);
if (err)
goto free_mem;
@@ -1354,6 +1363,10 @@ static int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add)
struct rvu_pfvf *pfvf;
int blkaddr;
+ /* Broadcast pkt replication is not needed for AF's VFs, hence skip */
+ if (is_afvf(pcifunc))
+ return 0;
+
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
if (blkaddr < 0)
return 0;
@@ -1966,6 +1979,12 @@ int rvu_mbox_handler_NIX_RXVLAN_ALLOC(struct rvu *rvu, struct msg_req *req,
int blkaddr, nixlf, err;
struct rvu_pfvf *pfvf;
+ /* LBK VFs do not have separate MCAM UCAST entry hence
+ * skip allocating rxvlan for them
+ */
+ if (is_afvf(pcifunc))
+ return 0;
+
pfvf = rvu_get_pfvf(rvu, pcifunc);
if (pfvf->rxvlan)
return 0;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 5dbb5cd..e61f5b3 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -314,6 +314,10 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
int blkaddr, index, kwi;
u64 mac = 0;
+ /* AF's VFs work in promiscuous mode */
+ if (is_afvf(pcifunc))
+ return;
+
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
if (blkaddr < 0)
return;
@@ -371,12 +375,12 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
struct nix_rx_action action = { };
int blkaddr, index, kwi;
- blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
- if (blkaddr < 0)
+ /* Only PF or AF VF can add a promiscuous entry */
+ if ((pcifunc & RVU_PFVF_FUNC_MASK) && !is_afvf(pcifunc))
return;
- /* Only PF or AF VF can add a promiscuous entry */
- if (pcifunc & RVU_PFVF_FUNC_MASK)
+ blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+ if (blkaddr < 0)
return;
index = npc_get_nixlf_mcam_index(mcam, pcifunc,
--
2.7.4
^ permalink raw reply related
* [PATCH 18/20] octeontx2-af: Add FLR handling support for AF's VFs
From: sunil.kovvuri @ 2018-11-08 18:35 UTC (permalink / raw)
To: netdev, davem; +Cc: arnd, linux-soc, Sunil Goutham
In-Reply-To: <1541702161-30673-1-git-send-email-sunil.kovvuri@gmail.com>
From: Sunil Goutham <sgoutham@marvell.com>
Added support to handle FLR for AF's VFs (i.e LBK VFs).
Just the FLR interrupt enable/disable, handler registration
etc, actual HW resource cleanup or LFs teardown logic is
already there.
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
---
drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 129 ++++++++++++++++++++----
1 file changed, 110 insertions(+), 19 deletions(-)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index 8e51f4e..1ef104a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -1788,6 +1788,23 @@ static void __rvu_flr_handler(struct rvu *rvu, u16 pcifunc)
mutex_unlock(&rvu->flr_lock);
}
+static void rvu_afvf_flr_handler(struct rvu *rvu, int vf)
+{
+ int reg = 0;
+
+ /* pcifunc = 0(PF0) | (vf + 1) */
+ __rvu_flr_handler(rvu, vf + 1);
+
+ if (vf >= 64) {
+ reg = 1;
+ vf = vf - 64;
+ }
+
+ /* Signal FLR finish and enable IRQ */
+ rvupf_write64(rvu, RVU_PF_VFTRPENDX(reg), BIT_ULL(vf));
+ rvupf_write64(rvu, RVU_PF_VFFLR_INT_ENA_W1SX(reg), BIT_ULL(vf));
+}
+
static void rvu_flr_handler(struct work_struct *work)
{
struct rvu_work *flrwork = container_of(work, struct rvu_work, work);
@@ -1797,6 +1814,10 @@ static void rvu_flr_handler(struct work_struct *work)
int pf;
pf = flrwork - rvu->flr_wrk;
+ if (pf >= rvu->hw->total_pfs) {
+ rvu_afvf_flr_handler(rvu, pf - rvu->hw->total_pfs);
+ return;
+ }
cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf));
numvfs = (cfg >> 12) & 0xFF;
@@ -1814,6 +1835,29 @@ static void rvu_flr_handler(struct work_struct *work)
rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFFLR_INT_ENA_W1S, BIT_ULL(pf));
}
+static void rvu_afvf_queue_flr_work(struct rvu *rvu, int start_vf, int numvfs)
+{
+ int dev, vf, reg = 0;
+ u64 intr;
+
+ if (start_vf >= 64)
+ reg = 1;
+
+ intr = rvupf_read64(rvu, RVU_PF_VFFLR_INTX(reg));
+ if (!intr)
+ return;
+
+ for (vf = 0; vf < numvfs; vf++) {
+ if (!(intr & BIT_ULL(vf)))
+ continue;
+ dev = vf + start_vf + rvu->hw->total_pfs;
+ queue_work(rvu->flr_wq, &rvu->flr_wrk[dev].work);
+ /* Clear and disable the interrupt */
+ rvupf_write64(rvu, RVU_PF_VFFLR_INTX(reg), BIT_ULL(vf));
+ rvupf_write64(rvu, RVU_PF_VFFLR_INT_ENA_W1CX(reg), BIT_ULL(vf));
+ }
+}
+
static irqreturn_t rvu_flr_intr_handler(int irq, void *rvu_irq)
{
struct rvu *rvu = (struct rvu *)rvu_irq;
@@ -1821,6 +1865,8 @@ static irqreturn_t rvu_flr_intr_handler(int irq, void *rvu_irq)
u8 pf;
intr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PFFLR_INT);
+ if (!intr)
+ goto afvf_flr;
for (pf = 0; pf < rvu->hw->total_pfs; pf++) {
if (intr & (1ULL << pf)) {
@@ -1834,6 +1880,12 @@ static irqreturn_t rvu_flr_intr_handler(int irq, void *rvu_irq)
BIT_ULL(pf));
}
}
+
+afvf_flr:
+ rvu_afvf_queue_flr_work(rvu, 0, 64);
+ if (rvu->vfs > 64)
+ rvu_afvf_queue_flr_work(rvu, 64, rvu->vfs - 64);
+
return IRQ_HANDLED;
}
@@ -1876,7 +1928,7 @@ static int rvu_afvf_msix_vectors_num_ok(struct rvu *rvu)
static int rvu_register_interrupts(struct rvu *rvu)
{
- int ret, offset;
+ int ret, offset, pf_vec_start;
rvu->num_vec = pci_msix_vec_count(rvu->pdev);
@@ -1941,10 +1993,11 @@ static int rvu_register_interrupts(struct rvu *rvu)
return 0;
/* Get PF MSIX vectors offset. */
- offset = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_INT_CFG(0)) & 0x3ff;
+ pf_vec_start = rvu_read64(rvu, BLKADDR_RVUM,
+ RVU_PRIV_PFX_INT_CFG(0)) & 0x3ff;
/* Register MBOX0 interrupt. */
- offset += RVU_PF_INT_VEC_VFPF_MBOX0;
+ offset = pf_vec_start + RVU_PF_INT_VEC_VFPF_MBOX0;
sprintf(&rvu->irq_name[offset * NAME_SIZE], "RVUAFVF Mbox0");
ret = request_irq(pci_irq_vector(rvu->pdev, offset),
rvu_mbox_intr_handler, 0,
@@ -1959,7 +2012,7 @@ static int rvu_register_interrupts(struct rvu *rvu)
/* Register MBOX1 interrupt. MBOX1 IRQ number follows MBOX0 so
* simply increment current offset by 1.
*/
- offset += 1;
+ offset = pf_vec_start + RVU_PF_INT_VEC_VFPF_MBOX1;
sprintf(&rvu->irq_name[offset * NAME_SIZE], "RVUAFVF Mbox1");
ret = request_irq(pci_irq_vector(rvu->pdev, offset),
rvu_mbox_intr_handler, 0,
@@ -1971,10 +2024,35 @@ static int rvu_register_interrupts(struct rvu *rvu)
rvu->irq_allocated[offset] = true;
+ /* Register FLR interrupt handler for AF's VFs */
+ offset = pf_vec_start + RVU_PF_INT_VEC_VFFLR0;
+ sprintf(&rvu->irq_name[offset * NAME_SIZE], "RVUAFVF FLR0");
+ ret = request_irq(pci_irq_vector(rvu->pdev, offset),
+ rvu_flr_intr_handler, 0,
+ &rvu->irq_name[offset * NAME_SIZE], rvu);
+ if (ret) {
+ dev_err(rvu->dev,
+ "RVUAF: IRQ registration failed for RVUAFVF FLR0\n");
+ goto fail;
+ }
+ rvu->irq_allocated[offset] = true;
+
+ offset = pf_vec_start + RVU_PF_INT_VEC_VFFLR1;
+ sprintf(&rvu->irq_name[offset * NAME_SIZE], "RVUAFVF FLR1");
+ ret = request_irq(pci_irq_vector(rvu->pdev, offset),
+ rvu_flr_intr_handler, 0,
+ &rvu->irq_name[offset * NAME_SIZE], rvu);
+ if (ret) {
+ dev_err(rvu->dev,
+ "RVUAF: IRQ registration failed for RVUAFVF FLR1\n");
+ goto fail;
+ }
+ rvu->irq_allocated[offset] = true;
+
return 0;
fail:
- pci_free_irq_vectors(rvu->pdev);
+ rvu_unregister_interrupts(rvu);
return ret;
}
@@ -1985,16 +2063,16 @@ static void rvu_flr_wq_destroy(struct rvu *rvu)
destroy_workqueue(rvu->flr_wq);
rvu->flr_wq = NULL;
}
- kfree(rvu->flr_wrk);
}
static int rvu_flr_init(struct rvu *rvu)
{
+ int dev, num_devs;
u64 cfg;
int pf;
/* Enable FLR for all PFs*/
- for (pf = 1; pf < rvu->hw->total_pfs; pf++) {
+ for (pf = 0; pf < rvu->hw->total_pfs; pf++) {
cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf));
rvu_write64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf),
cfg | BIT_ULL(22));
@@ -2006,16 +2084,17 @@ static int rvu_flr_init(struct rvu *rvu)
if (!rvu->flr_wq)
return -ENOMEM;
- rvu->flr_wrk = devm_kcalloc(rvu->dev, rvu->hw->total_pfs,
+ num_devs = rvu->hw->total_pfs + pci_sriov_get_totalvfs(rvu->pdev);
+ rvu->flr_wrk = devm_kcalloc(rvu->dev, num_devs,
sizeof(struct rvu_work), GFP_KERNEL);
if (!rvu->flr_wrk) {
destroy_workqueue(rvu->flr_wq);
return -ENOMEM;
}
- for (pf = 0; pf < rvu->hw->total_pfs; pf++) {
- rvu->flr_wrk[pf].rvu = rvu;
- INIT_WORK(&rvu->flr_wrk[pf].work, rvu_flr_handler);
+ for (dev = 0; dev < num_devs; dev++) {
+ rvu->flr_wrk[dev].rvu = rvu;
+ INIT_WORK(&rvu->flr_wrk[dev].work, rvu_flr_handler);
}
mutex_init(&rvu->flr_lock);
@@ -2023,26 +2102,35 @@ static int rvu_flr_init(struct rvu *rvu)
return 0;
}
-static void rvu_disable_afvf_mbox_intr(struct rvu *rvu)
+static void rvu_disable_afvf_intr(struct rvu *rvu)
{
int vfs = rvu->vfs;
rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INT_ENA_W1CX(0), INTR_MASK(vfs));
- if (vfs > 64)
- rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INT_ENA_W1CX(1),
- INTR_MASK(vfs - 64));
+ rvupf_write64(rvu, RVU_PF_VFFLR_INT_ENA_W1CX(0), INTR_MASK(vfs));
+ if (vfs <= 64)
+ return;
+
+ rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INT_ENA_W1CX(1),
+ INTR_MASK(vfs - 64));
+ rvupf_write64(rvu, RVU_PF_VFFLR_INT_ENA_W1CX(1), INTR_MASK(vfs - 64));
}
-static void rvu_enable_afvf_mbox_intr(struct rvu *rvu)
+static void rvu_enable_afvf_intr(struct rvu *rvu)
{
int vfs = rvu->vfs;
/* Clear any pending interrupts and enable AF VF interrupts for
* the first 64 VFs.
*/
+ /* Mbox */
rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INTX(0), INTR_MASK(vfs));
rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INT_ENA_W1SX(0), INTR_MASK(vfs));
+ /* FLR */
+ rvupf_write64(rvu, RVU_PF_VFFLR_INTX(0), INTR_MASK(vfs));
+ rvupf_write64(rvu, RVU_PF_VFFLR_INT_ENA_W1SX(0), INTR_MASK(vfs));
+
/* Same for remaining VFs, if any. */
if (vfs <= 64)
return;
@@ -2050,6 +2138,9 @@ static void rvu_enable_afvf_mbox_intr(struct rvu *rvu)
rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INTX(1), INTR_MASK(vfs - 64));
rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INT_ENA_W1SX(1),
INTR_MASK(vfs - 64));
+
+ rvupf_write64(rvu, RVU_PF_VFFLR_INTX(1), INTR_MASK(vfs - 64));
+ rvupf_write64(rvu, RVU_PF_VFFLR_INT_ENA_W1SX(1), INTR_MASK(vfs - 64));
}
#define PCI_DEVID_OCTEONTX2_LBK 0xA061
@@ -2125,13 +2216,13 @@ static int rvu_enable_sriov(struct rvu *rvu)
if (err)
return err;
- rvu_enable_afvf_mbox_intr(rvu);
+ rvu_enable_afvf_intr(rvu);
/* Make sure IRQs are enabled before SRIOV. */
mb();
err = pci_enable_sriov(pdev, vfs);
if (err) {
- rvu_disable_afvf_mbox_intr(rvu);
+ rvu_disable_afvf_intr(rvu);
rvu_mbox_destroy(&rvu->afvf_wq_info);
return err;
}
@@ -2141,7 +2232,7 @@ static int rvu_enable_sriov(struct rvu *rvu)
static void rvu_disable_sriov(struct rvu *rvu)
{
- rvu_disable_afvf_mbox_intr(rvu);
+ rvu_disable_afvf_intr(rvu);
rvu_mbox_destroy(&rvu->afvf_wq_info);
pci_disable_sriov(rvu->pdev);
}
--
2.7.4
^ permalink raw reply related
* [PATCH 19/20] octeontx2-af: Add interrupt handlers for Master Enable event
From: sunil.kovvuri @ 2018-11-08 18:36 UTC (permalink / raw)
To: netdev, davem; +Cc: arnd, linux-soc, Linu Cherian, Sunil Goutham
In-Reply-To: <1541702161-30673-1-git-send-email-sunil.kovvuri@gmail.com>
From: Linu Cherian <lcherian@marvell.com>
- Add interrupt handlers for Master Enable events from PFs
and Master Enable events from VFs of AF
- Master Enable is required for the MSIX delivery to work
- Master Enable bit trap handler doesn't have to do any anything
other than clearing the TRPEND bit, since the enable/disable
requirements are already taken care using mbox requests/flr handler.
Signed-off-by: Linu Cherian <lcherian@marvell.com>
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
---
drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 113 ++++++++++++++++++++++++
1 file changed, 113 insertions(+)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index 1ef104a..395819d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -1889,6 +1889,67 @@ static irqreturn_t rvu_flr_intr_handler(int irq, void *rvu_irq)
return IRQ_HANDLED;
}
+static void rvu_me_handle_vfset(struct rvu *rvu, int idx, u64 intr)
+{
+ int vf;
+
+ /* Nothing to be done here other than clearing the
+ * TRPEND bit.
+ */
+ for (vf = 0; vf < 64; vf++) {
+ if (intr & (1ULL << vf)) {
+ /* clear the trpend due to ME(master enable) */
+ rvupf_write64(rvu, RVU_PF_VFTRPENDX(idx), BIT_ULL(vf));
+ /* clear interrupt */
+ rvupf_write64(rvu, RVU_PF_VFME_INTX(idx), BIT_ULL(vf));
+ }
+ }
+}
+
+/* Handles ME interrupts from VFs of AF */
+static irqreturn_t rvu_me_vf_intr_handler(int irq, void *rvu_irq)
+{
+ struct rvu *rvu = (struct rvu *)rvu_irq;
+ int vfset;
+ u64 intr;
+
+ intr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PFME_INT);
+
+ for (vfset = 0; vfset <= 1; vfset++) {
+ intr = rvupf_read64(rvu, RVU_PF_VFME_INTX(vfset));
+ if (intr)
+ rvu_me_handle_vfset(rvu, vfset, intr);
+ }
+
+ return IRQ_HANDLED;
+}
+
+/* Handles ME interrupts from PFs */
+static irqreturn_t rvu_me_pf_intr_handler(int irq, void *rvu_irq)
+{
+ struct rvu *rvu = (struct rvu *)rvu_irq;
+ u64 intr;
+ u8 pf;
+
+ intr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PFME_INT);
+
+ /* Nothing to be done here other than clearing the
+ * TRPEND bit.
+ */
+ for (pf = 0; pf < rvu->hw->total_pfs; pf++) {
+ if (intr & (1ULL << pf)) {
+ /* clear the trpend due to ME(master enable) */
+ rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFTRPEND,
+ BIT_ULL(pf));
+ /* clear interrupt */
+ rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFME_INT,
+ BIT_ULL(pf));
+ }
+ }
+
+ return IRQ_HANDLED;
+}
+
static void rvu_unregister_interrupts(struct rvu *rvu)
{
int irq;
@@ -1901,6 +1962,10 @@ static void rvu_unregister_interrupts(struct rvu *rvu)
rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFFLR_INT_ENA_W1C,
INTR_MASK(rvu->hw->total_pfs) & ~1ULL);
+ /* Disable the PF ME interrupt */
+ rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFME_INT_ENA_W1C,
+ INTR_MASK(rvu->hw->total_pfs) & ~1ULL);
+
for (irq = 0; irq < rvu->num_vec; irq++) {
if (rvu->irq_allocated[irq])
free_irq(pci_irq_vector(rvu->pdev, irq), rvu);
@@ -1989,6 +2054,26 @@ static int rvu_register_interrupts(struct rvu *rvu)
rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFFLR_INT_ENA_W1S,
INTR_MASK(rvu->hw->total_pfs) & ~1ULL);
+ /* Register ME interrupt handler */
+ sprintf(&rvu->irq_name[RVU_AF_INT_VEC_PFME * NAME_SIZE],
+ "RVUAF ME");
+ ret = request_irq(pci_irq_vector(rvu->pdev, RVU_AF_INT_VEC_PFME),
+ rvu_me_pf_intr_handler, 0,
+ &rvu->irq_name[RVU_AF_INT_VEC_PFME * NAME_SIZE],
+ rvu);
+ if (ret) {
+ dev_err(rvu->dev,
+ "RVUAF: IRQ registration failed for ME\n");
+ }
+ rvu->irq_allocated[RVU_AF_INT_VEC_PFME] = true;
+
+ /* Enable ME interrupt for all PFs*/
+ rvu_write64(rvu, BLKADDR_RVUM,
+ RVU_AF_PFME_INT, INTR_MASK(rvu->hw->total_pfs));
+
+ rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFME_INT_ENA_W1S,
+ INTR_MASK(rvu->hw->total_pfs) & ~1ULL);
+
if (!rvu_afvf_msix_vectors_num_ok(rvu))
return 0;
@@ -2049,6 +2134,30 @@ static int rvu_register_interrupts(struct rvu *rvu)
}
rvu->irq_allocated[offset] = true;
+ /* Register ME interrupt handler for AF's VFs */
+ offset = pf_vec_start + RVU_PF_INT_VEC_VFME0;
+ sprintf(&rvu->irq_name[offset * NAME_SIZE], "RVUAFVF ME0");
+ ret = request_irq(pci_irq_vector(rvu->pdev, offset),
+ rvu_me_vf_intr_handler, 0,
+ &rvu->irq_name[offset * NAME_SIZE], rvu);
+ if (ret) {
+ dev_err(rvu->dev,
+ "RVUAF: IRQ registration failed for RVUAFVF ME0\n");
+ goto fail;
+ }
+ rvu->irq_allocated[offset] = true;
+
+ offset = pf_vec_start + RVU_PF_INT_VEC_VFME1;
+ sprintf(&rvu->irq_name[offset * NAME_SIZE], "RVUAFVF ME1");
+ ret = request_irq(pci_irq_vector(rvu->pdev, offset),
+ rvu_me_vf_intr_handler, 0,
+ &rvu->irq_name[offset * NAME_SIZE], rvu);
+ if (ret) {
+ dev_err(rvu->dev,
+ "RVUAF: IRQ registration failed for RVUAFVF ME1\n");
+ goto fail;
+ }
+ rvu->irq_allocated[offset] = true;
return 0;
fail:
@@ -2108,12 +2217,14 @@ static void rvu_disable_afvf_intr(struct rvu *rvu)
rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INT_ENA_W1CX(0), INTR_MASK(vfs));
rvupf_write64(rvu, RVU_PF_VFFLR_INT_ENA_W1CX(0), INTR_MASK(vfs));
+ rvupf_write64(rvu, RVU_PF_VFME_INT_ENA_W1CX(0), INTR_MASK(vfs));
if (vfs <= 64)
return;
rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INT_ENA_W1CX(1),
INTR_MASK(vfs - 64));
rvupf_write64(rvu, RVU_PF_VFFLR_INT_ENA_W1CX(1), INTR_MASK(vfs - 64));
+ rvupf_write64(rvu, RVU_PF_VFME_INT_ENA_W1CX(1), INTR_MASK(vfs - 64));
}
static void rvu_enable_afvf_intr(struct rvu *rvu)
@@ -2130,6 +2241,7 @@ static void rvu_enable_afvf_intr(struct rvu *rvu)
/* FLR */
rvupf_write64(rvu, RVU_PF_VFFLR_INTX(0), INTR_MASK(vfs));
rvupf_write64(rvu, RVU_PF_VFFLR_INT_ENA_W1SX(0), INTR_MASK(vfs));
+ rvupf_write64(rvu, RVU_PF_VFME_INT_ENA_W1SX(0), INTR_MASK(vfs));
/* Same for remaining VFs, if any. */
if (vfs <= 64)
@@ -2141,6 +2253,7 @@ static void rvu_enable_afvf_intr(struct rvu *rvu)
rvupf_write64(rvu, RVU_PF_VFFLR_INTX(1), INTR_MASK(vfs - 64));
rvupf_write64(rvu, RVU_PF_VFFLR_INT_ENA_W1SX(1), INTR_MASK(vfs - 64));
+ rvupf_write64(rvu, RVU_PF_VFME_INT_ENA_W1SX(1), INTR_MASK(vfs - 64));
}
#define PCI_DEVID_OCTEONTX2_LBK 0xA061
--
2.7.4
^ permalink raw reply related
* [PATCH 20/20] octeontx2-af: Workarounds for HW errata
From: sunil.kovvuri @ 2018-11-08 18:36 UTC (permalink / raw)
To: netdev, davem; +Cc: arnd, linux-soc, Sunil Goutham, Jerin Jacob
In-Reply-To: <1541702161-30673-1-git-send-email-sunil.kovvuri@gmail.com>
From: Sunil Goutham <sgoutham@marvell.com>
Errata 35038
Software sets NIX_AF_RX_SW_SYNC[ENA] to sync (flush) in-flight packets
the RX data path before configuration changes (e.g. disabling one or
more RQs). Hardware clears [ENA] to indicate sync is done
An issue exists whereby NIX may clear NIX_AF_RX_SW_SYNC [ENA] too
early.
Errata 35057
NIX may corrupt internal state when conditional clocks turn off.
So turnon all clocks by default.
Errata 35786
Parse nibble enable NPC configuration for KEY generation has to be
identical for both Rx and Tx interfaces.
Also corrected endianness configuration for NIX i.e NIX_AF_CFG[AF_BE]
is bit8 and not bit1.
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: Jerin Jacob <jerinj@marvell.com>
---
drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 12 ++++++++++++
drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 18 ++++++++++++++++--
drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 12 +++++++++---
3 files changed, 37 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index cf9139e..aa9f8c2 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -11,6 +11,7 @@
#ifndef RVU_H
#define RVU_H
+#include <linux/pci.h>
#include "rvu_struct.h"
#include "common.h"
#include "mbox.h"
@@ -18,6 +19,9 @@
/* PCI device IDs */
#define PCI_DEVID_OCTEONTX2_RVU_AF 0xA065
+/* Subsystem Device ID */
+#define PCI_SUBSYS_DEVID_96XX 0xB200
+
/* PCI BAR nos */
#define PCI_AF_REG_BAR_NUM 0
#define PCI_PF_REG_BAR_NUM 2
@@ -255,6 +259,14 @@ static inline u64 rvupf_read64(struct rvu *rvu, u64 offset)
return readq(rvu->pfreg_base + offset);
}
+static inline bool is_rvu_9xxx_A0(struct rvu *rvu)
+{
+ struct pci_dev *pdev = rvu->pdev;
+
+ return (pdev->revision == 0x00) &&
+ (pdev->subsystem_device == PCI_SUBSYS_DEVID_96XX);
+}
+
/* Function Prototypes
* RVU
*/
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 1f73c05..63df40d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -114,6 +114,12 @@ static void nix_rx_sync(struct rvu *rvu, int blkaddr)
err = rvu_poll_reg(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0), true);
if (err)
dev_err(rvu->dev, "NIX RX software sync failed\n");
+
+ /* As per a HW errata in 9xxx A0 silicon, HW may clear SW_SYNC[ENA]
+ * bit too early. Hence wait for 50us more.
+ */
+ if (is_rvu_9xxx_A0(rvu))
+ usleep_range(50, 60);
}
static bool is_valid_txschq(struct rvu *rvu, int blkaddr,
@@ -2135,10 +2141,10 @@ static int nix_aq_init(struct rvu *rvu, struct rvu_block *block)
/* Set admin queue endianness */
cfg = rvu_read64(rvu, block->addr, NIX_AF_CFG);
#ifdef __BIG_ENDIAN
- cfg |= BIT_ULL(1);
+ cfg |= BIT_ULL(8);
rvu_write64(rvu, block->addr, NIX_AF_CFG, cfg);
#else
- cfg &= ~BIT_ULL(1);
+ cfg &= ~BIT_ULL(8);
rvu_write64(rvu, block->addr, NIX_AF_CFG, cfg);
#endif
@@ -2175,6 +2181,14 @@ int rvu_nix_init(struct rvu *rvu)
return 0;
block = &hw->block[blkaddr];
+ /* As per a HW errata in 9xxx A0 silicon, NIX may corrupt
+ * internal state when conditional clocks are turned off.
+ * Hence enable them.
+ */
+ if (is_rvu_9xxx_A0(rvu))
+ rvu_write64(rvu, blkaddr, NIX_AF_CFG,
+ rvu_read64(rvu, blkaddr, NIX_AF_CFG) | 0x5EULL);
+
/* Calibrate X2P bus to check if CGX/LBK links are fine */
err = nix_calibrate_x2p(rvu, blkaddr);
if (err)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index e61f5b3..bff253c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -973,7 +973,7 @@ int rvu_npc_init(struct rvu *rvu)
struct npc_pkind *pkind = &rvu->hw->pkind;
u64 keyz = NPC_MCAM_KEY_X2;
int blkaddr, entry, bank, err;
- u64 cfg;
+ u64 cfg, nibble_ena;
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
if (blkaddr < 0) {
@@ -1022,10 +1022,16 @@ int rvu_npc_init(struct rvu *rvu)
/* Set RX and TX side MCAM search key size.
* LA..LD (ltype only) + Channel
*/
+ nibble_ena = 0x49247;
rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(NIX_INTF_RX),
- ((keyz & 0x3) << 32) | 0x49247);
+ ((keyz & 0x3) << 32) | nibble_ena);
+ /* Due to an errata (35786) in A0 pass silicon, parse nibble enable
+ * configuration has to be identical for both Rx and Tx interfaces.
+ */
+ if (!is_rvu_9xxx_A0(rvu))
+ nibble_ena = (1ULL << 19) - 1;
rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(NIX_INTF_TX),
- ((keyz & 0x3) << 32) | ((1ULL << 19) - 1));
+ ((keyz & 0x3) << 32) | nibble_ena);
err = npc_mcam_rsrcs_init(rvu, blkaddr);
if (err)
--
2.7.4
^ permalink raw reply related
* Re: [RFC PATCH ethtool v2 00/23] ethtool netlink interface (userspace side) (WiP)
From: Michal Kubecek @ 2018-11-08 18:30 UTC (permalink / raw)
To: Woojung.Huh; +Cc: netdev
In-Reply-To: <BN6PR1101MB2130A3A92409FBF732174764E7C40@BN6PR1101MB2130.namprd11.prod.outlook.com>
On Wed, Nov 07, 2018 at 11:57:03PM +0000, Woojung.Huh@microchip.com wrote:
> > -----Original Message-----
> > From: netdev-owner@vger.kernel.org <netdev-owner@vger.kernel.org> On Behalf Of Michal
> > Kubecek
> > Sent: Monday, July 30, 2018 8:56 AM
> > To: netdev@vger.kernel.org
> > Cc: linux-kernel@vger.kernel.org; John W. Linville <linville@tuxdriver.com>
> > Subject: [RFC PATCH ethtool v2 00/23] ethtool netlink interface (userspace side) (WiP)
> >
> > (The series is based on v4.17)
> Can you help me find exact version of version I can apply patch?
> v4.17 from either net-next.git or linux.git shows some failure while patching.
Based on the subject you quoted, you tried to apply the patch series for
ethtool (userspace utility). Kernel patch series was sent shortly before
this one with subject "[RFC PATCH net-next v2 00/17] ethtool netlink
interface (WiP)":
http://patchwork.ozlabs.org/project/netdev/list/?series=58290&state=*
But if you want to try it, I would rather suggest updated version from
https://github.com/mkubecek/ethnl
It has kernel series based on current net-next (this morning) and
ethtool series based on v4.19.
I would like to send a v3 to the list soon but in the last few weeks
I was quite busy with higher priority work.
Michal Kubecek
^ permalink raw reply
* RE: [RFC PATCH ethtool v2 00/23] ethtool netlink interface (userspace side) (WiP)
From: Woojung.Huh @ 2018-11-08 18:53 UTC (permalink / raw)
To: mkubecek; +Cc: netdev
In-Reply-To: <20181108183019.GD24800@unicorn.suse.cz>
Hi Michal,
> Based on the subject you quoted, you tried to apply the patch series for
> ethtool (userspace utility). Kernel patch series was sent shortly before
> this one with subject "[RFC PATCH net-next v2 00/17] ethtool netlink
> interface (WiP)":
Oh.. You are right. I picked up wrong email header. :(
> But if you want to try it, I would rather suggest updated version from
>
> https://github.com/mkubecek/ethnl
>
> It has kernel series based on current net-next (this morning) and
> ethtool series based on v4.19.
Thanks for this link. It would be clean tree.
> I would like to send a v3 to the list soon but in the last few weeks
> I was quite busy with higher priority work.
Great news. Will wait new series.
Regards,
Woojung
^ permalink raw reply
* Re: [PATCH v2 net-next 0/4] net: ethernet: ti: cpsw: fix vlan mcast
From: David Miller @ 2018-11-09 4:32 UTC (permalink / raw)
To: ivan.khoronzhuk
Cc: grygorii.strashko, linux-omap, netdev, linux-kernel,
alexander.h.duyck, bjorn
In-Reply-To: <20181108202757.30110-1-ivan.khoronzhuk@linaro.org>
From: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
Date: Thu, 8 Nov 2018 22:27:53 +0200
> The cpsw holds separate mcast entires for vlan entries. At this moment
> driver adds only not vlan mcast addresses, omitting vlan/mcast entries.
> As result mcast for vlans doesn't work. It can be fixed by adding same
> mcast entries for every created vlan, but this patchseries uses more
> sophisticated way and allows to create mcast entries only for vlans
> that really require it. Generic functions from this series can be
> reused for fixing vlan and macvlan unicast.
>
> Simple example of ALE table before and after this series, having same
> mcast entries as for vlan 100 as for real device (reserved vlan 2),
> and one mcast address only for vlan 100 - 01:1b:19:00:00:00.
...
> Based on net-next/master
>
> v2..v1:
> net: ethernet: ti: cpsw: fix vlan mcast
> - removed limit for legacy switch cpsw mode
Series applied.
^ permalink raw reply
* Re: [PATCH v3 bpf-next 4/4] bpftool: support loading flow dissector
From: Stanislav Fomichev @ 2018-11-08 19:01 UTC (permalink / raw)
To: Quentin Monnet
Cc: Stanislav Fomichev, netdev, linux-kselftest, ast, daniel, shuah,
jakub.kicinski, guro, jiong.wang, bhole_prashant_q7,
john.fastabend, jbenc, treeze.taeung, yhs, osk, sandipan
In-Reply-To: <d61e20cb-ccb1-1502-3119-8f71fb8d3570@netronome.com>
On 11/08, Quentin Monnet wrote:
> 2018-11-08 10:01 UTC-0800 ~ Stanislav Fomichev <sdf@fomichev.me>
> > On 11/08, Quentin Monnet wrote:
> >> Hi Stanislav, thanks for the changes! More comments below.
> > Thank you for another round of review!
> >
> >> 2018-11-07 21:39 UTC-0800 ~ Stanislav Fomichev <sdf@google.com>
> >>> This commit adds support for loading/attaching/detaching flow
> >>> dissector program. The structure of the flow dissector program is
> >>> assumed to be the same as in the selftests:
> >>>
> >>> * flow_dissector section with the main entry point
> >>> * a bunch of tail call progs
> >>> * a jmp_table map that is populated with the tail call progs
> >>>
> >>> When `bpftool load` is called with a flow_dissector prog (i.e. when the
> >>> first section is flow_dissector of 'type flow_dissector' argument is
> >>> passed), we load and pin all the programs/maps. User is responsible to
> >>> construct the jump table for the tail calls.
> >>>
> >>> The last argument of `bpftool attach` is made optional for this use
> >>> case.
> >>>
> >>> Example:
> >>> bpftool prog load tools/testing/selftests/bpf/bpf_flow.o \
> >>> /sys/fs/bpf/flow type flow_dissector
> >>>
> >>> bpftool map update pinned /sys/fs/bpf/flow/jmp_table \
> >>> key 0 0 0 0 \
> >>> value pinned /sys/fs/bpf/flow/IP
> >>>
> >>> bpftool map update pinned /sys/fs/bpf/flow/jmp_table \
> >>> key 1 0 0 0 \
> >>> value pinned /sys/fs/bpf/flow/IPV6
> >>>
> >>> bpftool map update pinned /sys/fs/bpf/flow/jmp_table \
> >>> key 2 0 0 0 \
> >>> value pinned /sys/fs/bpf/flow/IPV6OP
> >>>
> >>> bpftool map update pinned /sys/fs/bpf/flow/jmp_table \
> >>> key 3 0 0 0 \
> >>> value pinned /sys/fs/bpf/flow/IPV6FR
> >>>
> >>> bpftool map update pinned /sys/fs/bpf/flow/jmp_table \
> >>> key 4 0 0 0 \
> >>> value pinned /sys/fs/bpf/flow/MPLS
> >>>
> >>> bpftool map update pinned /sys/fs/bpf/flow/jmp_table \
> >>> key 5 0 0 0 \
> >>> value pinned /sys/fs/bpf/flow/VLAN
> >>>
> >>> bpftool prog attach pinned /sys/fs/bpf/flow/flow_dissector flow_dissector
> >>>
> >>> Tested by using the above lines to load the prog in
> >>> the test_flow_dissector.sh selftest.
> >>>
> >>> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> >>> ---
> >>> .../bpftool/Documentation/bpftool-prog.rst | 36 ++++--
> >>> tools/bpf/bpftool/bash-completion/bpftool | 6 +-
> >>> tools/bpf/bpftool/common.c | 30 ++---
> >>> tools/bpf/bpftool/main.h | 1 +
> >>> tools/bpf/bpftool/prog.c | 112 +++++++++++++-----
> >>> 5 files changed, 126 insertions(+), 59 deletions(-)
> >>>
> >>> diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
> >>> index ac4e904b10fb..0374634c3087 100644
> >>> --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
> >>> +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
> >>> @@ -15,7 +15,8 @@ SYNOPSIS
> >>> *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
> >>> *COMMANDS* :=
> >>> - { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** | **help** }
> >>> + { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load**
> >>> + | **loadall** | **help** }
> >>> MAP COMMANDS
> >>> =============
> >>> @@ -24,9 +25,9 @@ MAP COMMANDS
> >>> | **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual**}]
> >>> | **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}]
> >>> | **bpftool** **prog pin** *PROG* *FILE*
> >>> -| **bpftool** **prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
> >>> -| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* *MAP*
> >>> -| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* *MAP*
> >>> +| **bpftool** **prog { load | loadall }** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
> >>> +| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
> >>> +| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
> >>> | **bpftool** **prog help**
> >>> |
> >>> | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
> >>> @@ -39,7 +40,9 @@ MAP COMMANDS
> >>> | **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
> >>> | **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6**
> >>> | }
> >>> -| *ATTACH_TYPE* := { **msg_verdict** | **skb_verdict** | **skb_parse** }
> >>> +| *ATTACH_TYPE* := {
> >>> +| **msg_verdict** | **skb_verdict** | **skb_parse** | **flow_dissector**
> >>> +| }
> >>> DESCRIPTION
> >>> @@ -79,8 +82,11 @@ DESCRIPTION
> >>> contain a dot character ('.'), which is reserved for future
> >>> extensions of *bpffs*.
> >>> - **bpftool prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
> >>> + **bpftool prog { load | loadall }** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
> >>> Load bpf program from binary *OBJ* and pin as *FILE*.
> >>> + **bpftool prog load** will pin only the first bpf program
> >>> + from the *OBJ*, **bpftool prog loadall** will pin all maps
> >>> + and programs from the *OBJ*.
> >>
> >> This could be improved regarding maps: with "bpftool prog load" I think we
> >> also load and pin all maps, but your description implies this is only the
> >> case with "loadall"
> > I don't think we pin any maps with `bpftool prog load`, we certainly load
> > them, but we don't pin any afaict. Can you point me to the code where we
> > pin the maps?
> >
>
> My bad. I read "pin" but thought "load". It does not pin them indeed,
> sorry about that.
>
> >>> **type** is optional, if not specified program type will be
> >>> inferred from section names.
> >>> By default bpftool will create new maps as declared in the ELF
> >>> @@ -97,13 +103,17 @@ DESCRIPTION
> >>> contain a dot character ('.'), which is reserved for future
> >>> extensions of *bpffs*.
> >>> - **bpftool prog attach** *PROG* *ATTACH_TYPE* *MAP*
> >>> - Attach bpf program *PROG* (with type specified by *ATTACH_TYPE*)
> >>> - to the map *MAP*.
> >>> -
> >>> - **bpftool prog detach** *PROG* *ATTACH_TYPE* *MAP*
> >>> - Detach bpf program *PROG* (with type specified by *ATTACH_TYPE*)
> >>> - from the map *MAP*.
> >>> + **bpftool prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
> >>> + Attach bpf program *PROG* (with type specified by
> >>> + *ATTACH_TYPE*). Most *ATTACH_TYPEs* require a *MAP*
> >>> + parameter, with the exception of *flow_dissector* which is
> >>> + attached to current networking name space.
> >>> +
> >>> + **bpftool prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
> >>> + Detach bpf program *PROG* (with type specified by
> >>> + *ATTACH_TYPE*). Most *ATTACH_TYPEs* require a *MAP*
> >>> + parameter, with the exception of *flow_dissector* which is
> >>> + detached from the current networking name space.
> >>
> >> While at it could you please fix those two paragraphs to use tabs for
> >> indentation, as the rest of the doc? Thanks!
> > Time to teach my vim to use tabs in .rst files. Sorry about that.
>
> Those paragraphs were using spaces already, so you didn't introduce that
> :). But all others use tabs so its a good occasion to fix it.
>
> >>> **bpftool prog help**
> >>> Print short help message.
> >>> diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
> >>> index 3f78e6404589..ad0fc919f7ec 100644
> >>> --- a/tools/bpf/bpftool/bash-completion/bpftool
> >>> +++ b/tools/bpf/bpftool/bash-completion/bpftool
> >>> @@ -243,7 +243,7 @@ _bpftool()
> >>> # Completion depends on object and command in use
> >>> case $object in
> >>> prog)
> >>> - if [[ $command != "load" ]]; then
> >>> + if [[ $command != "load" && $command != "loadall" ]]; then
> >>> case $prev in
> >>> id)
> >>> _bpftool_get_prog_ids
> >>> @@ -299,7 +299,7 @@ _bpftool()
> >>> fi
> >>> if [[ ${#words[@]} == 6 ]]; then
> >>> - COMPREPLY=( $( compgen -W "msg_verdict skb_verdict skb_parse" -- "$cur" ) )
> >>> + COMPREPLY=( $( compgen -W "msg_verdict skb_verdict skb_parse flow_dissector" -- "$cur" ) )
> >>> return 0
> >>> fi
> >>> @@ -309,7 +309,7 @@ _bpftool()
> >>> fi
> >>> return 0
> >>> ;;
> >>> - load)
> >>> + load|loadall)
> >>> local obj
> >>> if [[ ${#words[@]} -lt 6 ]]; then
> >>
> >> You also want to update completion for the program types, at line 341 or so.
> >> Feel free to split that list on several lines, by the way :).
> > Will do, thanks!
> >
> >>> diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
> >>> index 25af85304ebe..f671a921dec5 100644
> >>> --- a/tools/bpf/bpftool/common.c
> >>> +++ b/tools/bpf/bpftool/common.c
> >>> @@ -169,34 +169,24 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type)
> >>> return fd;
> >>> }
> >>> -int do_pin_fd(int fd, const char *name)
> >>> +int mount_bpffs_for_pin(const char *name)
> >>> {
> >>> char err_str[ERR_MAX_LEN];
> >>> char *file;
> >>> char *dir;
> >>> int err = 0;
> >>> - err = bpf_obj_pin(fd, name);
> >>> - if (!err)
> >>> - goto out;
> >>> -
> >>> file = malloc(strlen(name) + 1);
> >>> strcpy(file, name);
> >>> dir = dirname(file);
> >>> - if (errno != EPERM || is_bpffs(dir)) {
> >>> - p_err("can't pin the object (%s): %s", name, strerror(errno));
> >>> + if (is_bpffs(dir)) {
> >>> + /* nothing to do if already mounted */
> >>> goto out_free;
> >>> }
> >>
> >> Nitpick: unnecessary brackets.
> > Ack.
> >
> >>> - /* Attempt to mount bpffs, then retry pinning. */
> >>> err = mnt_bpffs(dir, err_str, ERR_MAX_LEN);
> >>> - if (!err) {
> >>> - err = bpf_obj_pin(fd, name);
> >>> - if (err)
> >>> - p_err("can't pin the object (%s): %s", name,
> >>> - strerror(errno));
> >>> - } else {
> >>> + if (err) {
> >>> err_str[ERR_MAX_LEN - 1] = '\0';
> >>> p_err("can't mount BPF file system to pin the object (%s): %s",
> >>> name, err_str);
> >>> @@ -204,10 +194,20 @@ int do_pin_fd(int fd, const char *name)
> >>> out_free:
> >>> free(file);
> >>> -out:
> >>> return err;
> >>> }
> >>> +int do_pin_fd(int fd, const char *name)
> >>> +{
> >>> + int err;
> >>> +
> >>> + err = mount_bpffs_for_pin(name);
> >>> + if (err)
> >>> + return err;
> >>> +
> >>> + return bpf_obj_pin(fd, name);
> >>> +}
> >>> +
> >>> int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32))
> >>> {
> >>> unsigned int id;
> >>> diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
> >>> index 28322ace2856..1383824c9baf 100644
> >>> --- a/tools/bpf/bpftool/main.h
> >>> +++ b/tools/bpf/bpftool/main.h
> >>> @@ -129,6 +129,7 @@ const char *get_fd_type_name(enum bpf_obj_type type);
> >>> char *get_fdinfo(int fd, const char *key);
> >>> int open_obj_pinned(char *path);
> >>> int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type);
> >>> +int mount_bpffs_for_pin(const char *name);
> >>> int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32));
> >>> int do_pin_fd(int fd, const char *name);
> >>> diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
> >>> index 5302ee282409..a4346dd673b1 100644
> >>> --- a/tools/bpf/bpftool/prog.c
> >>> +++ b/tools/bpf/bpftool/prog.c
> >>> @@ -81,6 +81,7 @@ static const char * const attach_type_strings[] = {
> >>> [BPF_SK_SKB_STREAM_PARSER] = "stream_parser",
> >>> [BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict",
> >>> [BPF_SK_MSG_VERDICT] = "msg_verdict",
> >>> + [BPF_FLOW_DISSECTOR] = "flow_dissector",
> >>> [__MAX_BPF_ATTACH_TYPE] = NULL,
> >>> };
> >>> @@ -724,10 +725,11 @@ int map_replace_compar(const void *p1, const void *p2)
> >>> static int do_attach(int argc, char **argv)
> >>> {
> >>> enum bpf_attach_type attach_type;
> >>> - int err, mapfd, progfd;
> >>> + int err, progfd;
> >>> + int mapfd = 0;
> >>> - if (!REQ_ARGS(5)) {
> >>> - p_err("too few parameters for map attach");
> >>> + if (!REQ_ARGS(3)) {
> >>> + p_err("too few parameters for attach");
> >>> return -EINVAL;
> >>> }
> >>> @@ -740,11 +742,17 @@ static int do_attach(int argc, char **argv)
> >>> p_err("invalid attach type");
> >>> return -EINVAL;
> >>> }
> >>> - NEXT_ARG();
> >>> + if (attach_type != BPF_FLOW_DISSECTOR) {
> >>> + NEXT_ARG();
> >>> + if (!REQ_ARGS(2)) {
> >>> + p_err("too few parameters for map attach");
> >>> + return -EINVAL;
> >>> + }
> >>> - mapfd = map_parse_fd(&argc, &argv);
> >>> - if (mapfd < 0)
> >>> - return mapfd;
> >>> + mapfd = map_parse_fd(&argc, &argv);
> >>> + if (mapfd < 0)
> >>> + return mapfd;
> >>> + }
> >>> err = bpf_prog_attach(progfd, mapfd, attach_type, 0);
> >>> if (err) {
> >>> @@ -760,10 +768,11 @@ static int do_attach(int argc, char **argv)
> >>> static int do_detach(int argc, char **argv)
> >>> {
> >>> enum bpf_attach_type attach_type;
> >>> - int err, mapfd, progfd;
> >>> + int err, progfd;
> >>> + int mapfd = 0;
> >>> - if (!REQ_ARGS(5)) {
> >>> - p_err("too few parameters for map detach");
> >>> + if (!REQ_ARGS(3)) {
> >>> + p_err("too few parameters for detach");
> >>> return -EINVAL;
> >>> }
> >>> @@ -776,11 +785,17 @@ static int do_detach(int argc, char **argv)
> >>> p_err("invalid attach type");
> >>> return -EINVAL;
> >>> }
> >>> - NEXT_ARG();
> >>> + if (attach_type != BPF_FLOW_DISSECTOR) {
> >>> + NEXT_ARG();
> >>> + if (!REQ_ARGS(2)) {
> >>> + p_err("too few parameters for map detach");
> >>> + return -EINVAL;
> >>> + }
> >>
> >> Would that make sense to factor argument checks or parsing for do_attach()
> >> and do_detach() to some extent? In order to reduce the number of
> >> attach-type-based exceptions to add in the code if we have other attach
> >> types that do not take maps in the future.
> > I can move all argument parsing into a new function and use it from both
> > do_attach and do_detach.
>
> Sounds good to me, thanks!
>
> >>> - mapfd = map_parse_fd(&argc, &argv);
> >>> - if (mapfd < 0)
> >>> - return mapfd;
> >>> + mapfd = map_parse_fd(&argc, &argv);
> >>> + if (mapfd < 0)
> >>> + return mapfd;
> >>> + }
> >>> err = bpf_prog_detach2(progfd, mapfd, attach_type);
> >>> if (err) {
> >>> @@ -792,15 +807,16 @@ static int do_detach(int argc, char **argv)
> >>> jsonw_null(json_wtr);
> >>> return 0;
> >>> }
> >>> -static int do_load(int argc, char **argv)
> >>> +
> >>> +static int load_with_options(int argc, char **argv, bool first_prog_only)
> >>> {
> >>> enum bpf_attach_type expected_attach_type;
> >>> struct bpf_object_open_attr attr = {
> >>> .prog_type = BPF_PROG_TYPE_UNSPEC,
> >>> };
> >>> struct map_replace *map_replace = NULL;
> >>> + struct bpf_program *prog = NULL, *pos;
> >>> unsigned int old_map_fds = 0;
> >>> - struct bpf_program *prog;
> >>> struct bpf_object *obj;
> >>> struct bpf_map *map;
> >>> const char *pinfile;
> >>> @@ -918,14 +934,20 @@ static int do_load(int argc, char **argv)
> >>> goto err_free_reuse_maps;
> >>> }
> >>> - prog = bpf_program__next(NULL, obj);
> >>> - if (!prog) {
> >>> - p_err("object file doesn't contain any bpf program");
> >>> - goto err_close_obj;
> >>> + if (first_prog_only) {
> >>> + prog = bpf_program__next(NULL, obj);
> >>> + if (!prog) {
> >>> + p_err("object file doesn't contain any bpf program");
> >>> + goto err_close_obj;
> >>> + }
> >>> }
> >>> - bpf_program__set_ifindex(prog, ifindex);
> >>> if (attr.prog_type == BPF_PROG_TYPE_UNSPEC) {
> >>> + if (!prog) {
> >>> + p_err("can not guess program type when loading all programs\n");
> >>> + goto err_close_obj;
> >>> + }
> >>> +
> >>> const char *sec_name = bpf_program__title(prog, false);
> >>> err = libbpf_prog_type_by_name(sec_name, &attr.prog_type,
> >>> @@ -936,8 +958,13 @@ static int do_load(int argc, char **argv)
> >>> goto err_close_obj;
> >>> }
> >>> }
> >>> - bpf_program__set_type(prog, attr.prog_type);
> >>> - bpf_program__set_expected_attach_type(prog, expected_attach_type);
> >>> +
> >>> + bpf_object__for_each_program(pos, obj) {
> >>> + bpf_program__set_ifindex(pos, ifindex);
> >>> + bpf_program__set_type(pos, attr.prog_type);
> >>> + bpf_program__set_expected_attach_type(pos,
> >>> + expected_attach_type);
> >>> + }
> >>
> >> I still believe you can have programs of different types here, and be able
> >> to load them. I tried it and managed to have it working fine. If no type is
> >> provided from command line we can retrieve types for each program from its
> >> section name. If a type is provided on the command line, we can do the same,
> >> but I am not sure we should do it, or impose that type for all programs
> >> instead.
> > I can move auto-detection into this new bpf_object__for_each_program
> > loop. So if no type is specified, try to infer the type from each prog
> > section name, otherwise, use the provided one for all progs. Do we want
> > something like that?
>
> This is what I have in mind. But others may disagree.
Hm, I did another look and here is the problem.
Current **load** implementation actually loads all progs (and maps) from the
file: do_load -> bpf_object__load -> bpf_object__load_progs. It pins only the
first prog, but loads them all. That's why I don't understand how multiprog
loading can work currently (because we don't set correct
prog_type/expected_attach_type for all progs, only the first one).
Should we have the following functionality instead:
Both **load** and **loadall** load all maps and programs from the *OBJ* and
differ only in pinning. **load** pins only the first program from the *OBJ*
as *FILE*. **loadall** pins all programs and maps from the *OBJ* under
*FILE* directory.
If we want **load** to load only the first prog, we should probably
amend bpf_object__load to be able to filter the progs we (don't) want.
Thoughts?
> > Btw, do you have some existing real life example of where it's needed so
> > I can test this new implementation? (maybe something under samples/ ?)
>
> I thought about an ELF file containing both an XDP and a TC classifier
> program for example. XDP can mark programs for TC, then TC process them
> with all the facilities we have for skbs. It does not _have_ to be in
> the same ELF file, but could be.
>
> I haven't searched samples/bpf/ in depth, but a grep on SEC shows a
> couple of files with several types (kprobe/kretprobe, classifier/xdp).
> samples/bpf/xdp2skb_meta_kern.c looks like a good candidate. Or actually
> for testing purposes, I simply used the following:
>
> #define SEC(NAME) __attribute__((section(NAME), used))
>
> int _version SEC("version") = 1;
>
> SEC("classifier")
> int func()
> {
> return 1;
> }
>
> SEC("xdp")
> int funcbar()
> {
> return 0;
> }
Thanks, I just found socket_cookie_prog.c in selftests which has
cgroup/connect6 and sockops sections, I can probably use that for
testing.
> >>> qsort(map_replace, old_map_fds, sizeof(*map_replace),
> >>> map_replace_compar);
> >>> @@ -1001,9 +1028,25 @@ static int do_load(int argc, char **argv)
> >>> goto err_close_obj;
> >>> }
> >>> - if (do_pin_fd(bpf_program__fd(prog), pinfile))
> >>> + err = mount_bpffs_for_pin(pinfile);
> >>> + if (err)
> >>> goto err_close_obj;
> >>> + if (prog) {
> >>
> >> Nit: Maybe "if (first_prog_only) {" instead? If I understand correctly, at
> >> this stage it should be equivalent, but in my opinion it would make it
> >> easier to understand why we have two cases here.
> > Sure, I can do that if you think that's more readable, I don't have a
> > preference.
>
> Thanks!
> Quentin
>
^ permalink raw reply
* Re: [PATCH net-next 1/2] net: phy: use phy_id_mask value zero for exact match
From: Andrew Lunn @ 2018-11-08 18:34 UTC (permalink / raw)
To: Heiner Kallweit; +Cc: Florian Fainelli, David Miller, netdev@vger.kernel.org
In-Reply-To: <e69ac41d-7c1a-78dd-06b2-cb7bffab9e80@gmail.com>
On Wed, Nov 07, 2018 at 09:53:32PM +0100, Heiner Kallweit wrote:
> @@ -500,7 +500,7 @@ struct phy_driver {
> struct mdio_driver_common mdiodrv;
> u32 phy_id;
> char *name;
> - u32 phy_id_mask;
> + u32 phy_id_mask; /* value 0 means exact match */
Hi Heiner
Please make this comment part of the kerneldoc which is just above:
* phy_id: The result of reading the UID registers of this PHY
* type, and ANDing them with the phy_id_mask.
Andrew
^ permalink raw reply
* Re: [PATCH net-next 2/2] net: phy: realtek: remove boilerplate code from driver configs
From: Andrew Lunn @ 2018-11-08 18:37 UTC (permalink / raw)
To: Heiner Kallweit; +Cc: Florian Fainelli, David Miller, netdev@vger.kernel.org
In-Reply-To: <7f3b2935-f6d3-f7f6-fb42-4fe40d381cc6@gmail.com>
> {
> .phy_id = 0x00008201,
> .name = "RTL8201CP Ethernet",
> - .phy_id_mask = 0x0000ffff,
> .features = PHY_BASIC_FEATURES,
> .flags = PHY_HAS_INTERRUPT,
> }, {
> .phy_id = 0x001cc816,
> .name = "RTL8201F Fast Ethernet",
> - .phy_id_mask = 0x001fffff,
Hi Heiner
"RTL8201CP Ethernet" has a mask of 0x0000ffff, where as all the others
use 0x001fffff. Is this correct?
Andrew
^ permalink raw reply
* [PATCH mlx5-next 00/10] Collection of ODP fixes
From: Leon Romanovsky @ 2018-11-08 19:10 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Artemy Kovalyov, Majd Dibbiny,
Moni Shoua, Saeed Mahameed, linux-netdev
From: Leon Romanovsky <leonro@mellanox.com>
Hi,
This is collection of fixes to mlx5_core and mlx5_ib ODP logic.
There are two main reasons why we decided to forward it to mlx5-next
and not to rdma-rc or net like our other fixes.
1. We have large number of patches exactly in that area that are ready
for submission and we don't want to create extra merge work for
maintainers due to that. Among those future series (already ready and
tested): internal change of mlx5_core pagefaults handling, moving ODP
code to RDMA, change in EQ mechanism, simplification and moving SRQ QP
to RDMA, extra fixes to mlx5_ib ODP and ODP SRQ support.
2. Most of those fixes are for old bugs and there is no rush to fix them
right now (in -rc1/-rc2).
Thanks
Moni Shoua (10):
net/mlx5: Release resource on error flow
IB/mlx5: Avoid hangs due to a missing completion
net/mlx5: Add interface to hold and release core resources
net/mlx5: Enumerate page fault types
IB/mlx5: Lock QP during page fault handling
net/mlx5: Return success for PAGE_FAULT_RESUME in internal error state
net/mlx5: Use multi threaded workqueue for page fault handling
IB/mlx5: Call PAGE_FAULT_RESUME command asynchronously
net/mlx5: Remove unused function
IB/mlx5: Improve ODP debugging messages
drivers/infiniband/core/umem_odp.c | 14 +-
drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 +
drivers/infiniband/hw/mlx5/mr.c | 15 +-
drivers/infiniband/hw/mlx5/odp.c | 158 ++++++++++++++----
drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 2 +-
drivers/net/ethernet/mellanox/mlx5/core/eq.c | 21 +--
drivers/net/ethernet/mellanox/mlx5/core/qp.c | 20 ++-
include/linux/mlx5/device.h | 7 +
include/linux/mlx5/driver.h | 7 +-
include/linux/mlx5/qp.h | 5 +
10 files changed, 191 insertions(+), 59 deletions(-)
^ permalink raw reply
* [PATCH mlx5-next 01/10] net/mlx5: Release resource on error flow
From: Leon Romanovsky @ 2018-11-08 19:10 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Artemy Kovalyov, Majd Dibbiny,
Moni Shoua, Saeed Mahameed, linux-netdev
In-Reply-To: <20181108191017.21891-1-leon@kernel.org>
From: Moni Shoua <monis@mellanox.com>
Fix reference counting leakage when the event handler aborts due to an
unsupported event for the resource type.
Fixes: a14c2d4beee5 ("net/mlx5_core: Warn on unsupported events of QP/RQ/SQ")
Signed-off-by: Moni Shoua <monis@mellanox.com>
Reviewed-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/qp.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 91b8139a388d..690dc1dd9391 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -132,7 +132,7 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) {
mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n",
event_type, rsn);
- return;
+ goto out;
}
switch (common->res) {
@@ -150,7 +150,7 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
default:
mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn);
}
-
+out:
mlx5_core_put_rsc(common);
}
--
2.19.1
^ permalink raw reply related
* [PATCH mlx5-next 02/10] IB/mlx5: Avoid hangs due to a missing completion
From: Leon Romanovsky @ 2018-11-08 19:10 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Artemy Kovalyov, Majd Dibbiny,
Moni Shoua, Saeed Mahameed, linux-netdev
In-Reply-To: <20181108191017.21891-1-leon@kernel.org>
From: Moni Shoua <monis@mellanox.com>
Fix 2 flows that may cause a process to hang on wait_for_completion():
1. When callback for create MKEY command returns with bad status
2. When callback for create MKEY command is called before executer of
command calls wait_for_completion()
The following call trace might be triggered in the above flows:
INFO: task echo_server:1655 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
echo_server D ffff880813fb6898 0 1655 1 0x00000004
ffff880423f5b880 0000000000000086 ffff880402290fd0 ffff880423f5bfd8
ffff880423f5bfd8 ffff880423f5bfd8 ffff880402290fd0 ffff880813fb68a0
7fffffffffffffff ffff880813fb6898 ffff880402290fd0 ffff880813fb6898
Call Trace:
[<ffffffff816a94c9>] schedule+0x29/0x70
[<ffffffff816a6fd9>] schedule_timeout+0x239/0x2c0
[<ffffffffc07309e2>] ? mlx5_cmd_exec_cb+0x22/0x30 [mlx5_core]
[<ffffffffc073e697>] ? mlx5_core_create_mkey_cb+0xb7/0x220 [mlx5_core]
[<ffffffff811b94b7>] ? mm_drop_all_locks+0xd7/0x110
[<ffffffff816a987d>] wait_for_completion+0xfd/0x140
[<ffffffff810c4810>] ? wake_up_state+0x20/0x20
[<ffffffffc08fd308>] mlx5_mr_cache_alloc+0xa8/0x170 [mlx5_ib]
[<ffffffffc0909626>] implicit_mr_alloc+0x36/0x190 [mlx5_ib]
[<ffffffffc090a26e>] mlx5_ib_alloc_implicit_mr+0x4e/0xa0 [mlx5_ib]
[<ffffffffc08ff2f3>] mlx5_ib_reg_user_mr+0x93/0x6a0 [mlx5_ib]
[<ffffffffc0907410>] ? mlx5_ib_exp_query_device+0xab0/0xbc0 [mlx5_ib]
[<ffffffffc04998be>] ib_uverbs_exp_reg_mr+0x2fe/0x550 [ib_uverbs]
[<ffffffff811edaff>] ? do_huge_pmd_anonymous_page+0x2bf/0x530
[<ffffffffc048f6cc>] ib_uverbs_write+0x3ec/0x490 [ib_uverbs]
[<ffffffff81200d2d>] vfs_write+0xbd/0x1e0
[<ffffffff81201b3f>] SyS_write+0x7f/0xe0
[<ffffffff816b4fc9>] system_call_fastpath+0x16/0x1b
Fixes: 49780d42dfc9 ("IB/mlx5: Expose MR cache for mlx5_ib")
Signed-off-by: Moni Shoua <monis@mellanox.com>
Reviewed-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 +
drivers/infiniband/hw/mlx5/mr.c | 15 ++++++++++++---
2 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index b651a7a6fde9..cd9335e368bd 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -644,6 +644,7 @@ struct mlx5_cache_ent {
struct delayed_work dwork;
int pending;
struct completion compl;
+ atomic_t do_complete;
};
struct mlx5_mr_cache {
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 9b195d65a13e..259dd49c6874 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -143,7 +143,7 @@ static void reg_mr_callback(int status, void *context)
kfree(mr);
dev->fill_delay = 1;
mod_timer(&dev->delay_timer, jiffies + HZ);
- return;
+ goto do_complete;
}
mr->mmkey.type = MLX5_MKEY_MR;
@@ -167,8 +167,13 @@ static void reg_mr_callback(int status, void *context)
pr_err("Error inserting to mkey tree. 0x%x\n", -err);
write_unlock_irqrestore(&table->lock, flags);
- if (!completion_done(&ent->compl))
+do_complete:
+ spin_lock_irqsave(&ent->lock, flags);
+ if (atomic_read(&ent->do_complete)) {
complete(&ent->compl);
+ atomic_dec(&ent->do_complete);
+ }
+ spin_unlock_irqrestore(&ent->lock, flags);
}
static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
@@ -476,9 +481,12 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
if (list_empty(&ent->head)) {
spin_unlock_irq(&ent->lock);
+ atomic_inc(&ent->do_complete);
err = add_keys(dev, entry, 1);
- if (err && err != -EAGAIN)
+ if (err && err != -EAGAIN) {
+ atomic_dec(&ent->do_complete);
return ERR_PTR(err);
+ }
wait_for_completion(&ent->compl);
} else {
@@ -687,6 +695,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
ent->order = i + 2;
ent->dev = dev;
ent->limit = 0;
+ atomic_set(&ent->do_complete, 0);
init_completion(&ent->compl);
INIT_WORK(&ent->work, cache_work_func);
^ permalink raw reply related
* [PATCH mlx5-next 03/10] net/mlx5: Add interface to hold and release core resources
From: Leon Romanovsky @ 2018-11-08 19:10 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Artemy Kovalyov, Majd Dibbiny,
Moni Shoua, Saeed Mahameed, linux-netdev
In-Reply-To: <20181108191017.21891-1-leon@kernel.org>
From: Moni Shoua <monis@mellanox.com>
Sometimes upper layers may want to prevent the destruction of a core
resource for a period of time while work on that resource is in
progress. Add API to support this.
Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/qp.c | 16 ++++++++++++++++
include/linux/mlx5/qp.h | 5 +++++
2 files changed, 21 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 690dc1dd9391..cba4a435043a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -670,3 +670,19 @@ int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id,
return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
}
EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter);
+
+struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev,
+ int res_num,
+ enum mlx5_res_type res_type)
+{
+ u32 rsn = res_num | (res_type << MLX5_USER_INDEX_LEN);
+
+ return mlx5_get_rsc(dev, rsn);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_res_hold);
+
+void mlx5_core_res_put(struct mlx5_core_rsc_common *res)
+{
+ mlx5_core_put_rsc(res);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_res_put);
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index fbe322c966bc..b26ea9077384 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -596,6 +596,11 @@ int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id);
int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id,
int reset, void *out, int out_size);
+struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev,
+ int res_num,
+ enum mlx5_res_type res_type);
+void mlx5_core_res_put(struct mlx5_core_rsc_common *res);
+
static inline const char *mlx5_qp_type_str(int type)
{
switch (type) {
--
2.19.1
^ permalink raw reply related
* [PATCH mlx5-next 04/10] net/mlx5: Enumerate page fault types
From: Leon Romanovsky @ 2018-11-08 19:10 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Artemy Kovalyov, Majd Dibbiny,
Moni Shoua, Saeed Mahameed, linux-netdev
In-Reply-To: <20181108191017.21891-1-leon@kernel.org>
From: Moni Shoua <monis@mellanox.com>
Give meaningful names to type of WQE page faults.
Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
include/linux/mlx5/device.h | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index b4c0457fbebd..e326524bafcc 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -212,6 +212,13 @@ enum {
MLX5_PFAULT_SUBTYPE_RDMA = 1,
};
+enum wqe_page_fault_type {
+ MLX5_WQE_PF_TYPE_RMP = 0,
+ MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE = 1,
+ MLX5_WQE_PF_TYPE_RESP = 2,
+ MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC = 3,
+};
+
enum {
MLX5_PERM_LOCAL_READ = 1 << 2,
MLX5_PERM_LOCAL_WRITE = 1 << 3,
--
2.19.1
^ permalink raw reply related
* [PATCH mlx5-next 05/10] IB/mlx5: Lock QP during page fault handling
From: Leon Romanovsky @ 2018-11-08 19:10 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Artemy Kovalyov, Majd Dibbiny,
Moni Shoua, Saeed Mahameed, linux-netdev
In-Reply-To: <20181108191017.21891-1-leon@kernel.org>
From: Moni Shoua <monis@mellanox.com>
When page fault event for a WQE arrives, the event data contains the
resource (e.g. QP) number which will later be used by the page fault
handler to retrieve the resource. Meanwhile, another context can destroy
the resource and cause use-after-free. To avoid that, take a reference on the
resource when handler starts and release it when it ends.
Page fault events for RDMA operations don't need to be protected because
the driver doesn't need to access the QP in the page fault handler.
Fixes: d9aaed838765 ("{net,IB}/mlx5: Refactor page fault handling")
Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/hw/mlx5/odp.c | 46 +++++++++++++++++++++++++-------
1 file changed, 37 insertions(+), 9 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index b04eb6775326..abce55b8b9ba 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -1016,16 +1016,31 @@ static int mlx5_ib_mr_responder_pfault_handler(
return 0;
}
-static struct mlx5_ib_qp *mlx5_ib_odp_find_qp(struct mlx5_ib_dev *dev,
- u32 wq_num)
+static inline struct mlx5_core_rsc_common *odp_get_rsc(struct mlx5_ib_dev *dev,
+ u32 wq_num, int pf_type)
{
- struct mlx5_core_qp *mqp = __mlx5_qp_lookup(dev->mdev, wq_num);
+ enum mlx5_res_type res_type;
- if (!mqp) {
- mlx5_ib_err(dev, "QPN 0x%6x not found\n", wq_num);
+ switch (pf_type) {
+ case MLX5_WQE_PF_TYPE_RMP:
+ res_type = MLX5_RES_SRQ;
+ break;
+ case MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE:
+ case MLX5_WQE_PF_TYPE_RESP:
+ case MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC:
+ res_type = MLX5_RES_QP;
+ break;
+ default:
return NULL;
}
+ return mlx5_core_res_hold(dev->mdev, wq_num, res_type);
+}
+
+static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res)
+{
+ struct mlx5_core_qp *mqp = (struct mlx5_core_qp *)res;
+
return to_mibqp(mqp);
}
@@ -1039,18 +1054,30 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
int resume_with_error = 1;
u16 wqe_index = pfault->wqe.wqe_index;
int requestor = pfault->type & MLX5_PFAULT_REQUESTOR;
+ struct mlx5_core_rsc_common *res;
struct mlx5_ib_qp *qp;
+ res = odp_get_rsc(dev, pfault->wqe.wq_num, pfault->type);
+ if (!res) {
+ mlx5_ib_dbg(dev, "wqe page fault for missing resource %d\n", pfault->wqe.wq_num);
+ return;
+ }
+
+ switch (res->res) {
+ case MLX5_RES_QP:
+ qp = res_to_qp(res);
+ break;
+ default:
+ mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", pfault->type);
+ goto resolve_page_fault;
+ }
+
buffer = (char *)__get_free_page(GFP_KERNEL);
if (!buffer) {
mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n");
goto resolve_page_fault;
}
- qp = mlx5_ib_odp_find_qp(dev, pfault->wqe.wq_num);
- if (!qp)
- goto resolve_page_fault;
-
ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
PAGE_SIZE, &qp->trans_qp.base);
if (ret < 0) {
@@ -1090,6 +1117,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n",
pfault->wqe.wq_num, resume_with_error,
pfault->type);
+ mlx5_core_res_put(res);
free_page((unsigned long)buffer);
}
^ permalink raw reply related
* [PATCH mlx5-next 07/10] net/mlx5: Use multi threaded workqueue for page fault handling
From: Leon Romanovsky @ 2018-11-08 19:10 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Artemy Kovalyov, Majd Dibbiny,
Moni Shoua, Saeed Mahameed, linux-netdev
In-Reply-To: <20181108191017.21891-1-leon@kernel.org>
From: Moni Shoua <monis@mellanox.com>
Page fault events are processed in a workqueue context. Since each QP
can have up to two concurrent unrelated page-faults, one for requester
and one for responder, page-fault handling can be done in parallel.
Achieve this by changing the workqueue to be multi-threaded.
The number of threads is the same as the number of command interface
channels to avoid command interface bottlenecks.
In addition to multi-threads, change the workqueue flags to give it high
priority.
Stress benchmark shows that before this change 85% of page faults were
waiting in queue 8 seconds or more while after the change 98% of page
faults were waiting in queue 64 milliseconds or less. The number of threads
was chosen as the number of channels to the command interface.
Fixes: d9aaed838765 ("{net,IB}/mlx5: Refactor page fault handling")
Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/eq.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index c1e1a16a9b07..aeab0c4f60f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -356,8 +356,9 @@ static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name)
spin_lock_init(&pf_ctx->lock);
INIT_WORK(&pf_ctx->work, eq_pf_action);
- pf_ctx->wq = alloc_ordered_workqueue(name,
- WQ_MEM_RECLAIM);
+ pf_ctx->wq = alloc_workqueue(name,
+ WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM,
+ MLX5_NUM_CMD_EQE);
if (!pf_ctx->wq)
return -ENOMEM;
--
2.19.1
^ permalink raw reply related
* [PATCH mlx5-next 08/10] IB/mlx5: Call PAGE_FAULT_RESUME command asynchronously
From: Leon Romanovsky @ 2018-11-08 19:10 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Artemy Kovalyov, Majd Dibbiny,
Moni Shoua, Saeed Mahameed, linux-netdev
In-Reply-To: <20181108191017.21891-1-leon@kernel.org>
From: Moni Shoua <monis@mellanox.com>
Telling the HCA that page fault handling is done and QP can resume
its flow is done in the context of the page fault handler. This blocks
the handling of the next work in queue without a need.
Call the PAGE_FAULT_RESUME command in an asynchronous manner and free
the workqueue to pick the next work item for handling. All tasks that
were executed after PAGE_FAULT_RESUME need to be done now
in the callback of the asynchronous command mechanism.
Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/hw/mlx5/odp.c | 110 +++++++++++++++++++++++++------
include/linux/mlx5/driver.h | 3 +
2 files changed, 94 insertions(+), 19 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index abce55b8b9ba..0c4f469cdd5b 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -298,20 +298,78 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
return;
}
+struct pfault_resume_cb_ctx {
+ struct mlx5_ib_dev *dev;
+ struct mlx5_core_rsc_common *res;
+ struct mlx5_pagefault *pfault;
+};
+
+static void page_fault_resume_callback(int status, void *context)
+{
+ struct pfault_resume_cb_ctx *ctx = context;
+ struct mlx5_pagefault *pfault = ctx->pfault;
+
+ if (status)
+ mlx5_ib_err(ctx->dev, "Resolve the page fault failed with status %d\n",
+ status);
+
+ if (ctx->res)
+ mlx5_core_res_put(ctx->res);
+ kfree(pfault);
+ kfree(ctx);
+}
+
static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
+ struct mlx5_core_rsc_common *res,
struct mlx5_pagefault *pfault,
- int error)
+ int error,
+ bool async)
{
+ int ret = 0;
+ u32 *out = pfault->out_pf_resume;
+ u32 *in = pfault->in_pf_resume;
+ u32 token = pfault->token;
int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ?
- pfault->wqe.wq_num : pfault->token;
- int ret = mlx5_core_page_fault_resume(dev->mdev,
- pfault->token,
- wq_num,
- pfault->type,
- error);
- if (ret)
- mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x\n",
- wq_num);
+ pfault->wqe.wq_num : pfault->token;
+ u8 type = pfault->type;
+ struct pfault_resume_cb_ctx *ctx = NULL;
+
+ if (async)
+ ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+
+ if (ctx) {
+ ctx->dev = dev;
+ ctx->res = res;
+ ctx->pfault = pfault;
+ }
+
+ memset(out, 0, MLX5_ST_SZ_BYTES(page_fault_resume_out));
+ memset(in, 0, MLX5_ST_SZ_BYTES(page_fault_resume_in));
+
+ MLX5_SET(page_fault_resume_in, in, opcode, MLX5_CMD_OP_PAGE_FAULT_RESUME);
+ MLX5_SET(page_fault_resume_in, in, error, !!error);
+ MLX5_SET(page_fault_resume_in, in, page_fault_type, type);
+ MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
+ MLX5_SET(page_fault_resume_in, in, token, token);
+
+ if (ctx)
+ ret = mlx5_cmd_exec_cb(dev->mdev,
+ in, MLX5_ST_SZ_BYTES(page_fault_resume_in),
+ out, MLX5_ST_SZ_BYTES(page_fault_resume_out),
+ page_fault_resume_callback, ctx);
+ else
+ ret = mlx5_cmd_exec(dev->mdev,
+ in, MLX5_ST_SZ_BYTES(page_fault_resume_in),
+ out, MLX5_ST_SZ_BYTES(page_fault_resume_out));
+ if (ret || !ctx) {
+ if (ret)
+ mlx5_ib_err(dev, "Failed to resume the page fault (%d)\n", ret);
+ if (res)
+ mlx5_core_res_put(res);
+ if (async)
+ kfree(pfault);
+ kfree(ctx);
+ }
}
static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
@@ -1045,7 +1103,8 @@ static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res)
}
static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
- struct mlx5_pagefault *pfault)
+ struct mlx5_pagefault *pfault,
+ bool async_resume)
{
int ret;
void *wqe, *wqe_end;
@@ -1113,11 +1172,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
resume_with_error = 0;
resolve_page_fault:
- mlx5_ib_page_fault_resume(dev, pfault, resume_with_error);
+ mlx5_ib_page_fault_resume(dev, res, pfault, resume_with_error, async_resume);
mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n",
pfault->wqe.wq_num, resume_with_error,
pfault->type);
- mlx5_core_res_put(res);
free_page((unsigned long)buffer);
}
@@ -1128,7 +1186,8 @@ static int pages_in_range(u64 address, u32 length)
}
static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
- struct mlx5_pagefault *pfault)
+ struct mlx5_pagefault *pfault,
+ bool async_resume)
{
u64 address;
u32 length;
@@ -1166,14 +1225,14 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
/* We're racing with an invalidation, don't prefetch */
prefetch_activated = 0;
} else if (ret < 0 || pages_in_range(address, length) > ret) {
- mlx5_ib_page_fault_resume(dev, pfault, 1);
+ mlx5_ib_page_fault_resume(dev, NULL, pfault, 1, async_resume);
if (ret != -ENOENT)
mlx5_ib_dbg(dev, "PAGE FAULT error %d. QP 0x%x, type: 0x%x\n",
ret, pfault->token, pfault->type);
return;
}
- mlx5_ib_page_fault_resume(dev, pfault, 0);
+ mlx5_ib_page_fault_resume(dev, NULL, pfault, 0, async_resume);
mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x, type: 0x%x, prefetch_activated: %d\n",
pfault->token, pfault->type,
prefetch_activated);
@@ -1201,18 +1260,31 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
{
struct mlx5_ib_dev *dev = context;
u8 event_subtype = pfault->event_subtype;
+ struct mlx5_pagefault *pfault_copy;
+ struct mlx5_pagefault *pfault_arg;
+ bool async_resume;
+
+ pfault_copy = kmalloc(sizeof(*pfault_copy), GFP_KERNEL);
+ if (pfault_copy) {
+ memcpy(pfault_copy, pfault, sizeof(*pfault_copy));
+ async_resume = true;
+ pfault_arg = pfault_copy;
+ } else {
+ async_resume = false;
+ pfault_arg = pfault;
+ }
switch (event_subtype) {
case MLX5_PFAULT_SUBTYPE_WQE:
- mlx5_ib_mr_wqe_pfault_handler(dev, pfault);
+ mlx5_ib_mr_wqe_pfault_handler(dev, pfault_arg, async_resume);
break;
case MLX5_PFAULT_SUBTYPE_RDMA:
- mlx5_ib_mr_rdma_pfault_handler(dev, pfault);
+ mlx5_ib_mr_rdma_pfault_handler(dev, pfault_arg, async_resume);
break;
default:
mlx5_ib_err(dev, "Invalid page fault event subtype: 0x%x\n",
event_subtype);
- mlx5_ib_page_fault_resume(dev, pfault, 1);
+ mlx5_ib_page_fault_resume(dev, NULL, pfault, 1, false);
}
}
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index aa5963b5d38e..85275612a473 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -738,6 +738,9 @@ enum mlx5_pagefault_type_flags {
/* Contains the details of a pagefault. */
struct mlx5_pagefault {
+ u32 out_pf_resume[MLX5_ST_SZ_DW(page_fault_resume_out)];
+ u32 in_pf_resume[MLX5_ST_SZ_DW(page_fault_resume_in)];
+
u32 bytes_committed;
u32 token;
u8 event_subtype;
--
2.19.1
^ permalink raw reply related
* [PATCH mlx5-next 09/10] net/mlx5: Remove unused function
From: Leon Romanovsky @ 2018-11-08 19:10 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Artemy Kovalyov, Majd Dibbiny,
Moni Shoua, Saeed Mahameed, linux-netdev
In-Reply-To: <20181108191017.21891-1-leon@kernel.org>
From: Moni Shoua <monis@mellanox.com>
No callers to the function mlx5_core_page_fault_resume() so it is OK to
delete it.
Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/eq.c | 16 ----------------
include/linux/mlx5/driver.h | 4 ----
2 files changed, 20 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index aeab0c4f60f4..22eabb80e122 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -373,22 +373,6 @@ static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name)
return -ENOMEM;
}
-int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
- u32 wq_num, u8 type, int error)
-{
- u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0};
-
- MLX5_SET(page_fault_resume_in, in, opcode,
- MLX5_CMD_OP_PAGE_FAULT_RESUME);
- MLX5_SET(page_fault_resume_in, in, error, !!error);
- MLX5_SET(page_fault_resume_in, in, page_fault_type, type);
- MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
- MLX5_SET(page_fault_resume_in, in, token, token);
-
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
#endif
static void general_event_handler(struct mlx5_core_dev *dev,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 85275612a473..1de1e8e91382 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1141,10 +1141,6 @@ int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
struct mlx5_odp_caps *odp_caps);
int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
u8 port_num, void *out, size_t sz);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
- u32 wq_num, u8 type, int error);
-#endif
int mlx5_init_rl_table(struct mlx5_core_dev *dev);
void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
--
2.19.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox