* [Intel-wired-lan] [PATCH iwl-next v4444] i40e: add ability to reset VF for Tx and Rx MDD events
@ 2024-10-29 12:36 Aleksandr Loktionov
2024-11-04 15:40 ` Michal Schmidt
2024-11-04 17:19 ` Tony Nguyen
0 siblings, 2 replies; 5+ messages in thread
From: Aleksandr Loktionov @ 2024-10-29 12:36 UTC (permalink / raw)
To: intel-wired-lan, anthony.l.nguyen, aleksandr.loktionov
Cc: netdev, Jan Sokolowski, Padraig J Connolly
Implement "mdd-auto-reset-vf" priv-flag to handle Tx and Rx MDD events for VFs.
This flag is also used in other network adapters like ICE.
Usage:
- "on" - The problematic VF will be automatically reset
if a malformed descriptor is detected.
- "off" - The problematic VF will be disabled.
In cases where a VF sends malformed packets classified as malicious, it can
cause the Tx queue to freeze, rendering it unusable for several minutes. When
an MDD event occurs, this new implementation allows for a graceful VF reset to
quickly restore operational state.
Currently, VF iqueues are disabled if an MDD event occurs. This patch adds the
ability to reset the VF if a Tx or Rx MDD event occurs. It also includes MDD
event logging throttling to avoid dmesg pollution and unifies the format of
Tx and Rx MDD messages.
Note: Standard message rate limiting functions like dev_info_ratelimited()
do not meet our requirements. Custom rate limiting is implemented,
please see the code for details.
Co-developed-by: Jan Sokolowski <jan.sokolowski@intel.com>
Signed-off-by: Jan Sokolowski <jan.sokolowski@intel.com>
Co-developed-by: Padraig J Connolly <padraig.j.connolly@intel.com>
Signed-off-by: Padraig J Connolly <padraig.j.connolly@intel.com>
Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
---
drivers/net/ethernet/intel/i40e/i40e.h | 4 +-
.../net/ethernet/intel/i40e/i40e_debugfs.c | 2 +-
.../net/ethernet/intel/i40e/i40e_ethtool.c | 2 +
drivers/net/ethernet/intel/i40e/i40e_main.c | 105 ++++++++++++++++--
.../ethernet/intel/i40e/i40e_virtchnl_pf.c | 2 +-
.../ethernet/intel/i40e/i40e_virtchnl_pf.h | 11 +-
6 files changed, 111 insertions(+), 15 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index d546567..6d6683c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -87,6 +87,7 @@ enum i40e_state {
__I40E_SERVICE_SCHED,
__I40E_ADMINQ_EVENT_PENDING,
__I40E_MDD_EVENT_PENDING,
+ __I40E_MDD_VF_PRINT_PENDING,
__I40E_VFLR_EVENT_PENDING,
__I40E_RESET_RECOVERY_PENDING,
__I40E_TIMEOUT_RECOVERY_PENDING,
@@ -190,6 +191,7 @@ enum i40e_pf_flags {
*/
I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
I40E_FLAG_VF_VLAN_PRUNING_ENA,
+ I40E_FLAG_MDD_AUTO_RESET_VF,
I40E_PF_FLAGS_NBITS, /* must be last */
};
@@ -571,7 +573,7 @@ struct i40e_pf {
int num_alloc_vfs; /* actual number of VFs allocated */
u32 vf_aq_requests;
u32 arq_overflows; /* Not fatal, possibly indicative of problems */
-
+ unsigned long last_printed_mdd_jiffies; /* MDD message rate limit */
/* DCBx/DCBNL capability for PF that indicates
* whether DCBx is managed by firmware or host
* based agent (LLDPAD). Also, indicates what
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index abf624d..6a697bf 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -721,7 +721,7 @@ static void i40e_dbg_dump_vf(struct i40e_pf *pf, int vf_id)
dev_info(&pf->pdev->dev, "vf %2d: VSI id=%d, seid=%d, qps=%d\n",
vf_id, vf->lan_vsi_id, vsi->seid, vf->num_queue_pairs);
dev_info(&pf->pdev->dev, " num MDD=%lld\n",
- vf->num_mdd_events);
+ vf->mdd_tx_events.count + vf->mdd_rx_events.count);
} else {
dev_info(&pf->pdev->dev, "invalid VF id %d\n", vf_id);
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 1d0d2e5..d146575 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -459,6 +459,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
I40E_PRIV_FLAG("base-r-fec", I40E_FLAG_BASE_R_FEC, 0),
I40E_PRIV_FLAG("vf-vlan-pruning",
I40E_FLAG_VF_VLAN_PRUNING_ENA, 0),
+ I40E_PRIV_FLAG("mdd-auto-reset-vf",
+ I40E_FLAG_MDD_AUTO_RESET_VF, 0),
};
#define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index cbcfada..07f0a91 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -11189,22 +11189,91 @@ static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired)
i40e_reset_and_rebuild(pf, false, lock_acquired);
}
+/**
+ * i40e_print_vf_mdd_event - print VF Tx/Rx malicious driver detect event
+ * @pf: board private structure
+ * @vf: pointer to the VF structure
+ * @is_tx: true - for Tx event, false - for Rx
+ */
+static void i40e_print_vf_mdd_event(struct i40e_pf *pf, struct i40e_vf *vf,
+ bool is_tx)
+{
+ dev_err(&pf->pdev->dev, is_tx ?
+ "%lld Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n" :
+ "%lld Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n",
+ vf->mdd_rx_events.count,
+ pf->hw.pf_id,
+ vf->vf_id,
+ vf->default_lan_addr.addr,
+ test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags) ? "on" : "off");
+}
+
+/**
+ * i40e_print_vfs_mdd_events - print VFs malicious driver detect event
+ * @pf: pointer to the PF structure
+ *
+ * Called from i40e_handle_mdd_event to rate limit and print VFs MDD events.
+ */
+static void i40e_print_vfs_mdd_events(struct i40e_pf *pf)
+{
+ unsigned int i;
+
+ /* check that there are pending MDD events to print */
+ if (!test_and_clear_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state))
+ return;
+
+ /* VF MDD event logs are rate limited to one second intervals */
+ if (time_is_after_jiffies(pf->last_printed_mdd_jiffies + HZ * 1))
+ return;
+
+ pf->last_printed_mdd_jiffies = jiffies;
+
+ for (i = 0; i < pf->num_alloc_vfs; i++) {
+ struct i40e_vf *vf = &pf->vf[i];
+ bool is_printed = false;
+
+ /* only print Rx MDD event message if there are new events */
+ if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) {
+ vf->mdd_rx_events.last_printed = vf->mdd_rx_events.count;
+ i40e_print_vf_mdd_event(pf, vf, false);
+ is_printed = true;
+ }
+
+ /* only print Tx MDD event message if there are new events */
+ if (vf->mdd_tx_events.count != vf->mdd_tx_events.last_printed) {
+ vf->mdd_tx_events.last_printed = vf->mdd_tx_events.count;
+ i40e_print_vf_mdd_event(pf, vf, true);
+ is_printed = true;
+ }
+
+ if (is_printed && !test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags))
+ dev_info(&pf->pdev->dev,
+ "Use PF Control I/F to re-enable the VF #%d\n",
+ i);
+ }
+}
+
/**
* i40e_handle_mdd_event
* @pf: pointer to the PF structure
*
* Called from the MDD irq handler to identify possibly malicious vfs
**/
static void i40e_handle_mdd_event(struct i40e_pf *pf)
{
struct i40e_hw *hw = &pf->hw;
bool mdd_detected = false;
struct i40e_vf *vf;
u32 reg;
int i;
- if (!test_bit(__I40E_MDD_EVENT_PENDING, pf->state))
+ if (!test_and_clear_bit(__I40E_MDD_EVENT_PENDING, pf->state)) {
+ /* Since the VF MDD event logging is rate limited, check if
+ * there are pending MDD events.
+ */
+ i40e_print_vfs_mdd_events(pf);
return;
+ }
/* find what triggered the MDD event */
reg = rd32(hw, I40E_GL_MDET_TX);
@@ -11248,36 +11317,50 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
/* see if one of the VFs needs its hand slapped */
for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) {
+ bool is_mdd_on_tx = false;
+ bool is_mdd_on_rx = false;
+
vf = &(pf->vf[i]);
reg = rd32(hw, I40E_VP_MDET_TX(i));
if (reg & I40E_VP_MDET_TX_VALID_MASK) {
+ set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
wr32(hw, I40E_VP_MDET_TX(i), 0xFFFF);
- vf->num_mdd_events++;
- dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
- i);
- dev_info(&pf->pdev->dev,
- "Use PF Control I/F to re-enable the VF\n");
+ vf->mdd_tx_events.count++;
set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
+ is_mdd_on_tx = true;
}
reg = rd32(hw, I40E_VP_MDET_RX(i));
if (reg & I40E_VP_MDET_RX_VALID_MASK) {
+ set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
wr32(hw, I40E_VP_MDET_RX(i), 0xFFFF);
- vf->num_mdd_events++;
- dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
- i);
- dev_info(&pf->pdev->dev,
- "Use PF Control I/F to re-enable the VF\n");
+ vf->mdd_rx_events.count++;
set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
+ is_mdd_on_rx = true;
+ }
+
+ if ((is_mdd_on_tx || is_mdd_on_rx) &&
+ test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)) {
+ /* VF MDD event counters will be cleared by
+ * reset, so print the event prior to reset.
+ */
+ if (is_mdd_on_rx)
+ i40e_print_vf_mdd_event(pf, vf, false);
+ if (is_mdd_on_tx)
+ i40e_print_vf_mdd_event(pf, vf, true);
+
+ i40e_vc_reset_vf(vf, true);
}
}
/* re-enable mdd interrupt cause */
clear_bit(__I40E_MDD_EVENT_PENDING, pf->state);
reg = rd32(hw, I40E_PFINT_ICR0_ENA);
reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK;
wr32(hw, I40E_PFINT_ICR0_ENA, reg);
i40e_flush(hw);
+
+ i40e_print_vfs_mdd_events(pf);
}
/**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 662622f..5b4618e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -216,7 +216,7 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf)
* @notify_vf: notify vf about reset or not
* Reset VF handler.
**/
-static void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
+void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
{
struct i40e_pf *pf = vf->pf;
int i;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 66f95e2..5cf74f1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -64,6 +64,12 @@ struct i40evf_channel {
u64 max_tx_rate; /* bandwidth rate allocation for VSIs */
};
+struct i40e_mdd_vf_events {
+ u64 count; /* total count of Rx|Tx events */
+ /* count number of the last printed event */
+ u64 last_printed;
+};
+
/* VF information structure */
struct i40e_vf {
struct i40e_pf *pf;
@@ -92,7 +98,9 @@ struct i40e_vf {
u8 num_queue_pairs; /* num of qps assigned to VF vsis */
u8 num_req_queues; /* num of requested qps */
- u64 num_mdd_events; /* num of mdd events detected */
+ /* num of mdd tx and rx events detected */
+ struct i40e_mdd_vf_events mdd_rx_events;
+ struct i40e_mdd_vf_events mdd_tx_events;
unsigned long vf_caps; /* vf's adv. capabilities */
unsigned long vf_states; /* vf's runtime states */
@@ -120,6 +128,7 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs);
int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
u32 v_retval, u8 *msg, u16 msglen);
int i40e_vc_process_vflr_event(struct i40e_pf *pf);
+void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf);
bool i40e_reset_vf(struct i40e_vf *vf, bool flr);
bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr);
void i40e_vc_notify_vf_reset(struct i40e_vf *vf);
--
2.25.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [Intel-wired-lan] [PATCH iwl-next v4444] i40e: add ability to reset VF for Tx and Rx MDD events
2024-10-29 12:36 [Intel-wired-lan] [PATCH iwl-next v4444] i40e: add ability to reset VF for Tx and Rx MDD events Aleksandr Loktionov
@ 2024-11-04 15:40 ` Michal Schmidt
2024-11-04 15:50 ` Michal Schmidt
2024-11-15 19:29 ` Loktionov, Aleksandr
2024-11-04 17:19 ` Tony Nguyen
1 sibling, 2 replies; 5+ messages in thread
From: Michal Schmidt @ 2024-11-04 15:40 UTC (permalink / raw)
To: Aleksandr Loktionov
Cc: intel-wired-lan, anthony.l.nguyen, netdev, Jan Sokolowski,
Padraig J Connolly, maciej.fijalkowski, Przemek Kitszel
On Tue, Oct 29, 2024 at 1:36 PM Aleksandr Loktionov
<aleksandr.loktionov@intel.com> wrote:
> Implement "mdd-auto-reset-vf" priv-flag to handle Tx and Rx MDD events for VFs.
> This flag is also used in other network adapters like ICE.
>
> Usage:
> - "on" - The problematic VF will be automatically reset
> if a malformed descriptor is detected.
> - "off" - The problematic VF will be disabled.
>
> In cases where a VF sends malformed packets classified as malicious, it can
> cause the Tx queue to freeze, rendering it unusable for several minutes. When
> an MDD event occurs, this new implementation allows for a graceful VF reset to
> quickly restore operational state.
>
> Currently, VF iqueues are disabled if an MDD event occurs. This patch adds the
s/iqueues/queues/
> ability to reset the VF if a Tx or Rx MDD event occurs. It also includes MDD
> event logging throttling to avoid dmesg pollution and unifies the format of
> Tx and Rx MDD messages.
>
> Note: Standard message rate limiting functions like dev_info_ratelimited()
> do not meet our requirements. Custom rate limiting is implemented,
> please see the code for details.
I am not opposed to the custom rate-limiting, but have you also
considered struct ratelimit_state, ratelimit_state_{init,exit}(),
__ratelimit()?
> Co-developed-by: Jan Sokolowski <jan.sokolowski@intel.com>
> Signed-off-by: Jan Sokolowski <jan.sokolowski@intel.com>
> Co-developed-by: Padraig J Connolly <padraig.j.connolly@intel.com>
> Signed-off-by: Padraig J Connolly <padraig.j.connolly@intel.com>
> Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
> ---
> drivers/net/ethernet/intel/i40e/i40e.h | 4 +-
> .../net/ethernet/intel/i40e/i40e_debugfs.c | 2 +-
> .../net/ethernet/intel/i40e/i40e_ethtool.c | 2 +
> drivers/net/ethernet/intel/i40e/i40e_main.c | 105 ++++++++++++++++--
> .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 2 +-
> .../ethernet/intel/i40e/i40e_virtchnl_pf.h | 11 +-
> 6 files changed, 111 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
> index d546567..6d6683c 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e.h
> +++ b/drivers/net/ethernet/intel/i40e/i40e.h
> @@ -87,6 +87,7 @@ enum i40e_state {
> __I40E_SERVICE_SCHED,
> __I40E_ADMINQ_EVENT_PENDING,
> __I40E_MDD_EVENT_PENDING,
> + __I40E_MDD_VF_PRINT_PENDING,
> __I40E_VFLR_EVENT_PENDING,
> __I40E_RESET_RECOVERY_PENDING,
> __I40E_TIMEOUT_RECOVERY_PENDING,
> @@ -190,6 +191,7 @@ enum i40e_pf_flags {
> */
> I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
> I40E_FLAG_VF_VLAN_PRUNING_ENA,
> + I40E_FLAG_MDD_AUTO_RESET_VF,
> I40E_PF_FLAGS_NBITS, /* must be last */
> };
>
> @@ -571,7 +573,7 @@ struct i40e_pf {
> int num_alloc_vfs; /* actual number of VFs allocated */
> u32 vf_aq_requests;
> u32 arq_overflows; /* Not fatal, possibly indicative of problems */
> -
> + unsigned long last_printed_mdd_jiffies; /* MDD message rate limit */
> /* DCBx/DCBNL capability for PF that indicates
> * whether DCBx is managed by firmware or host
> * based agent (LLDPAD). Also, indicates what
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
> index abf624d..6a697bf 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
> @@ -721,7 +721,7 @@ static void i40e_dbg_dump_vf(struct i40e_pf *pf, int vf_id)
> dev_info(&pf->pdev->dev, "vf %2d: VSI id=%d, seid=%d, qps=%d\n",
> vf_id, vf->lan_vsi_id, vsi->seid, vf->num_queue_pairs);
> dev_info(&pf->pdev->dev, " num MDD=%lld\n",
> - vf->num_mdd_events);
> + vf->mdd_tx_events.count + vf->mdd_rx_events.count);
> } else {
> dev_info(&pf->pdev->dev, "invalid VF id %d\n", vf_id);
> }
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> index 1d0d2e5..d146575 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> @@ -459,6 +459,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
> I40E_PRIV_FLAG("base-r-fec", I40E_FLAG_BASE_R_FEC, 0),
> I40E_PRIV_FLAG("vf-vlan-pruning",
> I40E_FLAG_VF_VLAN_PRUNING_ENA, 0),
> + I40E_PRIV_FLAG("mdd-auto-reset-vf",
> + I40E_FLAG_MDD_AUTO_RESET_VF, 0),
> };
>
> #define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags)
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
> index cbcfada..07f0a91 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> @@ -11189,22 +11189,91 @@ static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired)
> i40e_reset_and_rebuild(pf, false, lock_acquired);
> }
>
> +/**
> + * i40e_print_vf_mdd_event - print VF Tx/Rx malicious driver detect event
> + * @pf: board private structure
> + * @vf: pointer to the VF structure
> + * @is_tx: true - for Tx event, false - for Rx
> + */
> +static void i40e_print_vf_mdd_event(struct i40e_pf *pf, struct i40e_vf *vf,
> + bool is_tx)
> +{
> + dev_err(&pf->pdev->dev, is_tx ?
> + "%lld Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n" :
> + "%lld Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n",
> + vf->mdd_rx_events.count,
> + pf->hw.pf_id,
> + vf->vf_id,
> + vf->default_lan_addr.addr,
> + test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags) ? "on" : "off");
> +}
> +
> +/**
> + * i40e_print_vfs_mdd_events - print VFs malicious driver detect event
> + * @pf: pointer to the PF structure
> + *
> + * Called from i40e_handle_mdd_event to rate limit and print VFs MDD events.
> + */
> +static void i40e_print_vfs_mdd_events(struct i40e_pf *pf)
> +{
> + unsigned int i;
> +
> + /* check that there are pending MDD events to print */
> + if (!test_and_clear_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state))
> + return;
> +
> + /* VF MDD event logs are rate limited to one second intervals */
> + if (time_is_after_jiffies(pf->last_printed_mdd_jiffies + HZ * 1))
> + return;
> +
> + pf->last_printed_mdd_jiffies = jiffies;
> +
> + for (i = 0; i < pf->num_alloc_vfs; i++) {
> + struct i40e_vf *vf = &pf->vf[i];
> + bool is_printed = false;
> +
> + /* only print Rx MDD event message if there are new events */
> + if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) {
> + vf->mdd_rx_events.last_printed = vf->mdd_rx_events.count;
> + i40e_print_vf_mdd_event(pf, vf, false);
> + is_printed = true;
> + }
> +
> + /* only print Tx MDD event message if there are new events */
> + if (vf->mdd_tx_events.count != vf->mdd_tx_events.last_printed) {
> + vf->mdd_tx_events.last_printed = vf->mdd_tx_events.count;
> + i40e_print_vf_mdd_event(pf, vf, true);
> + is_printed = true;
> + }
> +
> + if (is_printed && !test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags))
> + dev_info(&pf->pdev->dev,
> + "Use PF Control I/F to re-enable the VF #%d\n",
> + i);
> + }
> +}
> +
> /**
> * i40e_handle_mdd_event
> * @pf: pointer to the PF structure
> *
> * Called from the MDD irq handler to identify possibly malicious vfs
> **/
> static void i40e_handle_mdd_event(struct i40e_pf *pf)
> {
> struct i40e_hw *hw = &pf->hw;
> bool mdd_detected = false;
> struct i40e_vf *vf;
> u32 reg;
> int i;
>
> - if (!test_bit(__I40E_MDD_EVENT_PENDING, pf->state))
> + if (!test_and_clear_bit(__I40E_MDD_EVENT_PENDING, pf->state)) {
> + /* Since the VF MDD event logging is rate limited, check if
> + * there are pending MDD events.
> + */
> + i40e_print_vfs_mdd_events(pf);
Can there ever be anything to print here? i40e_print_vfs_mdd_events()
is also called at the end of i40e_handle_mdd_event(). It always clears
the __I40E_MDD_VF_PRINT_PENDING bit. So how can the bit ever remain
set between invocations? In fact, shouldn't the bit be a local
variable of this function instead of a pf->state bit?
> return;
> + }
>
> /* find what triggered the MDD event */
> reg = rd32(hw, I40E_GL_MDET_TX);
> @@ -11248,36 +11317,50 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
>
> /* see if one of the VFs needs its hand slapped */
> for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) {
> + bool is_mdd_on_tx = false;
> + bool is_mdd_on_rx = false;
> +
> vf = &(pf->vf[i]);
> reg = rd32(hw, I40E_VP_MDET_TX(i));
> if (reg & I40E_VP_MDET_TX_VALID_MASK) {
> + set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
> wr32(hw, I40E_VP_MDET_TX(i), 0xFFFF);
> - vf->num_mdd_events++;
> - dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
> - i);
> - dev_info(&pf->pdev->dev,
> - "Use PF Control I/F to re-enable the VF\n");
> + vf->mdd_tx_events.count++;
> set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
> + is_mdd_on_tx = true;
> }
>
> reg = rd32(hw, I40E_VP_MDET_RX(i));
> if (reg & I40E_VP_MDET_RX_VALID_MASK) {
> + set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
> wr32(hw, I40E_VP_MDET_RX(i), 0xFFFF);
> - vf->num_mdd_events++;
> - dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
> - i);
> - dev_info(&pf->pdev->dev,
> - "Use PF Control I/F to re-enable the VF\n");
> + vf->mdd_rx_events.count++;
> set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
> + is_mdd_on_rx = true;
> + }
> +
> + if ((is_mdd_on_tx || is_mdd_on_rx) &&
> + test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)) {
> + /* VF MDD event counters will be cleared by
> + * reset, so print the event prior to reset.
> + */
> + if (is_mdd_on_rx)
> + i40e_print_vf_mdd_event(pf, vf, false);
> + if (is_mdd_on_tx)
> + i40e_print_vf_mdd_event(pf, vf, true);
I see there's no rate-limiting applied here. Is this intentional?
> +
> + i40e_vc_reset_vf(vf, true);
> }
> }
>
> /* re-enable mdd interrupt cause */
> clear_bit(__I40E_MDD_EVENT_PENDING, pf->state);
Can you remove this 2nd clearing of the __I40E_MDD_EVENT_PENDING bit?
If the interrupt handler detects a MDD event while we're still
printing the message about the previous one, we don't want to forget
it by clearing it here.
Michal
> reg = rd32(hw, I40E_PFINT_ICR0_ENA);
> reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK;
> wr32(hw, I40E_PFINT_ICR0_ENA, reg);
> i40e_flush(hw);
> +
> + i40e_print_vfs_mdd_events(pf);
> }
>
> /**
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
> index 662622f..5b4618e 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
> @@ -216,7 +216,7 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf)
> * @notify_vf: notify vf about reset or not
> * Reset VF handler.
> **/
> -static void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
> +void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
> {
> struct i40e_pf *pf = vf->pf;
> int i;
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
> index 66f95e2..5cf74f1 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
> +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
> @@ -64,6 +64,12 @@ struct i40evf_channel {
> u64 max_tx_rate; /* bandwidth rate allocation for VSIs */
> };
>
> +struct i40e_mdd_vf_events {
> + u64 count; /* total count of Rx|Tx events */
> + /* count number of the last printed event */
> + u64 last_printed;
> +};
> +
> /* VF information structure */
> struct i40e_vf {
> struct i40e_pf *pf;
> @@ -92,7 +98,9 @@ struct i40e_vf {
>
> u8 num_queue_pairs; /* num of qps assigned to VF vsis */
> u8 num_req_queues; /* num of requested qps */
> - u64 num_mdd_events; /* num of mdd events detected */
> + /* num of mdd tx and rx events detected */
> + struct i40e_mdd_vf_events mdd_rx_events;
> + struct i40e_mdd_vf_events mdd_tx_events;
>
> unsigned long vf_caps; /* vf's adv. capabilities */
> unsigned long vf_states; /* vf's runtime states */
> @@ -120,6 +128,7 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs);
> int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
> u32 v_retval, u8 *msg, u16 msglen);
> int i40e_vc_process_vflr_event(struct i40e_pf *pf);
> +void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf);
> bool i40e_reset_vf(struct i40e_vf *vf, bool flr);
> bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr);
> void i40e_vc_notify_vf_reset(struct i40e_vf *vf);
> --
> 2.25.1
>
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Intel-wired-lan] [PATCH iwl-next v4444] i40e: add ability to reset VF for Tx and Rx MDD events
2024-11-04 15:40 ` Michal Schmidt
@ 2024-11-04 15:50 ` Michal Schmidt
2024-11-15 19:29 ` Loktionov, Aleksandr
1 sibling, 0 replies; 5+ messages in thread
From: Michal Schmidt @ 2024-11-04 15:50 UTC (permalink / raw)
To: Aleksandr Loktionov
Cc: intel-wired-lan, anthony.l.nguyen, netdev, Jan Sokolowski,
Padraig J Connolly, maciej.fijalkowski, Przemek Kitszel
On Mon, Nov 4, 2024 at 4:40 PM Michal Schmidt <mschmidt@redhat.com> wrote:
>
> On Tue, Oct 29, 2024 at 1:36 PM Aleksandr Loktionov
> <aleksandr.loktionov@intel.com> wrote:
[...]
> > +
> > + i40e_vc_reset_vf(vf, true);
> > }
> > }
> >
> > /* re-enable mdd interrupt cause */
> > clear_bit(__I40E_MDD_EVENT_PENDING, pf->state);
>
> Can you remove this 2nd clearing of the __I40E_MDD_EVENT_PENDING bit?
> If the interrupt handler detects a MDD event while we're still
> printing the message about the previous one, we don't want to forget
> it by clearing it here.
>
> Michal
Well, I suppose the race I described cannot happen because the
unmasking of ..._MAL_DETECT_MASK happens after this.
But it's still redundant to clear the bit twice.
Michal
> > reg = rd32(hw, I40E_PFINT_ICR0_ENA);
> > reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK;
> > wr32(hw, I40E_PFINT_ICR0_ENA, reg);
> > i40e_flush(hw);
> > +
> > + i40e_print_vfs_mdd_events(pf);
> > }
> >
> > /**
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Intel-wired-lan] [PATCH iwl-next v4444] i40e: add ability to reset VF for Tx and Rx MDD events
2024-10-29 12:36 [Intel-wired-lan] [PATCH iwl-next v4444] i40e: add ability to reset VF for Tx and Rx MDD events Aleksandr Loktionov
2024-11-04 15:40 ` Michal Schmidt
@ 2024-11-04 17:19 ` Tony Nguyen
1 sibling, 0 replies; 5+ messages in thread
From: Tony Nguyen @ 2024-11-04 17:19 UTC (permalink / raw)
To: Aleksandr Loktionov, intel-wired-lan
Cc: netdev, Jan Sokolowski, Padraig J Connolly
On 10/29/2024 5:36 AM, Aleksandr Loktionov wrote:
> Implement "mdd-auto-reset-vf" priv-flag to handle Tx and Rx MDD events for VFs.
> This flag is also used in other network adapters like ICE.
>
> Usage:
> - "on" - The problematic VF will be automatically reset
> if a malformed descriptor is detected.
> - "off" - The problematic VF will be disabled.
>
> In cases where a VF sends malformed packets classified as malicious, it can
> cause the Tx queue to freeze, rendering it unusable for several minutes. When
> an MDD event occurs, this new implementation allows for a graceful VF reset to
> quickly restore operational state.
>
> Currently, VF iqueues are disabled if an MDD event occurs. This patch adds the
> ability to reset the VF if a Tx or Rx MDD event occurs. It also includes MDD
> event logging throttling to avoid dmesg pollution and unifies the format of
> Tx and Rx MDD messages.
>
> Note: Standard message rate limiting functions like dev_info_ratelimited()
> do not meet our requirements. Custom rate limiting is implemented,
> please see the code for details.
>
> Co-developed-by: Jan Sokolowski <jan.sokolowski@intel.com>
> Signed-off-by: Jan Sokolowski <jan.sokolowski@intel.com>
> Co-developed-by: Padraig J Connolly <padraig.j.connolly@intel.com>
> Signed-off-by: Padraig J Connolly <padraig.j.connolly@intel.com>
> Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
> ---
> drivers/net/ethernet/intel/i40e/i40e.h | 4 +-
> .../net/ethernet/intel/i40e/i40e_debugfs.c | 2 +-
> .../net/ethernet/intel/i40e/i40e_ethtool.c | 2 +
> drivers/net/ethernet/intel/i40e/i40e_main.c | 105 ++++++++++++++++--
> .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 2 +-
> .../ethernet/intel/i40e/i40e_virtchnl_pf.h | 11 +-
Could you add info on this to the i40e Documentation/
> 6 files changed, 111 insertions(+), 15 deletions(-)
...
> +/**
> + * i40e_print_vf_mdd_event - print VF Tx/Rx malicious driver detect event
> + * @pf: board private structure
> + * @vf: pointer to the VF structure
> + * @is_tx: true - for Tx event, false - for Rx
> + */
> +static void i40e_print_vf_mdd_event(struct i40e_pf *pf, struct i40e_vf *vf,
> + bool is_tx)
> +{
> + dev_err(&pf->pdev->dev, is_tx ?
> + "%lld Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n" :
> + "%lld Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n",
This string is largely duplicated. Seems is_tx could adjust Tx/Rx only?
> + vf->mdd_rx_events.count,
This needs to check for, and report, Tx counts?
> + pf->hw.pf_id,
> + vf->vf_id,
> + vf->default_lan_addr.addr,
> + test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags) ? "on" : "off");
This could use the new str_on_off() string helper.
> +}
Thanks,
Tony
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Intel-wired-lan] [PATCH iwl-next v4444] i40e: add ability to reset VF for Tx and Rx MDD events
2024-11-04 15:40 ` Michal Schmidt
2024-11-04 15:50 ` Michal Schmidt
@ 2024-11-15 19:29 ` Loktionov, Aleksandr
1 sibling, 0 replies; 5+ messages in thread
From: Loktionov, Aleksandr @ 2024-11-15 19:29 UTC (permalink / raw)
To: mschmidt
Cc: intel-wired-lan@lists.osuosl.org, Nguyen, Anthony L,
netdev@vger.kernel.org, Sokolowski, Jan, Connolly, Padraig J,
Fijalkowski, Maciej, Kitszel, Przemyslaw
> -----Original Message-----
> From: Michal Schmidt <mschmidt@redhat.com>
> Sent: Monday, November 4, 2024 4:41 PM
> To: Loktionov, Aleksandr <aleksandr.loktionov@intel.com>
> Cc: intel-wired-lan@lists.osuosl.org; Nguyen, Anthony L
> <anthony.l.nguyen@intel.com>; netdev@vger.kernel.org; Sokolowski,
> Jan <jan.sokolowski@intel.com>; Connolly, Padraig J
> <padraig.j.connolly@intel.com>; Fijalkowski, Maciej
> <maciej.fijalkowski@intel.com>; Kitszel, Przemyslaw
> <przemyslaw.kitszel@intel.com>
> Subject: Re: [PATCH iwl-next v4444] i40e: add ability to reset VF
> for Tx and Rx MDD events
>
> On Tue, Oct 29, 2024 at 1:36 PM Aleksandr Loktionov
> <aleksandr.loktionov@intel.com> wrote:
> > Implement "mdd-auto-reset-vf" priv-flag to handle Tx and Rx MDD
> events for VFs.
> > This flag is also used in other network adapters like ICE.
> >
> > Usage:
> > - "on" - The problematic VF will be automatically reset
> > if a malformed descriptor is detected.
> > - "off" - The problematic VF will be disabled.
> >
> > In cases where a VF sends malformed packets classified as
> malicious,
> > it can cause the Tx queue to freeze, rendering it unusable for
> several
> > minutes. When an MDD event occurs, this new implementation allows
> for
> > a graceful VF reset to quickly restore operational state.
> >
> > Currently, VF iqueues are disabled if an MDD event occurs. This
> patch
> > adds the
>
> s/iqueues/queues/
Thanx
>
> > ability to reset the VF if a Tx or Rx MDD event occurs. It also
> > includes MDD event logging throttling to avoid dmesg pollution
> and
> > unifies the format of Tx and Rx MDD messages.
> >
> > Note: Standard message rate limiting functions like
> > dev_info_ratelimited() do not meet our requirements. Custom rate
> > limiting is implemented, please see the code for details.
>
> I am not opposed to the custom rate-limiting, but have you also
> considered struct ratelimit_state, ratelimit_state_{init,exit}(),
> __ratelimit()?
Ok I'll try, I hope it won't change behavior too much.
> > Co-developed-by: Jan Sokolowski <jan.sokolowski@intel.com>
> > Signed-off-by: Jan Sokolowski <jan.sokolowski@intel.com>
> > Co-developed-by: Padraig J Connolly
> <padraig.j.connolly@intel.com>
> > Signed-off-by: Padraig J Connolly <padraig.j.connolly@intel.com>
> > Signed-off-by: Aleksandr Loktionov
> <aleksandr.loktionov@intel.com>
> > ---
> > drivers/net/ethernet/intel/i40e/i40e.h | 4 +-
> > .../net/ethernet/intel/i40e/i40e_debugfs.c | 2 +-
> > .../net/ethernet/intel/i40e/i40e_ethtool.c | 2 +
> > drivers/net/ethernet/intel/i40e/i40e_main.c | 105
> ++++++++++++++++--
> > .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 2 +-
> > .../ethernet/intel/i40e/i40e_virtchnl_pf.h | 11 +-
> > 6 files changed, 111 insertions(+), 15 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/intel/i40e/i40e.h
> > b/drivers/net/ethernet/intel/i40e/i40e.h
> > index d546567..6d6683c 100644
> > --- a/drivers/net/ethernet/intel/i40e/i40e.h
> > +++ b/drivers/net/ethernet/intel/i40e/i40e.h
> > @@ -87,6 +87,7 @@ enum i40e_state {
> > __I40E_SERVICE_SCHED,
> > __I40E_ADMINQ_EVENT_PENDING,
> > __I40E_MDD_EVENT_PENDING,
> > + __I40E_MDD_VF_PRINT_PENDING,
> > __I40E_VFLR_EVENT_PENDING,
> > __I40E_RESET_RECOVERY_PENDING,
> > __I40E_TIMEOUT_RECOVERY_PENDING, @@ -190,6 +191,7 @@ enum
> > i40e_pf_flags {
> > */
> > I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
> > I40E_FLAG_VF_VLAN_PRUNING_ENA,
> > + I40E_FLAG_MDD_AUTO_RESET_VF,
> > I40E_PF_FLAGS_NBITS, /* must be last */
> > };
> >
> > @@ -571,7 +573,7 @@ struct i40e_pf {
> > int num_alloc_vfs; /* actual number of VFs allocated
> */
> > u32 vf_aq_requests;
> > u32 arq_overflows; /* Not fatal, possibly indicative
> of problems */
> > -
> > + unsigned long last_printed_mdd_jiffies; /* MDD message
> rate
> > + limit */
> > /* DCBx/DCBNL capability for PF that indicates
> > * whether DCBx is managed by firmware or host
> > * based agent (LLDPAD). Also, indicates what diff --git
> > a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
> > b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
> > index abf624d..6a697bf 100644
> > --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
> > +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
> > @@ -721,7 +721,7 @@ static void i40e_dbg_dump_vf(struct i40e_pf
> *pf, int vf_id)
> > dev_info(&pf->pdev->dev, "vf %2d: VSI id=%d,
> seid=%d, qps=%d\n",
> > vf_id, vf->lan_vsi_id, vsi->seid, vf-
> >num_queue_pairs);
> > dev_info(&pf->pdev->dev, " num MDD=%lld\n",
> > - vf->num_mdd_events);
> > + vf->mdd_tx_events.count +
> > + vf->mdd_rx_events.count);
> > } else {
> > dev_info(&pf->pdev->dev, "invalid VF id %d\n",
> vf_id);
> > }
> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> > b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> > index 1d0d2e5..d146575 100644
> > --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> > +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> > @@ -459,6 +459,8 @@ static const struct i40e_priv_flags
> i40e_gstrings_priv_flags[] = {
> > I40E_PRIV_FLAG("base-r-fec", I40E_FLAG_BASE_R_FEC, 0),
> > I40E_PRIV_FLAG("vf-vlan-pruning",
> > I40E_FLAG_VF_VLAN_PRUNING_ENA, 0),
> > + I40E_PRIV_FLAG("mdd-auto-reset-vf",
> > + I40E_FLAG_MDD_AUTO_RESET_VF, 0),
> > };
> >
> > #define I40E_PRIV_FLAGS_STR_LEN
> ARRAY_SIZE(i40e_gstrings_priv_flags)
> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c
> > b/drivers/net/ethernet/intel/i40e/i40e_main.c
> > index cbcfada..07f0a91 100644
> > --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> > +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> > @@ -11189,22 +11189,91 @@ static void
> i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired)
> > i40e_reset_and_rebuild(pf, false, lock_acquired); }
> >
> > +/**
> > + * i40e_print_vf_mdd_event - print VF Tx/Rx malicious driver
> detect
> > +event
> > + * @pf: board private structure
> > + * @vf: pointer to the VF structure
> > + * @is_tx: true - for Tx event, false - for Rx */ static void
> > +i40e_print_vf_mdd_event(struct i40e_pf *pf, struct i40e_vf *vf,
> > + bool is_tx) {
> > + dev_err(&pf->pdev->dev, is_tx ?
> > + "%lld Tx Malicious Driver Detection events
> detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n" :
> > + "%lld Rx Malicious Driver Detection events
> detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n",
> > + vf->mdd_rx_events.count,
> > + pf->hw.pf_id,
> > + vf->vf_id,
> > + vf->default_lan_addr.addr,
> > + test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)
> ?
> > +"on" : "off"); }
> > +
> > +/**
> > + * i40e_print_vfs_mdd_events - print VFs malicious driver detect
> > +event
> > + * @pf: pointer to the PF structure
> > + *
> > + * Called from i40e_handle_mdd_event to rate limit and print VFs
> MDD events.
> > + */
> > +static void i40e_print_vfs_mdd_events(struct i40e_pf *pf) {
> > + unsigned int i;
> > +
> > + /* check that there are pending MDD events to print */
> > + if (!test_and_clear_bit(__I40E_MDD_VF_PRINT_PENDING, pf-
> >state))
> > + return;
> > +
> > + /* VF MDD event logs are rate limited to one second
> intervals */
> > + if (time_is_after_jiffies(pf->last_printed_mdd_jiffies +
> HZ * 1))
> > + return;
> > +
> > + pf->last_printed_mdd_jiffies = jiffies;
> > +
> > + for (i = 0; i < pf->num_alloc_vfs; i++) {
> > + struct i40e_vf *vf = &pf->vf[i];
> > + bool is_printed = false;
> > +
> > + /* only print Rx MDD event message if there are
> new events */
> > + if (vf->mdd_rx_events.count != vf-
> >mdd_rx_events.last_printed) {
> > + vf->mdd_rx_events.last_printed = vf-
> >mdd_rx_events.count;
> > + i40e_print_vf_mdd_event(pf, vf, false);
> > + is_printed = true;
> > + }
> > +
> > + /* only print Tx MDD event message if there are
> new events */
> > + if (vf->mdd_tx_events.count != vf-
> >mdd_tx_events.last_printed) {
> > + vf->mdd_tx_events.last_printed = vf-
> >mdd_tx_events.count;
> > + i40e_print_vf_mdd_event(pf, vf, true);
> > + is_printed = true;
> > + }
> > +
> > + if (is_printed &&
> !test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags))
> > + dev_info(&pf->pdev->dev,
> > + "Use PF Control I/F to re-enable
> the VF #%d\n",
> > + i);
> > + }
> > +}
> > +
> > /**
> > * i40e_handle_mdd_event
> > * @pf: pointer to the PF structure
> > *
> > * Called from the MDD irq handler to identify possibly
> malicious vfs
> > **/
> > static void i40e_handle_mdd_event(struct i40e_pf *pf) {
> > struct i40e_hw *hw = &pf->hw;
> > bool mdd_detected = false;
> > struct i40e_vf *vf;
> > u32 reg;
> > int i;
> >
> > - if (!test_bit(__I40E_MDD_EVENT_PENDING, pf->state))
> > + if (!test_and_clear_bit(__I40E_MDD_EVENT_PENDING, pf-
> >state)) {
> > + /* Since the VF MDD event logging is rate
> limited, check if
> > + * there are pending MDD events.
> > + */
> > + i40e_print_vfs_mdd_events(pf);
>
> Can there ever be anything to print here?
> i40e_print_vfs_mdd_events() is also called at the end of
> i40e_handle_mdd_event(). It always clears the
> __I40E_MDD_VF_PRINT_PENDING bit. So how can the bit ever remain set
> between invocations? In fact, shouldn't the bit be a local variable
> of this function instead of a pf->state bit?
Our concern was that a bit in pf->state can be traced by ePBF or linux trace.
Easier to troubleshoot and was validated to be working.
> > return;
> > + }
> >
> > /* find what triggered the MDD event */
> > reg = rd32(hw, I40E_GL_MDET_TX); @@ -11248,36 +11317,50
> @@
> > static void i40e_handle_mdd_event(struct i40e_pf *pf)
> >
> > /* see if one of the VFs needs its hand slapped */
> > for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) {
> > + bool is_mdd_on_tx = false;
> > + bool is_mdd_on_rx = false;
> > +
> > vf = &(pf->vf[i]);
> > reg = rd32(hw, I40E_VP_MDET_TX(i));
> > if (reg & I40E_VP_MDET_TX_VALID_MASK) {
> > + set_bit(__I40E_MDD_VF_PRINT_PENDING,
> > + pf->state);
> > wr32(hw, I40E_VP_MDET_TX(i), 0xFFFF);
> > - vf->num_mdd_events++;
> > - dev_info(&pf->pdev->dev, "TX driver issue
> detected on VF %d\n",
> > - i);
> > - dev_info(&pf->pdev->dev,
> > - "Use PF Control I/F to re-enable
> the VF\n");
> > + vf->mdd_tx_events.count++;
> > set_bit(I40E_VF_STATE_DISABLED,
> > &vf->vf_states);
> > + is_mdd_on_tx = true;
> > }
> >
> > reg = rd32(hw, I40E_VP_MDET_RX(i));
> > if (reg & I40E_VP_MDET_RX_VALID_MASK) {
> > + set_bit(__I40E_MDD_VF_PRINT_PENDING,
> > + pf->state);
> > wr32(hw, I40E_VP_MDET_RX(i), 0xFFFF);
> > - vf->num_mdd_events++;
> > - dev_info(&pf->pdev->dev, "RX driver issue
> detected on VF %d\n",
> > - i);
> > - dev_info(&pf->pdev->dev,
> > - "Use PF Control I/F to re-enable
> the VF\n");
> > + vf->mdd_rx_events.count++;
> > set_bit(I40E_VF_STATE_DISABLED,
> > &vf->vf_states);
> > + is_mdd_on_rx = true;
> > + }
> > +
> > + if ((is_mdd_on_tx || is_mdd_on_rx) &&
> > + test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf-
> >flags)) {
> > + /* VF MDD event counters will be cleared
> by
> > + * reset, so print the event prior to
> reset.
> > + */
> > + if (is_mdd_on_rx)
> > + i40e_print_vf_mdd_event(pf, vf,
> false);
> > + if (is_mdd_on_tx)
> > + i40e_print_vf_mdd_event(pf, vf,
> true);
>
> I see there's no rate-limiting applied here. Is this intentional?
Yes of course, see the comment above.
This is the exception of rate-limiting because we/clients need to display
exact statistics.
>
> > +
> > + i40e_vc_reset_vf(vf, true);
> > }
> > }
> >
> > /* re-enable mdd interrupt cause */
> > clear_bit(__I40E_MDD_EVENT_PENDING, pf->state);
>
> Can you remove this 2nd clearing of the __I40E_MDD_EVENT_PENDING
> bit?
> If the interrupt handler detects a MDD event while we're still
> printing the message about the previous one, we don't want to
> forget it by clearing it here.
>
> Michal
>
Ok, let's try, but can you verify it works in all cases then?
> > reg = rd32(hw, I40E_PFINT_ICR0_ENA);
> > reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK;
> > wr32(hw, I40E_PFINT_ICR0_ENA, reg);
> > i40e_flush(hw);
> > +
> > + i40e_print_vfs_mdd_events(pf);
> > }
> >
> > /**
> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
> > b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
> > index 662622f..5b4618e 100644
> > --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
> > +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
> > @@ -216,7 +216,7 @@ void i40e_vc_notify_vf_reset(struct i40e_vf
> *vf)
> > * @notify_vf: notify vf about reset or not
> > * Reset VF handler.
> > **/
> > -static void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
> > +void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
> > {
> > struct i40e_pf *pf = vf->pf;
> > int i;
> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
> > b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
> > index 66f95e2..5cf74f1 100644
> > --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
> > +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
> > @@ -64,6 +64,12 @@ struct i40evf_channel {
> > u64 max_tx_rate; /* bandwidth rate allocation for VSIs */
> };
> >
> > +struct i40e_mdd_vf_events {
> > + u64 count; /* total count of Rx|Tx events */
> > + /* count number of the last printed event */
> > + u64 last_printed;
> > +};
> > +
> > /* VF information structure */
> > struct i40e_vf {
> > struct i40e_pf *pf;
> > @@ -92,7 +98,9 @@ struct i40e_vf {
> >
> > u8 num_queue_pairs; /* num of qps assigned to VF vsis
> */
> > u8 num_req_queues; /* num of requested qps */
> > - u64 num_mdd_events; /* num of mdd events detected */
> > + /* num of mdd tx and rx events detected */
> > + struct i40e_mdd_vf_events mdd_rx_events;
> > + struct i40e_mdd_vf_events mdd_tx_events;
> >
> > unsigned long vf_caps; /* vf's adv. capabilities */
> > unsigned long vf_states; /* vf's runtime states */
> > @@ -120,6 +128,7 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16
> > num_alloc_vfs); int i40e_vc_process_vf_msg(struct i40e_pf *pf,
> s16 vf_id, u32 v_opcode,
> > u32 v_retval, u8 *msg, u16 msglen);
> int
> > i40e_vc_process_vflr_event(struct i40e_pf *pf);
> > +void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf);
> > bool i40e_reset_vf(struct i40e_vf *vf, bool flr); bool
> > i40e_reset_all_vfs(struct i40e_pf *pf, bool flr); void
> > i40e_vc_notify_vf_reset(struct i40e_vf *vf);
> > --
> > 2.25.1
> >
> >
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2024-11-15 19:29 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-10-29 12:36 [Intel-wired-lan] [PATCH iwl-next v4444] i40e: add ability to reset VF for Tx and Rx MDD events Aleksandr Loktionov
2024-11-04 15:40 ` Michal Schmidt
2024-11-04 15:50 ` Michal Schmidt
2024-11-15 19:29 ` Loktionov, Aleksandr
2024-11-04 17:19 ` Tony Nguyen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox