From: Rahul Rameshbabu <rrameshbabu@nvidia.com>
To: Vadim Fedorenko <vfedorenko@novek.ru>
Cc: Vadim Fedorenko <vadfed@fb.com>, Aya Levin <ayal@nvidia.com>,
Saeed Mahameed <saeedm@nvidia.com>,
Jakub Kicinski <kuba@kernel.org>, Gal Pressman <gal@nvidia.com>,
Vadim Fedorenko <vadfed@meta.com>,
netdev@vger.kernel.org
Subject: Re: [PATCH net v2 1/2] mlx5: fix possible ptp queue fifo overflow
Date: Mon, 23 Jan 2023 18:05:55 -0800 [thread overview]
Message-ID: <87zga8sn3w.fsf@nvidia.com> (raw)
In-Reply-To: <20230124000836.20523-2-vfedorenko@novek.ru> (Vadim Fedorenko's message of "Tue, 24 Jan 2023 03:08:35 +0300")
On Tue, 24 Jan, 2023 03:08:35 +0300 Vadim Fedorenko <vfedorenko@novek.ru> wrote:
> From: Vadim Fedorenko <vadfed@meta.com>
>
> Fifo pointers are not checked for overflow and this could potentially
> lead to overflow and double free under heavy PTP traffic.
>
> Also there were accidental OOO cqe which lead to absolutely broken fifo.
> Add checks to workaround OOO cqe and add counters to show the amount of
> such events.
>
> Fixes: 58a518948f60 ("net/mlx5e: Add resiliency for PTP TX port timestamp")
> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
> ---
> .../net/ethernet/mellanox/mlx5/core/en/ptp.c | 28 ++++++++++++++-----
> .../net/ethernet/mellanox/mlx5/core/en/txrx.h | 6 +++-
> .../ethernet/mellanox/mlx5/core/en_stats.c | 2 ++
> .../ethernet/mellanox/mlx5/core/en_stats.h | 2 ++
> 4 files changed, 30 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> index 8469e9c38670..32d6b387af61 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> @@ -86,20 +86,31 @@ static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb
> return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id));
> }
>
> -static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
> +static bool mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
> {
> struct skb_shared_hwtstamps hwts = {};
> struct sk_buff *skb;
>
> ptpsq->cq_stats->resync_event++;
>
> - while (skb_cc != skb_id) {
> - skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
> + if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id) {
> + ptpsq->cq_stats->ooo_cqe++;
> + return false;
> + }
> +
> + while (skb_cc != skb_id && (skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
> hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
> skb_tstamp_tx(skb, &hwts);
> ptpsq->cq_stats->resync_cqe++;
> skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
> }
> +
> + if (!skb) {
> + ptpsq->cq_stats->fifo_empty++;
> + return false;
> + }
> +
> + return true;
> }
>
> static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
> @@ -109,7 +120,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
> u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter));
> u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
> struct mlx5e_txqsq *sq = &ptpsq->txqsq;
> - struct sk_buff *skb;
> + struct sk_buff *skb = NULL;
> ktime_t hwtstamp;
>
> if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
> @@ -118,8 +129,10 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
> goto out;
> }
>
> - if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id))
> - mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id);
> + if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id) &&
> + !mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id)) {
> + goto out;
> + }
>
> skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
> hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe));
> @@ -128,7 +141,8 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
> ptpsq->cq_stats->cqe++;
>
> out:
> - napi_consume_skb(skb, budget);
> + if (skb)
> + napi_consume_skb(skb, budget);
> }
>
> static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
> index 853f312cd757..5fb58764c923 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
> @@ -81,7 +81,7 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
> static inline bool
> mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo)
> {
> - return (*fifo->pc - *fifo->cc) < fifo->mask;
> + return (u16)(*fifo->pc - *fifo->cc) < fifo->mask;
> }
>
> static inline bool
> @@ -291,12 +291,16 @@ void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo *fifo, struct sk_buff *skb)
> {
> struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
>
> + WARN_ONCE((u16)(*fifo->pc - *fifo->cc) > fifo->mask, "ptp fifo overflow");
I found this pretty tough to read/understand since it needed to account
for the fact that the overflow already occurred. Instead I would
refactor into the following.
static inline
void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo *fifo, struct sk_buff *skb)
{
struct sk_buff **skb_item;
WARN_ONCE(!mlx5e_skb_fifo_has_room(fifo), "ptp fifo overflow");
skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
*skb_item = skb;
}
> *skb_item = skb;
> }
>
> static inline
> struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_skb_fifo *fifo)
> {
> + if (*fifo->pc == *fifo->cc)
> + return NULL;
> +
> return *mlx5e_skb_fifo_get(fifo, (*fifo->cc)++);
> }
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
> index 6687b8136e44..6fbd58d1722a 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
> @@ -2138,6 +2138,8 @@ static const struct counter_desc ptp_cq_stats_desc[] = {
> { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) },
> { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, resync_cqe) },
> { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, resync_event) },
> + { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, ooo_cqe) },
> + { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, fifo_empty) },
> };
>
> static const struct counter_desc ptp_rq_stats_desc[] = {
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
> index 375752d6546d..51da492169c2 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
> @@ -461,6 +461,8 @@ struct mlx5e_ptp_cq_stats {
> u64 abort_abs_diff_ns;
> u64 resync_cqe;
> u64 resync_event;
> + u64 ooo_cqe;
> + u64 fifo_empty;
> };
>
> struct mlx5e_rep_stats {
next prev parent reply other threads:[~2023-01-24 2:06 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-01-24 0:08 [PATCH net v2 0/2] mlx5: bugfixes for ptp fifo queue Vadim Fedorenko
2023-01-24 0:08 ` [PATCH net v2 1/2] mlx5: fix possible ptp queue fifo overflow Vadim Fedorenko
2023-01-24 2:05 ` Rahul Rameshbabu [this message]
2023-01-24 4:19 ` Jakub Kicinski
2023-01-24 16:03 ` Vadim Fedorenko
2023-01-25 22:02 ` Jakub Kicinski
2023-01-24 0:08 ` [PATCH net v2 2/2] mlx5: fix skb leak while fifo resync Vadim Fedorenko
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87zga8sn3w.fsf@nvidia.com \
--to=rrameshbabu@nvidia.com \
--cc=ayal@nvidia.com \
--cc=gal@nvidia.com \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=saeedm@nvidia.com \
--cc=vadfed@fb.com \
--cc=vadfed@meta.com \
--cc=vfedorenko@novek.ru \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).