All of lore.kernel.org
 help / color / mirror / Atom feed
From: Adrien Mazarguil <adrien.mazarguil@6wind.com>
To: Matan Azrad <matan@mellanox.com>
Cc: dev@dpdk.org, Thomas Monjalon <thomas@monjalon.net>,
	Olga Shern <olgas@mellanox.com>,
	stable@dpdk.org
Subject: Re: [PATCH] net/mlx4: workaround to verbs wrong error return
Date: Mon, 31 Jul 2017 16:17:28 +0200	[thread overview]
Message-ID: <20170731141728.GO19852@6wind.com> (raw)
In-Reply-To: <1501499709-19873-1-git-send-email-matan@mellanox.com>

Hi Matan,

On Mon, Jul 31, 2017 at 02:15:09PM +0300, Matan Azrad wrote:
> Current mlx4 OFED version has bug which returns error to
> ibv destroy functions when the device was plugged out, in
> spite of the resources were destroyed correctly.
> 
> Hence, failsafe PMD was aborted, only in debug mode, when
> it tries to remove the device in plug-out process.
> 
> The workaround removed the ibv destroy assertions.
> 
> DPDK 18.02 release should work with OFED-4.2 which will
> include the verbs fix to this bug, then, this patch will
> be removed.
> 
> Signed-off-by: Matan Azrad <matan@mellanox.com>
> Cc: stable@dpdk.org

Since this workaround is needed in order to validate hot-plug with mlx4
compiled in debug mode due to a problem in Verbs, I don't think
stable@dpdk.org should be involved.

What will be back-ported, once fixed, is the minimum OFED version to install
to properly benefit from hot-plug functionality.

More comments about the patch below.

> ---
>  drivers/net/mlx4/mlx4.c      | 70 +++++++++++++++++++++++++++++++++++---------
>  drivers/net/mlx4/mlx4_flow.c | 22 ++++++++++----
>  2 files changed, 73 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
> index 8451f5b..94782c2 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -955,7 +955,10 @@ struct rxq *
>  	return 0;
>  error:
>  	if (mr_linear != NULL)
> -		claim_zero(ibv_dereg_mr(mr_linear));
> +		/* Current verbs does not allow to check real
> +		 * errors when the device was plugged out.
> +		 */
> +		ibv_dereg_mr(mr_linear);
>  
>  	rte_free(elts_linear);
>  	rte_free(elts);
> @@ -992,7 +995,10 @@ struct rxq *
>  	txq->elts_linear = NULL;
>  	txq->mr_linear = NULL;
>  	if (mr_linear != NULL)
> -		claim_zero(ibv_dereg_mr(mr_linear));
> +		/* Current verbs does not allow to check real
> +		 * errors when the device was plugged out.
> +		 */
> +		ibv_dereg_mr(mr_linear);
>  
>  	rte_free(elts_linear);
>  	if (elts == NULL)
> @@ -1052,9 +1058,15 @@ struct rxq *
>  						&params));
>  	}
>  	if (txq->qp != NULL)
> -		claim_zero(ibv_destroy_qp(txq->qp));
> +		/* Current verbs does not allow to check real
> +		 * errors when the device was plugged out.
> +		 */
> +		ibv_destroy_qp(txq->qp);
>  	if (txq->cq != NULL)
> -		claim_zero(ibv_destroy_cq(txq->cq));
> +		/* Current verbs does not allow to check real
> +		 * errors when the device was plugged out.
> +		 */
> +		ibv_destroy_cq(txq->cq);
>  	if (txq->rd != NULL) {
>  		struct ibv_exp_destroy_res_domain_attr attr = {
>  			.comp_mask = 0,
> @@ -1070,7 +1082,10 @@ struct rxq *
>  		if (txq->mp2mr[i].mp == NULL)
>  			break;
>  		assert(txq->mp2mr[i].mr != NULL);
> -		claim_zero(ibv_dereg_mr(txq->mp2mr[i].mr));
> +		/* Current verbs does not allow to check real
> +		 * errors when the device was plugged out.
> +		 */
> +		ibv_dereg_mr(txq->mp2mr[i].mr);
>  	}
>  	memset(txq, 0, sizeof(*txq));
>  }
> @@ -1302,7 +1317,10 @@ static struct ibv_mr *mlx4_mp2mr(struct ibv_pd *, struct rte_mempool *)
>  		DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
>  		      (void *)txq);
>  		--i;
> -		claim_zero(ibv_dereg_mr(txq->mp2mr[0].mr));
> +		/* Current verbs does not allow to check real
> +		 * errors when the device was plugged out.
> +		 */
> +		ibv_dereg_mr(txq->mp2mr[0].mr);
>  		memmove(&txq->mp2mr[0], &txq->mp2mr[1],
>  			(sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0])));
>  	}
> @@ -2355,7 +2373,10 @@ struct txq_mp2mr_mbuf_check_data {
>  	      (void *)rxq,
>  	      (*mac)[0], (*mac)[1], (*mac)[2], (*mac)[3], (*mac)[4], (*mac)[5],
>  	      mac_index, priv->vlan_filter[vlan_index].id);
> -	claim_zero(ibv_destroy_flow(rxq->mac_flow[mac_index][vlan_index]));
> +	/* Current verbs does not allow to check real
> +	 * errors when the device was plugged out.
> +	 */
> +	ibv_destroy_flow(rxq->mac_flow[mac_index][vlan_index]);
>  	rxq->mac_flow[mac_index][vlan_index] = NULL;
>  }
>  
> @@ -2736,7 +2757,10 @@ struct txq_mp2mr_mbuf_check_data {
>  	DEBUG("%p: disabling allmulticast mode", (void *)rxq);
>  	if (rxq->allmulti_flow == NULL)
>  		return;
> -	claim_zero(ibv_destroy_flow(rxq->allmulti_flow));
> +	/* Current verbs does not allow to check real
> +	 * errors when the device was plugged out.
> +	 */
> +	ibv_destroy_flow(rxq->allmulti_flow);
>  	rxq->allmulti_flow = NULL;
>  	DEBUG("%p: allmulticast mode disabled", (void *)rxq);
>  }
> @@ -2796,7 +2820,10 @@ struct txq_mp2mr_mbuf_check_data {
>  	DEBUG("%p: disabling promiscuous mode", (void *)rxq);
>  	if (rxq->promisc_flow == NULL)
>  		return;
> -	claim_zero(ibv_destroy_flow(rxq->promisc_flow));
> +	/* Current verbs does not allow to check real
> +	 * errors when the device was plugged out.
> +	 */
> +	ibv_destroy_flow(rxq->promisc_flow);
>  	rxq->promisc_flow = NULL;
>  	DEBUG("%p: promiscuous mode disabled", (void *)rxq);
>  }
> @@ -2847,9 +2874,15 @@ struct txq_mp2mr_mbuf_check_data {
>  		rxq_mac_addrs_del(rxq);
>  	}
>  	if (rxq->qp != NULL)
> -		claim_zero(ibv_destroy_qp(rxq->qp));
> +		/* Current verbs does not allow to check real
> +		 * errors when the device was plugged out.
> +		 */
> +		ibv_destroy_qp(rxq->qp);
>  	if (rxq->cq != NULL)
> -		claim_zero(ibv_destroy_cq(rxq->cq));
> +		/* Current verbs does not allow to check real
> +		 * errors when the device was plugged out.
> +		 */
> +		ibv_destroy_cq(rxq->cq);
>  	if (rxq->channel != NULL)
>  		claim_zero(ibv_destroy_comp_channel(rxq->channel));
>  	if (rxq->rd != NULL) {
> @@ -2864,7 +2897,10 @@ struct txq_mp2mr_mbuf_check_data {
>  						      &attr));
>  	}
>  	if (rxq->mr != NULL)
> -		claim_zero(ibv_dereg_mr(rxq->mr));
> +		/* Current verbs does not allow to check real
> +		 * errors when the device was plugged out.
> +		 */
> +		ibv_dereg_mr(rxq->mr);
>  	memset(rxq, 0, sizeof(*rxq));
>  }
>  
> @@ -4374,7 +4410,10 @@ struct txq_mp2mr_mbuf_check_data {
>  		priv_parent_list_cleanup(priv);
>  	if (priv->pd != NULL) {
>  		assert(priv->ctx != NULL);
> -		claim_zero(ibv_dealloc_pd(priv->pd));
> +		/* Current verbs does not allow to check real
> +		 * errors when the device was plugged out.
> +		 */
> +		ibv_dealloc_pd(priv->pd);
>  		claim_zero(ibv_close_device(priv->ctx));
>  	} else
>  		assert(priv->ctx == NULL);
> @@ -6389,7 +6428,10 @@ struct txq_mp2mr_mbuf_check_data {
>  port_error:
>  		rte_free(priv);
>  		if (pd)
> -			claim_zero(ibv_dealloc_pd(pd));
> +			/* Current verbs does not allow to check real
> +			 * errors when the device was plugged out.
> +			 */
> +			ibv_dealloc_pd(pd);
>  		if (ctx)
>  			claim_zero(ibv_close_device(ctx));
>  		if (eth_dev)
> diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
> index 925c89c..daa62e3 100644
> --- a/drivers/net/mlx4/mlx4_flow.c
> +++ b/drivers/net/mlx4/mlx4_flow.c
> @@ -799,8 +799,11 @@ struct rte_flow_drop {
>  		struct rte_flow_drop *fdq = priv->flow_drop_queue;
>  
>  		priv->flow_drop_queue = NULL;
> -		claim_zero(ibv_destroy_qp(fdq->qp));
> -		claim_zero(ibv_destroy_cq(fdq->cq));
> +		/* Current verbs does not allow to check real
> +		 * errors when the device was plugged out.
> +		 */
> +		ibv_destroy_qp(fdq->qp);
> +		ibv_destroy_cq(fdq->cq);
>  		rte_free(fdq);
>  	}
>  }
> @@ -860,7 +863,10 @@ struct rte_flow_drop {
>  	priv->flow_drop_queue = fdq;
>  	return 0;
>  err_create_qp:
> -	claim_zero(ibv_destroy_cq(cq));
> +	/* Current verbs does not allow to check real
> +	 * errors when the device was plugged out.
> +	 */
> +	ibv_destroy_cq(cq);
>  err_create_cq:
>  	rte_free(fdq);
>  err:
> @@ -1200,7 +1206,10 @@ struct rte_flow *
>  	(void)priv;
>  	LIST_REMOVE(flow, next);
>  	if (flow->ibv_flow)
> -		claim_zero(ibv_destroy_flow(flow->ibv_flow));
> +		/* Current verbs does not allow to check real
> +		 * errors when the device was plugged out.
> +		 */
> +		ibv_destroy_flow(flow->ibv_flow);
>  	rte_free(flow->ibv_attr);
>  	DEBUG("Flow destroyed %p", (void *)flow);
>  	rte_free(flow);
> @@ -1278,7 +1287,10 @@ struct rte_flow *
>  	for (flow = LIST_FIRST(&priv->flows);
>  	     flow;
>  	     flow = LIST_NEXT(flow, next)) {
> -		claim_zero(ibv_destroy_flow(flow->ibv_flow));
> +		/* Current verbs does not allow to check real
> +		 * errors when the device was plugged out.
> +		 */
> +		ibv_destroy_flow(flow->ibv_flow);
>  		flow->ibv_flow = NULL;
>  		DEBUG("Flow %p removed", (void *)flow);
>  	}
> -- 
> 1.8.3.1
> 

This approach looks way too intrusive. How about making the claim_zero()
definition not fail but still complain when compiled against a broken Verbs
version instead?

 #include "mlx4_autoconf.h"

 [...]

 #ifndef HAVE_BROKEN_VERBS
 #define claim_zero(...) assert((__VA_ARGS__) == 0)
 #else /* HAVE_BROKEN_VERBS */
 #define claim_zero(...) \
     (void)(((__VA_ARGS__) == 0) || \
            DEBUG("Assertion `" # __VA_ARGS__ "' failed (IGNORED)"))
 #endif /* HAVE_BROKEN_VERBS */

You could use auto-config-h.sh to generate the HAVE_BROKEN_VERBS definition
in mlx4_autoconf.h (see mlx4 Makefile) based on some symbol, macro or type
that only exists or doesn't exist yet in problematic releases for instance.

-- 
Adrien Mazarguil
6WIND

  reply	other threads:[~2017-07-31 14:17 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-31 11:15 [PATCH] net/mlx4: workaround to verbs wrong error return Matan Azrad
2017-07-31 14:17 ` Adrien Mazarguil [this message]
2017-07-31 16:56   ` Matan Azrad
2017-08-01  9:42     ` Adrien Mazarguil
2017-08-01 10:12       ` Matan Azrad
2017-08-01 10:50         ` Adrien Mazarguil
2017-08-01 12:18           ` Matan Azrad
2017-08-02 19:00           ` [PATCH v2] " Matan Azrad
2017-08-03 21:04             ` Thomas Monjalon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170731141728.GO19852@6wind.com \
    --to=adrien.mazarguil@6wind.com \
    --cc=dev@dpdk.org \
    --cc=matan@mellanox.com \
    --cc=olgas@mellanox.com \
    --cc=stable@dpdk.org \
    --cc=thomas@monjalon.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.