All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Weinberger <richard@sigma-star.at>
To: Andri Yngvason <andri.yngvason@marel.com>
Cc: linux-can@vger.kernel.org, mkl@pengutronix.de, wg@grandegger.com,
	sigurbjorn.narfason@marel.com, hrafnkell.eiriksson@marel.com,
	patric.thysell@br-automation.com
Subject: Re: [PATCH v2 3/3] can: cc770: Fix queue stall & dropped RTR reply
Date: Mon, 12 Feb 2018 20:45:27 +0100	[thread overview]
Message-ID: <1975796.InEJQ7qnl6@blindfold> (raw)
In-Reply-To: <20180130165649.22732-3-andri.yngvason@marel.com>

Andri,

Am Dienstag, 30. Januar 2018, 17:56:49 CET schrieb Andri Yngvason:
> While waiting for the TX object to send an RTR, an external message with a
> matching id can overwrite the TX data. In this case we must call the rx
> routine and then try transmitting the message that was overwritten again.
> 
> The queue was being stalled because the RX event did not generate an
> interrupt to wake up the queue again and the TX event did not happen
> because the TXRQST flag is reset by the chip when new data is received.
> 
> According to the CC770 datasheet the id of a message object should not be
> changed while the MSGVAL bit is set. This has been fixed by resetting the
> MSGVAL bit before modifying the object in the transmit function and setting
> it after. It is not enough to set & reset CPUUPD.
> 
> It is important to keep the MSGVAL bit reset while the message object is
> being modified. Otherwise, during RTR transmission, a frame with matching
> id could trigger an rx-interrupt, which would cause a race condition
> between the interrupt routine and the transmit function.
> 
> Signed-off-by: Andri Yngvason <andri.yngvason@marel.com>
> ---
> Changes from v1:
>  - Squashed 4 into 3
>  - Modified comment
>  - Now using NEWDAT flag instead of MSGLST to check if a message was
>    received into TX object. This allows us to check if the message object
>    was overwritten more than once, and report it.
> 
>  drivers/net/can/cc770/cc770.c | 96
> ++++++++++++++++++++++++++++++------------- drivers/net/can/cc770/cc770.h |
>  2 +
>  2 files changed, 69 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c
> index 12d3b89..e6458cb 100644
> --- a/drivers/net/can/cc770/cc770.c
> +++ b/drivers/net/can/cc770/cc770.c
> @@ -390,38 +390,23 @@ static int cc770_get_berr_counter(const struct
> net_device *dev, return 0;
>  }
> 
> -static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device
> *dev) +static void cc770_tx(struct net_device *dev, int mo)
>  {
>  	struct cc770_priv *priv = netdev_priv(dev);
> -	struct net_device_stats *stats = &dev->stats;
> -	struct can_frame *cf = (struct can_frame *)skb->data;
> -	unsigned int mo = obj2msgobj(CC770_OBJ_TX);
> +	struct can_frame *cf = (struct can_frame *)priv->tx_skb->data;
>  	u8 dlc, rtr;
>  	u32 id;
>  	int i;
> 
> -	if (can_dropped_invalid_skb(dev, skb))
> -		return NETDEV_TX_OK;
> -
> -	if ((cc770_read_reg(priv,
> -			    msgobj[mo].ctrl1) & TXRQST_UNC) == TXRQST_SET) {
> -		netif_stop_queue(dev);
> -		netdev_err(dev, "TX register is still occupied!\n");
> -		return NETDEV_TX_BUSY;
> -	}
> -
> -	netif_stop_queue(dev);
> -
>  	dlc = cf->can_dlc;
>  	id = cf->can_id;
> -	if (cf->can_id & CAN_RTR_FLAG)
> -		rtr = 0;
> -	else
> -		rtr = MSGCFG_DIR;
> +	rtr = cf->can_id & CAN_RTR_FLAG ? 0 : MSGCFG_DIR;
> +
> +	cc770_write_reg(priv, msgobj[mo].ctrl0,
> +			MSGVAL_RES | TXIE_RES | RXIE_RES | INTPND_RES);
>  	cc770_write_reg(priv, msgobj[mo].ctrl1,
>  			RMTPND_RES | TXRQST_RES | CPUUPD_SET | NEWDAT_RES);
> -	cc770_write_reg(priv, msgobj[mo].ctrl0,
> -			MSGVAL_SET | TXIE_SET | RXIE_RES | INTPND_RES);
> +
>  	if (id & CAN_EFF_FLAG) {
>  		id &= CAN_EFF_MASK;
>  		cc770_write_reg(priv, msgobj[mo].config,
> @@ -440,13 +425,31 @@ static netdev_tx_t cc770_start_xmit(struct sk_buff
> *skb, struct net_device *dev) for (i = 0; i < dlc; i++)
>  		cc770_write_reg(priv, msgobj[mo].data[i], cf->data[i]);
> 
> -	/* Store echo skb before starting the transfer */
> -	can_put_echo_skb(skb, dev, 0);
> -
>  	cc770_write_reg(priv, msgobj[mo].ctrl1,
> -			RMTPND_RES | TXRQST_SET | CPUUPD_RES | NEWDAT_UNC);
> +			RMTPND_UNC | TXRQST_SET | CPUUPD_RES | NEWDAT_UNC);
> +	cc770_write_reg(priv, msgobj[mo].ctrl0,
> +			MSGVAL_SET | TXIE_SET | RXIE_SET | INTPND_UNC);
> +}
> +
> +static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device
> *dev) +{
> +	struct cc770_priv *priv = netdev_priv(dev);
> +	unsigned int mo = obj2msgobj(CC770_OBJ_TX);
> +
> +	if (can_dropped_invalid_skb(dev, skb))
> +		return NETDEV_TX_OK;
> 
> -	stats->tx_bytes += dlc;
> +	if ((cc770_read_reg(priv,
> +			    msgobj[mo].ctrl1) & TXRQST_UNC) == TXRQST_SET) {
> +		netif_stop_queue(dev);
> +		netdev_err(dev, "TX register is still occupied!\n");
> +		return NETDEV_TX_BUSY;
> +	}
> +
> +	netif_stop_queue(dev);
> +
> +	priv->tx_skb = skb;
> +	cc770_tx(dev, mo);
> 
>  	return NETDEV_TX_OK;
>  }
> @@ -672,13 +675,47 @@ static void cc770_tx_interrupt(struct net_device *dev,
> unsigned int o) struct cc770_priv *priv = netdev_priv(dev);
>  	struct net_device_stats *stats = &dev->stats;
>  	unsigned int mo = obj2msgobj(o);
> +	struct can_frame *cf;
> +	u8 ctrl1;
> +
> +	ctrl1 = cc770_read_reg(priv, msgobj[mo].ctrl1);
> 
> -	/* Nothing more to send, switch off interrupts */
>  	cc770_write_reg(priv, msgobj[mo].ctrl0,
>  			MSGVAL_RES | TXIE_RES | RXIE_RES | INTPND_RES);
> +	cc770_write_reg(priv, msgobj[mo].ctrl1,
> +			RMTPND_RES | TXRQST_RES | MSGLST_RES | NEWDAT_RES);
> 
> -	stats->tx_packets++;
> +	if (unlikely(!priv->tx_skb)) {
> +		netdev_err(dev, "missing tx skb in tx interrupt\n");
> +		return;
> +	}
> +
> +	if (unlikely(ctrl1 & MSGLST_SET)) {
> +		stats->rx_over_errors++;
> +		stats->rx_errors++;
> +	}
> +
> +	/* When the CC770 is sending an RTR message and it receives a regular
> +	 * message that matches the id of the RTR message, it will overwrite the
> +	 * outgoing message in the TX register. When this happens we must
> +	 * process the received message and try to transmit the outgoing skb
> +	 * again.
> +	 */
> +	if (unlikely(ctrl1 & NEWDAT_SET)) {
> +		cc770_rx(dev, mo, ctrl1);
> +		cc770_tx(dev, mo);
> +		return;
> +	}
> +
> +	can_put_echo_skb(priv->tx_skb, dev, 0);
>  	can_get_echo_skb(dev, 0);
> +
> +	cf = (struct can_frame *)priv->tx_skb->data;
> +	stats->tx_bytes += cf->can_dlc;
> +	stats->tx_packets++;
> +
> +	priv->tx_skb = NULL;
> +
>  	netif_wake_queue(dev);
>  }
> 
> @@ -790,6 +827,7 @@ struct net_device *alloc_cc770dev(int sizeof_priv)
>  	priv->can.do_set_bittiming = cc770_set_bittiming;
>  	priv->can.do_set_mode = cc770_set_mode;
>  	priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
> +	priv->tx_skb = NULL;
> 
>  	memcpy(priv->obj_flags, cc770_obj_flags, sizeof(cc770_obj_flags));
> 
> diff --git a/drivers/net/can/cc770/cc770.h b/drivers/net/can/cc770/cc770.h
> index a1739db..95752e1 100644
> --- a/drivers/net/can/cc770/cc770.h
> +++ b/drivers/net/can/cc770/cc770.h
> @@ -193,6 +193,8 @@ struct cc770_priv {
>  	u8 cpu_interface;	/* CPU interface register */
>  	u8 clkout;		/* Clock out register */
>  	u8 bus_config;		/* Bus conffiguration register */
> +
> +	struct sk_buff *tx_skb;
>  };
> 
>  struct net_device *alloc_cc770dev(int sizeof_priv);

Looks good to me.
If this patch survives 1-2 days on my test bed, I'll replay with a Reviewed/Tested-by. 

Thanks,
//richard

-- 
sigma star gmbh - Eduard-Bodem-Gasse 6 - 6020 Innsbruck - Austria
ATU66964118 - FN 374287y

  reply	other threads:[~2018-02-12 19:44 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-30 16:56 [PATCH v2 1/3] can: cc770: Remove redundant IRQ ack Andri Yngvason
2018-01-30 16:56 ` [PATCH v2 2/3] can: cc770: Stop queue on NETDEV_TX_BUSY Andri Yngvason
2018-02-12 19:40   ` Richard Weinberger
2018-02-12 20:37     ` Wolfgang Grandegger
2018-02-12 20:44       ` Wolfgang Grandegger
2018-02-12 20:55         ` Richard Weinberger
2018-02-12 21:28           ` Wolfgang Grandegger
2018-02-12 21:36             ` Wolfgang Grandegger
2018-02-13 10:32               ` Andri Yngvason
2018-02-13 16:30                 ` Wolfgang Grandegger
2018-02-16 14:22               ` Marc Kleine-Budde
2018-02-16 15:20                 ` Wolfgang Grandegger
2018-01-30 16:56 ` [PATCH v2 3/3] can: cc770: Fix queue stall & dropped RTR reply Andri Yngvason
2018-02-12 19:45   ` Richard Weinberger [this message]
2018-02-12 19:41 ` [PATCH v2 1/3] can: cc770: Remove redundant IRQ ack Richard Weinberger
     [not found]   ` <151851733257.10946.11726494714017260046@maxwell>
2018-02-13 10:35     ` Richard Weinberger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1975796.InEJQ7qnl6@blindfold \
    --to=richard@sigma-star.at \
    --cc=andri.yngvason@marel.com \
    --cc=hrafnkell.eiriksson@marel.com \
    --cc=linux-can@vger.kernel.org \
    --cc=mkl@pengutronix.de \
    --cc=patric.thysell@br-automation.com \
    --cc=sigurbjorn.narfason@marel.com \
    --cc=wg@grandegger.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.