Linux CAN drivers development
 help / color / mirror / Atom feed
From: Richard Weinberger <richard@sigma-star.at>
To: Andri Yngvason <andri.yngvason@marel.com>
Cc: linux-can@vger.kernel.org, mkl@pengutronix.de, wg@grandegger.com,
	sigurbjorn.narfason@marel.com, hrafnkell.eiriksson@marel.com,
	patric.thysell@br-automation.com
Subject: Re: [PATCH v2 3/3] can: cc770: Fix queue stall & dropped RTR reply
Date: Mon, 12 Feb 2018 20:45:27 +0100	[thread overview]
Message-ID: <1975796.InEJQ7qnl6@blindfold> (raw)
In-Reply-To: <20180130165649.22732-3-andri.yngvason@marel.com>

Andri,

Am Dienstag, 30. Januar 2018, 17:56:49 CET schrieb Andri Yngvason:
> While waiting for the TX object to send an RTR, an external message with a
> matching id can overwrite the TX data. In this case we must call the rx
> routine and then try transmitting the message that was overwritten again.
> 
> The queue was being stalled because the RX event did not generate an
> interrupt to wake up the queue again and the TX event did not happen
> because the TXRQST flag is reset by the chip when new data is received.
> 
> According to the CC770 datasheet the id of a message object should not be
> changed while the MSGVAL bit is set. This has been fixed by resetting the
> MSGVAL bit before modifying the object in the transmit function and setting
> it after. It is not enough to set & reset CPUUPD.
> 
> It is important to keep the MSGVAL bit reset while the message object is
> being modified. Otherwise, during RTR transmission, a frame with matching
> id could trigger an rx-interrupt, which would cause a race condition
> between the interrupt routine and the transmit function.
> 
> Signed-off-by: Andri Yngvason <andri.yngvason@marel.com>
> ---
> Changes from v1:
>  - Squashed 4 into 3
>  - Modified comment
>  - Now using NEWDAT flag instead of MSGLST to check if a message was
>    received into TX object. This allows us to check if the message object
>    was overwritten more than once, and report it.
> 
>  drivers/net/can/cc770/cc770.c | 96
> ++++++++++++++++++++++++++++++------------- drivers/net/can/cc770/cc770.h |
>  2 +
>  2 files changed, 69 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c
> index 12d3b89..e6458cb 100644
> --- a/drivers/net/can/cc770/cc770.c
> +++ b/drivers/net/can/cc770/cc770.c
> @@ -390,38 +390,23 @@ static int cc770_get_berr_counter(const struct
> net_device *dev, return 0;
>  }
> 
> -static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device
> *dev) +static void cc770_tx(struct net_device *dev, int mo)
>  {
>  	struct cc770_priv *priv = netdev_priv(dev);
> -	struct net_device_stats *stats = &dev->stats;
> -	struct can_frame *cf = (struct can_frame *)skb->data;
> -	unsigned int mo = obj2msgobj(CC770_OBJ_TX);
> +	struct can_frame *cf = (struct can_frame *)priv->tx_skb->data;
>  	u8 dlc, rtr;
>  	u32 id;
>  	int i;
> 
> -	if (can_dropped_invalid_skb(dev, skb))
> -		return NETDEV_TX_OK;
> -
> -	if ((cc770_read_reg(priv,
> -			    msgobj[mo].ctrl1) & TXRQST_UNC) == TXRQST_SET) {
> -		netif_stop_queue(dev);
> -		netdev_err(dev, "TX register is still occupied!\n");
> -		return NETDEV_TX_BUSY;
> -	}
> -
> -	netif_stop_queue(dev);
> -
>  	dlc = cf->can_dlc;
>  	id = cf->can_id;
> -	if (cf->can_id & CAN_RTR_FLAG)
> -		rtr = 0;
> -	else
> -		rtr = MSGCFG_DIR;
> +	rtr = cf->can_id & CAN_RTR_FLAG ? 0 : MSGCFG_DIR;
> +
> +	cc770_write_reg(priv, msgobj[mo].ctrl0,
> +			MSGVAL_RES | TXIE_RES | RXIE_RES | INTPND_RES);
>  	cc770_write_reg(priv, msgobj[mo].ctrl1,
>  			RMTPND_RES | TXRQST_RES | CPUUPD_SET | NEWDAT_RES);
> -	cc770_write_reg(priv, msgobj[mo].ctrl0,
> -			MSGVAL_SET | TXIE_SET | RXIE_RES | INTPND_RES);
> +
>  	if (id & CAN_EFF_FLAG) {
>  		id &= CAN_EFF_MASK;
>  		cc770_write_reg(priv, msgobj[mo].config,
> @@ -440,13 +425,31 @@ static netdev_tx_t cc770_start_xmit(struct sk_buff
> *skb, struct net_device *dev) for (i = 0; i < dlc; i++)
>  		cc770_write_reg(priv, msgobj[mo].data[i], cf->data[i]);
> 
> -	/* Store echo skb before starting the transfer */
> -	can_put_echo_skb(skb, dev, 0);
> -
>  	cc770_write_reg(priv, msgobj[mo].ctrl1,
> -			RMTPND_RES | TXRQST_SET | CPUUPD_RES | NEWDAT_UNC);
> +			RMTPND_UNC | TXRQST_SET | CPUUPD_RES | NEWDAT_UNC);
> +	cc770_write_reg(priv, msgobj[mo].ctrl0,
> +			MSGVAL_SET | TXIE_SET | RXIE_SET | INTPND_UNC);
> +}
> +
> +static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device
> *dev) +{
> +	struct cc770_priv *priv = netdev_priv(dev);
> +	unsigned int mo = obj2msgobj(CC770_OBJ_TX);
> +
> +	if (can_dropped_invalid_skb(dev, skb))
> +		return NETDEV_TX_OK;
> 
> -	stats->tx_bytes += dlc;
> +	if ((cc770_read_reg(priv,
> +			    msgobj[mo].ctrl1) & TXRQST_UNC) == TXRQST_SET) {
> +		netif_stop_queue(dev);
> +		netdev_err(dev, "TX register is still occupied!\n");
> +		return NETDEV_TX_BUSY;
> +	}
> +
> +	netif_stop_queue(dev);
> +
> +	priv->tx_skb = skb;
> +	cc770_tx(dev, mo);
> 
>  	return NETDEV_TX_OK;
>  }
> @@ -672,13 +675,47 @@ static void cc770_tx_interrupt(struct net_device *dev,
> unsigned int o) struct cc770_priv *priv = netdev_priv(dev);
>  	struct net_device_stats *stats = &dev->stats;
>  	unsigned int mo = obj2msgobj(o);
> +	struct can_frame *cf;
> +	u8 ctrl1;
> +
> +	ctrl1 = cc770_read_reg(priv, msgobj[mo].ctrl1);
> 
> -	/* Nothing more to send, switch off interrupts */
>  	cc770_write_reg(priv, msgobj[mo].ctrl0,
>  			MSGVAL_RES | TXIE_RES | RXIE_RES | INTPND_RES);
> +	cc770_write_reg(priv, msgobj[mo].ctrl1,
> +			RMTPND_RES | TXRQST_RES | MSGLST_RES | NEWDAT_RES);
> 
> -	stats->tx_packets++;
> +	if (unlikely(!priv->tx_skb)) {
> +		netdev_err(dev, "missing tx skb in tx interrupt\n");
> +		return;
> +	}
> +
> +	if (unlikely(ctrl1 & MSGLST_SET)) {
> +		stats->rx_over_errors++;
> +		stats->rx_errors++;
> +	}
> +
> +	/* When the CC770 is sending an RTR message and it receives a regular
> +	 * message that matches the id of the RTR message, it will overwrite the
> +	 * outgoing message in the TX register. When this happens we must
> +	 * process the received message and try to transmit the outgoing skb
> +	 * again.
> +	 */
> +	if (unlikely(ctrl1 & NEWDAT_SET)) {
> +		cc770_rx(dev, mo, ctrl1);
> +		cc770_tx(dev, mo);
> +		return;
> +	}
> +
> +	can_put_echo_skb(priv->tx_skb, dev, 0);
>  	can_get_echo_skb(dev, 0);
> +
> +	cf = (struct can_frame *)priv->tx_skb->data;
> +	stats->tx_bytes += cf->can_dlc;
> +	stats->tx_packets++;
> +
> +	priv->tx_skb = NULL;
> +
>  	netif_wake_queue(dev);
>  }
> 
> @@ -790,6 +827,7 @@ struct net_device *alloc_cc770dev(int sizeof_priv)
>  	priv->can.do_set_bittiming = cc770_set_bittiming;
>  	priv->can.do_set_mode = cc770_set_mode;
>  	priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
> +	priv->tx_skb = NULL;
> 
>  	memcpy(priv->obj_flags, cc770_obj_flags, sizeof(cc770_obj_flags));
> 
> diff --git a/drivers/net/can/cc770/cc770.h b/drivers/net/can/cc770/cc770.h
> index a1739db..95752e1 100644
> --- a/drivers/net/can/cc770/cc770.h
> +++ b/drivers/net/can/cc770/cc770.h
> @@ -193,6 +193,8 @@ struct cc770_priv {
>  	u8 cpu_interface;	/* CPU interface register */
>  	u8 clkout;		/* Clock out register */
>  	u8 bus_config;		/* Bus conffiguration register */
> +
> +	struct sk_buff *tx_skb;
>  };
> 
>  struct net_device *alloc_cc770dev(int sizeof_priv);

Looks good to me.
If this patch survives 1-2 days on my test bed, I'll replay with a Reviewed/Tested-by. 

Thanks,
//richard

-- 
sigma star gmbh - Eduard-Bodem-Gasse 6 - 6020 Innsbruck - Austria
ATU66964118 - FN 374287y

  reply	other threads:[~2018-02-12 19:44 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-30 16:56 [PATCH v2 1/3] can: cc770: Remove redundant IRQ ack Andri Yngvason
2018-01-30 16:56 ` [PATCH v2 2/3] can: cc770: Stop queue on NETDEV_TX_BUSY Andri Yngvason
2018-02-12 19:40   ` Richard Weinberger
2018-02-12 20:37     ` Wolfgang Grandegger
2018-02-12 20:44       ` Wolfgang Grandegger
2018-02-12 20:55         ` Richard Weinberger
2018-02-12 21:28           ` Wolfgang Grandegger
2018-02-12 21:36             ` Wolfgang Grandegger
2018-02-13 10:32               ` Andri Yngvason
2018-02-13 16:30                 ` Wolfgang Grandegger
2018-02-16 14:22               ` Marc Kleine-Budde
2018-02-16 15:20                 ` Wolfgang Grandegger
2018-01-30 16:56 ` [PATCH v2 3/3] can: cc770: Fix queue stall & dropped RTR reply Andri Yngvason
2018-02-12 19:45   ` Richard Weinberger [this message]
2018-02-12 19:41 ` [PATCH v2 1/3] can: cc770: Remove redundant IRQ ack Richard Weinberger
     [not found]   ` <151851733257.10946.11726494714017260046@maxwell>
2018-02-13 10:35     ` Richard Weinberger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1975796.InEJQ7qnl6@blindfold \
    --to=richard@sigma-star.at \
    --cc=andri.yngvason@marel.com \
    --cc=hrafnkell.eiriksson@marel.com \
    --cc=linux-can@vger.kernel.org \
    --cc=mkl@pengutronix.de \
    --cc=patric.thysell@br-automation.com \
    --cc=sigurbjorn.narfason@marel.com \
    --cc=wg@grandegger.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox