From: Richard Weinberger <richard@sigma-star.at>
To: Andri Yngvason <andri.yngvason@marel.com>
Cc: linux-can@vger.kernel.org, mkl@pengutronix.de, wg@grandegger.com,
sigurbjorn.narfason@marel.com, hrafnkell.eiriksson@marel.com,
patric.thysell@br-automation.com
Subject: Re: [PATCH v2 3/3] can: cc770: Fix queue stall & dropped RTR reply
Date: Mon, 12 Feb 2018 20:45:27 +0100 [thread overview]
Message-ID: <1975796.InEJQ7qnl6@blindfold> (raw)
In-Reply-To: <20180130165649.22732-3-andri.yngvason@marel.com>
Andri,
Am Dienstag, 30. Januar 2018, 17:56:49 CET schrieb Andri Yngvason:
> While waiting for the TX object to send an RTR, an external message with a
> matching id can overwrite the TX data. In this case we must call the rx
> routine and then try transmitting the message that was overwritten again.
>
> The queue was being stalled because the RX event did not generate an
> interrupt to wake up the queue again and the TX event did not happen
> because the TXRQST flag is reset by the chip when new data is received.
>
> According to the CC770 datasheet the id of a message object should not be
> changed while the MSGVAL bit is set. This has been fixed by resetting the
> MSGVAL bit before modifying the object in the transmit function and setting
> it after. It is not enough to set & reset CPUUPD.
>
> It is important to keep the MSGVAL bit reset while the message object is
> being modified. Otherwise, during RTR transmission, a frame with matching
> id could trigger an rx-interrupt, which would cause a race condition
> between the interrupt routine and the transmit function.
>
> Signed-off-by: Andri Yngvason <andri.yngvason@marel.com>
> ---
> Changes from v1:
> - Squashed 4 into 3
> - Modified comment
> - Now using NEWDAT flag instead of MSGLST to check if a message was
> received into TX object. This allows us to check if the message object
> was overwritten more than once, and report it.
>
> drivers/net/can/cc770/cc770.c | 96
> ++++++++++++++++++++++++++++++------------- drivers/net/can/cc770/cc770.h |
> 2 +
> 2 files changed, 69 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c
> index 12d3b89..e6458cb 100644
> --- a/drivers/net/can/cc770/cc770.c
> +++ b/drivers/net/can/cc770/cc770.c
> @@ -390,38 +390,23 @@ static int cc770_get_berr_counter(const struct
> net_device *dev, return 0;
> }
>
> -static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device
> *dev) +static void cc770_tx(struct net_device *dev, int mo)
> {
> struct cc770_priv *priv = netdev_priv(dev);
> - struct net_device_stats *stats = &dev->stats;
> - struct can_frame *cf = (struct can_frame *)skb->data;
> - unsigned int mo = obj2msgobj(CC770_OBJ_TX);
> + struct can_frame *cf = (struct can_frame *)priv->tx_skb->data;
> u8 dlc, rtr;
> u32 id;
> int i;
>
> - if (can_dropped_invalid_skb(dev, skb))
> - return NETDEV_TX_OK;
> -
> - if ((cc770_read_reg(priv,
> - msgobj[mo].ctrl1) & TXRQST_UNC) == TXRQST_SET) {
> - netif_stop_queue(dev);
> - netdev_err(dev, "TX register is still occupied!\n");
> - return NETDEV_TX_BUSY;
> - }
> -
> - netif_stop_queue(dev);
> -
> dlc = cf->can_dlc;
> id = cf->can_id;
> - if (cf->can_id & CAN_RTR_FLAG)
> - rtr = 0;
> - else
> - rtr = MSGCFG_DIR;
> + rtr = cf->can_id & CAN_RTR_FLAG ? 0 : MSGCFG_DIR;
> +
> + cc770_write_reg(priv, msgobj[mo].ctrl0,
> + MSGVAL_RES | TXIE_RES | RXIE_RES | INTPND_RES);
> cc770_write_reg(priv, msgobj[mo].ctrl1,
> RMTPND_RES | TXRQST_RES | CPUUPD_SET | NEWDAT_RES);
> - cc770_write_reg(priv, msgobj[mo].ctrl0,
> - MSGVAL_SET | TXIE_SET | RXIE_RES | INTPND_RES);
> +
> if (id & CAN_EFF_FLAG) {
> id &= CAN_EFF_MASK;
> cc770_write_reg(priv, msgobj[mo].config,
> @@ -440,13 +425,31 @@ static netdev_tx_t cc770_start_xmit(struct sk_buff
> *skb, struct net_device *dev) for (i = 0; i < dlc; i++)
> cc770_write_reg(priv, msgobj[mo].data[i], cf->data[i]);
>
> - /* Store echo skb before starting the transfer */
> - can_put_echo_skb(skb, dev, 0);
> -
> cc770_write_reg(priv, msgobj[mo].ctrl1,
> - RMTPND_RES | TXRQST_SET | CPUUPD_RES | NEWDAT_UNC);
> + RMTPND_UNC | TXRQST_SET | CPUUPD_RES | NEWDAT_UNC);
> + cc770_write_reg(priv, msgobj[mo].ctrl0,
> + MSGVAL_SET | TXIE_SET | RXIE_SET | INTPND_UNC);
> +}
> +
> +static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device
> *dev) +{
> + struct cc770_priv *priv = netdev_priv(dev);
> + unsigned int mo = obj2msgobj(CC770_OBJ_TX);
> +
> + if (can_dropped_invalid_skb(dev, skb))
> + return NETDEV_TX_OK;
>
> - stats->tx_bytes += dlc;
> + if ((cc770_read_reg(priv,
> + msgobj[mo].ctrl1) & TXRQST_UNC) == TXRQST_SET) {
> + netif_stop_queue(dev);
> + netdev_err(dev, "TX register is still occupied!\n");
> + return NETDEV_TX_BUSY;
> + }
> +
> + netif_stop_queue(dev);
> +
> + priv->tx_skb = skb;
> + cc770_tx(dev, mo);
>
> return NETDEV_TX_OK;
> }
> @@ -672,13 +675,47 @@ static void cc770_tx_interrupt(struct net_device *dev,
> unsigned int o) struct cc770_priv *priv = netdev_priv(dev);
> struct net_device_stats *stats = &dev->stats;
> unsigned int mo = obj2msgobj(o);
> + struct can_frame *cf;
> + u8 ctrl1;
> +
> + ctrl1 = cc770_read_reg(priv, msgobj[mo].ctrl1);
>
> - /* Nothing more to send, switch off interrupts */
> cc770_write_reg(priv, msgobj[mo].ctrl0,
> MSGVAL_RES | TXIE_RES | RXIE_RES | INTPND_RES);
> + cc770_write_reg(priv, msgobj[mo].ctrl1,
> + RMTPND_RES | TXRQST_RES | MSGLST_RES | NEWDAT_RES);
>
> - stats->tx_packets++;
> + if (unlikely(!priv->tx_skb)) {
> + netdev_err(dev, "missing tx skb in tx interrupt\n");
> + return;
> + }
> +
> + if (unlikely(ctrl1 & MSGLST_SET)) {
> + stats->rx_over_errors++;
> + stats->rx_errors++;
> + }
> +
> + /* When the CC770 is sending an RTR message and it receives a regular
> + * message that matches the id of the RTR message, it will overwrite the
> + * outgoing message in the TX register. When this happens we must
> + * process the received message and try to transmit the outgoing skb
> + * again.
> + */
> + if (unlikely(ctrl1 & NEWDAT_SET)) {
> + cc770_rx(dev, mo, ctrl1);
> + cc770_tx(dev, mo);
> + return;
> + }
> +
> + can_put_echo_skb(priv->tx_skb, dev, 0);
> can_get_echo_skb(dev, 0);
> +
> + cf = (struct can_frame *)priv->tx_skb->data;
> + stats->tx_bytes += cf->can_dlc;
> + stats->tx_packets++;
> +
> + priv->tx_skb = NULL;
> +
> netif_wake_queue(dev);
> }
>
> @@ -790,6 +827,7 @@ struct net_device *alloc_cc770dev(int sizeof_priv)
> priv->can.do_set_bittiming = cc770_set_bittiming;
> priv->can.do_set_mode = cc770_set_mode;
> priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
> + priv->tx_skb = NULL;
>
> memcpy(priv->obj_flags, cc770_obj_flags, sizeof(cc770_obj_flags));
>
> diff --git a/drivers/net/can/cc770/cc770.h b/drivers/net/can/cc770/cc770.h
> index a1739db..95752e1 100644
> --- a/drivers/net/can/cc770/cc770.h
> +++ b/drivers/net/can/cc770/cc770.h
> @@ -193,6 +193,8 @@ struct cc770_priv {
> u8 cpu_interface; /* CPU interface register */
> u8 clkout; /* Clock out register */
> u8 bus_config; /* Bus conffiguration register */
> +
> + struct sk_buff *tx_skb;
> };
>
> struct net_device *alloc_cc770dev(int sizeof_priv);
Looks good to me.
If this patch survives 1-2 days on my test bed, I'll replay with a Reviewed/Tested-by.
Thanks,
//richard
--
sigma star gmbh - Eduard-Bodem-Gasse 6 - 6020 Innsbruck - Austria
ATU66964118 - FN 374287y
next prev parent reply other threads:[~2018-02-12 19:44 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-01-30 16:56 [PATCH v2 1/3] can: cc770: Remove redundant IRQ ack Andri Yngvason
2018-01-30 16:56 ` [PATCH v2 2/3] can: cc770: Stop queue on NETDEV_TX_BUSY Andri Yngvason
2018-02-12 19:40 ` Richard Weinberger
2018-02-12 20:37 ` Wolfgang Grandegger
2018-02-12 20:44 ` Wolfgang Grandegger
2018-02-12 20:55 ` Richard Weinberger
2018-02-12 21:28 ` Wolfgang Grandegger
2018-02-12 21:36 ` Wolfgang Grandegger
2018-02-13 10:32 ` Andri Yngvason
2018-02-13 16:30 ` Wolfgang Grandegger
2018-02-16 14:22 ` Marc Kleine-Budde
2018-02-16 15:20 ` Wolfgang Grandegger
2018-01-30 16:56 ` [PATCH v2 3/3] can: cc770: Fix queue stall & dropped RTR reply Andri Yngvason
2018-02-12 19:45 ` Richard Weinberger [this message]
2018-02-12 19:41 ` [PATCH v2 1/3] can: cc770: Remove redundant IRQ ack Richard Weinberger
[not found] ` <151851733257.10946.11726494714017260046@maxwell>
2018-02-13 10:35 ` Richard Weinberger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1975796.InEJQ7qnl6@blindfold \
--to=richard@sigma-star.at \
--cc=andri.yngvason@marel.com \
--cc=hrafnkell.eiriksson@marel.com \
--cc=linux-can@vger.kernel.org \
--cc=mkl@pengutronix.de \
--cc=patric.thysell@br-automation.com \
--cc=sigurbjorn.narfason@marel.com \
--cc=wg@grandegger.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox