netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Leon Romanovsky <leon@kernel.org>
To: Veerasenareddy Burru <vburru@marvell.com>
Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	aayarekar@marvell.com, sedara@marvell.com, sburla@marvell.com,
	linux-doc@vger.kernel.org,
	"David S. Miller" <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>
Subject: Re: [PATCH net-next v4 8/8] octeon_ep: add heartbeat monitor
Date: Thu, 23 Mar 2023 12:47:03 +0200	[thread overview]
Message-ID: <20230323104703.GD36557@unreal> (raw)
In-Reply-To: <20230322091958.13103-9-vburru@marvell.com>

On Wed, Mar 22, 2023 at 02:19:57AM -0700, Veerasenareddy Burru wrote:
> Monitor periodic heartbeat messages from device firmware.
> Presence of heartbeat indicates the device is active and running.
> If the heartbeat is missed for configured interval indicates
> firmware has crashed and device is unusable; in this case, PF driver
> stops and uninitialize the device.
> 
> Signed-off-by: Veerasenareddy Burru <vburru@marvell.com>
> Signed-off-by: Abhijit Ayarekar <aayarekar@marvell.com>
> ---
> v3 -> v4:
>  * 0007-xxx.patch in v3 is 0008-xxx.patch in v4.
> 
> v2 -> v3:
>  * 0009-xxx.patch in v2 is now 0007-xxx.patch in v3 due to
>    0007 and 0008.patch from v2 are removed in v3.
> 
> v1 -> v2:
>  * no change
> 
>  .../marvell/octeon_ep/octep_cn9k_pf.c         |  9 ++++
>  .../ethernet/marvell/octeon_ep/octep_config.h |  6 +++
>  .../ethernet/marvell/octeon_ep/octep_main.c   | 45 ++++++++++++++++++-
>  .../ethernet/marvell/octeon_ep/octep_main.h   |  7 +++
>  .../marvell/octeon_ep/octep_regs_cn9k_pf.h    |  2 +
>  5 files changed, 67 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c
> index e2503c9bc8a1..90c3a419932d 100644
> --- a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c
> +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c
> @@ -16,6 +16,9 @@
>  #define CTRL_MBOX_MAX_PF	128
>  #define CTRL_MBOX_SZ		((size_t)(0x400000 / CTRL_MBOX_MAX_PF))
>  
> +#define FW_HB_INTERVAL_IN_SECS		1
> +#define FW_HB_MISS_COUNT		10
> +
>  /* Names of Hardware non-queue generic interrupts */
>  static char *cn93_non_ioq_msix_names[] = {
>  	"epf_ire_rint",
> @@ -249,6 +252,10 @@ static void octep_init_config_cn93_pf(struct octep_device *oct)
>  	conf->ctrl_mbox_cfg.barmem_addr = (void __iomem *)oct->mmio[2].hw_addr +
>  					   (0x400000ull * 7) +
>  					   (link * CTRL_MBOX_SZ);
> +
> +	conf->hb_interval = FW_HB_INTERVAL_IN_SECS;
> +	conf->max_hb_miss_cnt = FW_HB_MISS_COUNT;
> +
>  }
>  
>  /* Setup registers for a hardware Tx Queue  */
> @@ -383,6 +390,8 @@ static bool octep_poll_non_ioq_interrupts_cn93_pf(struct octep_device *oct)
>  		octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT, reg0);
>  		if (reg0 & CN93_SDP_EPF_OEI_RINT_DATA_BIT_MBOX)
>  			queue_work(octep_wq, &oct->ctrl_mbox_task);
> +		else if (reg0 & CN93_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT)
> +			atomic_set(&oct->hb_miss_cnt, 0);
>  
>  		handled = true;
>  	}
> diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
> index f208f3f9a447..df7cd39d9fce 100644
> --- a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
> +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
> @@ -200,5 +200,11 @@ struct octep_config {
>  
>  	/* ctrl mbox config */
>  	struct octep_ctrl_mbox_config ctrl_mbox_cfg;
> +
> +	/* Configured maximum heartbeat miss count */
> +	u32 max_hb_miss_cnt;
> +
> +	/* Configured firmware heartbeat interval in secs */
> +	u32 hb_interval;
>  };
>  #endif /* _OCTEP_CONFIG_H_ */
> diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
> index ba0d5fe3081d..415dd06ff344 100644
> --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
> +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
> @@ -901,6 +901,38 @@ static void octep_intr_poll_task(struct work_struct *work)
>  			   msecs_to_jiffies(OCTEP_INTR_POLL_TIME_MSECS));
>  }
>  
> +/**
> + * octep_hb_timeout_task - work queue task to check firmware heartbeat.
> + *
> + * @work: pointer to hb work_struct
> + *
> + * Check for heartbeat miss count. Uninitialize oct device if miss count
> + * exceeds configured max heartbeat miss count.
> + *
> + **/
> +static void octep_hb_timeout_task(struct work_struct *work)
> +{
> +	struct octep_device *oct = container_of(work, struct octep_device,
> +						hb_task.work);
> +
> +	int miss_cnt;
> +
> +	atomic_inc(&oct->hb_miss_cnt);
> +	miss_cnt = atomic_read(&oct->hb_miss_cnt);

miss_cnt = atomic_inc_return(&oct->hb_miss_cnt);

> +	if (miss_cnt < oct->conf->max_hb_miss_cnt) {

How is this heartbeat working? You increment on every entry to octep_hb_timeout_task(),
After max_hb_miss_cnt invocations, you will stop your device.

Thanks

> +		queue_delayed_work(octep_wq, &oct->hb_task,
> +				   msecs_to_jiffies(oct->conf->hb_interval * 1000));
> +		return;
> +	}
> +
> +	dev_err(&oct->pdev->dev, "Missed %u heartbeats. Uninitializing\n",
> +		miss_cnt);
> +	rtnl_lock();
> +	if (netif_running(oct->netdev))
> +		octep_stop(oct->netdev);
> +	rtnl_unlock();
> +}
> +
>  /**
>   * octep_ctrl_mbox_task - work queue task to handle ctrl mbox messages.
>   *
> @@ -938,7 +970,7 @@ static const char *octep_devid_to_str(struct octep_device *oct)
>  int octep_device_setup(struct octep_device *oct)
>  {
>  	struct pci_dev *pdev = oct->pdev;
> -	int i;
> +	int i, ret;
>  
>  	/* allocate memory for oct->conf */
>  	oct->conf = kzalloc(sizeof(*oct->conf), GFP_KERNEL);
> @@ -973,7 +1005,15 @@ int octep_device_setup(struct octep_device *oct)
>  
>  	oct->pkind = CFG_GET_IQ_PKIND(oct->conf);
>  
> -	return octep_ctrl_net_init(oct);
> +	ret = octep_ctrl_net_init(oct);
> +	if (ret)
> +		return ret;
> +
> +	atomic_set(&oct->hb_miss_cnt, 0);
> +	INIT_DELAYED_WORK(&oct->hb_task, octep_hb_timeout_task);
> +	queue_delayed_work(octep_wq, &oct->hb_task,
> +			   msecs_to_jiffies(oct->conf->hb_interval * 1000));
> +	return 0;
>  
>  unsupported_dev:
>  	for (i = 0; i < OCTEP_MMIO_REGIONS; i++)
> @@ -1002,6 +1042,7 @@ static void octep_device_cleanup(struct octep_device *oct)
>  	}
>  
>  	octep_ctrl_net_uninit(oct);
> +	cancel_delayed_work_sync(&oct->hb_task);
>  
>  	oct->hw_ops.soft_reset(oct);
>  	for (i = 0; i < OCTEP_MMIO_REGIONS; i++) {
> diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
> index 836d990ba3fa..e0907a719133 100644
> --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
> +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
> @@ -280,6 +280,13 @@ struct octep_device {
>  	bool poll_non_ioq_intr;
>  	/* Work entry to poll non-ioq interrupts */
>  	struct delayed_work intr_poll_task;
> +
> +	/* Firmware heartbeat timer */
> +	struct timer_list hb_timer;
> +	/* Firmware heartbeat miss count tracked by timer */
> +	atomic_t hb_miss_cnt;
> +	/* Task to reset device on heartbeat miss */
> +	struct delayed_work hb_task;
>  };
>  
>  static inline u16 OCTEP_MAJOR_REV(struct octep_device *oct)
> diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h
> index 0466fd9a002d..b25c3093dc7b 100644
> --- a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h
> +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h
> @@ -367,5 +367,7 @@
>  
>  /* bit 0 for control mbox interrupt */
>  #define CN93_SDP_EPF_OEI_RINT_DATA_BIT_MBOX	BIT_ULL(0)
> +/* bit 1 for firmware heartbeat interrupt */
> +#define CN93_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT	BIT_ULL(1)
>  
>  #endif /* _OCTEP_REGS_CN9K_PF_H_ */
> -- 
> 2.36.0
> 

  reply	other threads:[~2023-03-23 10:49 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-22  9:19 [PATCH net-next v4 0/8] octeon_ep: deferred probe and mailbox Veerasenareddy Burru
2023-03-22  9:19 ` [PATCH net-next v4 1/8] octeon_ep: defer probe if firmware not ready Veerasenareddy Burru
2023-03-22  9:19 ` [PATCH net-next v4 2/8] octeon_ep: poll for control messages Veerasenareddy Burru
2023-03-22  9:19 ` [PATCH net-next v4 3/8] octeon_ep: control mailbox for multiple PFs Veerasenareddy Burru
2023-03-22  9:19 ` [PATCH net-next v4 4/8] octeon_ep: add separate mailbox command and response queues Veerasenareddy Burru
2023-03-22  9:19 ` [PATCH net-next v4 5/8] octeon_ep: include function id in mailbox commands Veerasenareddy Burru
2023-03-22  9:19 ` [PATCH net-next v4 6/8] octeon_ep: support asynchronous notifications Veerasenareddy Burru
2023-03-23 10:39   ` Leon Romanovsky
2023-03-23 17:24     ` [EXT] " Veerasenareddy Burru
2023-03-29  7:29       ` Leon Romanovsky
2023-03-22  9:19 ` [PATCH net-next v4 7/8] octeon_ep: function id in link info and stats mailbox commands Veerasenareddy Burru
2023-03-22  9:19 ` [PATCH net-next v4 8/8] octeon_ep: add heartbeat monitor Veerasenareddy Burru
2023-03-23 10:47   ` Leon Romanovsky [this message]
2023-03-23 18:14     ` [EXT] " Veerasenareddy Burru
2023-03-29  7:33       ` Leon Romanovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230323104703.GD36557@unreal \
    --to=leon@kernel.org \
    --cc=aayarekar@marvell.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=kuba@kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=sburla@marvell.com \
    --cc=sedara@marvell.com \
    --cc=vburru@marvell.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).