Linux CXL
 help / color / mirror / Atom feed
From: Jonathan Cameron <Jonathan.Cameron@Huawei.com>
To: Davidlohr Bueso <dave@stgolabs.net>
Cc: <dan.j.williams@intel.com>, <dave.jiang@intel.com>,
	<alison.schofield@intel.com>, <ira.weiny@intel.com>,
	<vishal.l.verma@intel.com>, <fan.ni@samsung.com>,
	<a.manzanares@samsung.com>, <linux-cxl@vger.kernel.org>
Subject: Re: [PATCH 3/7] cxl/mbox: Add sanitation handling machinery
Date: Thu, 11 May 2023 15:45:30 +0100	[thread overview]
Message-ID: <20230511154530.00000801@Huawei.com> (raw)
In-Reply-To: <20230421092321.12741-4-dave@stgolabs.net>

On Fri, 21 Apr 2023 02:23:17 -0700
Davidlohr Bueso <dave@stgolabs.net> wrote:

> Sanitation is by definition a device-monopolizing operation, and thus
> the timeslicing rules for other background commands do not apply.
> As such handle this special case asynchronously and return immediately.
> Subsequent changes will allow completion to be pollable from userspace
> via a sysfs file interface.
> 
> For devices that don't support interrupts for notifying background
> command completion, self-poll with the caveat that the poller can
> be out of sync with the ready hardware, and therefore care must be
> taken to not allow any new commands to go through until the poller
> sees the hw completion. The poller takes the mbox_mutex to stabilize
> the flagging, minimizing any runtime overhead in the send path to
> check for 'sanitize_tmo' for uncommon poll scenarios. This flag
> also serves for sanitation (the only user of async polling) to know
> when to queue work or simply rely on irqs.
> 
> The irq case is much simpler as hardware will serialize/error
> appropriately.
> 
> Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
> ---
>  drivers/cxl/cxlmem.h | 16 +++++++++
>  drivers/cxl/pci.c    | 79 ++++++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 93 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 8c3302fc7738..17e3ab3c641a 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -220,6 +220,18 @@ struct cxl_event_state {
>  	struct mutex log_lock;
>  };
>  
> +/**
> + * struct cxl_security_state - Device security state
> + *
> + * @sanitize_dwork: self-polling work item for sanitation
> + * @sanitize_tmo: self-polling timeout
> + */
> +struct cxl_security_state {
> +	/* below only used if device mbox irqs are not supported */
Call it out by name.  We are almost sure to make a 'below' bit rot
at somepoint :)

> +	struct delayed_work sanitize_dwork;
> +	int sanitize_tmo;
> +};
> +
>  /**
>   * struct cxl_dev_state - The driver device state
>   *
> @@ -256,6 +268,7 @@ struct cxl_event_state {
>   * @serial: PCIe Device Serial Number
>   * @doe_mbs: PCI DOE mailbox array
>   * @event: event log driver state
> + * @sec: device security state
>   * @mbox_send: @dev specific transport for transmitting mailbox commands
>   *
>   * See section 8.2.9.5.2 Capacity Configuration and Label Storage for
> @@ -296,6 +309,8 @@ struct cxl_dev_state {
>  
>  	struct cxl_event_state event;
>  
> +	struct cxl_security_state sec;
> +
>  	int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd);
>  };

...

> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index aa1bb74a52a1..bdee5273af5a 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -97,6 +97,8 @@ static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds)
>  static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
>  {
>  	struct cxl_dev_state *cxlds = id;
> +	u64 reg;
> +	u16 opcode;
>  
>  	/* spurious or raced with hw? */
>  	if (!cxl_mbox_background_complete(cxlds)) {
> @@ -107,12 +109,47 @@ static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
>  		goto done;
>  	}
>  
> -	/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
> -	wake_up(&mbox_wait);
> +	reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
> +	opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg);
> +
> +	if (opcode == CXL_MBOX_OP_SANITIZE) {
> +		dev_dbg(cxlds->dev, "Sanitation operation ended\n");
> +	} else {
> +		/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
> +		wake_up(&mbox_wait);
> +	}
>  done:
>  	return IRQ_HANDLED;
>  }
>  
> +/*
> + * Sanitation operation polling mode.
> + */
> +static void cxl_mbox_sanitize_work(struct work_struct *work)
> +{
> +	struct cxl_dev_state *cxlds;
> +
> +	cxlds = container_of(work, struct cxl_dev_state,
> +			     sec.sanitize_dwork.work);
> +
> +	WARN_ON(cxlds->sec.sanitize_tmo == -1);

Overly paranoid?

> +
> +	mutex_lock(&cxlds->mbox_mutex);
> +	if (cxl_mbox_background_complete(cxlds)) {
> +		cxlds->sec.sanitize_tmo = 0;
> +		put_device(cxlds->dev);
> +
> +		dev_dbg(cxlds->dev, "Sanitation operation ended\n");
> +	} else {
> +		int tmo = cxlds->sec.sanitize_tmo + 10;

Add some units to the naming of variables.

> +
> +		cxlds->sec.sanitize_tmo = min(15 * 60, tmo);

Why? That feels like it needs a comment to me.  Not that expensive
to check this so I'm not sure the ramp up is that logical.

> +		queue_delayed_work(system_wq,
> +				   &cxlds->sec.sanitize_dwork, tmo * HZ);
> +	}
> +	mutex_unlock(&cxlds->mbox_mutex);
> +}
> +
>  /**
>   * __cxl_pci_mbox_send_cmd() - Execute a mailbox command
>   * @cxlds: The device state to communicate with.
> @@ -173,6 +210,16 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
>  		return -EBUSY;
>  	}
>  
> +	/*
> +	 * With sanitize polling, hardware might be done and the poller still
> +	 * not be in sync. Ensure no new command comes in until so. Keep the
> +	 * hardware semantics and only allow device health status.
> +	 */
> +	if (unlikely(cxlds->sec.sanitize_tmo > 0)) {
> +		if (mbox_cmd->opcode != CXL_MBOX_OP_GET_HEALTH_INFO)

Doesn't this let the value of mbox_cmd->opcode change to HEALTH_INFO so that
when we get here again we could carry on without other commands though still not in
sync (if things are very weird).

> +			return -EBUSY;
> +	}
> +
>  	cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK,
>  			     mbox_cmd->opcode);
>  	if (mbox_cmd->size_in) {
> @@ -223,6 +270,27 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
>  		u64 bg_status_reg;
>  		int i;
>  
> +		/*
> +		 * Sanitation is a special case which monopolizes the device
> +		 * in an uninterruptible state and thus cannot be timesliced.
> +		 * Return immediately instead and allow userspace to poll(2)
> +		 * for completion.
> +		 */
> +		if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) {
> +			if (cxlds->sec.sanitize_tmo != -1) {

As below. Have a self explanatory variable called sec.polling or sec.interrupt

> +				/* give first timeout a second */
> +				cxlds->sec.sanitize_tmo = 1;

If this was named santize_tmo_secs then comment not needed.

> +				/* hold the device throughout */
> +				get_device(cxlds->dev);
> +				queue_delayed_work(system_wq,
> +						   &cxlds->sec.sanitize_dwork,
> +						   cxlds->sec.sanitize_tmo * HZ);
> +			}
> +
> +			dev_dbg(dev, "Sanitation operation started\n");
> +			return 0;
> +		}
> +
>  		dev_dbg(dev, "Mailbox background operation (0x%04x) started\n",
>  			mbox_cmd->opcode);
>  
> @@ -366,6 +434,9 @@ static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
>  		if (rc)
>  			goto mbox_poll;
>  
> +		/* flag that irqs are enabled */
> +		cxlds->sec.sanitize_tmo = -1;

That's confusing.  I'd add a separate structure element for it instead with
appropriate naming.

> +
>  		writel(CXLDEV_MBOX_CTRL_BG_CMD_IRQ,
>  		       cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
>  
> @@ -373,7 +444,11 @@ static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
>  	}
>  
>  mbox_poll:
> +	INIT_DELAYED_WORK(&cxlds->sec.sanitize_dwork,
> +			  cxl_mbox_sanitize_work);
> +	cxlds->sec.sanitize_tmo = 0;
>  	dev_dbg(cxlds->dev, "Mailbox interrupts are unsupported");
> +
My favorite moan. Unrelated whitespace change! Push it to patch 2 that introduced
that dev_dbg() I think.

>  	return 0;
>  }
>  


  parent reply	other threads:[~2023-05-11 15:02 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-21  9:23 [PATCH v4 0/7] cxl: Background cmds and device sanitation Davidlohr Bueso
2023-04-21  9:23 ` [PATCH 1/7] cxl/pci: Allocate irq vectors earlier in pci probe Davidlohr Bueso
2023-04-28 16:09   ` Dave Jiang
2023-05-11 13:55   ` Jonathan Cameron
2023-04-21  9:23 ` [PATCH 2/7] cxl/mbox: Add background cmd handling machinery Davidlohr Bueso
2023-04-23  7:54   ` Li, Ming
2023-04-23 20:51     ` Davidlohr Bueso
2023-04-28 16:21   ` Dave Jiang
2023-04-28 17:18     ` Davidlohr Bueso
2023-04-28 21:04       ` Dave Jiang
2023-04-28 22:03         ` Davidlohr Bueso
2023-05-01 15:56           ` Davidlohr Bueso
2023-05-11 14:23   ` Jonathan Cameron
2023-05-11 16:04     ` Davidlohr Bueso
2023-05-12 17:05       ` Jonathan Cameron
2023-04-21  9:23 ` [PATCH 3/7] cxl/mbox: Add sanitation " Davidlohr Bueso
2023-04-28 16:43   ` Dave Jiang
2023-04-28 16:46     ` Davidlohr Bueso
2023-04-28 17:37       ` Dave Jiang
2023-05-11 14:45   ` Jonathan Cameron [this message]
2023-05-11 16:48     ` Davidlohr Bueso
2023-05-12 17:02       ` Jonathan Cameron
2023-04-21  9:23 ` [PATCH 4/7] cxl/mem: Wire up Sanitation support Davidlohr Bueso
2023-04-21 20:04   ` kernel test robot
2023-04-21 20:24   ` kernel test robot
2023-05-11 15:07   ` Jonathan Cameron
2023-05-11 17:23     ` Davidlohr Bueso
2023-05-12 17:00       ` Jonathan Cameron
2023-04-21  9:23 ` [PATCH 5/7] cxl/test: Add Sanitize opcode support Davidlohr Bueso
2023-05-11 15:09   ` Jonathan Cameron
2023-05-11 15:13     ` Davidlohr Bueso
2023-04-21  9:23 ` [PATCH 6/7] cxl/mem: Support Secure Erase Davidlohr Bueso
2023-05-11 15:10   ` Jonathan Cameron
2023-04-21  9:23 ` [PATCH 7/7] cxl/test: Add Secure Erase opcode support Davidlohr Bueso
2023-05-11 15:10   ` Jonathan Cameron
2023-04-23  2:05 ` [PATCH v4 0/7] cxl: Background cmds and device sanitation Davidlohr Bueso
  -- strict thread matches above, loose matches on Subject: below --
2023-06-12 18:10 [PATCH v6 0/7] cxl: Support " Davidlohr Bueso
2023-06-12 18:10 ` [PATCH 3/7] cxl/mbox: Add sanitation handling machinery Davidlohr Bueso
2023-06-13 16:07   ` Jonathan Cameron
2023-06-13 16:28     ` Davidlohr Bueso
2023-06-14  8:36       ` Jonathan Cameron
2023-06-25 22:13   ` Dan Williams
2023-06-26 18:17     ` Davidlohr Bueso
2023-06-25 22:18   ` Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230511154530.00000801@Huawei.com \
    --to=jonathan.cameron@huawei.com \
    --cc=a.manzanares@samsung.com \
    --cc=alison.schofield@intel.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.jiang@intel.com \
    --cc=dave@stgolabs.net \
    --cc=fan.ni@samsung.com \
    --cc=ira.weiny@intel.com \
    --cc=linux-cxl@vger.kernel.org \
    --cc=vishal.l.verma@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox