public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Cornelia Huck <cohuck@redhat.com>
To: Pierre Morel <pmorel@linux.vnet.ibm.com>
Cc: pasic@linux.vnet.ibm.com, bjsdjshi@linux.vnet.ibm.com,
	linux-s390@vger.kernel.org, linux-kernel@vger.kernel.org,
	kvm@vger.kernel.org
Subject: Re: [PATCH v2 08/10] vfio: ccw: Handling reset and shutdown with states
Date: Tue, 5 Jun 2018 14:18:27 +0200	[thread overview]
Message-ID: <20180605141827.6911fc74.cohuck@redhat.com> (raw)
In-Reply-To: <1527243678-3140-9-git-send-email-pmorel@linux.vnet.ibm.com>

On Fri, 25 May 2018 12:21:16 +0200
Pierre Morel <pmorel@linux.vnet.ibm.com> wrote:

> Two new events, VFIO_CCW_EVENT_ONLINE and VFIO_CCW_EVENT_OFFLINE
> allow to handle the enabling and disabling of a Sub Channel and
> the init, shutdown, quiesce and reset operations are changed
> accordingly.
> 
> Signed-off-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
> ---
>  drivers/s390/cio/vfio_ccw_drv.c     | 44 ++++------------------
>  drivers/s390/cio/vfio_ccw_fsm.c     | 75 +++++++++++++++++++++++++++++++++----
>  drivers/s390/cio/vfio_ccw_ops.c     | 15 ++------
>  drivers/s390/cio/vfio_ccw_private.h |  3 ++
>  4 files changed, 82 insertions(+), 55 deletions(-)
> 
> diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
> index 6fc7668..3e7b514 100644
> --- a/drivers/s390/cio/vfio_ccw_drv.c
> +++ b/drivers/s390/cio/vfio_ccw_drv.c
> @@ -30,41 +30,13 @@ int vfio_ccw_sch_quiesce(struct subchannel *sch)
>  {
>  	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
>  	DECLARE_COMPLETION_ONSTACK(completion);
> -	int iretry, ret = 0;
> -
> -	spin_lock_irq(sch->lock);
> -	if (!sch->schib.pmcw.ena)
> -		goto out_unlock;
> -	ret = cio_disable_subchannel(sch);
> -	if (ret != -EBUSY)
> -		goto out_unlock;
> -
> -	do {
> -		iretry = 255;
> -
> -		ret = cio_cancel_halt_clear(sch, &iretry);
> -		while (ret == -EBUSY) {
> -			/*
> -			 * Flush all I/O and wait for
> -			 * cancel/halt/clear completion.
> -			 */
> -			private->completion = &completion;
> -			spin_unlock_irq(sch->lock);
> -
> -			wait_for_completion_timeout(&completion, 3*HZ);
> -
> -			spin_lock_irq(sch->lock);
> -			private->completion = NULL;
> -			flush_workqueue(vfio_ccw_work_q);
> -			ret = cio_cancel_halt_clear(sch, &iretry);
> -		};
> -
> -		ret = cio_disable_subchannel(sch);
> -	} while (ret == -EBUSY);
> -out_unlock:
> -	private->state = VFIO_CCW_STATE_NOT_OPER;
> -	spin_unlock_irq(sch->lock);
> -	return ret;
> +
> +	private->completion = &completion;
> +	vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_OFFLINE);
> +	wait_for_completion_interruptible_timeout(&completion, jiffies + 3*HZ);
> +	if (private->state != VFIO_CCW_STATE_STANDBY)
> +		return -EFAULT;

-EFAULT really looks like the wrong error here. -EIO?

(I'm not sold on the whole concept here, though. See below.)

> +	return 0;
>  }
>  
>  static void vfio_ccw_sch_io_todo(struct work_struct *work)
> @@ -95,8 +67,6 @@ static void vfio_ccw_sch_irq(struct subchannel *sch)
>  	memcpy(&private->irb, irb, sizeof(*irb));
>  
>  	queue_work(vfio_ccw_work_q, &private->io_work);
> -	if (private->completion)
> -		complete(private->completion);
>  }
>  
>  static int vfio_ccw_sch_probe(struct subchannel *sch)
> diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
> index 20b909c..0acab2f 100644
> --- a/drivers/s390/cio/vfio_ccw_fsm.c
> +++ b/drivers/s390/cio/vfio_ccw_fsm.c
> @@ -73,6 +73,53 @@ static int fsm_notoper(struct vfio_ccw_private *private)
>  	return VFIO_CCW_STATE_NOT_OPER;
>  }
>  
> +static int fsm_online(struct vfio_ccw_private *private)
> +{
> +	struct subchannel *sch = private->sch;
> +	int ret = VFIO_CCW_STATE_IDLE;
> +
> +	spin_lock_irq(sch->lock);
> +	if (cio_enable_subchannel(sch, (u32)(unsigned long)sch))
> +		ret = VFIO_CCW_STATE_NOT_OPER;
> +	spin_unlock_irq(sch->lock);
> +
> +	return ret;
> +}
> +static int fsm_offline(struct vfio_ccw_private *private)
> +{
> +	struct subchannel *sch = private->sch;
> +	int ret = VFIO_CCW_STATE_STANDBY;
> +
> +	spin_lock_irq(sch->lock);
> +	if (cio_disable_subchannel(sch))
> +		ret = VFIO_CCW_STATE_NOT_OPER;

So, what about a subchannel that is busy? Why should it go to the not
oper state?

(And you should try to flush pending I/O and then try again in that
case. Otherwise, you may have a still-enabled subchannel which may
throw an interrupt.)

> +	spin_unlock_irq(sch->lock);
> +	if (private->completion)
> +		complete(private->completion);
> +
> +	return ret;
> +}
> +static int fsm_quiescing(struct vfio_ccw_private *private)
> +{
> +	struct subchannel *sch = private->sch;
> +	int ret = VFIO_CCW_STATE_STANDBY;
> +	int iretry = 255;
> +
> +	spin_lock_irq(sch->lock);
> +	ret = cio_cancel_halt_clear(sch, &iretry);
> +	if (ret == -EBUSY)
> +		ret = VFIO_CCW_STATE_QUIESCING;
> +	else if (private->completion)
> +		complete(private->completion);
> +	spin_unlock_irq(sch->lock);
> +	return ret;

If I read this correctly, you're calling cio_cancel_halt_clear() only
once. What happened to the retry loop?

> +}
> +static int fsm_quiescing_done(struct vfio_ccw_private *private)
> +{
> +	if (private->completion)
> +		complete(private->completion);
> +	return VFIO_CCW_STATE_STANDBY;
> +}
>  /*
>   * No operation action.
>   */
> @@ -178,15 +225,10 @@ static int fsm_sch_event(struct vfio_ccw_private *private)
>  static int fsm_init(struct vfio_ccw_private *private)
>  {
>  	struct subchannel *sch = private->sch;
> -	int ret = VFIO_CCW_STATE_STANDBY;
>  
> -	spin_lock_irq(sch->lock);
>  	sch->isc = VFIO_CCW_ISC;
> -	if (cio_enable_subchannel(sch, (u32)(unsigned long)sch))
> -		ret = VFIO_CCW_STATE_NOT_OPER;
> -	spin_unlock_irq(sch->lock);
>  
> -	return ret;
> +	return VFIO_CCW_STATE_STANDBY;

Doesn't that change the semantic of the standby state?

>  }
>  
>  
> @@ -196,6 +238,8 @@ static int fsm_init(struct vfio_ccw_private *private)
>  fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
>  	[VFIO_CCW_STATE_NOT_OPER] = {
>  		[VFIO_CCW_EVENT_INIT]		= fsm_init,
> +		[VFIO_CCW_EVENT_ONLINE]		= fsm_nop,
> +		[VFIO_CCW_EVENT_OFFLINE]	= fsm_nop,
>  		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_nop,
>  		[VFIO_CCW_EVENT_SSCH_REQ]	= fsm_nop,
>  		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_nop,
> @@ -203,13 +247,17 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
>  	},
>  	[VFIO_CCW_STATE_STANDBY] = {
>  		[VFIO_CCW_EVENT_INIT]		= fsm_nop,
> +		[VFIO_CCW_EVENT_ONLINE]		= fsm_online,
> +		[VFIO_CCW_EVENT_OFFLINE]	= fsm_offline,
>  		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
>  		[VFIO_CCW_EVENT_SSCH_REQ]	= fsm_io_error,
> -		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
> +		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_disabled_irq,
>  		[VFIO_CCW_EVENT_SCHIB_CHANGED]	= fsm_sch_event,
>  	},
>  	[VFIO_CCW_STATE_IDLE] = {
>  		[VFIO_CCW_EVENT_INIT]		= fsm_nop,
> +		[VFIO_CCW_EVENT_ONLINE]		= fsm_nop,
> +		[VFIO_CCW_EVENT_OFFLINE]	= fsm_offline,
>  		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
>  		[VFIO_CCW_EVENT_SSCH_REQ]	= fsm_io_request,
>  		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
> @@ -217,6 +265,8 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
>  	},
>  	[VFIO_CCW_STATE_BOXED] = {
>  		[VFIO_CCW_EVENT_INIT]		= fsm_nop,
> +		[VFIO_CCW_EVENT_ONLINE]		= fsm_nop,
> +		[VFIO_CCW_EVENT_OFFLINE]	= fsm_quiescing,
>  		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
>  		[VFIO_CCW_EVENT_SSCH_REQ]	= fsm_io_busy,
>  		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
> @@ -224,9 +274,20 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
>  	},
>  	[VFIO_CCW_STATE_BUSY] = {
>  		[VFIO_CCW_EVENT_INIT]		= fsm_nop,
> +		[VFIO_CCW_EVENT_ONLINE]		= fsm_nop,
> +		[VFIO_CCW_EVENT_OFFLINE]	= fsm_quiescing,
>  		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
>  		[VFIO_CCW_EVENT_SSCH_REQ]	= fsm_io_busy,
>  		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
>  		[VFIO_CCW_EVENT_SCHIB_CHANGED]	= fsm_sch_event,
>  	},
> +	[VFIO_CCW_STATE_QUIESCING] = {
> +		[VFIO_CCW_EVENT_INIT]		= fsm_nop,
> +		[VFIO_CCW_EVENT_ONLINE]		= fsm_nop,
> +		[VFIO_CCW_EVENT_OFFLINE]	= fsm_nop,
> +		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
> +		[VFIO_CCW_EVENT_SSCH_REQ]	= fsm_io_busy,
> +		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_quiescing_done,
> +		[VFIO_CCW_EVENT_SCHIB_CHANGED]	= fsm_sch_event,
> +	},

Your idea here seems to be to go to either disabling the subchannel
directly or flushing out I/O first, depending on the state you're in.
The problem is that you may need retries in any case (the subchannel
may be status pending if it is enabled; not necessarily by any I/O that
had been started, but also from an unsolicited notification.)

>  };
> diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
> index ea8fd64..b202e73 100644
> --- a/drivers/s390/cio/vfio_ccw_ops.c
> +++ b/drivers/s390/cio/vfio_ccw_ops.c
> @@ -21,21 +21,14 @@ static int vfio_ccw_mdev_reset(struct mdev_device *mdev)
>  
>  	private = dev_get_drvdata(mdev_parent_dev(mdev));
>  	sch = private->sch;
> -	/*
> -	 * TODO:
> -	 * In the cureent stage, some things like "no I/O running" and "no
> -	 * interrupt pending" are clear, but we are not sure what other state
> -	 * we need to care about.
> -	 * There are still a lot more instructions need to be handled. We
> -	 * should come back here later.
> -	 */

This is still true, no? I'm thinking about things like channel monitors
and the like (even if we don't support them yet).

> +
>  	ret = vfio_ccw_sch_quiesce(sch);
>  	if (ret)
>  		return ret;
> +	vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_ONLINE);
>  
> -	ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch);
> -	if (!ret)
> -		private->state = VFIO_CCW_STATE_IDLE;
> +	if (!(private->state == VFIO_CCW_STATE_IDLE))
> +		ret = -EFAULT;

The -EFAULT looks wrong here as well.

I'm also not sure whether we should conflate enabling/disabling a
device and doing a reset.

>  
>  	return ret;
>  }
> diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
> index c5455a9..ad59091 100644
> --- a/drivers/s390/cio/vfio_ccw_private.h
> +++ b/drivers/s390/cio/vfio_ccw_private.h
> @@ -68,6 +68,7 @@ enum vfio_ccw_state {
>  	VFIO_CCW_STATE_IDLE,
>  	VFIO_CCW_STATE_BOXED,
>  	VFIO_CCW_STATE_BUSY,
> +	VFIO_CCW_STATE_QUIESCING,
>  	/* last element! */
>  	NR_VFIO_CCW_STATES
>  };
> @@ -81,6 +82,8 @@ enum vfio_ccw_event {
>  	VFIO_CCW_EVENT_SSCH_REQ,
>  	VFIO_CCW_EVENT_INTERRUPT,
>  	VFIO_CCW_EVENT_SCHIB_CHANGED,
> +	VFIO_CCW_EVENT_ONLINE,
> +	VFIO_CCW_EVENT_OFFLINE,
>  	/* last element! */
>  	NR_VFIO_CCW_EVENTS
>  };

  reply	other threads:[~2018-06-05 12:18 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-25 10:21 [PATCH v2 00/10] vfio: ccw: Refactoring the VFIO CCW state machine Pierre Morel
2018-05-25 10:21 ` [PATCH v2 01/10] vfio: ccw: Moving state change out of IRQ context Pierre Morel
2018-06-04 13:52   ` Cornelia Huck
2018-06-05 13:34     ` Pierre Morel
2018-06-05 13:52       ` Cornelia Huck
2018-06-05 14:22         ` Pierre Morel
2018-05-25 10:21 ` [PATCH v2 02/10] vfio: ccw: Transform FSM functions to return state Pierre Morel
2018-05-25 10:21 ` [PATCH v2 03/10] vfio: ccw: new SCH_EVENT event Pierre Morel
2018-05-25 10:21 ` [PATCH v2 04/10] vfio: ccw: replace IO_REQ event with SSCH_REQ event Pierre Morel
2018-05-25 10:21 ` [PATCH v2 05/10] vfio: ccw: Suppress unused event parameter Pierre Morel
2018-05-25 10:21 ` [PATCH v2 06/10] vfio: ccw: Make FSM functions atomic Pierre Morel
2018-06-05 11:38   ` Cornelia Huck
2018-06-05 13:10     ` Pierre Morel
2018-06-05 13:35       ` Cornelia Huck
2018-06-05 14:21         ` Pierre Morel
2018-06-05 15:15           ` Cornelia Huck
2018-05-25 10:21 ` [PATCH v2 07/10] vfio: ccw: FSM and mediated device initialization Pierre Morel
2018-05-25 10:21 ` [PATCH v2 08/10] vfio: ccw: Handling reset and shutdown with states Pierre Morel
2018-06-05 12:18   ` Cornelia Huck [this message]
2018-06-05 14:10     ` Pierre Morel
2018-06-05 15:27       ` Cornelia Huck
2018-06-05 16:40         ` Pierre Morel
2018-05-25 10:21 ` [PATCH v2 09/10] vfio: ccw: Suppressing the BOXED state Pierre Morel
2018-05-25 10:21 ` [PATCH v2 10/10] vfio: ccw: Let user wait when busy on IO Pierre Morel
2018-05-25 14:04   ` Heiko Carstens
2018-06-05 13:02     ` Pierre Morel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180605141827.6911fc74.cohuck@redhat.com \
    --to=cohuck@redhat.com \
    --cc=bjsdjshi@linux.vnet.ibm.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=pasic@linux.vnet.ibm.com \
    --cc=pmorel@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox