qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Alex Williamson <alex.williamson@redhat.com>
To: Zhou Jie <zhoujie2011@cn.fujitsu.com>
Cc: fan.chen@easystack.cn, linux-kernel@vger.kernel.org,
	qemu-devel@nongnu.org, Chen Fan <chen.fan.fnst@cn.fujitsu.com>,
	izumi.taku@jp.fujitsu.com
Subject: Re: [Qemu-devel] [PATCH v2 2/2] vfio : add aer process
Date: Mon, 1 Aug 2016 09:36:49 -0600	[thread overview]
Message-ID: <20160801093649.69c44e89@t450s.home> (raw)
In-Reply-To: <d9a83de6-811d-d492-f7f8-99c01f40b2d8@cn.fujitsu.com>

On Mon, 1 Aug 2016 10:14:06 +0800
Zhou Jie <zhoujie2011@cn.fujitsu.com> wrote:

> Hi, Alex
> 
> On 2016/7/30 1:12, Alex Williamson wrote:
> > On Tue, 19 Jul 2016 15:32:43 +0800
> > Zhou Jie <zhoujie2011@cn.fujitsu.com> wrote:
> >  
> >> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> >>
> >> During aer err occurs and resume do following to
> >> protect device from being accessed.
> >> 1. Make config space read only.
> >> 2. Disable INTx/MSI Interrupt.
> >> 3. Do nothing for bar regions.
> >>
> >> Signed-off-by: Zhou Jie <zhoujie2011@cn.fujitsu.com>
> >> ---
> >>  drivers/vfio/pci/vfio_pci.c         | 30 ++++++++++++++++++++++++++++++
> >>  drivers/vfio/pci/vfio_pci_private.h |  2 ++
> >>  include/uapi/linux/vfio.h           |  2 ++
> >>  3 files changed, 34 insertions(+)
> >>
> >> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> >> index 2d12b03..dd96b60 100644
> >> --- a/drivers/vfio/pci/vfio_pci.c
> >> +++ b/drivers/vfio/pci/vfio_pci.c
> >> @@ -318,6 +318,7 @@ static int vfio_pci_open(void *device_data)
> >>  		return -ENODEV;
> >>
> >>  	mutex_lock(&driver_lock);
> >> +	init_completion(&vdev->aer_error_completion);
> >>
> >>  	if (!vdev->refcnt) {
> >>  		ret = vfio_pci_enable(vdev);
> >> @@ -571,6 +572,16 @@ static long vfio_pci_ioctl(void *device_data,
> >>  	struct vfio_pci_device *vdev = device_data;
> >>  	unsigned long minsz;
> >>
> >> +	if (vdev->aer_error_in_progress && (cmd == VFIO_DEVICE_SET_IRQS ||
> >> +	    cmd == VFIO_DEVICE_RESET || cmd == VFIO_DEVICE_PCI_HOT_RESET)) {
> >> +		int ret;
> >> +		ret = wait_for_completion_interruptible(
> >> +			&vdev->aer_error_completion);
> >> +		if (ret) {
> >> +			return ret;
> >> +		}  
> >
> > No brackets necessary.
> >  
> >> +	}
> >> +
> >>  	if (cmd == VFIO_DEVICE_GET_INFO) {
> >>  		struct vfio_device_info info;
> >>
> >> @@ -587,6 +598,10 @@ static long vfio_pci_ioctl(void *device_data,
> >>  		if (vdev->reset_works)
> >>  			info.flags |= VFIO_DEVICE_FLAGS_RESET;
> >>
> >> +		info.flags |= VFIO_DEVICE_FLAGS_AERPROCESS;
> >> +		if (vdev->aer_error_in_progress)
> >> +			info.flags |= VFIO_DEVICE_FLAGS_INAERPROCESS;
> >> +
> >>  		info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
> >>  		info.num_irqs = VFIO_PCI_NUM_IRQS;
> >>
> >> @@ -996,6 +1011,14 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf,
> >>
> >>  	switch (index) {
> >>  	case VFIO_PCI_CONFIG_REGION_INDEX:
> >> +		if (vdev->aer_error_in_progress && iswrite) {
> >> +			int ret;
> >> +			ret = wait_for_completion_interruptible(
> >> +				&vdev->aer_error_completion);
> >> +			if (ret) {
> >> +				return ret;
> >> +			}
> >> +		}
> >>  		return vfio_pci_config_rw(vdev, buf, count, ppos, iswrite);
> >>
> >>  	case VFIO_PCI_ROM_REGION_INDEX:
> >> @@ -1226,6 +1249,10 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
> >>
> >>  	mutex_lock(&vdev->igate);
> >>
> >> +	vdev->aer_error_in_progress = true;
> >> +	vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE |
> >> +				VFIO_IRQ_SET_ACTION_TRIGGER,
> >> +				vdev->irq_type, 0, 0, NULL);
> >>  	if (vdev->err_trigger)
> >>  		eventfd_signal(vdev->err_trigger, 1);
> >>
> >> @@ -1252,6 +1279,9 @@ static void vfio_pci_aer_resume(struct pci_dev *pdev)
> >>  	}
> >>
> >>  	mutex_lock(&vdev->igate);
> >> +
> >> +	vdev->aer_error_in_progress = false;
> >> +	complete_all(&vdev->aer_error_completion);
> >>  	if (vdev->resume_trigger)
> >>  		eventfd_signal(vdev->resume_trigger, 1);
> >>
> >> diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
> >> index 80d4ddd..2f151f5 100644
> >> --- a/drivers/vfio/pci/vfio_pci_private.h
> >> +++ b/drivers/vfio/pci/vfio_pci_private.h
> >> @@ -84,6 +84,8 @@ struct vfio_pci_device {
> >>  	bool			has_vga;
> >>  	bool			needs_reset;
> >>  	bool			nointx;
> >> +	bool			aer_error_in_progress;
> >> +	struct completion	aer_error_completion;
> >>  	struct pci_saved_state	*pci_saved_state;
> >>  	int			refcnt;
> >>  	struct eventfd_ctx	*err_trigger;
> >> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> >> index 34ab138..276ce50 100644
> >> --- a/include/uapi/linux/vfio.h
> >> +++ b/include/uapi/linux/vfio.h
> >> @@ -198,6 +198,8 @@ struct vfio_device_info {
> >>  #define VFIO_DEVICE_FLAGS_PCI	(1 << 1)	/* vfio-pci device */
> >>  #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2)	/* vfio-platform device */
> >>  #define VFIO_DEVICE_FLAGS_AMBA  (1 << 3)	/* vfio-amba device */
> >> +#define VFIO_DEVICE_FLAGS_AERPROCESS  (1 << 4)	/* support aer error progress */
> >> +#define VFIO_DEVICE_FLAGS_INAERPROCESS  (1 << 5)/* status in aer error progress */
> >>  	__u32	num_regions;	/* Max region index + 1 */
> >>  	__u32	num_irqs;	/* Max IRQ index + 1 */
> >>  };  
> >
> > Clearly this has only been tested for a single instance of an AER error
> > event and resume per device.  Are the things you're intending to block
> > actually blocked for subsequent events?  Note how complete_all() fills
> > the done field to let all current and future waiters go through and
> > nowhere is there a call to reinit_completion() to drain that path.
> > Thanks,
> >
> > Alex  
> 
> Do you mean this condition?
> 
> For device 1:
> error1 occurs ---- error1 resumes
>      error2 occurs ---- error2 resumes
>          error3 occurs ---- error3 resumes
> 
> In current code, I do complete_all() when error1 resumes.
> And this will unblock the device
> when error2 and error3 are still be processed.

So walk me through how this works.  On vfio_pci_open() we call
init_completion(), which sets aer_error_completion.done equal to zero
(BTW, a user can open the device file descriptor multiple times, so
there's already a bug here).  Let's assume that an error occurs and the
user stalls a single access on wait_for_completion_interruptible().
The bulk of this function happens here:

static inline long __sched
do_wait_for_common(struct completion *x,
                   long (*action)(long), long timeout, int state)
{
        if (!x->done) {
                DECLARE_WAITQUEUE(wait, current);

                __add_wait_queue_tail_exclusive(&x->wait, &wait);
                do {
                        if (signal_pending_state(state, current)) {
                                timeout = -ERESTARTSYS;
                                break;
                        }
                        __set_current_state(state);
                        spin_unlock_irq(&x->wait.lock);
                        timeout = action(timeout);
                        spin_lock_irq(&x->wait.lock);
                } while (!x->done && timeout);
                __remove_wait_queue(&x->wait, &wait);
                if (!x->done)
                        return timeout;
        }
        x->done--;
        return timeout ?: 1;
}

So it waits within that do{}while loop for a completion, interruption,
or timeout.  Then we have:

void complete_all(struct completion *x)
{
        unsigned long flags;

        spin_lock_irqsave(&x->wait.lock, flags);
        x->done += UINT_MAX/2;
        __wake_up_locked(&x->wait, TASK_NORMAL, 0);
        spin_unlock_irqrestore(&x->wait.lock, flags);
}

So aer_error_completion.done gets incremented to let a couple billion
completion waiters through...  Show me how another call to
wait_for_completion_interruptible() will ever block again within our
lifetime when the actual wait of do_wait_for_common() is only entered
when 'done' count is equal to zero.  This seems to be why
reinit_completion() exists, but it's not used here.  Thanks,

Alex

  reply	other threads:[~2016-08-01 15:36 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-07-19  7:32 [Qemu-devel] [PATCH v2 0/2] vfio: add aer process Zhou Jie
2016-07-19  7:32 ` [Qemu-devel] [PATCH v2 1/2] vfio : resume notifier Zhou Jie
2016-07-19  7:39   ` Chen Fan
2016-07-19  7:32 ` [Qemu-devel] [PATCH v2 2/2] vfio : add aer process Zhou Jie
2016-07-29 17:12   ` Alex Williamson
2016-08-01  2:14     ` Zhou Jie
2016-08-01 15:36       ` Alex Williamson [this message]
2016-08-02  1:22         ` Zhou Jie

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160801093649.69c44e89@t450s.home \
    --to=alex.williamson@redhat.com \
    --cc=chen.fan.fnst@cn.fujitsu.com \
    --cc=fan.chen@easystack.cn \
    --cc=izumi.taku@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=qemu-devel@nongnu.org \
    --cc=zhoujie2011@cn.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).