All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Laurent Vivier <lvivier@redhat.com>
Cc: "Amit Shah" <amit@kernel.org>,
	qemu-devel@nongnu.org, virtualization@lists.linux-foundation.org,
	"Philippe Mathieu-Daudé" <philmd@redhat.com>,
	"Martin Wilck" <mwilck@suse.com>
Subject: Re: [PATCH v3] virtio-rng: return available data with O_NONBLOCK
Date: Tue, 8 Sep 2020 10:14:56 -0400	[thread overview]
Message-ID: <20200908101413-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <a5d4bcd3-e468-e818-3bd2-3a1b0fa172d8@redhat.com>

On Mon, Aug 31, 2020 at 02:37:26PM +0200, Laurent Vivier wrote:
> On 28/08/2020 23:34, Martin Wilck wrote:
> > On Wed, 2020-08-26 at 08:26 -0400, Michael S. Tsirkin wrote:
> >> On Tue, Aug 11, 2020 at 04:42:32PM +0200, Laurent Vivier wrote:
> >>> On 11/08/2020 16:28, mwilck@suse.com wrote:
> >>>> From: Martin Wilck <mwilck@suse.com>
> >>>>
> >>>> If a program opens /dev/hwrng with O_NONBLOCK and uses poll() and
> >>>> non-blocking read() to retrieve random data, it ends up in a
> >>>> tight
> >>>> loop with poll() always returning POLLIN and read() returning
> >>>> EAGAIN.
> >>>> This repeats forever until some process makes a blocking read()
> >>>> call.
> >>>> The reason is that virtio_read() always returns 0 in non-blocking 
> >>>> mode,
> >>>> even if data is available. Worse, it fetches random data from the
> >>>> hypervisor after every non-blocking call, without ever using this
> >>>> data.
> >>>>
> >>>> The following test program illustrates the behavior and can be
> >>>> used
> >>>> for testing and experiments. The problem will only be seen if all
> >>>> tasks use non-blocking access; otherwise the blocking reads will
> >>>> "recharge" the random pool and cause other, non-blocking reads to
> >>>> succeed at least sometimes.
> >>>>
> >>>> /* Whether to use non-blocking mode in a task, problem occurs if
> >>>> CONDITION is 1 */
> >>>> //#define CONDITION (getpid() % 2 != 0)
> >>>>
> >>>> static volatile sig_atomic_t stop;
> >>>> static void handler(int sig __attribute__((unused))) { stop = 1;
> >>>> }
> >>>>
> >>>> static void loop(int fd, int sec)
> >>>> {
> >>>> 	struct pollfd pfd = { .fd = fd, .events  = POLLIN, };
> >>>> 	unsigned long errors = 0, eagains = 0, bytes = 0, succ = 0;
> >>>> 	int size, rc, rd;
> >>>>
> >>>> 	srandom(getpid());
> >>>> 	if (CONDITION && fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) |
> >>>> O_NONBLOCK) == -1)
> >>>> 		perror("fcntl");
> >>>> 	size = MINBUFSIZ + random() % (MAXBUFSIZ - MINBUFSIZ + 1);
> >>>>
> >>>> 	for(;;) {
> >>>> 		char buf[size];
> >>>>
> >>>> 		if (stop)
> >>>> 			break;
> >>>> 		rc = poll(&pfd, 1, sec);
> >>>> 		if (rc > 0) {
> >>>> 			rd = read(fd, buf, sizeof(buf));
> >>>> 			if (rd == -1 && errno == EAGAIN)
> >>>> 				eagains++;
> >>>> 			else if (rd == -1)
> >>>> 				errors++;
> >>>> 			else {
> >>>> 				succ++;
> >>>> 				bytes += rd;
> >>>> 				write(1, buf, sizeof(buf));
> >>>> 			}
> >>>> 		} else if (rc == -1) {
> >>>> 			if (errno != EINTR)
> >>>> 				perror("poll");
> >>>> 			break;
> >>>> 		} else
> >>>> 			fprintf(stderr, "poll: timeout\n");
> >>>> 	}
> >>>> 	fprintf(stderr,
> >>>> 		"pid %d %sblocking, bufsize %d, %d seconds, %lu bytes
> >>>> read, %lu success, %lu eagain, %lu errors\n",
> >>>> 		getpid(), CONDITION ? "non-" : "", size, sec, bytes,
> >>>> succ, eagains, errors);
> >>>> }
> >>>>
> >>>> int main(void)
> >>>> {
> >>>> 	int fd;
> >>>>
> >>>> 	fork(); fork();
> >>>> 	fd = open("/dev/hwrng", O_RDONLY);
> >>>> 	if (fd == -1) {
> >>>> 		perror("open");
> >>>> 		return 1;
> >>>> 	};
> >>>> 	signal(SIGALRM, handler);
> >>>> 	alarm(SECONDS);
> >>>> 	loop(fd, SECONDS);
> >>>> 	close(fd);
> >>>> 	wait(NULL);
> >>>> 	return 0;
> >>>> }
> >>>>
> >>>> void loop(int fd)
> >>>> {
> >>>>         struct pollfd pfd0 = { .fd = fd, .events  = POLLIN, };
> >>>>         int rc;
> >>>>         unsigned int n;
> >>>>
> >>>>         for (n = LOOPS; n > 0; n--) {
> >>>>                 struct pollfd pfd = pfd0;
> >>>>                 char buf[SIZE];
> >>>>
> >>>>                 rc = poll(&pfd, 1, 1);
> >>>>                 if (rc > 0) {
> >>>>                         int rd = read(fd, buf, sizeof(buf));
> >>>>
> >>>>                         if (rd == -1)
> >>>>                                 perror("read");
> >>>>                         else
> >>>>                                 printf("read %d bytes\n", rd);
> >>>>                 } else if (rc == -1)
> >>>>                         perror("poll");
> >>>>                 else
> >>>>                         fprintf(stderr, "timeout\n");
> >>>>
> >>>>         }
> >>>> }
> >>>>
> >>>> int main(void)
> >>>> {
> >>>>         int fd;
> >>>>
> >>>>         fd = open("/dev/hwrng", O_RDONLY|O_NONBLOCK);
> >>>>         if (fd == -1) {
> >>>>                 perror("open");
> >>>>                 return 1;
> >>>>         };
> >>>>         loop(fd);
> >>>>         close(fd);
> >>>>         return 0;
> >>>> }
> >>>>
> >>>> This can be observed in the real word e.g. with nested qemu/KVM
> >>>> virtual
> >>>> machines, if both the "outer" and "inner" VMs have a virtio-rng
> >>>> device.
> >>>> If the "inner" VM requests random data, qemu running in the
> >>>> "outer" VM
> >>>> uses this device in a non-blocking manner like the test program
> >>>> above.
> >>>>
> >>>> Fix it by returning available data if a previous hypervisor call
> >>>> has
> >>>> completed. I tested this patch with the program above, and with
> >>>> rng-tools.
> >>>>
> >>>> v2 -> v3: Simplified the implementation as suggested by Laurent
> >>>> Vivier
> >>>>
> >>>> Signed-off-by: Martin Wilck <mwilck@suse.com>
> >>>> ---
> >>>>  drivers/char/hw_random/virtio-rng.c | 4 ++--
> >>>>  1 file changed, 2 insertions(+), 2 deletions(-)
> >>>>
> >>>> diff --git a/drivers/char/hw_random/virtio-rng.c
> >>>> b/drivers/char/hw_random/virtio-rng.c
> >>>> index a90001e02bf7..8eaeceecb41e 100644
> >>>> --- a/drivers/char/hw_random/virtio-rng.c
> >>>> +++ b/drivers/char/hw_random/virtio-rng.c
> >>>> @@ -65,7 +65,7 @@ static int virtio_read(struct hwrng *rng, void
> >>>> *buf, size_t size, bool wait)
> >>>>  		register_buffer(vi, buf, size);
> >>>>  	}
> >>>>  
> >>>> -	if (!wait)
> >>>> +	if (!wait && !completion_done(&vi->have_data))
> >>>>  		return 0;
> >>>>  
> >>>>  	ret = wait_for_completion_killable(&vi->have_data);
> >>>> @@ -74,7 +74,7 @@ static int virtio_read(struct hwrng *rng, void
> >>>> *buf, size_t size, bool wait)
> >>>>  
> >>>>  	vi->busy = false;
> >>>>  
> >>>> -	return vi->data_avail;
> >>>> +	return min_t(size_t, size, vi->data_avail);
> >>>>  }
> >>>>  
> >>>>  static void virtio_cleanup(struct hwrng *rng)
> >>>>
> >>>
> >>> Reviewed-by: Laurent Vivier <lvivier@redhat.com>
> >>
> >> Laurent didn't we agree the real fix is private buffers in the
> >> driver,
> >> and copying out from there?
> >>
> > 
> > Can we perhaps proceed with this for now? AFAICS the private buffer
> > implementation would be a larger effort, while we have the issues with
> > nested VMs getting no entropy today.
> > 
> 
> I agree. I think it's important to have a simple and quick fix for the
> problem reported by Martin.
> 
> We need the private buffers but not sure how long it will take to have
> them included in the kernel and how many new bugs will be introduced
> doing that as the code is hard to understand and the core is shared with
> several other hardware backends that can be impacted by the changes needed.
> 
> Thanks,
> Laurent

However I am not sure with the patch applies we never return
the same buffer to userspace twice, e.g. if one is
non blocking another blocking. Doing that would be a bug.

-- 
MST

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

WARNING: multiple messages have this Message-ID (diff)
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Laurent Vivier <lvivier@redhat.com>
Cc: "Amit Shah" <amit@kernel.org>, "Jason Wang" <jasowang@redhat.com>,
	qemu-devel@nongnu.org, virtualization@lists.linux-foundation.org,
	"Philippe Mathieu-Daudé" <philmd@redhat.com>,
	"Martin Wilck" <mwilck@suse.com>
Subject: Re: [PATCH v3] virtio-rng: return available data with O_NONBLOCK
Date: Tue, 8 Sep 2020 10:14:56 -0400	[thread overview]
Message-ID: <20200908101413-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <a5d4bcd3-e468-e818-3bd2-3a1b0fa172d8@redhat.com>

On Mon, Aug 31, 2020 at 02:37:26PM +0200, Laurent Vivier wrote:
> On 28/08/2020 23:34, Martin Wilck wrote:
> > On Wed, 2020-08-26 at 08:26 -0400, Michael S. Tsirkin wrote:
> >> On Tue, Aug 11, 2020 at 04:42:32PM +0200, Laurent Vivier wrote:
> >>> On 11/08/2020 16:28, mwilck@suse.com wrote:
> >>>> From: Martin Wilck <mwilck@suse.com>
> >>>>
> >>>> If a program opens /dev/hwrng with O_NONBLOCK and uses poll() and
> >>>> non-blocking read() to retrieve random data, it ends up in a
> >>>> tight
> >>>> loop with poll() always returning POLLIN and read() returning
> >>>> EAGAIN.
> >>>> This repeats forever until some process makes a blocking read()
> >>>> call.
> >>>> The reason is that virtio_read() always returns 0 in non-blocking 
> >>>> mode,
> >>>> even if data is available. Worse, it fetches random data from the
> >>>> hypervisor after every non-blocking call, without ever using this
> >>>> data.
> >>>>
> >>>> The following test program illustrates the behavior and can be
> >>>> used
> >>>> for testing and experiments. The problem will only be seen if all
> >>>> tasks use non-blocking access; otherwise the blocking reads will
> >>>> "recharge" the random pool and cause other, non-blocking reads to
> >>>> succeed at least sometimes.
> >>>>
> >>>> /* Whether to use non-blocking mode in a task, problem occurs if
> >>>> CONDITION is 1 */
> >>>> //#define CONDITION (getpid() % 2 != 0)
> >>>>
> >>>> static volatile sig_atomic_t stop;
> >>>> static void handler(int sig __attribute__((unused))) { stop = 1;
> >>>> }
> >>>>
> >>>> static void loop(int fd, int sec)
> >>>> {
> >>>> 	struct pollfd pfd = { .fd = fd, .events  = POLLIN, };
> >>>> 	unsigned long errors = 0, eagains = 0, bytes = 0, succ = 0;
> >>>> 	int size, rc, rd;
> >>>>
> >>>> 	srandom(getpid());
> >>>> 	if (CONDITION && fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) |
> >>>> O_NONBLOCK) == -1)
> >>>> 		perror("fcntl");
> >>>> 	size = MINBUFSIZ + random() % (MAXBUFSIZ - MINBUFSIZ + 1);
> >>>>
> >>>> 	for(;;) {
> >>>> 		char buf[size];
> >>>>
> >>>> 		if (stop)
> >>>> 			break;
> >>>> 		rc = poll(&pfd, 1, sec);
> >>>> 		if (rc > 0) {
> >>>> 			rd = read(fd, buf, sizeof(buf));
> >>>> 			if (rd == -1 && errno == EAGAIN)
> >>>> 				eagains++;
> >>>> 			else if (rd == -1)
> >>>> 				errors++;
> >>>> 			else {
> >>>> 				succ++;
> >>>> 				bytes += rd;
> >>>> 				write(1, buf, sizeof(buf));
> >>>> 			}
> >>>> 		} else if (rc == -1) {
> >>>> 			if (errno != EINTR)
> >>>> 				perror("poll");
> >>>> 			break;
> >>>> 		} else
> >>>> 			fprintf(stderr, "poll: timeout\n");
> >>>> 	}
> >>>> 	fprintf(stderr,
> >>>> 		"pid %d %sblocking, bufsize %d, %d seconds, %lu bytes
> >>>> read, %lu success, %lu eagain, %lu errors\n",
> >>>> 		getpid(), CONDITION ? "non-" : "", size, sec, bytes,
> >>>> succ, eagains, errors);
> >>>> }
> >>>>
> >>>> int main(void)
> >>>> {
> >>>> 	int fd;
> >>>>
> >>>> 	fork(); fork();
> >>>> 	fd = open("/dev/hwrng", O_RDONLY);
> >>>> 	if (fd == -1) {
> >>>> 		perror("open");
> >>>> 		return 1;
> >>>> 	};
> >>>> 	signal(SIGALRM, handler);
> >>>> 	alarm(SECONDS);
> >>>> 	loop(fd, SECONDS);
> >>>> 	close(fd);
> >>>> 	wait(NULL);
> >>>> 	return 0;
> >>>> }
> >>>>
> >>>> void loop(int fd)
> >>>> {
> >>>>         struct pollfd pfd0 = { .fd = fd, .events  = POLLIN, };
> >>>>         int rc;
> >>>>         unsigned int n;
> >>>>
> >>>>         for (n = LOOPS; n > 0; n--) {
> >>>>                 struct pollfd pfd = pfd0;
> >>>>                 char buf[SIZE];
> >>>>
> >>>>                 rc = poll(&pfd, 1, 1);
> >>>>                 if (rc > 0) {
> >>>>                         int rd = read(fd, buf, sizeof(buf));
> >>>>
> >>>>                         if (rd == -1)
> >>>>                                 perror("read");
> >>>>                         else
> >>>>                                 printf("read %d bytes\n", rd);
> >>>>                 } else if (rc == -1)
> >>>>                         perror("poll");
> >>>>                 else
> >>>>                         fprintf(stderr, "timeout\n");
> >>>>
> >>>>         }
> >>>> }
> >>>>
> >>>> int main(void)
> >>>> {
> >>>>         int fd;
> >>>>
> >>>>         fd = open("/dev/hwrng", O_RDONLY|O_NONBLOCK);
> >>>>         if (fd == -1) {
> >>>>                 perror("open");
> >>>>                 return 1;
> >>>>         };
> >>>>         loop(fd);
> >>>>         close(fd);
> >>>>         return 0;
> >>>> }
> >>>>
> >>>> This can be observed in the real word e.g. with nested qemu/KVM
> >>>> virtual
> >>>> machines, if both the "outer" and "inner" VMs have a virtio-rng
> >>>> device.
> >>>> If the "inner" VM requests random data, qemu running in the
> >>>> "outer" VM
> >>>> uses this device in a non-blocking manner like the test program
> >>>> above.
> >>>>
> >>>> Fix it by returning available data if a previous hypervisor call
> >>>> has
> >>>> completed. I tested this patch with the program above, and with
> >>>> rng-tools.
> >>>>
> >>>> v2 -> v3: Simplified the implementation as suggested by Laurent
> >>>> Vivier
> >>>>
> >>>> Signed-off-by: Martin Wilck <mwilck@suse.com>
> >>>> ---
> >>>>  drivers/char/hw_random/virtio-rng.c | 4 ++--
> >>>>  1 file changed, 2 insertions(+), 2 deletions(-)
> >>>>
> >>>> diff --git a/drivers/char/hw_random/virtio-rng.c
> >>>> b/drivers/char/hw_random/virtio-rng.c
> >>>> index a90001e02bf7..8eaeceecb41e 100644
> >>>> --- a/drivers/char/hw_random/virtio-rng.c
> >>>> +++ b/drivers/char/hw_random/virtio-rng.c
> >>>> @@ -65,7 +65,7 @@ static int virtio_read(struct hwrng *rng, void
> >>>> *buf, size_t size, bool wait)
> >>>>  		register_buffer(vi, buf, size);
> >>>>  	}
> >>>>  
> >>>> -	if (!wait)
> >>>> +	if (!wait && !completion_done(&vi->have_data))
> >>>>  		return 0;
> >>>>  
> >>>>  	ret = wait_for_completion_killable(&vi->have_data);
> >>>> @@ -74,7 +74,7 @@ static int virtio_read(struct hwrng *rng, void
> >>>> *buf, size_t size, bool wait)
> >>>>  
> >>>>  	vi->busy = false;
> >>>>  
> >>>> -	return vi->data_avail;
> >>>> +	return min_t(size_t, size, vi->data_avail);
> >>>>  }
> >>>>  
> >>>>  static void virtio_cleanup(struct hwrng *rng)
> >>>>
> >>>
> >>> Reviewed-by: Laurent Vivier <lvivier@redhat.com>
> >>
> >> Laurent didn't we agree the real fix is private buffers in the
> >> driver,
> >> and copying out from there?
> >>
> > 
> > Can we perhaps proceed with this for now? AFAICS the private buffer
> > implementation would be a larger effort, while we have the issues with
> > nested VMs getting no entropy today.
> > 
> 
> I agree. I think it's important to have a simple and quick fix for the
> problem reported by Martin.
> 
> We need the private buffers but not sure how long it will take to have
> them included in the kernel and how many new bugs will be introduced
> doing that as the code is hard to understand and the core is shared with
> several other hardware backends that can be impacted by the changes needed.
> 
> Thanks,
> Laurent

However I am not sure with the patch applies we never return
the same buffer to userspace twice, e.g. if one is
non blocking another blocking. Doing that would be a bug.

-- 
MST



  reply	other threads:[~2020-09-08 14:15 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-11 14:28 [PATCH v3] virtio-rng: return available data with O_NONBLOCK mwilck
2020-08-11 14:42 ` Laurent Vivier
2020-08-11 14:42   ` Laurent Vivier
2020-08-26 12:26   ` Michael S. Tsirkin
2020-08-26 12:26     ` Michael S. Tsirkin
2020-08-28 21:34     ` Martin Wilck
2020-08-31 12:37       ` Laurent Vivier
2020-08-31 12:37         ` Laurent Vivier
2020-09-08 14:14         ` Michael S. Tsirkin [this message]
2020-09-08 14:14           ` Michael S. Tsirkin
2020-09-08 15:33           ` Martin Wilck
2020-10-21 14:43             ` Michael S. Tsirkin
2020-10-21 14:43               ` Michael S. Tsirkin
2020-11-25  9:39             ` Michael S. Tsirkin
2020-11-25  9:39               ` Michael S. Tsirkin
2020-11-26 10:49               ` Laurent Vivier
2020-11-26 10:49                 ` Laurent Vivier
2022-12-14 13:41                 ` Claudio Fontana
2022-12-14 15:36                   ` Martin Wilck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200908101413-mutt-send-email-mst@kernel.org \
    --to=mst@redhat.com \
    --cc=amit@kernel.org \
    --cc=lvivier@redhat.com \
    --cc=mwilck@suse.com \
    --cc=philmd@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.