All of lore.kernel.org
 help / color / mirror / Atom feed
From: Rusty Russell <rusty@rustcorp.com.au>
To: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Dave Chinner <david@fromorbit.com>, Jens Axboe <axboe@kernel.dk>,
	Minchan Kim <minchan@kernel.org>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH 4/4] virtio_ring: unify direct/indirect code paths.
Date: Fri, 30 May 2014 15:51:35 +0930	[thread overview]
Message-ID: <874n077pn4.fsf@rustcorp.com.au> (raw)
In-Reply-To: <87d2ew6lfr.fsf@rustcorp.com.au>

Rusty Russell <rusty@rustcorp.com.au> writes:
> "Michael S. Tsirkin" <mst@redhat.com> writes:
>> On Thu, May 29, 2014 at 04:56:45PM +0930, Rusty Russell wrote:
>>> virtqueue_add() populates the virtqueue descriptor table from the sgs
>>> given.  If it uses an indirect descriptor table, then it puts a single
>>> descriptor in the descriptor table pointing to the kmalloc'ed indirect
>>> table where the sg is populated.
>>> +	for (i = 0; i < total_sg; i++)
>>> +		desc[i].next = i+1;
>>> +	return desc;
>>
>> Hmm we are doing an extra walk over descriptors here.
>> This might hurt performance esp for big descriptors.
>
> Yes, this needs to be benchmarked; since it's cache hot my gut feel is
> that it's a NOOP, but on modern machines my gut feel is always wrong.

CC's trimmed.

Well, I was almost right about being wrong.

I wrote a userspace virtio_ring microbench which does 10000000
virtqueue_add_outbuf() calls (which go indirect) and not much else.

Read as <MIN>-<MAX>(<MEAN>+/-<STDDEV>):
Current kernel:           936153354- 967745359(9.44739e+08+/-6.1e+06)ns
Using sg_next:           1061485790-1104800648(1.08254e+09+/-6.6e+06)ns
Unifying indirect path:  1214289435-1272686712(1.22564e+09+/-8e+06)ns
Using indirect flag:     1125610268-1183528965(1.14172e+09+/-8e+06)ns

Of course this might be lost in the noise on real networking, so that's
my job on Monday.

Subject: vring_bench: simple benchmark for adding descriptors to a virtqueue.

This userspace benchark uses the kernel code to add 8 16-element
scatterlists to a virtqueue, then consume them and start again.

For example:
	$ for i in `seq 10`; do ./vring_bench; done | stats --trim-outliers
	936153354-967745359(9.44739e+08+/-6.1e+06)ns
	9999872 returned

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/tools/virtio/.gitignore b/tools/virtio/.gitignore
index 1cfbb0157a46..ff32cca971d8 100644
--- a/tools/virtio/.gitignore
+++ b/tools/virtio/.gitignore
@@ -1,3 +1,4 @@
 *.d
 virtio_test
 vringh_test
+vring_bench
diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile
index 3187c62d9814..103101273049 100644
--- a/tools/virtio/Makefile
+++ b/tools/virtio/Makefile
@@ -1,6 +1,7 @@
 all: test mod
-test: virtio_test vringh_test
+test: virtio_test vringh_test vring_bench
 virtio_test: virtio_ring.o virtio_test.o
+vring_bench: virtio_ring.o vring_bench.o
 vringh_test: vringh_test.o vringh.o virtio_ring.o
 
 CFLAGS += -g -O2 -Wall -I. -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE
@@ -9,6 +10,6 @@ mod:
 	${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test
 .PHONY: all test mod clean
 clean:
-	${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \
+	${RM} *.o vringh_test virtio_test vring_bench vhost_test/*.o vhost_test/.*.cmd \
               vhost_test/Module.symvers vhost_test/modules.order *.d
 -include *.d
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index fba705963968..8dcff8e3374c 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -109,4 +109,7 @@ static inline void free_page(unsigned long addr)
 	(void) (&_min1 == &_min2);		\
 	_min1 < _min2 ? _min1 : _min2; })
 
+/* Just make it compile */
+#define list_for_each_entry(iter, list, member)
+
 #endif /* KERNEL_H */
diff --git a/tools/virtio/vring_bench.c b/tools/virtio/vring_bench.c
new file mode 100644
index 000000000000..0d7544fd26ad
--- /dev/null
+++ b/tools/virtio/vring_bench.c
@@ -0,0 +1,125 @@
+#define _GNU_SOURCE
+#include <time.h>
+#include <getopt.h>
+#include <string.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <linux/virtio.h>
+#include <linux/virtio_ring.h>
+
+/* Unused */
+void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
+
+static struct vring vring;
+static uint16_t last_avail_idx;
+static unsigned int returned;
+
+static bool vq_notify(struct virtqueue *vq)
+{
+	/* "Use" them all. */
+	while (vring.avail->idx != last_avail_idx) {
+		unsigned int i, head;
+
+		i = last_avail_idx++ & (vring.num - 1);
+		head = vring.avail->ring[i];
+		assert(head < vring.num);
+
+		i = vring.used->idx & (vring.num - 1);
+		vring.used->ring[i].id = head;
+		vring.used->ring[i].len = 0;
+		vring.used->idx++;
+	}
+	return true;
+}
+
+static void vq_callback(struct virtqueue *vq)
+{
+	unsigned int len;
+	void *p;
+
+	while ((p = virtqueue_get_buf(vq, &len)) != NULL)
+		returned++;
+}
+
+/* Ring size 128, just like qemu uses */
+#define VRING_NUM 128
+#define SG_SIZE 16
+
+static inline struct timespec time_sub(struct timespec recent,
+				       struct timespec old)
+{
+	struct timespec diff;
+
+	diff.tv_sec = recent.tv_sec - old.tv_sec;
+	if (old.tv_nsec > recent.tv_nsec) {
+		diff.tv_sec--;
+		diff.tv_nsec = 1000000000 + recent.tv_nsec - old.tv_nsec;
+	} else
+		diff.tv_nsec = recent.tv_nsec - old.tv_nsec;
+
+	return diff;
+}
+
+static struct timespec time_now(void)
+{
+	struct timespec ret;
+	clock_gettime(CLOCK_REALTIME, &ret);
+	return ret;
+}
+
+static inline uint64_t time_to_nsec(struct timespec t)
+{
+	uint64_t nsec;
+
+	nsec = t.tv_nsec + (uint64_t)t.tv_sec * 1000000000;
+	return nsec;
+}
+
+int main(int argc, char *argv[])
+{
+	struct virtqueue *vq;
+	struct virtio_device vdev;
+	void *ring;
+	unsigned int i, num;
+	int e;
+	struct scatterlist sg[SG_SIZE];
+	struct timespec start;
+
+	sg_init_table(sg, SG_SIZE);
+
+	e = posix_memalign(&ring, 4096, vring_size(VRING_NUM, 4096));
+	assert(e >= 0);
+
+	vdev.features[0] = (1UL << VIRTIO_RING_F_INDIRECT_DESC) |
+		(1UL << VIRTIO_RING_F_EVENT_IDX);
+
+	vq = vring_new_virtqueue(0, VRING_NUM, 4096, &vdev, true, ring,
+				 vq_notify, vq_callback, "benchmark");
+	assert(vq);
+	vring_init(&vring, VRING_NUM, ring, 4096);
+
+	num = atoi(argv[1] ?: "10000000");
+
+	start = time_now();
+	for (i = 0; i < num; i++) {
+	again:
+		e = virtqueue_add_outbuf(vq, sg, SG_SIZE, sg, GFP_ATOMIC);
+		if (e < 0) {
+			virtqueue_kick(vq);
+			vring_interrupt(0, vq);
+			goto again;
+		}
+	}
+	printf("%lluns\n",
+	       (long long)time_to_nsec(time_sub(time_now(), start)));
+	printf("%u returned\n", returned);
+	return 0;
+}



  reply	other threads:[~2014-05-30  7:00 UTC|newest]

Thread overview: 205+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-05-28  6:53 [PATCH 1/2] ftrace: print stack usage right before Oops Minchan Kim
2014-05-28  6:53 ` Minchan Kim
2014-05-28  6:53 ` [RFC 2/2] x86_64: expand kernel stack to 16K Minchan Kim
2014-05-28  6:53   ` Minchan Kim
2014-05-28  8:37   ` Dave Chinner
2014-05-28  8:37     ` Dave Chinner
2014-05-28  8:37     ` Dave Chinner
2014-05-28  9:13     ` Dave Chinner
2014-05-28  9:13       ` Dave Chinner
2014-05-28  9:13       ` Dave Chinner
2014-05-28 16:06       ` Johannes Weiner
2014-05-28 16:06         ` Johannes Weiner
2014-05-28 16:06         ` Johannes Weiner
2014-05-28 21:55         ` Dave Chinner
2014-05-28 21:55           ` Dave Chinner
2014-05-28 21:55           ` Dave Chinner
2014-05-29  6:06         ` Minchan Kim
2014-05-29  6:06           ` Minchan Kim
2014-05-29  6:06           ` Minchan Kim
2014-05-28  9:04   ` Michael S. Tsirkin
2014-05-28  9:04     ` Michael S. Tsirkin
2014-05-29  1:09     ` Minchan Kim
2014-05-29  2:44       ` Steven Rostedt
2014-05-29  2:44         ` Steven Rostedt
2014-05-29  4:11         ` Minchan Kim
2014-05-29  4:11           ` Minchan Kim
2014-05-29  2:47       ` Rusty Russell
2014-05-29  2:47         ` Rusty Russell
2014-05-29  4:10     ` virtio_ring stack usage Rusty Russell
2014-05-28  9:27   ` [RFC 2/2] x86_64: expand kernel stack to 16K Borislav Petkov
2014-05-29 13:23     ` One Thousand Gnomes
2014-05-29 13:23       ` One Thousand Gnomes
2014-05-28 14:14   ` Steven Rostedt
2014-05-28 14:14     ` Steven Rostedt
2014-05-28 14:23     ` H. Peter Anvin
2014-05-28 14:23       ` H. Peter Anvin
2014-05-28 22:11       ` Dave Chinner
2014-05-28 22:11         ` Dave Chinner
2014-05-28 22:42         ` H. Peter Anvin
2014-05-28 22:42           ` H. Peter Anvin
2014-05-28 23:17           ` Dave Chinner
2014-05-28 23:17             ` Dave Chinner
2014-05-28 23:21             ` H. Peter Anvin
2014-05-28 23:21               ` H. Peter Anvin
2014-05-28 15:43   ` Richard Weinberger
2014-05-28 15:43     ` Richard Weinberger
2014-05-28 16:08     ` Steven Rostedt
2014-05-28 16:08       ` Steven Rostedt
2014-05-28 16:11       ` Richard Weinberger
2014-05-28 16:11         ` Richard Weinberger
2014-05-28 16:13       ` Linus Torvalds
2014-05-28 16:13         ` Linus Torvalds
2014-05-28 16:09   ` Linus Torvalds
2014-05-28 16:09     ` Linus Torvalds
2014-05-28 22:31     ` Dave Chinner
2014-05-28 22:31       ` Dave Chinner
2014-05-28 22:41       ` Linus Torvalds
2014-05-28 22:41         ` Linus Torvalds
2014-05-29  1:30         ` Dave Chinner
2014-05-29  1:30           ` Dave Chinner
2014-05-29  1:58           ` Dave Chinner
2014-05-29  1:58             ` Dave Chinner
2014-05-29  2:51             ` Linus Torvalds
2014-05-29  2:51               ` Linus Torvalds
2014-05-29 23:36             ` Minchan Kim
2014-05-29 23:36               ` Minchan Kim
2014-05-30  0:05               ` Linus Torvalds
2014-05-30  0:20                 ` Minchan Kim
2014-05-30  0:20                   ` Minchan Kim
2014-05-30  0:31                   ` Linus Torvalds
2014-05-30  0:31                     ` Linus Torvalds
2014-05-30  0:50                     ` Minchan Kim
2014-05-30  0:50                       ` Minchan Kim
2014-05-30  1:24                       ` Linus Torvalds
2014-05-30  1:24                         ` Linus Torvalds
2014-05-30  1:58                         ` Dave Chinner
2014-05-30  1:58                           ` Dave Chinner
2014-05-30  2:13                           ` Linus Torvalds
2014-05-30  2:13                             ` Linus Torvalds
2014-05-30  6:21                         ` Minchan Kim
2014-05-30  6:21                           ` Minchan Kim
2014-05-30  1:30                 ` Linus Torvalds
2014-05-30  1:30                   ` Linus Torvalds
2014-05-30  0:15               ` Dave Chinner
2014-05-30  0:15                 ` Dave Chinner
2014-05-30  2:12                 ` Minchan Kim
2014-05-30  2:12                   ` Minchan Kim
2014-05-30  4:37                   ` Linus Torvalds
2014-05-30  4:37                     ` Linus Torvalds
2014-05-31  1:45                     ` Linus Torvalds
2014-05-31  1:45                       ` Linus Torvalds
2014-05-30  6:12                   ` Minchan Kim
2014-05-30  6:12                     ` Minchan Kim
2014-06-03 13:28                   ` Rasmus Villemoes
2014-06-03 13:28                     ` Rasmus Villemoes
2014-06-03 19:04                     ` Linus Torvalds
2014-06-03 19:04                       ` Linus Torvalds
2014-06-10 12:29                       ` [PATCH 0/2] Per-task wait_queue_t Rasmus Villemoes
2014-06-10 12:29                         ` [PATCH 1/2] wait: Introduce per-task wait_queue_t Rasmus Villemoes
2014-06-11 15:16                           ` Oleg Nesterov
2014-06-10 12:29                         ` [PATCH 2/2] wait: Use the per-task wait_queue_t in ___wait_event macro Rasmus Villemoes
2014-06-10 15:50                         ` [PATCH 0/2] Per-task wait_queue_t Peter Zijlstra
2014-06-12 21:46                           ` Rasmus Villemoes
2014-05-29  2:42           ` [RFC 2/2] x86_64: expand kernel stack to 16K Linus Torvalds
2014-05-29  2:42             ` Linus Torvalds
2014-05-29  5:14             ` H. Peter Anvin
2014-05-29  5:14               ` H. Peter Anvin
2014-05-29  6:01             ` Rusty Russell
2014-05-29  6:01               ` Rusty Russell
2014-05-29  7:26               ` virtio ring cleanups, which save stack on older gcc Rusty Russell
2014-05-29  7:26                 ` Rusty Russell
2014-05-29  7:26                 ` [PATCH 1/4] Hack: measure stack taken by vring from virtio_blk Rusty Russell
2014-05-29  7:26                   ` Rusty Russell
2014-05-29 15:39                   ` Linus Torvalds
2014-05-29 15:39                     ` Linus Torvalds
2014-05-29  7:26                 ` [PATCH 2/4] virtio_net: pass well-formed sg to virtqueue_add_inbuf() Rusty Russell
2014-05-29  7:26                   ` Rusty Russell
2014-05-29 10:07                   ` Michael S. Tsirkin
2014-05-29 10:07                     ` Michael S. Tsirkin
2014-05-29  7:26                 ` [PATCH 3/4] virtio_ring: assume sgs are always well-formed Rusty Russell
2014-05-29  7:26                   ` Rusty Russell
2014-05-29 11:18                   ` Michael S. Tsirkin
2014-05-29 11:18                     ` Michael S. Tsirkin
2014-05-29  7:26                 ` [PATCH 4/4] virtio_ring: unify direct/indirect code paths Rusty Russell
2014-05-29  7:26                   ` Rusty Russell
2014-05-29  7:52                   ` Peter Zijlstra
2014-05-29 11:05                     ` Rusty Russell
2014-05-29 11:05                       ` Rusty Russell
2014-05-29 11:33                       ` Michael S. Tsirkin
2014-05-29 11:33                         ` Michael S. Tsirkin
2014-05-29 11:29                   ` Michael S. Tsirkin
2014-05-29 11:29                     ` Michael S. Tsirkin
2014-05-30  2:37                     ` Rusty Russell
2014-05-30  2:37                       ` Rusty Russell
2014-05-30  6:21                       ` Rusty Russell [this message]
2014-05-29  7:41                 ` virtio ring cleanups, which save stack on older gcc Minchan Kim
2014-05-29  7:41                   ` Minchan Kim
2014-05-29 10:39                   ` Dave Chinner
2014-05-29 10:39                     ` Dave Chinner
2014-05-29 11:08                   ` Rusty Russell
2014-05-29 11:08                     ` Rusty Russell
2014-05-29 23:45                     ` Minchan Kim
2014-05-29 23:45                       ` Minchan Kim
2014-05-30  1:06                       ` Minchan Kim
2014-05-30  1:06                         ` Minchan Kim
2014-05-30  6:56                       ` Rusty Russell
2014-05-30  6:56                         ` Rusty Russell
2014-05-29  7:26             ` [RFC 2/2] x86_64: expand kernel stack to 16K Dave Chinner
2014-05-29  7:26               ` Dave Chinner
2014-05-29 15:24               ` Linus Torvalds
2014-05-29 15:24                 ` Linus Torvalds
2014-05-29 23:40                 ` Minchan Kim
2014-05-29 23:40                   ` Minchan Kim
2014-05-29 23:53                 ` Dave Chinner
2014-05-29 23:53                   ` Dave Chinner
2014-05-30  0:06                   ` Dave Jones
2014-05-30  0:06                     ` Dave Jones
2014-05-30  0:21                     ` Dave Chinner
2014-05-30  0:21                       ` Dave Chinner
2014-05-30  0:29                       ` Dave Jones
2014-05-30  0:29                         ` Dave Jones
2014-05-30  0:32                       ` Minchan Kim
2014-05-30  0:32                         ` Minchan Kim
2014-05-30  1:34                         ` Dave Chinner
2014-05-30  1:34                           ` Dave Chinner
2014-05-30 15:25                           ` H. Peter Anvin
2014-05-30 15:25                             ` H. Peter Anvin
2014-05-30 15:41                             ` Linus Torvalds
2014-05-30 15:41                               ` Linus Torvalds
2014-05-30 15:52                               ` H. Peter Anvin
2014-05-30 15:52                                 ` H. Peter Anvin
2014-05-30 16:06                                 ` Linus Torvalds
2014-05-30 16:06                                   ` Linus Torvalds
2014-05-30 17:24                                   ` Dave Hansen
2014-05-30 17:24                                     ` Dave Hansen
2014-05-30 18:12                                     ` H. Peter Anvin
2014-05-30 18:12                                       ` H. Peter Anvin
2014-10-21  2:00                               ` Dave Jones
2014-10-21  4:59                                 ` Andy Lutomirski
2014-05-30  9:48                 ` Richard Weinberger
2014-05-30  9:48                   ` Richard Weinberger
2014-05-30 15:36                   ` Linus Torvalds
2014-05-30 15:36                     ` Linus Torvalds
2014-05-31  2:06             ` Jens Axboe
2014-05-31  2:06               ` Jens Axboe
2014-06-02 22:59               ` Dave Chinner
2014-06-02 22:59                 ` Dave Chinner
2014-06-03 13:02               ` Konstantin Khlebnikov
2014-06-03 13:02                 ` Konstantin Khlebnikov
2014-05-29  3:46     ` Minchan Kim
2014-05-29  3:46       ` Minchan Kim
2014-05-29  4:13       ` Linus Torvalds
2014-05-29  4:13         ` Linus Torvalds
2014-05-29  5:10         ` Minchan Kim
2014-05-29  5:10           ` Minchan Kim
2014-05-30 21:23     ` Andi Kleen
2014-05-30 21:23       ` Andi Kleen
2014-05-28 16:18 ` [PATCH 1/2] ftrace: print stack usage right before Oops Steven Rostedt
2014-05-28 16:18   ` Steven Rostedt
2014-05-29  3:52   ` Minchan Kim
2014-05-29  3:52     ` Minchan Kim
2014-05-29  3:01 ` Steven Rostedt
2014-05-29  3:01   ` Steven Rostedt
2014-05-29  3:49   ` Minchan Kim
2014-05-29  3:49     ` Minchan Kim

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=874n077pn4.fsf@rustcorp.com.au \
    --to=rusty@rustcorp.com.au \
    --cc=axboe@kernel.dk \
    --cc=david@fromorbit.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=minchan@kernel.org \
    --cc=mst@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.