From: "Michael S. Tsirkin" <mst@redhat.com>
To: Marc Hartmayer <mhartmay@linux.ibm.com>
Cc: "Daniel P. Berrangé" <berrange@redhat.com>,
"Cornelia Huck" <cohuck@redhat.com>,
qemu-devel@nongnu.org,
"Dr. David Alan Gilbert" <dgilbert@redhat.com>,
"Halil Pasic" <pasic@linux.ibm.com>,
"Stefan Hajnoczi" <stefanha@redhat.com>,
"Marc-André Lureau" <marcandre.lureau@redhat.com>
Subject: Re: [RFC v2 2/3] libvhost-user: handle endianness as mandated by the spec
Date: Tue, 21 Jul 2020 09:40:10 -0400 [thread overview]
Message-ID: <20200721093942-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <20200717092929.19453-3-mhartmay@linux.ibm.com>
On Fri, Jul 17, 2020 at 11:29:28AM +0200, Marc Hartmayer wrote:
> Since virtio existed even before it got standardized, the virtio
> standard defines the following types of virtio devices:
>
> + legacy device (pre-virtio 1.0)
> + non-legacy or VIRTIO 1.0 device
> + transitional device (which can act both as legacy and non-legacy)
>
> Virtio 1.0 defines the fields of the virtqueues as little endian,
> while legacy uses guest's native endian [1]. Currently libvhost-user
> does not handle virtio endianness at all, i.e. it works only if the
> native endianness matches with whatever is actually needed. That means
> things break spectacularly on big-endian targets. Let us handle virtio
> endianness for non-legacy as required by the virtio specification
> [1]. We will fence non-legacy virtio devices with the upcoming patch.
>
> [1] https://docs.oasis-open.org/virtio/virtio/v1.1/cs01/virtio-v1.1-cs01.html#x1-210003
>
> Signed-off-by: Marc Hartmayer <mhartmay@linux.ibm.com>
>
> ---
> Note: As we don't support legacy virtio devices
Who's "we" in this sentence? vhost user supports legacy generally ...
> there is dead code in
> libvhost-access.h that could be removed. But for the sake of
> completeness, I left it in the code.
> ---
> contrib/libvhost-user/libvhost-access.h | 61 ++++++++++++
> contrib/libvhost-user/libvhost-user.c | 119 ++++++++++++------------
> 2 files changed, 121 insertions(+), 59 deletions(-)
> create mode 100644 contrib/libvhost-user/libvhost-access.h
>
> diff --git a/contrib/libvhost-user/libvhost-access.h b/contrib/libvhost-user/libvhost-access.h
> new file mode 100644
> index 000000000000..868ba3e7bfb8
> --- /dev/null
> +++ b/contrib/libvhost-user/libvhost-access.h
> @@ -0,0 +1,61 @@
> +#ifndef LIBVHOST_ACCESS_H
> +
> +#include "qemu/bswap.h"
> +
> +#include "libvhost-user.h"
> +
> +static inline bool vu_access_is_big_endian(VuDev *dev)
> +{
> + /* Devices conforming to VIRTIO 1.0 or later are always LE. */
> + return false;
> +}
> +
> +static inline void vu_stw_p(VuDev *vdev, void *ptr, uint16_t v)
> +{
> + if (vu_access_is_big_endian(vdev)) {
> + stw_be_p(ptr, v);
> + } else {
> + stw_le_p(ptr, v);
> + }
> +}
> +
> +static inline void vu_stl_p(VuDev *vdev, void *ptr, uint32_t v)
> +{
> + if (vu_access_is_big_endian(vdev)) {
> + stl_be_p(ptr, v);
> + } else {
> + stl_le_p(ptr, v);
> + }
> +}
> +
> +static inline void vu_stq_p(VuDev *vdev, void *ptr, uint64_t v)
> +{
> + if (vu_access_is_big_endian(vdev)) {
> + stq_be_p(ptr, v);
> + } else {
> + stq_le_p(ptr, v);
> + }
> +}
> +
> +static inline int vu_lduw_p(VuDev *vdev, const void *ptr)
> +{
> + if (vu_access_is_big_endian(vdev))
> + return lduw_be_p(ptr);
> + return lduw_le_p(ptr);
> +}
> +
> +static inline int vu_ldl_p(VuDev *vdev, const void *ptr)
> +{
> + if (vu_access_is_big_endian(vdev))
> + return ldl_be_p(ptr);
> + return ldl_le_p(ptr);
> +}
> +
> +static inline uint64_t vu_ldq_p(VuDev *vdev, const void *ptr)
> +{
> + if (vu_access_is_big_endian(vdev))
> + return ldq_be_p(ptr);
> + return ldq_le_p(ptr);
> +}
> +
> +#endif /* LIBVHOST_ACCESS_H */
> diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
> index d315db139606..0214b04c5291 100644
> --- a/contrib/libvhost-user/libvhost-user.c
> +++ b/contrib/libvhost-user/libvhost-user.c
> @@ -45,6 +45,7 @@
> #include "qemu/memfd.h"
>
> #include "libvhost-user.h"
> +#include "libvhost-access.h"
>
> /* usually provided by GLib */
> #ifndef MIN
> @@ -1074,7 +1075,7 @@ vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg)
> return false;
> }
>
> - vq->used_idx = vq->vring.used->idx;
> + vq->used_idx = vu_lduw_p(dev, &vq->vring.used->idx);
>
> if (vq->last_avail_idx != vq->used_idx) {
> bool resume = dev->iface->queue_is_processed_in_order &&
> @@ -1191,7 +1192,7 @@ vu_check_queue_inflights(VuDev *dev, VuVirtq *vq)
> return 0;
> }
>
> - vq->used_idx = vq->vring.used->idx;
> + vq->used_idx = vu_lduw_p(dev, &vq->vring.used->idx);
> vq->resubmit_num = 0;
> vq->resubmit_list = NULL;
> vq->counter = 0;
> @@ -2019,35 +2020,35 @@ vu_queue_started(const VuDev *dev, const VuVirtq *vq)
> }
>
> static inline uint16_t
> -vring_avail_flags(VuVirtq *vq)
> +vring_avail_flags(VuDev *dev, VuVirtq *vq)
> {
> - return vq->vring.avail->flags;
> + return vu_lduw_p(dev, &vq->vring.avail->flags);
> }
>
> static inline uint16_t
> -vring_avail_idx(VuVirtq *vq)
> +vring_avail_idx(VuDev *dev, VuVirtq *vq)
> {
> - vq->shadow_avail_idx = vq->vring.avail->idx;
> + vq->shadow_avail_idx = vu_lduw_p(dev, &vq->vring.avail->idx);
>
> return vq->shadow_avail_idx;
> }
>
> static inline uint16_t
> -vring_avail_ring(VuVirtq *vq, int i)
> +vring_avail_ring(VuDev *dev, VuVirtq *vq, int i)
> {
> - return vq->vring.avail->ring[i];
> + return vu_lduw_p(dev, &vq->vring.avail->ring[i]);
> }
>
> static inline uint16_t
> -vring_get_used_event(VuVirtq *vq)
> +vring_get_used_event(VuDev *dev, VuVirtq *vq)
> {
> - return vring_avail_ring(vq, vq->vring.num);
> + return vring_avail_ring(dev, vq, vq->vring.num);
> }
>
> static int
> virtqueue_num_heads(VuDev *dev, VuVirtq *vq, unsigned int idx)
> {
> - uint16_t num_heads = vring_avail_idx(vq) - idx;
> + uint16_t num_heads = vring_avail_idx(dev, vq) - idx;
>
> /* Check it isn't doing very strange things with descriptor numbers. */
> if (num_heads > vq->vring.num) {
> @@ -2070,7 +2071,7 @@ virtqueue_get_head(VuDev *dev, VuVirtq *vq,
> {
> /* Grab the next descriptor number they're advertising, and increment
> * the index we've seen. */
> - *head = vring_avail_ring(vq, idx % vq->vring.num);
> + *head = vring_avail_ring(dev, vq, idx % vq->vring.num);
>
> /* If their number is silly, that's a fatal mistake. */
> if (*head >= vq->vring.num) {
> @@ -2123,12 +2124,12 @@ virtqueue_read_next_desc(VuDev *dev, struct vring_desc *desc,
> int i, unsigned int max, unsigned int *next)
> {
> /* If this descriptor says it doesn't chain, we're done. */
> - if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
> + if (!(vu_lduw_p(dev, &desc[i].flags) & VRING_DESC_F_NEXT)) {
> return VIRTQUEUE_READ_DESC_DONE;
> }
>
> /* Check they're not leading us off end of descriptors. */
> - *next = desc[i].next;
> + *next = vu_lduw_p(dev, &desc[i].next);
> /* Make sure compiler knows to grab that: we don't want it changing! */
> smp_wmb();
>
> @@ -2171,8 +2172,8 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
> }
> desc = vq->vring.desc;
>
> - if (desc[i].flags & VRING_DESC_F_INDIRECT) {
> - if (desc[i].len % sizeof(struct vring_desc)) {
> + if (vu_lduw_p(dev, &desc[i].flags) & VRING_DESC_F_INDIRECT) {
> + if (vu_ldl_p(dev, &desc[i].len) % sizeof(struct vring_desc)) {
> vu_panic(dev, "Invalid size for indirect buffer table");
> goto err;
> }
> @@ -2185,8 +2186,8 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
>
> /* loop over the indirect descriptor table */
> indirect = 1;
> - desc_addr = desc[i].addr;
> - desc_len = desc[i].len;
> + desc_addr = vu_ldq_p(dev, &desc[i].addr);
> + desc_len = vu_ldl_p(dev, &desc[i].len);
> max = desc_len / sizeof(struct vring_desc);
> read_len = desc_len;
> desc = vu_gpa_to_va(dev, &read_len, desc_addr);
> @@ -2213,10 +2214,10 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
> goto err;
> }
>
> - if (desc[i].flags & VRING_DESC_F_WRITE) {
> - in_total += desc[i].len;
> + if (vu_lduw_p(dev, &desc[i].flags) & VRING_DESC_F_WRITE) {
> + in_total += vu_ldl_p(dev, &desc[i].len);
> } else {
> - out_total += desc[i].len;
> + out_total += vu_ldl_p(dev, &desc[i].len);
> }
> if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
> goto done;
> @@ -2277,7 +2278,7 @@ vu_queue_empty(VuDev *dev, VuVirtq *vq)
> return false;
> }
>
> - return vring_avail_idx(vq) == vq->last_avail_idx;
> + return vring_avail_idx(dev, vq) == vq->last_avail_idx;
> }
>
> static bool
> @@ -2296,14 +2297,14 @@ vring_notify(VuDev *dev, VuVirtq *vq)
> }
>
> if (!vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
> - return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
> + return !(vring_avail_flags(dev, vq) & VRING_AVAIL_F_NO_INTERRUPT);
> }
>
> v = vq->signalled_used_valid;
> vq->signalled_used_valid = true;
> old = vq->signalled_used;
> new = vq->signalled_used = vq->used_idx;
> - return !v || vring_need_event(vring_get_used_event(vq), new, old);
> + return !v || vring_need_event(vring_get_used_event(dev, vq), new, old);
> }
>
> static void _vu_queue_notify(VuDev *dev, VuVirtq *vq, bool sync)
> @@ -2361,33 +2362,33 @@ void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq)
> }
>
> static inline void
> -vring_used_flags_set_bit(VuVirtq *vq, int mask)
> +vring_used_flags_set_bit(VuDev *dev, VuVirtq *vq, int mask)
> +{
> + uint16_t *flags;
> +
> + flags = (uint16_t *)(char*)vq->vring.used +
> + offsetof(struct vring_used, flags);
> + vu_stw_p(dev, flags, vu_lduw_p(dev, flags) | mask);
> +}
> +
> +static inline void
> +vring_used_flags_unset_bit(VuDev *dev, VuVirtq *vq, int mask)
> {
> uint16_t *flags;
>
> flags = (uint16_t *)((char*)vq->vring.used +
> offsetof(struct vring_used, flags));
> - *flags |= mask;
> + vu_stw_p(dev, flags, vu_lduw_p(dev, flags) & ~mask);
> }
>
> static inline void
> -vring_used_flags_unset_bit(VuVirtq *vq, int mask)
> -{
> - uint16_t *flags;
> -
> - flags = (uint16_t *)((char*)vq->vring.used +
> - offsetof(struct vring_used, flags));
> - *flags &= ~mask;
> -}
> -
> -static inline void
> -vring_set_avail_event(VuVirtq *vq, uint16_t val)
> +vring_set_avail_event(VuDev *dev, VuVirtq *vq, uint16_t val)
> {
> if (!vq->notification) {
> return;
> }
>
> - *((uint16_t *) &vq->vring.used->ring[vq->vring.num]) = val;
> + vu_stw_p(dev, &vq->vring.used->ring[vq->vring.num], val);
> }
>
> void
> @@ -2395,11 +2396,11 @@ vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable)
> {
> vq->notification = enable;
> if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
> - vring_set_avail_event(vq, vring_avail_idx(vq));
> + vring_set_avail_event(dev, vq, vring_avail_idx(dev, vq));
> } else if (enable) {
> - vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
> + vring_used_flags_unset_bit(dev, vq, VRING_USED_F_NO_NOTIFY);
> } else {
> - vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
> + vring_used_flags_set_bit(dev, vq, VRING_USED_F_NO_NOTIFY);
> }
> if (enable) {
> /* Expose avail event/used flags before caller checks the avail idx. */
> @@ -2476,14 +2477,14 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
> struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
> int rc;
>
> - if (desc[i].flags & VRING_DESC_F_INDIRECT) {
> - if (desc[i].len % sizeof(struct vring_desc)) {
> + if (vu_lduw_p(dev, &desc[i].flags) & VRING_DESC_F_INDIRECT) {
> + if (vu_ldl_p(dev, &desc[i].len) % sizeof(struct vring_desc)) {
> vu_panic(dev, "Invalid size for indirect buffer table");
> }
>
> /* loop over the indirect descriptor table */
> - desc_addr = desc[i].addr;
> - desc_len = desc[i].len;
> + desc_addr = vu_ldq_p(dev, &desc[i].addr);
> + desc_len = vu_ldl_p(dev, &desc[i].len);
> max = desc_len / sizeof(struct vring_desc);
> read_len = desc_len;
> desc = vu_gpa_to_va(dev, &read_len, desc_addr);
> @@ -2505,10 +2506,10 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
>
> /* Collect all the descriptors */
> do {
> - if (desc[i].flags & VRING_DESC_F_WRITE) {
> + if (vu_lduw_p(dev, &desc[i].flags) & VRING_DESC_F_WRITE) {
> virtqueue_map_desc(dev, &in_num, iov + out_num,
> VIRTQUEUE_MAX_SIZE - out_num, true,
> - desc[i].addr, desc[i].len);
> + vu_ldq_p(dev, &desc[i].addr), vu_ldl_p(dev, &desc[i].len));
> } else {
> if (in_num) {
> vu_panic(dev, "Incorrect order for descriptors");
> @@ -2516,7 +2517,7 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
> }
> virtqueue_map_desc(dev, &out_num, iov,
> VIRTQUEUE_MAX_SIZE, false,
> - desc[i].addr, desc[i].len);
> + vu_ldq_p(dev, &desc[i].addr), vu_ldl_p(dev, &desc[i].len));
> }
>
> /* If we've got too many, that implies a descriptor loop. */
> @@ -2642,7 +2643,7 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
> }
>
> if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
> - vring_set_avail_event(vq, vq->last_avail_idx);
> + vring_set_avail_event(dev, vq, vq->last_avail_idx);
> }
>
> elem = vu_queue_map_desc(dev, vq, head, sz);
> @@ -2712,14 +2713,14 @@ vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
> max = vq->vring.num;
> i = elem->index;
>
> - if (desc[i].flags & VRING_DESC_F_INDIRECT) {
> - if (desc[i].len % sizeof(struct vring_desc)) {
> + if (vu_lduw_p(dev, &desc[i].flags) & VRING_DESC_F_INDIRECT) {
> + if (vu_ldl_p(dev, &desc[i].len) % sizeof(struct vring_desc)) {
> vu_panic(dev, "Invalid size for indirect buffer table");
> }
>
> /* loop over the indirect descriptor table */
> - desc_addr = desc[i].addr;
> - desc_len = desc[i].len;
> + desc_addr = vu_ldq_p(dev, &desc[i].addr);
> + desc_len = vu_ldl_p(dev, &desc[i].len);
> max = desc_len / sizeof(struct vring_desc);
> read_len = desc_len;
> desc = vu_gpa_to_va(dev, &read_len, desc_addr);
> @@ -2745,9 +2746,9 @@ vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
> return;
> }
>
> - if (desc[i].flags & VRING_DESC_F_WRITE) {
> - min = MIN(desc[i].len, len);
> - vu_log_write(dev, desc[i].addr, min);
> + if (vu_lduw_p(dev, &desc[i].flags) & VRING_DESC_F_WRITE) {
> + min = MIN(vu_ldl_p(dev, &desc[i].len), len);
> + vu_log_write(dev, vu_ldq_p(dev, &desc[i].addr), min);
> len -= min;
> }
>
> @@ -2772,15 +2773,15 @@ vu_queue_fill(VuDev *dev, VuVirtq *vq,
>
> idx = (idx + vq->used_idx) % vq->vring.num;
>
> - uelem.id = elem->index;
> - uelem.len = len;
> + vu_stl_p(dev, &uelem.id, elem->index);
> + vu_stl_p(dev, &uelem.len, len);
> vring_used_write(dev, vq, &uelem, idx);
> }
>
> static inline
> void vring_used_idx_set(VuDev *dev, VuVirtq *vq, uint16_t val)
> {
> - vq->vring.used->idx = val;
> + vu_stw_p(dev, &vq->vring.used->idx, val);
> vu_log_write(dev,
> vq->vring.log_guest_addr + offsetof(struct vring_used, idx),
> sizeof(vq->vring.used->idx));
> --
> 2.25.4
next prev parent reply other threads:[~2020-07-21 13:41 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-17 9:29 [RFC v2 0/3] Enable virtio-fs on s390x Marc Hartmayer
2020-07-17 9:29 ` [RFC v2 1/3] virtio: add vhost-user-fs-ccw device Marc Hartmayer
2020-07-21 13:47 ` Stefan Hajnoczi
2020-07-28 10:31 ` Cornelia Huck
2020-07-28 11:25 ` Halil Pasic
2020-07-28 11:33 ` Cornelia Huck
2020-07-17 9:29 ` [RFC v2 2/3] libvhost-user: handle endianness as mandated by the spec Marc Hartmayer
2020-07-21 12:48 ` Stefan Hajnoczi
2020-07-30 13:15 ` Marc Hartmayer
2020-07-21 13:40 ` Michael S. Tsirkin [this message]
2020-07-21 16:44 ` Halil Pasic
2020-07-28 10:48 ` Cornelia Huck
2020-07-28 11:31 ` Halil Pasic
2020-07-28 10:52 ` Marc Hartmayer
2020-07-29 14:13 ` Michael S. Tsirkin
2020-07-29 16:11 ` Marc Hartmayer
2020-07-17 9:29 ` [RFC v2 3/3] libvhost-user: fence legacy virtio devices Marc Hartmayer
2020-07-21 13:47 ` Stefan Hajnoczi
2020-07-17 10:26 ` [RFC v2 0/3] Enable virtio-fs on s390x no-reply
2020-07-17 10:31 ` no-reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200721093942-mutt-send-email-mst@kernel.org \
--to=mst@redhat.com \
--cc=berrange@redhat.com \
--cc=cohuck@redhat.com \
--cc=dgilbert@redhat.com \
--cc=marcandre.lureau@redhat.com \
--cc=mhartmay@linux.ibm.com \
--cc=pasic@linux.ibm.com \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.