Linux virtualization list
 help / color / mirror / Atom feed
* [PATCH net-next V2 7/8] vhost: event suppression for packed ring
From: Jason Wang @ 2018-07-16  3:28 UTC (permalink / raw)
  To: mst, jasowang
  Cc: kvm, netdev, linux-kernel, virtualization, maxime.coquelin, wexu
In-Reply-To: <1531711691-6769-1-git-send-email-jasowang@redhat.com>

This patch introduces support for event suppression. This is done by
have a two areas: device area and driver area. One side could then try
to disable or enable (delayed) notification from other side by using a
boolean hint or event index interface in the areas.

For more information, please refer Virtio spec.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/vhost.c | 191 ++++++++++++++++++++++++++++++++++++++++++++++----
 drivers/vhost/vhost.h |  10 ++-
 2 files changed, 185 insertions(+), 16 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 63b79e8..0459f8b 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1115,10 +1115,15 @@ static int vq_access_ok_packed(struct vhost_virtqueue *vq, unsigned int num,
 			       struct vring_used __user *used)
 {
 	struct vring_packed_desc *packed = (struct vring_packed_desc *)desc;
+	struct vring_packed_desc_event *driver_event =
+		(struct vring_packed_desc_event *)avail;
+	struct vring_packed_desc_event *device_event =
+		(struct vring_packed_desc_event *)used;
 
-	/* TODO: check device area and driver area */
 	return access_ok(VERIFY_READ, packed, num * sizeof(*packed)) &&
-	       access_ok(VERIFY_WRITE, packed, num * sizeof(*packed));
+	       access_ok(VERIFY_WRITE, packed, num * sizeof(*packed)) &&
+	       access_ok(VERIFY_READ, driver_event, sizeof(*driver_event)) &&
+	       access_ok(VERIFY_WRITE, device_event, sizeof(*device_event));
 }
 
 static int vq_access_ok_split(struct vhost_virtqueue *vq, unsigned int num,
@@ -1193,14 +1198,27 @@ static bool iotlb_access_ok(struct vhost_virtqueue *vq,
 	return true;
 }
 
-int vq_iotlb_prefetch(struct vhost_virtqueue *vq)
+int vq_iotlb_prefetch_packed(struct vhost_virtqueue *vq)
+{
+	int num = vq->num;
+
+	return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc,
+			       num * sizeof(*vq->desc), VHOST_ADDR_DESC) &&
+	       iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->desc,
+			       num * sizeof(*vq->desc), VHOST_ADDR_DESC) &&
+	       iotlb_access_ok(vq, VHOST_ACCESS_RO,
+			       (u64)(uintptr_t)vq->driver_event,
+			       sizeof(*vq->driver_event), VHOST_ADDR_AVAIL) &&
+	       iotlb_access_ok(vq, VHOST_ACCESS_WO,
+			       (u64)(uintptr_t)vq->device_event,
+			       sizeof(*vq->device_event), VHOST_ADDR_USED);
+}
+
+int vq_iotlb_prefetch_split(struct vhost_virtqueue *vq)
 {
 	size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
 	unsigned int num = vq->num;
 
-	if (!vq->iotlb)
-		return 1;
-
 	return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc,
 			       num * sizeof(*vq->desc), VHOST_ADDR_DESC) &&
 	       iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail,
@@ -1212,6 +1230,17 @@ int vq_iotlb_prefetch(struct vhost_virtqueue *vq)
 			       num * sizeof(*vq->used->ring) + s,
 			       VHOST_ADDR_USED);
 }
+
+int vq_iotlb_prefetch(struct vhost_virtqueue *vq)
+{
+	if (!vq->iotlb)
+		return 1;
+
+	if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
+		return vq_iotlb_prefetch_packed(vq);
+	else
+		return vq_iotlb_prefetch_split(vq);
+}
 EXPORT_SYMBOL_GPL(vq_iotlb_prefetch);
 
 /* Can we log writes? */
@@ -1771,6 +1800,50 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq)
 	return 0;
 }
 
+static int vhost_update_device_flags(struct vhost_virtqueue *vq,
+				     __virtio16 device_flags)
+{
+	void __user *flags;
+
+	if (vhost_put_user(vq, device_flags, &vq->device_event->flags,
+			   VHOST_ADDR_USED) < 0)
+		return -EFAULT;
+	if (unlikely(vq->log_used)) {
+		/* Make sure the flag is seen before log. */
+		smp_wmb();
+		/* Log used flag write. */
+		flags = &vq->device_event->flags;
+		log_write(vq->log_base, vq->log_addr +
+			  (flags - (void __user *)vq->device_event),
+			  sizeof(vq->device_event->flags));
+		if (vq->log_ctx)
+			eventfd_signal(vq->log_ctx, 1);
+	}
+	return 0;
+}
+
+static int vhost_update_device_off_wrap(struct vhost_virtqueue *vq,
+					__virtio16 device_off_wrap)
+{
+	void __user *off_wrap;
+
+	if (vhost_put_user(vq, device_off_wrap, &vq->device_event->off_wrap,
+			   VHOST_ADDR_USED) < 0)
+		return -EFAULT;
+	if (unlikely(vq->log_used)) {
+		/* Make sure the flag is seen before log. */
+		smp_wmb();
+		/* Log used flag write. */
+		off_wrap = &vq->device_event->off_wrap;
+		log_write(vq->log_base, vq->log_addr +
+			  (off_wrap - (void __user *)vq->device_event),
+			  sizeof(vq->device_event->off_wrap));
+		if (vq->log_ctx)
+			eventfd_signal(vq->log_ctx, 1);
+	}
+	return 0;
+}
+
 static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event)
 {
 	if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx),
@@ -2754,16 +2827,13 @@ int vhost_add_used_n(struct vhost_virtqueue *vq,
 }
 EXPORT_SYMBOL_GPL(vhost_add_used_n);
 
-static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+static bool vhost_notify_split(struct vhost_dev *dev,
+			       struct vhost_virtqueue *vq)
 {
 	__u16 old, new;
 	__virtio16 event;
 	bool v;
 
-	/* TODO: check driver area */
-	if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
-		return true;
-
 	/* Flush out used index updates. This is paired
 	 * with the barrier that the Guest executes when enabling
 	 * interrupts. */
@@ -2796,6 +2866,64 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 	return vring_need_event(vhost16_to_cpu(vq, event), new, old);
 }
 
+static bool vhost_notify_packed(struct vhost_dev *dev,
+				struct vhost_virtqueue *vq)
+{
+	__virtio16 event_off_wrap, event_flags;
+	__u16 old, new, off_wrap;
+	bool v;
+
+	/* Flush out used descriptors updates. This is paired
+	 * with the barrier that the Guest executes when enabling
+	 * interrupts.
+	 */
+	smp_mb();
+
+	if (vhost_get_avail(vq, event_flags,
+			   &vq->driver_event->flags) < 0) {
+		vq_err(vq, "Failed to get driver desc_event_flags");
+		return true;
+	}
+
+	if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX))
+		return event_flags !=
+		       cpu_to_vhost16(vq, VRING_EVENT_F_DISABLE);
+
+	old = vq->signalled_used;
+	v = vq->signalled_used_valid;
+	new = vq->signalled_used = vq->last_used_idx;
+	vq->signalled_used_valid = true;
+
+	if (event_flags != cpu_to_vhost16(vq, VRING_EVENT_F_DESC))
+		return event_flags !=
+		       cpu_to_vhost16(vq, VRING_EVENT_F_DISABLE);
+
+	/* Read desc event flags before event_off and event_wrap */
+	smp_rmb();
+
+	if (vhost_get_avail(vq, event_off_wrap,
+			    &vq->driver_event->off_wrap) < 0) {
+		vq_err(vq, "Failed to get driver desc_event_off/wrap");
+		return true;
+	}
+
+	off_wrap = vhost16_to_cpu(vq, event_off_wrap);
+
+	if (unlikely(!v))
+		return true;
+
+	return vhost_vring_packed_need_event(vq, vq->last_used_wrap_counter,
+					     off_wrap, new, old);
+}
+
+static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+{
+	if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
+		return vhost_notify_packed(dev, vq);
+	else
+		return vhost_notify_split(dev, vq);
+}
+
 /* This actually signals the guest, using eventfd. */
 void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 {
@@ -2873,10 +3001,34 @@ static bool vhost_enable_notify_packed(struct vhost_dev *dev,
 				       struct vhost_virtqueue *vq)
 {
 	struct vring_packed_desc *d = vq->desc_packed + vq->avail_idx;
-	__virtio16 flags;
+	__virtio16 flags = cpu_to_vhost16(vq, VRING_EVENT_F_ENABLE);
 	int ret;
 
-	/* TODO: enable notification through device area */
+	if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
+		return false;
+	vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
+
+	if (vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
+		__virtio16 off_wrap = cpu_to_vhost16(vq, vq->avail_idx |
+				      vq->avail_wrap_counter << 15);
+
+		ret = vhost_update_device_off_wrap(vq, off_wrap);
+		if (ret) {
+			vq_err(vq, "Failed to write to off warp at %p: %d\n",
+			       &vq->device_event->off_wrap, ret);
+			return false;
+		}
+		/* Make sure off_wrap is wrote before flags */
+		smp_wmb();
+		flags = cpu_to_vhost16(vq, VRING_EVENT_F_DESC);
+	}
+
+	ret = vhost_update_device_flags(vq, flags);
+	if (ret) {
+		vq_err(vq, "Failed to enable notification at %p: %d\n",
+			&vq->device_event->flags, ret);
+		return false;
+	}
 
 	/* They could have slipped one in as we were doing that: make
 	 * sure it's written, then check again.
@@ -2943,7 +3095,18 @@ EXPORT_SYMBOL_GPL(vhost_enable_notify);
 static void vhost_disable_notify_packed(struct vhost_dev *dev,
 					struct vhost_virtqueue *vq)
 {
-	/* TODO: disable notification through device area */
+	__virtio16 flags;
+	int r;
+
+	if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
+		return;
+	vq->used_flags |= VRING_USED_F_NO_NOTIFY;
+
+	flags = cpu_to_vhost16(vq, VRING_EVENT_F_DISABLE);
+	r = vhost_update_device_flags(vq, flags);
+	if (r)
+		vq_err(vq, "Failed to enable notification at %p: %d\n",
+		       &vq->device_event->flags, r);
 }
 
 static void vhost_disable_notify_split(struct vhost_dev *dev,
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 73c2a78..3a7fc4b 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -96,8 +96,14 @@ struct vhost_virtqueue {
 		struct vring_desc __user *desc;
 		struct vring_packed_desc __user *desc_packed;
 	};
-	struct vring_avail __user *avail;
-	struct vring_used __user *used;
+	union {
+		struct vring_avail __user *avail;
+		struct vring_packed_desc_event __user *driver_event;
+	};
+	union {
+		struct vring_used __user *used;
+		struct vring_packed_desc_event __user *device_event;
+	};
 	const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
 	struct file *kick;
 	struct eventfd_ctx *call_ctx;
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next V2 8/8] vhost: enable packed virtqueues
From: Jason Wang @ 2018-07-16  3:28 UTC (permalink / raw)
  To: mst, jasowang
  Cc: kvm, netdev, linux-kernel, virtualization, maxime.coquelin, wexu
In-Reply-To: <1531711691-6769-1-git-send-email-jasowang@redhat.com>

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/vhost.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 3a7fc4b..0dfe864 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -264,7 +264,8 @@ enum {
 			 (1ULL << VIRTIO_RING_F_EVENT_IDX) |
 			 (1ULL << VHOST_F_LOG_ALL) |
 			 (1ULL << VIRTIO_F_ANY_LAYOUT) |
-			 (1ULL << VIRTIO_F_VERSION_1)
+			 (1ULL << VIRTIO_F_VERSION_1) |
+			 (1ULL << VIRTIO_F_RING_PACKED)
 };
 
 static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit)
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH net-next V2 0/8] Packed virtqueue support for vhost
From: Michael S. Tsirkin @ 2018-07-16  8:39 UTC (permalink / raw)
  To: Jason Wang
  Cc: kvm, netdev, linux-kernel, virtualization, maxime.coquelin, wexu
In-Reply-To: <1531711691-6769-1-git-send-email-jasowang@redhat.com>

On Mon, Jul 16, 2018 at 11:28:03AM +0800, Jason Wang wrote:
> Hi all:
> 
> This series implements packed virtqueues. The code were tested with
> Tiwei's guest driver series at https://patchwork.ozlabs.org/cover/942297/
> 
> 
> Pktgen test for both RX and TX does not show obvious difference with
> split virtqueues. The main bottleneck is the guest Linux driver, since
> it can not stress vhost for a 100% CPU utilization. A full TCP
> benchmark is ongoing. Will test virtio-net pmd as well when it was
> ready.

Well the question then is why we should bother merging this
if this doesn't give a performance gain. Do you see
a gain in CPU utilization maybe?

If not - let's wait for that TCP benchmark result?

> Notes:
> - This version depends on Tiwei's series at https://patchwork.ozlabs.org/cover/942297/
> 
> This version were tested with:
> 
> - Zerocopy (Out of Order) support
> - vIOMMU support
> - mergeable buffer on/off
> - busy polling on/off
> - vsock (nc-vsock)
> 
> Changes from V1:
> - drop uapi patch and use Tiwei's
> - split the enablement of packed virtqueue into a separate patch
> 
> Changes from RFC V5:
> 
> - save unnecessary barriers during vhost_add_used_packed_n()
> - more compact math for event idx
> - fix failure of SET_VRING_BASE when avail_wrap_counter is true
> - fix not copy avail_wrap_counter during GET_VRING_BASE
> - introduce SET_VRING_USED_BASE/GET_VRING_USED_BASE for syncing last_used_idx
> - rename used_wrap_counter to last_used_wrap_counter
> - rebase to net-next
> 
> Changes from RFC V4:
> 
> - fix signalled_used index recording
> - track avail index correctly
> - various minor fixes
> 
> Changes from RFC V3:
> 
> - Fix math on event idx checking
> - Sync last avail wrap counter through GET/SET_VRING_BASE
> - remove desc_event prefix in the driver/device structure
> 
> Changes from RFC V2:
> 
> - do not use & in checking desc_event_flags
> - off should be most significant bit
> - remove the workaround of mergeable buffer for dpdk prototype
> - id should be in the last descriptor in the chain
> - keep _F_WRITE for write descriptor when adding used
> - device flags updating should use ADDR_USED type
> - return error on unexpected unavail descriptor in a chain
> - return false in vhost_ve_avail_empty is descriptor is available
> - track last seen avail_wrap_counter
> - correctly examine available descriptor in get_indirect_packed()
> - vhost_idx_diff should return u16 instead of bool
> 
> Changes from RFC V1:
> 
> - Refactor vhost used elem code to avoid open coding on used elem
> - Event suppression support (compile test only).
> - Indirect descriptor support (compile test only).
> - Zerocopy support.
> - vIOMMU support.
> - SCSI/VSOCK support (compile test only).
> - Fix several bugs
> 
> Jason Wang (8):
>   vhost: move get_rx_bufs to vhost.c
>   vhost: hide used ring layout from device
>   vhost: do not use vring_used_elem
>   vhost_net: do not explicitly manipulate vhost_used_elem
>   vhost: vhost_put_user() can accept metadata type
>   vhost: packed ring support
>   vhost: event suppression for packed ring
>   vhost: enable packed virtqueues
> 
>  drivers/vhost/net.c        | 143 ++-----
>  drivers/vhost/scsi.c       |  62 +--
>  drivers/vhost/vhost.c      | 994 ++++++++++++++++++++++++++++++++++++++++-----
>  drivers/vhost/vhost.h      |  55 ++-
>  drivers/vhost/vsock.c      |  42 +-
>  include/uapi/linux/vhost.h |   7 +
>  6 files changed, 1035 insertions(+), 268 deletions(-)
> 
> -- 
> 2.7.4

^ permalink raw reply

* Re: [PATCH net-next V2 0/8] Packed virtqueue support for vhost
From: Jason Wang @ 2018-07-16  9:46 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: kvm, netdev, linux-kernel, virtualization, maxime.coquelin, wexu
In-Reply-To: <20180716113720-mutt-send-email-mst@kernel.org>



On 2018年07月16日 16:39, Michael S. Tsirkin wrote:
> On Mon, Jul 16, 2018 at 11:28:03AM +0800, Jason Wang wrote:
>> Hi all:
>>
>> This series implements packed virtqueues. The code were tested with
>> Tiwei's guest driver series at https://patchwork.ozlabs.org/cover/942297/
>>
>>
>> Pktgen test for both RX and TX does not show obvious difference with
>> split virtqueues. The main bottleneck is the guest Linux driver, since
>> it can not stress vhost for a 100% CPU utilization. A full TCP
>> benchmark is ongoing. Will test virtio-net pmd as well when it was
>> ready.
> Well the question then is why we should bother merging this
> if this doesn't give a performance gain.

We meet bottlenecks at other places. I can only test Linux driver which 
has lots of overheads e.g interrupts. And perf show only a small 
fraction of time were spent on e.g virtqueue manipulation. I hope 
virtio-net pmd can give us different result but we don't have one ready 
for testing now. (Jen's V4 have bugs thus can not work with this series).

>   Do you see
> a gain in CPU utilization maybe?

Unfortunately not.

>
> If not - let's wait for that TCP benchmark result?

We can, but you know TCP_STREAM result is sometime misleading.

A brunch of other patches of mine were rebased on this and then blocked 
on this series. Consider we don't meet regression, maybe we can merge 
this first and try optimizations or fixups on top?

Thanks

>
>> Notes:
>> - This version depends on Tiwei's series at https://patchwork.ozlabs.org/cover/942297/
>>
>> This version were tested with:
>>
>> - Zerocopy (Out of Order) support
>> - vIOMMU support
>> - mergeable buffer on/off
>> - busy polling on/off
>> - vsock (nc-vsock)
>>
>> Changes from V1:
>> - drop uapi patch and use Tiwei's
>> - split the enablement of packed virtqueue into a separate patch
>>
>> Changes from RFC V5:
>>
>> - save unnecessary barriers during vhost_add_used_packed_n()
>> - more compact math for event idx
>> - fix failure of SET_VRING_BASE when avail_wrap_counter is true
>> - fix not copy avail_wrap_counter during GET_VRING_BASE
>> - introduce SET_VRING_USED_BASE/GET_VRING_USED_BASE for syncing last_used_idx
>> - rename used_wrap_counter to last_used_wrap_counter
>> - rebase to net-next
>>
>> Changes from RFC V4:
>>
>> - fix signalled_used index recording
>> - track avail index correctly
>> - various minor fixes
>>
>> Changes from RFC V3:
>>
>> - Fix math on event idx checking
>> - Sync last avail wrap counter through GET/SET_VRING_BASE
>> - remove desc_event prefix in the driver/device structure
>>
>> Changes from RFC V2:
>>
>> - do not use & in checking desc_event_flags
>> - off should be most significant bit
>> - remove the workaround of mergeable buffer for dpdk prototype
>> - id should be in the last descriptor in the chain
>> - keep _F_WRITE for write descriptor when adding used
>> - device flags updating should use ADDR_USED type
>> - return error on unexpected unavail descriptor in a chain
>> - return false in vhost_ve_avail_empty is descriptor is available
>> - track last seen avail_wrap_counter
>> - correctly examine available descriptor in get_indirect_packed()
>> - vhost_idx_diff should return u16 instead of bool
>>
>> Changes from RFC V1:
>>
>> - Refactor vhost used elem code to avoid open coding on used elem
>> - Event suppression support (compile test only).
>> - Indirect descriptor support (compile test only).
>> - Zerocopy support.
>> - vIOMMU support.
>> - SCSI/VSOCK support (compile test only).
>> - Fix several bugs
>>
>> Jason Wang (8):
>>    vhost: move get_rx_bufs to vhost.c
>>    vhost: hide used ring layout from device
>>    vhost: do not use vring_used_elem
>>    vhost_net: do not explicitly manipulate vhost_used_elem
>>    vhost: vhost_put_user() can accept metadata type
>>    vhost: packed ring support
>>    vhost: event suppression for packed ring
>>    vhost: enable packed virtqueues
>>
>>   drivers/vhost/net.c        | 143 ++-----
>>   drivers/vhost/scsi.c       |  62 +--
>>   drivers/vhost/vhost.c      | 994 ++++++++++++++++++++++++++++++++++++++++-----
>>   drivers/vhost/vhost.h      |  55 ++-
>>   drivers/vhost/vsock.c      |  42 +-
>>   include/uapi/linux/vhost.h |   7 +
>>   6 files changed, 1035 insertions(+), 268 deletions(-)
>>
>> -- 
>> 2.7.4

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* Re: [PATCH net-next V2 0/8] Packed virtqueue support for vhost
From: Michael S. Tsirkin @ 2018-07-16 12:49 UTC (permalink / raw)
  To: Jason Wang
  Cc: kvm, netdev, linux-kernel, virtualization, maxime.coquelin, wexu
In-Reply-To: <33f4643f-f226-0389-1f4f-607c289db94e@redhat.com>

On Mon, Jul 16, 2018 at 05:46:33PM +0800, Jason Wang wrote:
> 
> 
> On 2018年07月16日 16:39, Michael S. Tsirkin wrote:
> > On Mon, Jul 16, 2018 at 11:28:03AM +0800, Jason Wang wrote:
> > > Hi all:
> > > 
> > > This series implements packed virtqueues. The code were tested with
> > > Tiwei's guest driver series at https://patchwork.ozlabs.org/cover/942297/
> > > 
> > > 
> > > Pktgen test for both RX and TX does not show obvious difference with
> > > split virtqueues. The main bottleneck is the guest Linux driver, since
> > > it can not stress vhost for a 100% CPU utilization. A full TCP
> > > benchmark is ongoing. Will test virtio-net pmd as well when it was
> > > ready.
> > Well the question then is why we should bother merging this
> > if this doesn't give a performance gain.
> 
> We meet bottlenecks at other places. I can only test Linux driver which has
> lots of overheads e.g interrupts. And perf show only a small fraction of
> time were spent on e.g virtqueue manipulation. I hope virtio-net pmd can
> give us different result but we don't have one ready for testing now. (Jen's
> V4 have bugs thus can not work with this series).

Can't linux busy poll? And how about testing loopback with XDP?

> >   Do you see
> > a gain in CPU utilization maybe?
> 
> Unfortunately not.
> 
> > 
> > If not - let's wait for that TCP benchmark result?
> 
> We can, but you know TCP_STREAM result is sometime misleading.
> 
> A brunch of other patches of mine were rebased on this and then blocked on
> this series. Consider we don't meet regression, maybe we can merge this
> first and try optimizations or fixups on top?
> 
> Thanks

I'm not sure I understand this approach. Packed ring is just an optimization.
What value is there in merging it if it does not help speed?

> > 
> > > Notes:
> > > - This version depends on Tiwei's series at https://patchwork.ozlabs.org/cover/942297/
> > > 
> > > This version were tested with:
> > > 
> > > - Zerocopy (Out of Order) support
> > > - vIOMMU support
> > > - mergeable buffer on/off
> > > - busy polling on/off
> > > - vsock (nc-vsock)
> > > 
> > > Changes from V1:
> > > - drop uapi patch and use Tiwei's
> > > - split the enablement of packed virtqueue into a separate patch
> > > 
> > > Changes from RFC V5:
> > > 
> > > - save unnecessary barriers during vhost_add_used_packed_n()
> > > - more compact math for event idx
> > > - fix failure of SET_VRING_BASE when avail_wrap_counter is true
> > > - fix not copy avail_wrap_counter during GET_VRING_BASE
> > > - introduce SET_VRING_USED_BASE/GET_VRING_USED_BASE for syncing last_used_idx
> > > - rename used_wrap_counter to last_used_wrap_counter
> > > - rebase to net-next
> > > 
> > > Changes from RFC V4:
> > > 
> > > - fix signalled_used index recording
> > > - track avail index correctly
> > > - various minor fixes
> > > 
> > > Changes from RFC V3:
> > > 
> > > - Fix math on event idx checking
> > > - Sync last avail wrap counter through GET/SET_VRING_BASE
> > > - remove desc_event prefix in the driver/device structure
> > > 
> > > Changes from RFC V2:
> > > 
> > > - do not use & in checking desc_event_flags
> > > - off should be most significant bit
> > > - remove the workaround of mergeable buffer for dpdk prototype
> > > - id should be in the last descriptor in the chain
> > > - keep _F_WRITE for write descriptor when adding used
> > > - device flags updating should use ADDR_USED type
> > > - return error on unexpected unavail descriptor in a chain
> > > - return false in vhost_ve_avail_empty is descriptor is available
> > > - track last seen avail_wrap_counter
> > > - correctly examine available descriptor in get_indirect_packed()
> > > - vhost_idx_diff should return u16 instead of bool
> > > 
> > > Changes from RFC V1:
> > > 
> > > - Refactor vhost used elem code to avoid open coding on used elem
> > > - Event suppression support (compile test only).
> > > - Indirect descriptor support (compile test only).
> > > - Zerocopy support.
> > > - vIOMMU support.
> > > - SCSI/VSOCK support (compile test only).
> > > - Fix several bugs
> > > 
> > > Jason Wang (8):
> > >    vhost: move get_rx_bufs to vhost.c
> > >    vhost: hide used ring layout from device
> > >    vhost: do not use vring_used_elem
> > >    vhost_net: do not explicitly manipulate vhost_used_elem
> > >    vhost: vhost_put_user() can accept metadata type
> > >    vhost: packed ring support
> > >    vhost: event suppression for packed ring
> > >    vhost: enable packed virtqueues
> > > 
> > >   drivers/vhost/net.c        | 143 ++-----
> > >   drivers/vhost/scsi.c       |  62 +--
> > >   drivers/vhost/vhost.c      | 994 ++++++++++++++++++++++++++++++++++++++++-----
> > >   drivers/vhost/vhost.h      |  55 ++-
> > >   drivers/vhost/vsock.c      |  42 +-
> > >   include/uapi/linux/vhost.h |   7 +
> > >   6 files changed, 1035 insertions(+), 268 deletions(-)
> > > 
> > > -- 
> > > 2.7.4
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* Re: [PATCH net-next V2 0/8] Packed virtqueue support for vhost
From: Jason Wang @ 2018-07-17  0:45 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: kvm, netdev, linux-kernel, virtualization, maxime.coquelin, wexu
In-Reply-To: <20180716154102-mutt-send-email-mst@kernel.org>



On 2018年07月16日 20:49, Michael S. Tsirkin wrote:
> On Mon, Jul 16, 2018 at 05:46:33PM +0800, Jason Wang wrote:
>>
>> On 2018年07月16日 16:39, Michael S. Tsirkin wrote:
>>> On Mon, Jul 16, 2018 at 11:28:03AM +0800, Jason Wang wrote:
>>>> Hi all:
>>>>
>>>> This series implements packed virtqueues. The code were tested with
>>>> Tiwei's guest driver series at https://patchwork.ozlabs.org/cover/942297/
>>>>
>>>>
>>>> Pktgen test for both RX and TX does not show obvious difference with
>>>> split virtqueues. The main bottleneck is the guest Linux driver, since
>>>> it can not stress vhost for a 100% CPU utilization. A full TCP
>>>> benchmark is ongoing. Will test virtio-net pmd as well when it was
>>>> ready.
>>> Well the question then is why we should bother merging this
>>> if this doesn't give a performance gain.
>> We meet bottlenecks at other places. I can only test Linux driver which has
>> lots of overheads e.g interrupts. And perf show only a small fraction of
>> time were spent on e.g virtqueue manipulation. I hope virtio-net pmd can
>> give us different result but we don't have one ready for testing now. (Jen's
>> V4 have bugs thus can not work with this series).
> Can't linux busy poll?

For vhost busy polling, there's no difference since guest can not give 
vhost enough stress. For guest busy polling, it does not work for the 
packets generated by pktgen.

> And how about testing loopback with XDP?

No difference, I even shortcut both the tun_get_user() on host and 
netif_receive_skb() in guest.

>>>    Do you see
>>> a gain in CPU utilization maybe?
>> Unfortunately not.
>>
>>> If not - let's wait for that TCP benchmark result?
>> We can, but you know TCP_STREAM result is sometime misleading.
>>
>> A brunch of other patches of mine were rebased on this and then blocked on
>> this series. Consider we don't meet regression, maybe we can merge this
>> first and try optimizations or fixups on top?
>>
>> Thanks
> I'm not sure I understand this approach. Packed ring is just an optimization.
> What value is there in merging it if it does not help speed?

If you want to support migration from dpdk or vDPA backend. And we still 
have the chance to see the performance with virito-net pmd in the 
future. If this does not make sense for you, I will leave this series 
until we can get results from virtio-net pmd (or find a way that packed 
virtqueue outperform). And I will start to post other optimizations on 
vhost.

Thanks

>
>>>> Notes:
>>>> - This version depends on Tiwei's series at https://patchwork.ozlabs.org/cover/942297/
>>>>
>>>> This version were tested with:
>>>>
>>>> - Zerocopy (Out of Order) support
>>>> - vIOMMU support
>>>> - mergeable buffer on/off
>>>> - busy polling on/off
>>>> - vsock (nc-vsock)
>>>>
>>>> Changes from V1:
>>>> - drop uapi patch and use Tiwei's
>>>> - split the enablement of packed virtqueue into a separate patch
>>>>
>>>> Changes from RFC V5:
>>>>
>>>> - save unnecessary barriers during vhost_add_used_packed_n()
>>>> - more compact math for event idx
>>>> - fix failure of SET_VRING_BASE when avail_wrap_counter is true
>>>> - fix not copy avail_wrap_counter during GET_VRING_BASE
>>>> - introduce SET_VRING_USED_BASE/GET_VRING_USED_BASE for syncing last_used_idx
>>>> - rename used_wrap_counter to last_used_wrap_counter
>>>> - rebase to net-next
>>>>
>>>> Changes from RFC V4:
>>>>
>>>> - fix signalled_used index recording
>>>> - track avail index correctly
>>>> - various minor fixes
>>>>
>>>> Changes from RFC V3:
>>>>
>>>> - Fix math on event idx checking
>>>> - Sync last avail wrap counter through GET/SET_VRING_BASE
>>>> - remove desc_event prefix in the driver/device structure
>>>>
>>>> Changes from RFC V2:
>>>>
>>>> - do not use & in checking desc_event_flags
>>>> - off should be most significant bit
>>>> - remove the workaround of mergeable buffer for dpdk prototype
>>>> - id should be in the last descriptor in the chain
>>>> - keep _F_WRITE for write descriptor when adding used
>>>> - device flags updating should use ADDR_USED type
>>>> - return error on unexpected unavail descriptor in a chain
>>>> - return false in vhost_ve_avail_empty is descriptor is available
>>>> - track last seen avail_wrap_counter
>>>> - correctly examine available descriptor in get_indirect_packed()
>>>> - vhost_idx_diff should return u16 instead of bool
>>>>
>>>> Changes from RFC V1:
>>>>
>>>> - Refactor vhost used elem code to avoid open coding on used elem
>>>> - Event suppression support (compile test only).
>>>> - Indirect descriptor support (compile test only).
>>>> - Zerocopy support.
>>>> - vIOMMU support.
>>>> - SCSI/VSOCK support (compile test only).
>>>> - Fix several bugs
>>>>
>>>> Jason Wang (8):
>>>>     vhost: move get_rx_bufs to vhost.c
>>>>     vhost: hide used ring layout from device
>>>>     vhost: do not use vring_used_elem
>>>>     vhost_net: do not explicitly manipulate vhost_used_elem
>>>>     vhost: vhost_put_user() can accept metadata type
>>>>     vhost: packed ring support
>>>>     vhost: event suppression for packed ring
>>>>     vhost: enable packed virtqueues
>>>>
>>>>    drivers/vhost/net.c        | 143 ++-----
>>>>    drivers/vhost/scsi.c       |  62 +--
>>>>    drivers/vhost/vhost.c      | 994 ++++++++++++++++++++++++++++++++++++++++-----
>>>>    drivers/vhost/vhost.h      |  55 ++-
>>>>    drivers/vhost/vsock.c      |  42 +-
>>>>    include/uapi/linux/vhost.h |   7 +
>>>>    6 files changed, 1035 insertions(+), 268 deletions(-)
>>>>
>>>> -- 
>>>> 2.7.4

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* RE: [PATCH v6 3/3] x86: paravirt: make native_save_fl extern inline
From: David Laight @ 2018-07-17  8:44 UTC (permalink / raw)
  To: 'Nick Desaulniers'
  Cc: Kate Stewart, andrea.parri@amarulasolutions.com,
	linux-efi@vger.kernel.org, brijesh.singh@amd.com, J. Kiszka,
	Josh Poimboeuf, Will Deacon, jarkko.sakkinen@linux.intel.com,
	virtualization@lists.linux-foundation.org, Masahiro Yamada,
	Manoj Gupta, hpa@zytor.com, boris.ostrovsky@oracle.com,
	Thiebaud Weksteen, mawilcox@microsoft.com, x86@kernel.org,
	akataria@vmware.com, Greg Hackmann
In-Reply-To: <CAKwvOdmFVnSos71vAUnayuocBDhxLg76tvGF0y3sj+4+s7TCcw@mail.gmail.com>

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

Please consider the environment and don't print this e-mail unless you really need to

^ permalink raw reply

* Re: [PATCH net-next V2 0/8] Packed virtqueue support for vhost
From: David Miller @ 2018-07-18  4:09 UTC (permalink / raw)
  To: mst; +Cc: kvm, netdev, linux-kernel, virtualization, maxime.coquelin, wexu
In-Reply-To: <20180716154102-mutt-send-email-mst@kernel.org>

From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 16 Jul 2018 15:49:04 +0300

> I'm not sure I understand this approach. Packed ring is just an
> optimization.  What value is there in merging it if it does not help
> speed?

So it seems that both Tiwei's and Jason's packed patch sets are kind
of in limbo due to this discussion.

If I understand Jason correctly, he's trying to say that although this
work doesn't show improvements by itself, however it paves the way
such that optimizaations done in the future will be more visible.

I kind of can see Michael's viewpoint too, in that we should put this
stuff in later when it does actually show some difference.

Therefore, I'll mark both patch sets as "deferred" for now.

Let me know if I should do something else.

Thanks!

^ permalink raw reply

* Patch "compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations" has been added to the 4.14-stable tree
From: gregkh @ 2018-07-18  9:17 UTC (permalink / raw)
  To: 20180621162324.36656-2-ndesaulniers, acme, akataria, akpm,
	andrea.parri, ard.biesheuvel, arnd, aryabinin, astrachan,
	boris.ostrovsky, brijesh.singh, caoj.fnst, geert, ghackmann,
	gregkh, hpa, jan.kiszka, jarkko.sakkinen, jgross, joe, jpoimboe,
	keescook, kirill.shutemov, kstewart, manojgupta, mawilcox,
	michal.lkml, mingo, mjg59, mka
  Cc: stable-commits


This is a note to let you know that I've just added the patch titled

    compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations

to the 4.14-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     compiler-gcc.h-add-__attribute__-gnu_inline-to-all-inline-declarations.patch
and it can be found in the queue-4.14 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.


From d03db2bc26f0e4a6849ad649a09c9c73fccdc656 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Thu, 21 Jun 2018 09:23:22 -0700
Subject: compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations

From: Nick Desaulniers <ndesaulniers@google.com>

commit d03db2bc26f0e4a6849ad649a09c9c73fccdc656 upstream.

Functions marked extern inline do not emit an externally visible
function when the gnu89 C standard is used. Some KBUILD Makefiles
overwrite KBUILD_CFLAGS. This is an issue for GCC 5.1+ users as without
an explicit C standard specified, the default is gnu11. Since c99, the
semantics of extern inline have changed such that an externally visible
function is always emitted. This can lead to multiple definition errors
of extern inline functions at link time of compilation units whose build
files have removed an explicit C standard compiler flag for users of GCC
5.1+ or Clang.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: H. Peter Anvin <hpa@zytor.com>
Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: akataria@vmware.com
Cc: akpm@linux-foundation.org
Cc: andrea.parri@amarulasolutions.com
Cc: ard.biesheuvel@linaro.org
Cc: aryabinin@virtuozzo.com
Cc: astrachan@google.com
Cc: boris.ostrovsky@oracle.com
Cc: brijesh.singh@amd.com
Cc: caoj.fnst@cn.fujitsu.com
Cc: geert@linux-m68k.org
Cc: ghackmann@google.com
Cc: gregkh@linuxfoundation.org
Cc: jan.kiszka@siemens.com
Cc: jarkko.sakkinen@linux.intel.com
Cc: jpoimboe@redhat.com
Cc: keescook@google.com
Cc: kirill.shutemov@linux.intel.com
Cc: kstewart@linuxfoundation.org
Cc: linux-efi@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: manojgupta@google.com
Cc: mawilcox@microsoft.com
Cc: michal.lkml@markovi.net
Cc: mjg59@google.com
Cc: mka@chromium.org
Cc: pombredanne@nexb.com
Cc: rientjes@google.com
Cc: rostedt@goodmis.org
Cc: sedat.dilek@gmail.com
Cc: thomas.lendacky@amd.com
Cc: tstellar@redhat.com
Cc: tweek@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: yamada.masahiro@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-2-ndesaulniers@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 include/linux/compiler-gcc.h |   29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -66,25 +66,40 @@
 #endif
 
 /*
+ * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
+ * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
+ * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
+ * defined so the gnu89 semantics are the default.
+ */
+#ifdef __GNUC_STDC_INLINE__
+# define __gnu_inline	__attribute__((gnu_inline))
+#else
+# define __gnu_inline
+#endif
+
+/*
  * Force always-inline if the user requests it so via the .config,
  * or if gcc is too old.
  * GCC does not warn about unused static inline functions for
  * -Wunused-function.  This turns out to avoid the need for complex #ifdef
  * directives.  Suppress the warning in clang as well by using "unused"
  * function attribute, which is redundant but not harmful for gcc.
+ * Prefer gnu_inline, so that extern inline functions do not emit an
+ * externally visible function. This makes extern inline behave as per gnu89
+ * semantics rather than c99. This prevents multiple symbol definition errors
+ * of extern inline functions at link time.
+ * A lot of inline functions can cause havoc with function tracing.
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) ||		\
     !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
-#define inline inline		__attribute__((always_inline,unused)) notrace
-#define __inline__ __inline__	__attribute__((always_inline,unused)) notrace
-#define __inline __inline	__attribute__((always_inline,unused)) notrace
+#define inline \
+	inline __attribute__((always_inline, unused)) notrace __gnu_inline
 #else
-/* A lot of inline functions can cause havoc with function tracing */
-#define inline inline		__attribute__((unused)) notrace
-#define __inline__ __inline__	__attribute__((unused)) notrace
-#define __inline __inline	__attribute__((unused)) notrace
+#define inline inline		__attribute__((unused)) notrace __gnu_inline
 #endif
 
+#define __inline__ inline
+#define __inline inline
 #define __always_inline	inline __attribute__((always_inline))
 #define  noinline	__attribute__((noinline))
 


Patches currently in stable-queue which might be from ndesaulniers@google.com are

queue-4.14/x86-asm-add-_asm_arg-constants-for-argument-registers-to-asm-asm.h.patch
queue-4.14/compiler-gcc.h-add-__attribute__-gnu_inline-to-all-inline-declarations.patch
queue-4.14/x86-paravirt-make-native_save_fl-extern-inline.patch

^ permalink raw reply

* Patch "x86/paravirt: Make native_save_fl() extern inline" has been added to the 4.14-stable tree
From: gregkh @ 2018-07-18  9:17 UTC (permalink / raw)
  To: 20180621162324.36656-4-ndesaulniers, acme, akataria, akpm,
	andrea.parri, ard.biesheuvel, arnd, aryabinin, astrachan,
	boris.ostrovsky, brijesh.singh, caoj.fnst, geert, ghackmann,
	gregkh, hpa, jan.kiszka, jarkko.sakkinen, jgross, joe, jpoimboe,
	keescook, kirill.shutemov, kstewart, manojgupta, mawilcox,
	michal.lkml, mingo, mjg59, mka
  Cc: stable-commits


This is a note to let you know that I've just added the patch titled

    x86/paravirt: Make native_save_fl() extern inline

to the 4.14-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     x86-paravirt-make-native_save_fl-extern-inline.patch
and it can be found in the queue-4.14 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.


From d0a8d9378d16eb3c69bd8e6d23779fbdbee3a8c7 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Thu, 21 Jun 2018 09:23:24 -0700
Subject: x86/paravirt: Make native_save_fl() extern inline

From: Nick Desaulniers <ndesaulniers@google.com>

commit d0a8d9378d16eb3c69bd8e6d23779fbdbee3a8c7 upstream.

native_save_fl() is marked static inline, but by using it as
a function pointer in arch/x86/kernel/paravirt.c, it MUST be outlined.

paravirt's use of native_save_fl() also requires that no GPRs other than
%rax are clobbered.

Compilers have different heuristics which they use to emit stack guard
code, the emittance of which can break paravirt's callee saved assumption
by clobbering %rcx.

Marking a function definition extern inline means that if this version
cannot be inlined, then the out-of-line version will be preferred. By
having the out-of-line version be implemented in assembly, it cannot be
instrumented with a stack protector, which might violate custom calling
conventions that code like paravirt rely on.

The semantics of extern inline has changed since gnu89. This means that
folks using GCC versions >= 5.1 may see symbol redefinition errors at
link time for subdirs that override KBUILD_CFLAGS (making the C standard
used implicit) regardless of this patch. This has been cleaned up
earlier in the patch set, but is left as a note in the commit message
for future travelers.

Reports:
 https://lkml.org/lkml/2018/5/7/534
 https://github.com/ClangBuiltLinux/linux/issues/16

Discussion:
 https://bugs.llvm.org/show_bug.cgi?id=37512
 https://lkml.org/lkml/2018/5/24/1371

Thanks to the many folks that participated in the discussion.

Debugged-by: Alistair Strachan <astrachan@google.com>
Debugged-by: Matthias Kaehlcke <mka@chromium.org>
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: H. Peter Anvin <hpa@zytor.com>
Suggested-by: Tom Stellar <tstellar@redhat.com>
Reported-by: Sedat Dilek <sedat.dilek@gmail.com>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: akataria@vmware.com
Cc: akpm@linux-foundation.org
Cc: andrea.parri@amarulasolutions.com
Cc: ard.biesheuvel@linaro.org
Cc: aryabinin@virtuozzo.com
Cc: astrachan@google.com
Cc: boris.ostrovsky@oracle.com
Cc: brijesh.singh@amd.com
Cc: caoj.fnst@cn.fujitsu.com
Cc: geert@linux-m68k.org
Cc: ghackmann@google.com
Cc: gregkh@linuxfoundation.org
Cc: jan.kiszka@siemens.com
Cc: jarkko.sakkinen@linux.intel.com
Cc: joe@perches.com
Cc: jpoimboe@redhat.com
Cc: keescook@google.com
Cc: kirill.shutemov@linux.intel.com
Cc: kstewart@linuxfoundation.org
Cc: linux-efi@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: manojgupta@google.com
Cc: mawilcox@microsoft.com
Cc: michal.lkml@markovi.net
Cc: mjg59@google.com
Cc: mka@chromium.org
Cc: pombredanne@nexb.com
Cc: rientjes@google.com
Cc: rostedt@goodmis.org
Cc: thomas.lendacky@amd.com
Cc: tweek@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: yamada.masahiro@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-4-ndesaulniers@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 arch/x86/include/asm/irqflags.h |    2 +-
 arch/x86/kernel/Makefile        |    1 +
 arch/x86/kernel/irqflags.S      |   26 ++++++++++++++++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)

--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -13,7 +13,7 @@
  * Interrupt control:
  */
 
-static inline unsigned long native_save_fl(void)
+extern inline unsigned long native_save_fl(void)
 {
 	unsigned long flags;
 
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -58,6 +58,7 @@ obj-y			+= alternative.o i8253.o pci-nom
 obj-y			+= tsc.o tsc_msr.o io_delay.o rtc.o
 obj-y			+= pci-iommu_table.o
 obj-y			+= resource.o
+obj-y			+= irqflags.o
 
 obj-y				+= process.o
 obj-y				+= fpu/
--- /dev/null
+++ b/arch/x86/kernel/irqflags.S
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm/asm.h>
+#include <asm/export.h>
+#include <linux/linkage.h>
+
+/*
+ * unsigned long native_save_fl(void)
+ */
+ENTRY(native_save_fl)
+	pushf
+	pop %_ASM_AX
+	ret
+ENDPROC(native_save_fl)
+EXPORT_SYMBOL(native_save_fl)
+
+/*
+ * void native_restore_fl(unsigned long flags)
+ * %eax/%rdi: flags
+ */
+ENTRY(native_restore_fl)
+	push %_ASM_ARG1
+	popf
+	ret
+ENDPROC(native_restore_fl)
+EXPORT_SYMBOL(native_restore_fl)


Patches currently in stable-queue which might be from ndesaulniers@google.com are

queue-4.14/x86-asm-add-_asm_arg-constants-for-argument-registers-to-asm-asm.h.patch
queue-4.14/compiler-gcc.h-add-__attribute__-gnu_inline-to-all-inline-declarations.patch
queue-4.14/x86-paravirt-make-native_save_fl-extern-inline.patch

^ permalink raw reply

* Patch "x86/asm: Add _ASM_ARG* constants for argument registers to <asm/asm.h>" has been added to the 4.14-stable tree
From: gregkh @ 2018-07-18  9:17 UTC (permalink / raw)
  To: 20180621162324.36656-3-ndesaulniers, acme, akataria, akpm,
	andrea.parri, ard.biesheuvel, arnd, aryabinin, astrachan,
	boris.ostrovsky, brijesh.singh, caoj.fnst, geert, ghackmann,
	gregkh, hpa, jan.kiszka, jarkko.sakkinen, jgross, joe, jpoimboe,
	keescook, kirill.shutemov, kstewart, manojgupta, mawilcox,
	michal.lkml, mingo, mjg59, mka
  Cc: stable-commits


This is a note to let you know that I've just added the patch titled

    x86/asm: Add _ASM_ARG* constants for argument registers to <asm/asm.h>

to the 4.14-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     x86-asm-add-_asm_arg-constants-for-argument-registers-to-asm-asm.h.patch
and it can be found in the queue-4.14 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.


From 0e2e160033283e20f688d8bad5b89460cc5bfcc4 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Thu, 21 Jun 2018 09:23:23 -0700
Subject: x86/asm: Add _ASM_ARG* constants for argument registers to <asm/asm.h>

From: H. Peter Anvin <hpa@linux.intel.com>

commit 0e2e160033283e20f688d8bad5b89460cc5bfcc4 upstream.

i386 and x86-64 uses different registers for arguments; make them
available so we don't have to #ifdef in the actual code.

Native size and specified size (q, l, w, b) versions are provided.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Sedat Dilek <sedat.dilek@gmail.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: akataria@vmware.com
Cc: akpm@linux-foundation.org
Cc: andrea.parri@amarulasolutions.com
Cc: ard.biesheuvel@linaro.org
Cc: arnd@arndb.de
Cc: aryabinin@virtuozzo.com
Cc: astrachan@google.com
Cc: boris.ostrovsky@oracle.com
Cc: brijesh.singh@amd.com
Cc: caoj.fnst@cn.fujitsu.com
Cc: geert@linux-m68k.org
Cc: ghackmann@google.com
Cc: gregkh@linuxfoundation.org
Cc: jan.kiszka@siemens.com
Cc: jarkko.sakkinen@linux.intel.com
Cc: joe@perches.com
Cc: jpoimboe@redhat.com
Cc: keescook@google.com
Cc: kirill.shutemov@linux.intel.com
Cc: kstewart@linuxfoundation.org
Cc: linux-efi@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: manojgupta@google.com
Cc: mawilcox@microsoft.com
Cc: michal.lkml@markovi.net
Cc: mjg59@google.com
Cc: mka@chromium.org
Cc: pombredanne@nexb.com
Cc: rientjes@google.com
Cc: rostedt@goodmis.org
Cc: thomas.lendacky@amd.com
Cc: tstellar@redhat.com
Cc: tweek@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: yamada.masahiro@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-3-ndesaulniers@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 arch/x86/include/asm/asm.h |   59 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -46,6 +46,65 @@
 #define _ASM_SI		__ASM_REG(si)
 #define _ASM_DI		__ASM_REG(di)
 
+#ifndef __x86_64__
+/* 32 bit */
+
+#define _ASM_ARG1	_ASM_AX
+#define _ASM_ARG2	_ASM_DX
+#define _ASM_ARG3	_ASM_CX
+
+#define _ASM_ARG1L	eax
+#define _ASM_ARG2L	edx
+#define _ASM_ARG3L	ecx
+
+#define _ASM_ARG1W	ax
+#define _ASM_ARG2W	dx
+#define _ASM_ARG3W	cx
+
+#define _ASM_ARG1B	al
+#define _ASM_ARG2B	dl
+#define _ASM_ARG3B	cl
+
+#else
+/* 64 bit */
+
+#define _ASM_ARG1	_ASM_DI
+#define _ASM_ARG2	_ASM_SI
+#define _ASM_ARG3	_ASM_DX
+#define _ASM_ARG4	_ASM_CX
+#define _ASM_ARG5	r8
+#define _ASM_ARG6	r9
+
+#define _ASM_ARG1Q	rdi
+#define _ASM_ARG2Q	rsi
+#define _ASM_ARG3Q	rdx
+#define _ASM_ARG4Q	rcx
+#define _ASM_ARG5Q	r8
+#define _ASM_ARG6Q	r9
+
+#define _ASM_ARG1L	edi
+#define _ASM_ARG2L	esi
+#define _ASM_ARG3L	edx
+#define _ASM_ARG4L	ecx
+#define _ASM_ARG5L	r8d
+#define _ASM_ARG6L	r9d
+
+#define _ASM_ARG1W	di
+#define _ASM_ARG2W	si
+#define _ASM_ARG3W	dx
+#define _ASM_ARG4W	cx
+#define _ASM_ARG5W	r8w
+#define _ASM_ARG6W	r9w
+
+#define _ASM_ARG1B	dil
+#define _ASM_ARG2B	sil
+#define _ASM_ARG3B	dl
+#define _ASM_ARG4B	cl
+#define _ASM_ARG5B	r8b
+#define _ASM_ARG6B	r9b
+
+#endif
+
 /*
  * Macros to generate condition code outputs from inline assembly,
  * The output operand must be type "bool".


Patches currently in stable-queue which might be from hpa@linux.intel.com are

queue-4.14/x86-asm-add-_asm_arg-constants-for-argument-registers-to-asm-asm.h.patch

^ permalink raw reply

* Patch "compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations" has been added to the 4.17-stable tree
From: gregkh @ 2018-07-18  9:34 UTC (permalink / raw)
  To: 20180621162324.36656-2-ndesaulniers, acme, akataria, akpm,
	andrea.parri, ard.biesheuvel, arnd, aryabinin, astrachan,
	boris.ostrovsky, brijesh.singh, caoj.fnst, geert, ghackmann,
	gregkh, hpa, jan.kiszka, jarkko.sakkinen, jgross, joe, jpoimboe,
	keescook, kirill.shutemov, kstewart, manojgupta, mawilcox,
	michal.lkml, mingo, mjg59, mka
  Cc: stable-commits


This is a note to let you know that I've just added the patch titled

    compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations

to the 4.17-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     compiler-gcc.h-add-__attribute__-gnu_inline-to-all-inline-declarations.patch
and it can be found in the queue-4.17 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.


From d03db2bc26f0e4a6849ad649a09c9c73fccdc656 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Thu, 21 Jun 2018 09:23:22 -0700
Subject: compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations

From: Nick Desaulniers <ndesaulniers@google.com>

commit d03db2bc26f0e4a6849ad649a09c9c73fccdc656 upstream.

Functions marked extern inline do not emit an externally visible
function when the gnu89 C standard is used. Some KBUILD Makefiles
overwrite KBUILD_CFLAGS. This is an issue for GCC 5.1+ users as without
an explicit C standard specified, the default is gnu11. Since c99, the
semantics of extern inline have changed such that an externally visible
function is always emitted. This can lead to multiple definition errors
of extern inline functions at link time of compilation units whose build
files have removed an explicit C standard compiler flag for users of GCC
5.1+ or Clang.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: H. Peter Anvin <hpa@zytor.com>
Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: akataria@vmware.com
Cc: akpm@linux-foundation.org
Cc: andrea.parri@amarulasolutions.com
Cc: ard.biesheuvel@linaro.org
Cc: aryabinin@virtuozzo.com
Cc: astrachan@google.com
Cc: boris.ostrovsky@oracle.com
Cc: brijesh.singh@amd.com
Cc: caoj.fnst@cn.fujitsu.com
Cc: geert@linux-m68k.org
Cc: ghackmann@google.com
Cc: gregkh@linuxfoundation.org
Cc: jan.kiszka@siemens.com
Cc: jarkko.sakkinen@linux.intel.com
Cc: jpoimboe@redhat.com
Cc: keescook@google.com
Cc: kirill.shutemov@linux.intel.com
Cc: kstewart@linuxfoundation.org
Cc: linux-efi@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: manojgupta@google.com
Cc: mawilcox@microsoft.com
Cc: michal.lkml@markovi.net
Cc: mjg59@google.com
Cc: mka@chromium.org
Cc: pombredanne@nexb.com
Cc: rientjes@google.com
Cc: rostedt@goodmis.org
Cc: sedat.dilek@gmail.com
Cc: thomas.lendacky@amd.com
Cc: tstellar@redhat.com
Cc: tweek@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: yamada.masahiro@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-2-ndesaulniers@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 include/linux/compiler-gcc.h |   29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -66,25 +66,40 @@
 #endif
 
 /*
+ * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
+ * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
+ * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
+ * defined so the gnu89 semantics are the default.
+ */
+#ifdef __GNUC_STDC_INLINE__
+# define __gnu_inline	__attribute__((gnu_inline))
+#else
+# define __gnu_inline
+#endif
+
+/*
  * Force always-inline if the user requests it so via the .config,
  * or if gcc is too old.
  * GCC does not warn about unused static inline functions for
  * -Wunused-function.  This turns out to avoid the need for complex #ifdef
  * directives.  Suppress the warning in clang as well by using "unused"
  * function attribute, which is redundant but not harmful for gcc.
+ * Prefer gnu_inline, so that extern inline functions do not emit an
+ * externally visible function. This makes extern inline behave as per gnu89
+ * semantics rather than c99. This prevents multiple symbol definition errors
+ * of extern inline functions at link time.
+ * A lot of inline functions can cause havoc with function tracing.
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) ||		\
     !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
-#define inline inline		__attribute__((always_inline,unused)) notrace
-#define __inline__ __inline__	__attribute__((always_inline,unused)) notrace
-#define __inline __inline	__attribute__((always_inline,unused)) notrace
+#define inline \
+	inline __attribute__((always_inline, unused)) notrace __gnu_inline
 #else
-/* A lot of inline functions can cause havoc with function tracing */
-#define inline inline		__attribute__((unused)) notrace
-#define __inline__ __inline__	__attribute__((unused)) notrace
-#define __inline __inline	__attribute__((unused)) notrace
+#define inline inline		__attribute__((unused)) notrace __gnu_inline
 #endif
 
+#define __inline__ inline
+#define __inline inline
 #define __always_inline	inline __attribute__((always_inline))
 #define  noinline	__attribute__((noinline))
 


Patches currently in stable-queue which might be from ndesaulniers@google.com are

queue-4.17/x86-asm-add-_asm_arg-constants-for-argument-registers-to-asm-asm.h.patch
queue-4.17/compiler-gcc.h-add-__attribute__-gnu_inline-to-all-inline-declarations.patch
queue-4.17/x86-paravirt-make-native_save_fl-extern-inline.patch

^ permalink raw reply

* Patch "x86/paravirt: Make native_save_fl() extern inline" has been added to the 4.17-stable tree
From: gregkh @ 2018-07-18  9:34 UTC (permalink / raw)
  To: 20180621162324.36656-4-ndesaulniers, acme, akataria, akpm,
	andrea.parri, ard.biesheuvel, arnd, aryabinin, astrachan,
	boris.ostrovsky, brijesh.singh, caoj.fnst, geert, ghackmann,
	gregkh, hpa, jan.kiszka, jarkko.sakkinen, jgross, joe, jpoimboe,
	keescook, kirill.shutemov, kstewart, manojgupta, mawilcox,
	michal.lkml, mingo, mjg59, mka
  Cc: stable-commits


This is a note to let you know that I've just added the patch titled

    x86/paravirt: Make native_save_fl() extern inline

to the 4.17-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     x86-paravirt-make-native_save_fl-extern-inline.patch
and it can be found in the queue-4.17 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.


From d0a8d9378d16eb3c69bd8e6d23779fbdbee3a8c7 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Thu, 21 Jun 2018 09:23:24 -0700
Subject: x86/paravirt: Make native_save_fl() extern inline

From: Nick Desaulniers <ndesaulniers@google.com>

commit d0a8d9378d16eb3c69bd8e6d23779fbdbee3a8c7 upstream.

native_save_fl() is marked static inline, but by using it as
a function pointer in arch/x86/kernel/paravirt.c, it MUST be outlined.

paravirt's use of native_save_fl() also requires that no GPRs other than
%rax are clobbered.

Compilers have different heuristics which they use to emit stack guard
code, the emittance of which can break paravirt's callee saved assumption
by clobbering %rcx.

Marking a function definition extern inline means that if this version
cannot be inlined, then the out-of-line version will be preferred. By
having the out-of-line version be implemented in assembly, it cannot be
instrumented with a stack protector, which might violate custom calling
conventions that code like paravirt rely on.

The semantics of extern inline has changed since gnu89. This means that
folks using GCC versions >= 5.1 may see symbol redefinition errors at
link time for subdirs that override KBUILD_CFLAGS (making the C standard
used implicit) regardless of this patch. This has been cleaned up
earlier in the patch set, but is left as a note in the commit message
for future travelers.

Reports:
 https://lkml.org/lkml/2018/5/7/534
 https://github.com/ClangBuiltLinux/linux/issues/16

Discussion:
 https://bugs.llvm.org/show_bug.cgi?id=37512
 https://lkml.org/lkml/2018/5/24/1371

Thanks to the many folks that participated in the discussion.

Debugged-by: Alistair Strachan <astrachan@google.com>
Debugged-by: Matthias Kaehlcke <mka@chromium.org>
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: H. Peter Anvin <hpa@zytor.com>
Suggested-by: Tom Stellar <tstellar@redhat.com>
Reported-by: Sedat Dilek <sedat.dilek@gmail.com>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: akataria@vmware.com
Cc: akpm@linux-foundation.org
Cc: andrea.parri@amarulasolutions.com
Cc: ard.biesheuvel@linaro.org
Cc: aryabinin@virtuozzo.com
Cc: astrachan@google.com
Cc: boris.ostrovsky@oracle.com
Cc: brijesh.singh@amd.com
Cc: caoj.fnst@cn.fujitsu.com
Cc: geert@linux-m68k.org
Cc: ghackmann@google.com
Cc: gregkh@linuxfoundation.org
Cc: jan.kiszka@siemens.com
Cc: jarkko.sakkinen@linux.intel.com
Cc: joe@perches.com
Cc: jpoimboe@redhat.com
Cc: keescook@google.com
Cc: kirill.shutemov@linux.intel.com
Cc: kstewart@linuxfoundation.org
Cc: linux-efi@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: manojgupta@google.com
Cc: mawilcox@microsoft.com
Cc: michal.lkml@markovi.net
Cc: mjg59@google.com
Cc: mka@chromium.org
Cc: pombredanne@nexb.com
Cc: rientjes@google.com
Cc: rostedt@goodmis.org
Cc: thomas.lendacky@amd.com
Cc: tweek@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: yamada.masahiro@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-4-ndesaulniers@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 arch/x86/include/asm/irqflags.h |    2 +-
 arch/x86/kernel/Makefile        |    1 +
 arch/x86/kernel/irqflags.S      |   26 ++++++++++++++++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)

--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -13,7 +13,7 @@
  * Interrupt control:
  */
 
-static inline unsigned long native_save_fl(void)
+extern inline unsigned long native_save_fl(void)
 {
 	unsigned long flags;
 
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -61,6 +61,7 @@ obj-y			+= alternative.o i8253.o hw_brea
 obj-y			+= tsc.o tsc_msr.o io_delay.o rtc.o
 obj-y			+= pci-iommu_table.o
 obj-y			+= resource.o
+obj-y			+= irqflags.o
 
 obj-y				+= process.o
 obj-y				+= fpu/
--- /dev/null
+++ b/arch/x86/kernel/irqflags.S
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm/asm.h>
+#include <asm/export.h>
+#include <linux/linkage.h>
+
+/*
+ * unsigned long native_save_fl(void)
+ */
+ENTRY(native_save_fl)
+	pushf
+	pop %_ASM_AX
+	ret
+ENDPROC(native_save_fl)
+EXPORT_SYMBOL(native_save_fl)
+
+/*
+ * void native_restore_fl(unsigned long flags)
+ * %eax/%rdi: flags
+ */
+ENTRY(native_restore_fl)
+	push %_ASM_ARG1
+	popf
+	ret
+ENDPROC(native_restore_fl)
+EXPORT_SYMBOL(native_restore_fl)


Patches currently in stable-queue which might be from ndesaulniers@google.com are

queue-4.17/x86-asm-add-_asm_arg-constants-for-argument-registers-to-asm-asm.h.patch
queue-4.17/compiler-gcc.h-add-__attribute__-gnu_inline-to-all-inline-declarations.patch
queue-4.17/x86-paravirt-make-native_save_fl-extern-inline.patch

^ permalink raw reply

* Patch "x86/asm: Add _ASM_ARG* constants for argument registers to <asm/asm.h>" has been added to the 4.17-stable tree
From: gregkh @ 2018-07-18  9:34 UTC (permalink / raw)
  To: 20180621162324.36656-3-ndesaulniers, acme, akataria, akpm,
	andrea.parri, ard.biesheuvel, arnd, aryabinin, astrachan,
	boris.ostrovsky, brijesh.singh, caoj.fnst, geert, ghackmann,
	gregkh, hpa, jan.kiszka, jarkko.sakkinen, jgross, joe, jpoimboe,
	keescook, kirill.shutemov, kstewart, manojgupta, mawilcox,
	michal.lkml, mingo, mjg59, mka
  Cc: stable-commits


This is a note to let you know that I've just added the patch titled

    x86/asm: Add _ASM_ARG* constants for argument registers to <asm/asm.h>

to the 4.17-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     x86-asm-add-_asm_arg-constants-for-argument-registers-to-asm-asm.h.patch
and it can be found in the queue-4.17 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.


From 0e2e160033283e20f688d8bad5b89460cc5bfcc4 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Thu, 21 Jun 2018 09:23:23 -0700
Subject: x86/asm: Add _ASM_ARG* constants for argument registers to <asm/asm.h>

From: H. Peter Anvin <hpa@linux.intel.com>

commit 0e2e160033283e20f688d8bad5b89460cc5bfcc4 upstream.

i386 and x86-64 uses different registers for arguments; make them
available so we don't have to #ifdef in the actual code.

Native size and specified size (q, l, w, b) versions are provided.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Sedat Dilek <sedat.dilek@gmail.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: akataria@vmware.com
Cc: akpm@linux-foundation.org
Cc: andrea.parri@amarulasolutions.com
Cc: ard.biesheuvel@linaro.org
Cc: arnd@arndb.de
Cc: aryabinin@virtuozzo.com
Cc: astrachan@google.com
Cc: boris.ostrovsky@oracle.com
Cc: brijesh.singh@amd.com
Cc: caoj.fnst@cn.fujitsu.com
Cc: geert@linux-m68k.org
Cc: ghackmann@google.com
Cc: gregkh@linuxfoundation.org
Cc: jan.kiszka@siemens.com
Cc: jarkko.sakkinen@linux.intel.com
Cc: joe@perches.com
Cc: jpoimboe@redhat.com
Cc: keescook@google.com
Cc: kirill.shutemov@linux.intel.com
Cc: kstewart@linuxfoundation.org
Cc: linux-efi@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: manojgupta@google.com
Cc: mawilcox@microsoft.com
Cc: michal.lkml@markovi.net
Cc: mjg59@google.com
Cc: mka@chromium.org
Cc: pombredanne@nexb.com
Cc: rientjes@google.com
Cc: rostedt@goodmis.org
Cc: thomas.lendacky@amd.com
Cc: tstellar@redhat.com
Cc: tweek@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: yamada.masahiro@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-3-ndesaulniers@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 arch/x86/include/asm/asm.h |   59 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -46,6 +46,65 @@
 #define _ASM_SI		__ASM_REG(si)
 #define _ASM_DI		__ASM_REG(di)
 
+#ifndef __x86_64__
+/* 32 bit */
+
+#define _ASM_ARG1	_ASM_AX
+#define _ASM_ARG2	_ASM_DX
+#define _ASM_ARG3	_ASM_CX
+
+#define _ASM_ARG1L	eax
+#define _ASM_ARG2L	edx
+#define _ASM_ARG3L	ecx
+
+#define _ASM_ARG1W	ax
+#define _ASM_ARG2W	dx
+#define _ASM_ARG3W	cx
+
+#define _ASM_ARG1B	al
+#define _ASM_ARG2B	dl
+#define _ASM_ARG3B	cl
+
+#else
+/* 64 bit */
+
+#define _ASM_ARG1	_ASM_DI
+#define _ASM_ARG2	_ASM_SI
+#define _ASM_ARG3	_ASM_DX
+#define _ASM_ARG4	_ASM_CX
+#define _ASM_ARG5	r8
+#define _ASM_ARG6	r9
+
+#define _ASM_ARG1Q	rdi
+#define _ASM_ARG2Q	rsi
+#define _ASM_ARG3Q	rdx
+#define _ASM_ARG4Q	rcx
+#define _ASM_ARG5Q	r8
+#define _ASM_ARG6Q	r9
+
+#define _ASM_ARG1L	edi
+#define _ASM_ARG2L	esi
+#define _ASM_ARG3L	edx
+#define _ASM_ARG4L	ecx
+#define _ASM_ARG5L	r8d
+#define _ASM_ARG6L	r9d
+
+#define _ASM_ARG1W	di
+#define _ASM_ARG2W	si
+#define _ASM_ARG3W	dx
+#define _ASM_ARG4W	cx
+#define _ASM_ARG5W	r8w
+#define _ASM_ARG6W	r9w
+
+#define _ASM_ARG1B	dil
+#define _ASM_ARG2B	sil
+#define _ASM_ARG3B	dl
+#define _ASM_ARG4B	cl
+#define _ASM_ARG5B	r8b
+#define _ASM_ARG6B	r9b
+
+#endif
+
 /*
  * Macros to generate condition code outputs from inline assembly,
  * The output operand must be type "bool".


Patches currently in stable-queue which might be from hpa@linux.intel.com are

queue-4.17/x86-asm-add-_asm_arg-constants-for-argument-registers-to-asm-asm.h.patch

^ permalink raw reply

* Patch "compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations" has been added to the 4.4-stable tree
From: gregkh @ 2018-07-18  9:34 UTC (permalink / raw)
  To: 20180621162324.36656-2-ndesaulniers, acme, akataria, akpm,
	andrea.parri, ard.biesheuvel, arnd, aryabinin, astrachan,
	boris.ostrovsky, brijesh.singh, caoj.fnst, geert, ghackmann,
	gregkh, hpa, jan.kiszka, jarkko.sakkinen, jgross, joe, jpoimboe,
	keescook, kirill.shutemov, kstewart, manojgupta, mawilcox,
	michal.lkml, mingo, mjg59, mka
  Cc: stable-commits


This is a note to let you know that I've just added the patch titled

    compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations

to the 4.4-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     compiler-gcc.h-add-__attribute__-gnu_inline-to-all-inline-declarations.patch
and it can be found in the queue-4.4 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.


From d03db2bc26f0e4a6849ad649a09c9c73fccdc656 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Thu, 21 Jun 2018 09:23:22 -0700
Subject: compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations

From: Nick Desaulniers <ndesaulniers@google.com>

commit d03db2bc26f0e4a6849ad649a09c9c73fccdc656 upstream.

Functions marked extern inline do not emit an externally visible
function when the gnu89 C standard is used. Some KBUILD Makefiles
overwrite KBUILD_CFLAGS. This is an issue for GCC 5.1+ users as without
an explicit C standard specified, the default is gnu11. Since c99, the
semantics of extern inline have changed such that an externally visible
function is always emitted. This can lead to multiple definition errors
of extern inline functions at link time of compilation units whose build
files have removed an explicit C standard compiler flag for users of GCC
5.1+ or Clang.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: H. Peter Anvin <hpa@zytor.com>
Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: akataria@vmware.com
Cc: akpm@linux-foundation.org
Cc: andrea.parri@amarulasolutions.com
Cc: ard.biesheuvel@linaro.org
Cc: aryabinin@virtuozzo.com
Cc: astrachan@google.com
Cc: boris.ostrovsky@oracle.com
Cc: brijesh.singh@amd.com
Cc: caoj.fnst@cn.fujitsu.com
Cc: geert@linux-m68k.org
Cc: ghackmann@google.com
Cc: gregkh@linuxfoundation.org
Cc: jan.kiszka@siemens.com
Cc: jarkko.sakkinen@linux.intel.com
Cc: jpoimboe@redhat.com
Cc: keescook@google.com
Cc: kirill.shutemov@linux.intel.com
Cc: kstewart@linuxfoundation.org
Cc: linux-efi@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: manojgupta@google.com
Cc: mawilcox@microsoft.com
Cc: michal.lkml@markovi.net
Cc: mjg59@google.com
Cc: mka@chromium.org
Cc: pombredanne@nexb.com
Cc: rientjes@google.com
Cc: rostedt@goodmis.org
Cc: sedat.dilek@gmail.com
Cc: thomas.lendacky@amd.com
Cc: tstellar@redhat.com
Cc: tweek@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: yamada.masahiro@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-2-ndesaulniers@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 include/linux/compiler-gcc.h |   29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -65,25 +65,40 @@
 #endif
 
 /*
+ * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
+ * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
+ * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
+ * defined so the gnu89 semantics are the default.
+ */
+#ifdef __GNUC_STDC_INLINE__
+# define __gnu_inline	__attribute__((gnu_inline))
+#else
+# define __gnu_inline
+#endif
+
+/*
  * Force always-inline if the user requests it so via the .config,
  * or if gcc is too old.
  * GCC does not warn about unused static inline functions for
  * -Wunused-function.  This turns out to avoid the need for complex #ifdef
  * directives.  Suppress the warning in clang as well by using "unused"
  * function attribute, which is redundant but not harmful for gcc.
+ * Prefer gnu_inline, so that extern inline functions do not emit an
+ * externally visible function. This makes extern inline behave as per gnu89
+ * semantics rather than c99. This prevents multiple symbol definition errors
+ * of extern inline functions at link time.
+ * A lot of inline functions can cause havoc with function tracing.
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) ||		\
     !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
-#define inline inline		__attribute__((always_inline,unused)) notrace
-#define __inline__ __inline__	__attribute__((always_inline,unused)) notrace
-#define __inline __inline	__attribute__((always_inline,unused)) notrace
+#define inline \
+	inline __attribute__((always_inline, unused)) notrace __gnu_inline
 #else
-/* A lot of inline functions can cause havoc with function tracing */
-#define inline inline		__attribute__((unused)) notrace
-#define __inline__ __inline__	__attribute__((unused)) notrace
-#define __inline __inline	__attribute__((unused)) notrace
+#define inline inline		__attribute__((unused)) notrace __gnu_inline
 #endif
 
+#define __inline__ inline
+#define __inline inline
 #define __always_inline	inline __attribute__((always_inline))
 #define  noinline	__attribute__((noinline))
 


Patches currently in stable-queue which might be from ndesaulniers@google.com are

queue-4.4/x86-asm-add-_asm_arg-constants-for-argument-registers-to-asm-asm.h.patch
queue-4.4/compiler-gcc.h-add-__attribute__-gnu_inline-to-all-inline-declarations.patch

^ permalink raw reply

* Patch "x86/asm: Add _ASM_ARG* constants for argument registers to <asm/asm.h>" has been added to the 4.4-stable tree
From: gregkh @ 2018-07-18  9:34 UTC (permalink / raw)
  To: 20180621162324.36656-3-ndesaulniers, acme, akataria, akpm,
	andrea.parri, ard.biesheuvel, arnd, aryabinin, astrachan,
	boris.ostrovsky, brijesh.singh, caoj.fnst, geert, ghackmann,
	gregkh, hpa, jan.kiszka, jarkko.sakkinen, jgross, joe, jpoimboe,
	keescook, kirill.shutemov, kstewart, manojgupta, mawilcox,
	michal.lkml, mingo, mjg59, mka
  Cc: stable-commits


This is a note to let you know that I've just added the patch titled

    x86/asm: Add _ASM_ARG* constants for argument registers to <asm/asm.h>

to the 4.4-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     x86-asm-add-_asm_arg-constants-for-argument-registers-to-asm-asm.h.patch
and it can be found in the queue-4.4 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.


From 0e2e160033283e20f688d8bad5b89460cc5bfcc4 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Thu, 21 Jun 2018 09:23:23 -0700
Subject: x86/asm: Add _ASM_ARG* constants for argument registers to <asm/asm.h>

From: H. Peter Anvin <hpa@linux.intel.com>

commit 0e2e160033283e20f688d8bad5b89460cc5bfcc4 upstream.

i386 and x86-64 uses different registers for arguments; make them
available so we don't have to #ifdef in the actual code.

Native size and specified size (q, l, w, b) versions are provided.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Sedat Dilek <sedat.dilek@gmail.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: akataria@vmware.com
Cc: akpm@linux-foundation.org
Cc: andrea.parri@amarulasolutions.com
Cc: ard.biesheuvel@linaro.org
Cc: arnd@arndb.de
Cc: aryabinin@virtuozzo.com
Cc: astrachan@google.com
Cc: boris.ostrovsky@oracle.com
Cc: brijesh.singh@amd.com
Cc: caoj.fnst@cn.fujitsu.com
Cc: geert@linux-m68k.org
Cc: ghackmann@google.com
Cc: gregkh@linuxfoundation.org
Cc: jan.kiszka@siemens.com
Cc: jarkko.sakkinen@linux.intel.com
Cc: joe@perches.com
Cc: jpoimboe@redhat.com
Cc: keescook@google.com
Cc: kirill.shutemov@linux.intel.com
Cc: kstewart@linuxfoundation.org
Cc: linux-efi@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: manojgupta@google.com
Cc: mawilcox@microsoft.com
Cc: michal.lkml@markovi.net
Cc: mjg59@google.com
Cc: mka@chromium.org
Cc: pombredanne@nexb.com
Cc: rientjes@google.com
Cc: rostedt@goodmis.org
Cc: thomas.lendacky@amd.com
Cc: tstellar@redhat.com
Cc: tweek@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: yamada.masahiro@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-3-ndesaulniers@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 arch/x86/include/asm/asm.h |   59 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -44,6 +44,65 @@
 #define _ASM_SI		__ASM_REG(si)
 #define _ASM_DI		__ASM_REG(di)
 
+#ifndef __x86_64__
+/* 32 bit */
+
+#define _ASM_ARG1	_ASM_AX
+#define _ASM_ARG2	_ASM_DX
+#define _ASM_ARG3	_ASM_CX
+
+#define _ASM_ARG1L	eax
+#define _ASM_ARG2L	edx
+#define _ASM_ARG3L	ecx
+
+#define _ASM_ARG1W	ax
+#define _ASM_ARG2W	dx
+#define _ASM_ARG3W	cx
+
+#define _ASM_ARG1B	al
+#define _ASM_ARG2B	dl
+#define _ASM_ARG3B	cl
+
+#else
+/* 64 bit */
+
+#define _ASM_ARG1	_ASM_DI
+#define _ASM_ARG2	_ASM_SI
+#define _ASM_ARG3	_ASM_DX
+#define _ASM_ARG4	_ASM_CX
+#define _ASM_ARG5	r8
+#define _ASM_ARG6	r9
+
+#define _ASM_ARG1Q	rdi
+#define _ASM_ARG2Q	rsi
+#define _ASM_ARG3Q	rdx
+#define _ASM_ARG4Q	rcx
+#define _ASM_ARG5Q	r8
+#define _ASM_ARG6Q	r9
+
+#define _ASM_ARG1L	edi
+#define _ASM_ARG2L	esi
+#define _ASM_ARG3L	edx
+#define _ASM_ARG4L	ecx
+#define _ASM_ARG5L	r8d
+#define _ASM_ARG6L	r9d
+
+#define _ASM_ARG1W	di
+#define _ASM_ARG2W	si
+#define _ASM_ARG3W	dx
+#define _ASM_ARG4W	cx
+#define _ASM_ARG5W	r8w
+#define _ASM_ARG6W	r9w
+
+#define _ASM_ARG1B	dil
+#define _ASM_ARG2B	sil
+#define _ASM_ARG3B	dl
+#define _ASM_ARG4B	cl
+#define _ASM_ARG5B	r8b
+#define _ASM_ARG6B	r9b
+
+#endif
+
 /* Exception table entry */
 #ifdef __ASSEMBLY__
 # define _ASM_EXTABLE(from,to)					\


Patches currently in stable-queue which might be from hpa@linux.intel.com are

queue-4.4/x86-asm-add-_asm_arg-constants-for-argument-registers-to-asm-asm.h.patch

^ permalink raw reply

* Patch "x86/asm: Add _ASM_ARG* constants for argument registers to <asm/asm.h>" has been added to the 4.9-stable tree
From: gregkh @ 2018-07-18  9:34 UTC (permalink / raw)
  To: 20180621162324.36656-3-ndesaulniers, acme, akataria, akpm,
	andrea.parri, ard.biesheuvel, arnd, aryabinin, astrachan,
	boris.ostrovsky, brijesh.singh, caoj.fnst, geert, ghackmann,
	gregkh, hpa, jan.kiszka, jarkko.sakkinen, jgross, joe, jpoimboe,
	keescook, kirill.shutemov, kstewart, manojgupta, mawilcox,
	michal.lkml, mingo, mjg59, mka
  Cc: stable-commits


This is a note to let you know that I've just added the patch titled

    x86/asm: Add _ASM_ARG* constants for argument registers to <asm/asm.h>

to the 4.9-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     x86-asm-add-_asm_arg-constants-for-argument-registers-to-asm-asm.h.patch
and it can be found in the queue-4.9 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.


From 0e2e160033283e20f688d8bad5b89460cc5bfcc4 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Thu, 21 Jun 2018 09:23:23 -0700
Subject: x86/asm: Add _ASM_ARG* constants for argument registers to <asm/asm.h>

From: H. Peter Anvin <hpa@linux.intel.com>

commit 0e2e160033283e20f688d8bad5b89460cc5bfcc4 upstream.

i386 and x86-64 uses different registers for arguments; make them
available so we don't have to #ifdef in the actual code.

Native size and specified size (q, l, w, b) versions are provided.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Sedat Dilek <sedat.dilek@gmail.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: akataria@vmware.com
Cc: akpm@linux-foundation.org
Cc: andrea.parri@amarulasolutions.com
Cc: ard.biesheuvel@linaro.org
Cc: arnd@arndb.de
Cc: aryabinin@virtuozzo.com
Cc: astrachan@google.com
Cc: boris.ostrovsky@oracle.com
Cc: brijesh.singh@amd.com
Cc: caoj.fnst@cn.fujitsu.com
Cc: geert@linux-m68k.org
Cc: ghackmann@google.com
Cc: gregkh@linuxfoundation.org
Cc: jan.kiszka@siemens.com
Cc: jarkko.sakkinen@linux.intel.com
Cc: joe@perches.com
Cc: jpoimboe@redhat.com
Cc: keescook@google.com
Cc: kirill.shutemov@linux.intel.com
Cc: kstewart@linuxfoundation.org
Cc: linux-efi@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: manojgupta@google.com
Cc: mawilcox@microsoft.com
Cc: michal.lkml@markovi.net
Cc: mjg59@google.com
Cc: mka@chromium.org
Cc: pombredanne@nexb.com
Cc: rientjes@google.com
Cc: rostedt@goodmis.org
Cc: thomas.lendacky@amd.com
Cc: tstellar@redhat.com
Cc: tweek@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: yamada.masahiro@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-3-ndesaulniers@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 arch/x86/include/asm/asm.h |   59 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -45,6 +45,65 @@
 #define _ASM_SI		__ASM_REG(si)
 #define _ASM_DI		__ASM_REG(di)
 
+#ifndef __x86_64__
+/* 32 bit */
+
+#define _ASM_ARG1	_ASM_AX
+#define _ASM_ARG2	_ASM_DX
+#define _ASM_ARG3	_ASM_CX
+
+#define _ASM_ARG1L	eax
+#define _ASM_ARG2L	edx
+#define _ASM_ARG3L	ecx
+
+#define _ASM_ARG1W	ax
+#define _ASM_ARG2W	dx
+#define _ASM_ARG3W	cx
+
+#define _ASM_ARG1B	al
+#define _ASM_ARG2B	dl
+#define _ASM_ARG3B	cl
+
+#else
+/* 64 bit */
+
+#define _ASM_ARG1	_ASM_DI
+#define _ASM_ARG2	_ASM_SI
+#define _ASM_ARG3	_ASM_DX
+#define _ASM_ARG4	_ASM_CX
+#define _ASM_ARG5	r8
+#define _ASM_ARG6	r9
+
+#define _ASM_ARG1Q	rdi
+#define _ASM_ARG2Q	rsi
+#define _ASM_ARG3Q	rdx
+#define _ASM_ARG4Q	rcx
+#define _ASM_ARG5Q	r8
+#define _ASM_ARG6Q	r9
+
+#define _ASM_ARG1L	edi
+#define _ASM_ARG2L	esi
+#define _ASM_ARG3L	edx
+#define _ASM_ARG4L	ecx
+#define _ASM_ARG5L	r8d
+#define _ASM_ARG6L	r9d
+
+#define _ASM_ARG1W	di
+#define _ASM_ARG2W	si
+#define _ASM_ARG3W	dx
+#define _ASM_ARG4W	cx
+#define _ASM_ARG5W	r8w
+#define _ASM_ARG6W	r9w
+
+#define _ASM_ARG1B	dil
+#define _ASM_ARG2B	sil
+#define _ASM_ARG3B	dl
+#define _ASM_ARG4B	cl
+#define _ASM_ARG5B	r8b
+#define _ASM_ARG6B	r9b
+
+#endif
+
 /*
  * Macros to generate condition code outputs from inline assembly,
  * The output operand must be type "bool".


Patches currently in stable-queue which might be from hpa@linux.intel.com are

queue-4.9/x86-asm-add-_asm_arg-constants-for-argument-registers-to-asm-asm.h.patch

^ permalink raw reply

* Patch "compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations" has been added to the 4.9-stable tree
From: gregkh @ 2018-07-18  9:34 UTC (permalink / raw)
  To: 20180621162324.36656-2-ndesaulniers, acme, akataria, akpm,
	andrea.parri, ard.biesheuvel, arnd, aryabinin, astrachan,
	boris.ostrovsky, brijesh.singh, caoj.fnst, geert, ghackmann,
	gregkh, hpa, jan.kiszka, jarkko.sakkinen, jgross, joe, jpoimboe,
	keescook, kirill.shutemov, kstewart, manojgupta, mawilcox,
	michal.lkml, mingo, mjg59, mka
  Cc: stable-commits


This is a note to let you know that I've just added the patch titled

    compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations

to the 4.9-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     compiler-gcc.h-add-__attribute__-gnu_inline-to-all-inline-declarations.patch
and it can be found in the queue-4.9 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.


From d03db2bc26f0e4a6849ad649a09c9c73fccdc656 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Thu, 21 Jun 2018 09:23:22 -0700
Subject: compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations

From: Nick Desaulniers <ndesaulniers@google.com>

commit d03db2bc26f0e4a6849ad649a09c9c73fccdc656 upstream.

Functions marked extern inline do not emit an externally visible
function when the gnu89 C standard is used. Some KBUILD Makefiles
overwrite KBUILD_CFLAGS. This is an issue for GCC 5.1+ users as without
an explicit C standard specified, the default is gnu11. Since c99, the
semantics of extern inline have changed such that an externally visible
function is always emitted. This can lead to multiple definition errors
of extern inline functions at link time of compilation units whose build
files have removed an explicit C standard compiler flag for users of GCC
5.1+ or Clang.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: H. Peter Anvin <hpa@zytor.com>
Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: akataria@vmware.com
Cc: akpm@linux-foundation.org
Cc: andrea.parri@amarulasolutions.com
Cc: ard.biesheuvel@linaro.org
Cc: aryabinin@virtuozzo.com
Cc: astrachan@google.com
Cc: boris.ostrovsky@oracle.com
Cc: brijesh.singh@amd.com
Cc: caoj.fnst@cn.fujitsu.com
Cc: geert@linux-m68k.org
Cc: ghackmann@google.com
Cc: gregkh@linuxfoundation.org
Cc: jan.kiszka@siemens.com
Cc: jarkko.sakkinen@linux.intel.com
Cc: jpoimboe@redhat.com
Cc: keescook@google.com
Cc: kirill.shutemov@linux.intel.com
Cc: kstewart@linuxfoundation.org
Cc: linux-efi@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: manojgupta@google.com
Cc: mawilcox@microsoft.com
Cc: michal.lkml@markovi.net
Cc: mjg59@google.com
Cc: mka@chromium.org
Cc: pombredanne@nexb.com
Cc: rientjes@google.com
Cc: rostedt@goodmis.org
Cc: sedat.dilek@gmail.com
Cc: thomas.lendacky@amd.com
Cc: tstellar@redhat.com
Cc: tweek@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: yamada.masahiro@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-2-ndesaulniers@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 include/linux/compiler-gcc.h |   29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -65,25 +65,40 @@
 #endif
 
 /*
+ * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
+ * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
+ * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
+ * defined so the gnu89 semantics are the default.
+ */
+#ifdef __GNUC_STDC_INLINE__
+# define __gnu_inline	__attribute__((gnu_inline))
+#else
+# define __gnu_inline
+#endif
+
+/*
  * Force always-inline if the user requests it so via the .config,
  * or if gcc is too old.
  * GCC does not warn about unused static inline functions for
  * -Wunused-function.  This turns out to avoid the need for complex #ifdef
  * directives.  Suppress the warning in clang as well by using "unused"
  * function attribute, which is redundant but not harmful for gcc.
+ * Prefer gnu_inline, so that extern inline functions do not emit an
+ * externally visible function. This makes extern inline behave as per gnu89
+ * semantics rather than c99. This prevents multiple symbol definition errors
+ * of extern inline functions at link time.
+ * A lot of inline functions can cause havoc with function tracing.
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) ||		\
     !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
-#define inline inline		__attribute__((always_inline,unused)) notrace
-#define __inline__ __inline__	__attribute__((always_inline,unused)) notrace
-#define __inline __inline	__attribute__((always_inline,unused)) notrace
+#define inline \
+	inline __attribute__((always_inline, unused)) notrace __gnu_inline
 #else
-/* A lot of inline functions can cause havoc with function tracing */
-#define inline inline		__attribute__((unused)) notrace
-#define __inline__ __inline__	__attribute__((unused)) notrace
-#define __inline __inline	__attribute__((unused)) notrace
+#define inline inline		__attribute__((unused)) notrace __gnu_inline
 #endif
 
+#define __inline__ inline
+#define __inline inline
 #define __always_inline	inline __attribute__((always_inline))
 #define  noinline	__attribute__((noinline))
 


Patches currently in stable-queue which might be from ndesaulniers@google.com are

queue-4.9/x86-asm-add-_asm_arg-constants-for-argument-registers-to-asm-asm.h.patch
queue-4.9/compiler-gcc.h-add-__attribute__-gnu_inline-to-all-inline-declarations.patch
queue-4.9/x86-paravirt-make-native_save_fl-extern-inline.patch

^ permalink raw reply

* Patch "x86/paravirt: Make native_save_fl() extern inline" has been added to the 4.9-stable tree
From: gregkh @ 2018-07-18  9:34 UTC (permalink / raw)
  To: 20180621162324.36656-4-ndesaulniers, acme, akataria, akpm,
	andrea.parri, ard.biesheuvel, arnd, aryabinin, astrachan,
	boris.ostrovsky, brijesh.singh, caoj.fnst, geert, ghackmann,
	gregkh, hpa, jan.kiszka, jarkko.sakkinen, jgross, joe, jpoimboe,
	keescook, kirill.shutemov, kstewart, manojgupta, mawilcox,
	michal.lkml, mingo, mjg59, mka
  Cc: stable-commits


This is a note to let you know that I've just added the patch titled

    x86/paravirt: Make native_save_fl() extern inline

to the 4.9-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     x86-paravirt-make-native_save_fl-extern-inline.patch
and it can be found in the queue-4.9 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.


From d0a8d9378d16eb3c69bd8e6d23779fbdbee3a8c7 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Thu, 21 Jun 2018 09:23:24 -0700
Subject: x86/paravirt: Make native_save_fl() extern inline

From: Nick Desaulniers <ndesaulniers@google.com>

commit d0a8d9378d16eb3c69bd8e6d23779fbdbee3a8c7 upstream.

native_save_fl() is marked static inline, but by using it as
a function pointer in arch/x86/kernel/paravirt.c, it MUST be outlined.

paravirt's use of native_save_fl() also requires that no GPRs other than
%rax are clobbered.

Compilers have different heuristics which they use to emit stack guard
code, the emittance of which can break paravirt's callee saved assumption
by clobbering %rcx.

Marking a function definition extern inline means that if this version
cannot be inlined, then the out-of-line version will be preferred. By
having the out-of-line version be implemented in assembly, it cannot be
instrumented with a stack protector, which might violate custom calling
conventions that code like paravirt rely on.

The semantics of extern inline has changed since gnu89. This means that
folks using GCC versions >= 5.1 may see symbol redefinition errors at
link time for subdirs that override KBUILD_CFLAGS (making the C standard
used implicit) regardless of this patch. This has been cleaned up
earlier in the patch set, but is left as a note in the commit message
for future travelers.

Reports:
 https://lkml.org/lkml/2018/5/7/534
 https://github.com/ClangBuiltLinux/linux/issues/16

Discussion:
 https://bugs.llvm.org/show_bug.cgi?id=37512
 https://lkml.org/lkml/2018/5/24/1371

Thanks to the many folks that participated in the discussion.

Debugged-by: Alistair Strachan <astrachan@google.com>
Debugged-by: Matthias Kaehlcke <mka@chromium.org>
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: H. Peter Anvin <hpa@zytor.com>
Suggested-by: Tom Stellar <tstellar@redhat.com>
Reported-by: Sedat Dilek <sedat.dilek@gmail.com>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: akataria@vmware.com
Cc: akpm@linux-foundation.org
Cc: andrea.parri@amarulasolutions.com
Cc: ard.biesheuvel@linaro.org
Cc: aryabinin@virtuozzo.com
Cc: astrachan@google.com
Cc: boris.ostrovsky@oracle.com
Cc: brijesh.singh@amd.com
Cc: caoj.fnst@cn.fujitsu.com
Cc: geert@linux-m68k.org
Cc: ghackmann@google.com
Cc: gregkh@linuxfoundation.org
Cc: jan.kiszka@siemens.com
Cc: jarkko.sakkinen@linux.intel.com
Cc: joe@perches.com
Cc: jpoimboe@redhat.com
Cc: keescook@google.com
Cc: kirill.shutemov@linux.intel.com
Cc: kstewart@linuxfoundation.org
Cc: linux-efi@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: manojgupta@google.com
Cc: mawilcox@microsoft.com
Cc: michal.lkml@markovi.net
Cc: mjg59@google.com
Cc: mka@chromium.org
Cc: pombredanne@nexb.com
Cc: rientjes@google.com
Cc: rostedt@goodmis.org
Cc: thomas.lendacky@amd.com
Cc: tweek@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: yamada.masahiro@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-4-ndesaulniers@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 arch/x86/include/asm/irqflags.h |    2 +-
 arch/x86/kernel/Makefile        |    1 +
 arch/x86/kernel/irqflags.S      |   26 ++++++++++++++++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)

--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -12,7 +12,7 @@
  * Interrupt control:
  */
 
-static inline unsigned long native_save_fl(void)
+extern inline unsigned long native_save_fl(void)
 {
 	unsigned long flags;
 
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -56,6 +56,7 @@ obj-y			+= alternative.o i8253.o pci-nom
 obj-y			+= tsc.o tsc_msr.o io_delay.o rtc.o
 obj-y			+= pci-iommu_table.o
 obj-y			+= resource.o
+obj-y			+= irqflags.o
 
 obj-y				+= process.o
 obj-y				+= fpu/
--- /dev/null
+++ b/arch/x86/kernel/irqflags.S
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm/asm.h>
+#include <asm/export.h>
+#include <linux/linkage.h>
+
+/*
+ * unsigned long native_save_fl(void)
+ */
+ENTRY(native_save_fl)
+	pushf
+	pop %_ASM_AX
+	ret
+ENDPROC(native_save_fl)
+EXPORT_SYMBOL(native_save_fl)
+
+/*
+ * void native_restore_fl(unsigned long flags)
+ * %eax/%rdi: flags
+ */
+ENTRY(native_restore_fl)
+	push %_ASM_ARG1
+	popf
+	ret
+ENDPROC(native_restore_fl)
+EXPORT_SYMBOL(native_restore_fl)


Patches currently in stable-queue which might be from ndesaulniers@google.com are

queue-4.9/x86-asm-add-_asm_arg-constants-for-argument-registers-to-asm-asm.h.patch
queue-4.9/compiler-gcc.h-add-__attribute__-gnu_inline-to-all-inline-declarations.patch
queue-4.9/x86-paravirt-make-native_save_fl-extern-inline.patch

^ permalink raw reply

* Memory Read Only Enforcement: VMM assisted kernel rootkit mitigation for KVM
From: Ahmed Abd El Mawgood @ 2018-07-19 21:37 UTC (permalink / raw)
  To: kvm, Kernel Hardening, virtualization, linux-doc, x86
  Cc: Ard Biesheuvel, Kees Cook, nathan Corbet, David Vrabel, rkrcmar,
	Boris Lukashev, Ingo Molnar, nigel.edwards, hpa, Paolo Bonzini,
	Thomas Gleixner, Rik van Riel

Hi,

This is my first set of patches that works as I would expect, and the
third revision I sent to mailing lists.

Following up with my previous discussions about kernel rootkit mitigation
via placing R/O protection on critical data structure, static data,
privileged registers with static content. These patches present the
first part where it is only possible to place these protections on
memory pages. Feature-wise, this set of patches is incomplete in the sense of:
- They still don't protect privileged registers
- They don't protect guest TLB from malicious gva -> gpa page mappings.
But they provide sketches for a basic working design. Note that I am totally
noob and it took lots of time and effort to get to this point. So sorry in
advance if I overlooked something.

[PATCH 1/3] [RFC V3] KVM: X86: Memory ROE documentation
[PATCH 2/3] [RFC V3] KVM: X86: Adding arbitrary data pointer in kvm memslot itterator functions
[PATCH 3/3] [RFC V3] KVM: X86: Adding skeleton for Memory ROE

Summery:

 Documentation/virtual/kvm/hypercalls.txt |  14 ++++
 arch/x86/include/asm/kvm_host.h          |  11 ++-
 arch/x86/kvm/Kconfig                     |   7 ++
 arch/x86/kvm/mmu.c                       | 127 ++++++++++++++++++++++---------
 arch/x86/kvm/x86.c                       |  82 +++++++++++++++++++-
 include/linux/kvm_host.h                 |   3 +
 include/uapi/linux/kvm_para.h            |   1 +
 virt/kvm/kvm_main.c                      |  29 ++++++-
 8 files changed, 232 insertions(+), 42 deletions(-)

^ permalink raw reply

* [PATCH 1/3] [RFC V3] KVM: X86: Memory ROE documentation
From: Ahmed Abd El Mawgood @ 2018-07-19 21:38 UTC (permalink / raw)
  To: kvm, Kernel Hardening, virtualization, linux-doc, x86
  Cc: Ard Biesheuvel, Kees Cook, nathan Corbet, David Vrabel, rkrcmar,
	Boris Lukashev, Ingo Molnar, nigel.edwards, hpa, Paolo Bonzini,
	Thomas Gleixner, Rik van Riel
In-Reply-To: <20180719213802.17161-1-ahmedsoliman0x666@gmail.com>

Following up with my previous threads on KVM assisted Anti rootkit
protections.
The current version doesn't address the attacks involving pages
remapping. It is still design in progress, nevertheless, it will be in
my later patch sets.

Signed-off-by: Ahmed Abd El Mawgood <ahmedsoliman0x666@gmail.com>
---
 Documentation/virtual/kvm/hypercalls.txt | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt
index a890529c63ed..a9db68adb7c9 100644
--- a/Documentation/virtual/kvm/hypercalls.txt
+++ b/Documentation/virtual/kvm/hypercalls.txt
@@ -121,3 +121,17 @@ compute the CLOCK_REALTIME for its clock, at the same instant.
 
 Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
 or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
+
+7. KVM_HC_HMROE
+----------------
+Architecture: x86
+Status: active
+Purpose: Hypercall used to apply Read-Only Enforcement to guest pages
+Usage:
+     a0: start address of page that should be protected.
+
+This hypercall lets a guest kernel to have part of its read/write memory
+converted into read-only.  This action is irreversible. KVM_HC_HMROE can
+not be triggered from guest Ring 3 (user mode). The reason is that user
+mode malicious software can make use of it enforce read only protection on
+an arbitrary memory page thus crashing the kernel.
-- 
2.16.4

^ permalink raw reply related

* [PATCH 2/3] [RFC V3] KVM: X86: Adding arbitrary data pointer in kvm memslot itterator functions
From: Ahmed Abd El Mawgood @ 2018-07-19 21:38 UTC (permalink / raw)
  To: kvm, Kernel Hardening, virtualization, linux-doc, x86
  Cc: Ard Biesheuvel, Kees Cook, nathan Corbet, David Vrabel, rkrcmar,
	Boris Lukashev, Ingo Molnar, nigel.edwards, hpa, Paolo Bonzini,
	Thomas Gleixner, Rik van Riel
In-Reply-To: <20180719213802.17161-1-ahmedsoliman0x666@gmail.com>

This will help sharing data into the slot_level_handler callback. In my
case I need to a share a counter for the pages traversed to use it in some
bitmap. Being able to send arbitrary memory pointer into the
slot_level_handler callback made it easy.

Signed-off-by: Ahmed Abd El Mawgood <ahmedsoliman0x666@gmail.com>
---
 arch/x86/kvm/mmu.c | 65 +++++++++++++++++++++++++++++++-----------------------
 1 file changed, 37 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d594690d8b95..77661530b2c4 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1418,7 +1418,7 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect)
 
 static bool __rmap_write_protect(struct kvm *kvm,
 				 struct kvm_rmap_head *rmap_head,
-				 bool pt_protect)
+				 bool pt_protect, void *data)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1457,7 +1457,8 @@ static bool wrprot_ad_disabled_spte(u64 *sptep)
  *	- W bit on ad-disabled SPTEs.
  * Returns true iff any D or W bits were cleared.
  */
-static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
+static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
+				void *data)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1483,7 +1484,8 @@ static bool spte_set_dirty(u64 *sptep)
 	return mmu_spte_update(sptep, spte);
 }
 
-static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
+static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
+				void *data)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1515,7 +1517,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 	while (mask) {
 		rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
 					  PT_PAGE_TABLE_LEVEL, slot);
-		__rmap_write_protect(kvm, rmap_head, false);
+		__rmap_write_protect(kvm, rmap_head, false, NULL);
 
 		/* clear the first set bit */
 		mask &= mask - 1;
@@ -1541,7 +1543,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
 	while (mask) {
 		rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
 					  PT_PAGE_TABLE_LEVEL, slot);
-		__rmap_clear_dirty(kvm, rmap_head);
+		__rmap_clear_dirty(kvm, rmap_head, NULL);
 
 		/* clear the first set bit */
 		mask &= mask - 1;
@@ -1594,7 +1596,8 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 
 	for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
 		rmap_head = __gfn_to_rmap(gfn, i, slot);
-		write_protected |= __rmap_write_protect(kvm, rmap_head, true);
+		write_protected |= __rmap_write_protect(kvm, rmap_head, true,
+				NULL);
 	}
 
 	return write_protected;
@@ -1608,7 +1611,8 @@ static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
 	return kvm_mmu_slot_gfn_write_protect(vcpu->kvm, slot, gfn);
 }
 
-static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
+static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
+		void *data)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1628,7 +1632,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
 			   struct kvm_memory_slot *slot, gfn_t gfn, int level,
 			   unsigned long data)
 {
-	return kvm_zap_rmapp(kvm, rmap_head);
+	return kvm_zap_rmapp(kvm, rmap_head, NULL);
 }
 
 static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
@@ -5086,13 +5090,15 @@ void kvm_mmu_uninit_vm(struct kvm *kvm)
 }
 
 /* The return value indicates if tlb flush on all vcpus is needed. */
-typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
+typedef bool (*slot_level_handler) (struct kvm *kvm,
+		struct kvm_rmap_head *rmap_head, void *data);
 
 /* The caller should hold mmu-lock before calling this function. */
 static __always_inline bool
 slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			slot_level_handler fn, int start_level, int end_level,
-			gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb)
+			gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb,
+			void *data)
 {
 	struct slot_rmap_walk_iterator iterator;
 	bool flush = false;
@@ -5100,7 +5106,7 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
 	for_each_slot_rmap_range(memslot, start_level, end_level, start_gfn,
 			end_gfn, &iterator) {
 		if (iterator.rmap)
-			flush |= fn(kvm, iterator.rmap);
+			flush |= fn(kvm, iterator.rmap, data);
 
 		if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
 			if (flush && lock_flush_tlb) {
@@ -5122,36 +5128,36 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
 static __always_inline bool
 slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
 		  slot_level_handler fn, int start_level, int end_level,
-		  bool lock_flush_tlb)
+		  bool lock_flush_tlb, void *data)
 {
 	return slot_handle_level_range(kvm, memslot, fn, start_level,
 			end_level, memslot->base_gfn,
 			memslot->base_gfn + memslot->npages - 1,
-			lock_flush_tlb);
+			lock_flush_tlb, data);
 }
 
 static __always_inline bool
 slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
-		      slot_level_handler fn, bool lock_flush_tlb)
+		      slot_level_handler fn, bool lock_flush_tlb, void *data)
 {
 	return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL,
-				 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
+				 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb, data);
 }
 
 static __always_inline bool
 slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
-			slot_level_handler fn, bool lock_flush_tlb)
+			slot_level_handler fn, bool lock_flush_tlb, void *data)
 {
 	return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL + 1,
-				 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
+				 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb, data);
 }
 
 static __always_inline bool
 slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
-		 slot_level_handler fn, bool lock_flush_tlb)
+		 slot_level_handler fn, bool lock_flush_tlb, void *data)
 {
 	return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL,
-				 PT_PAGE_TABLE_LEVEL, lock_flush_tlb);
+				 PT_PAGE_TABLE_LEVEL, lock_flush_tlb, data);
 }
 
 void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
@@ -5173,7 +5179,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 
 			slot_handle_level_range(kvm, memslot, kvm_zap_rmapp,
 						PT_PAGE_TABLE_LEVEL, PT_MAX_HUGEPAGE_LEVEL,
-						start, end - 1, true);
+						start, end - 1, true, NULL);
 		}
 	}
 
@@ -5181,9 +5187,10 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 }
 
 static bool slot_rmap_write_protect(struct kvm *kvm,
-				    struct kvm_rmap_head *rmap_head)
+				    struct kvm_rmap_head *rmap_head,
+				    void *data)
 {
-	return __rmap_write_protect(kvm, rmap_head, false);
+	return __rmap_write_protect(kvm, rmap_head, false, data);
 }
 
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
@@ -5193,7 +5200,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
 
 	spin_lock(&kvm->mmu_lock);
 	flush = slot_handle_all_level(kvm, memslot, slot_rmap_write_protect,
-				      false);
+				      false, NULL);
 	spin_unlock(&kvm->mmu_lock);
 
 	/*
@@ -5219,7 +5226,8 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
 }
 
 static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
-					 struct kvm_rmap_head *rmap_head)
+					 struct kvm_rmap_head *rmap_head,
+					 void *data)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -5257,7 +5265,7 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
 	/* FIXME: const-ify all uses of struct kvm_memory_slot.  */
 	spin_lock(&kvm->mmu_lock);
 	slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot,
-			 kvm_mmu_zap_collapsible_spte, true);
+			 kvm_mmu_zap_collapsible_spte, true, NULL);
 	spin_unlock(&kvm->mmu_lock);
 }
 
@@ -5267,7 +5275,7 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
 	bool flush;
 
 	spin_lock(&kvm->mmu_lock);
-	flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false);
+	flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false, NULL);
 	spin_unlock(&kvm->mmu_lock);
 
 	lockdep_assert_held(&kvm->slots_lock);
@@ -5290,7 +5298,7 @@ void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
 
 	spin_lock(&kvm->mmu_lock);
 	flush = slot_handle_large_level(kvm, memslot, slot_rmap_write_protect,
-					false);
+					false, NULL);
 	spin_unlock(&kvm->mmu_lock);
 
 	/* see kvm_mmu_slot_remove_write_access */
@@ -5307,7 +5315,8 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm,
 	bool flush;
 
 	spin_lock(&kvm->mmu_lock);
-	flush = slot_handle_all_level(kvm, memslot, __rmap_set_dirty, false);
+	flush = slot_handle_all_level(kvm, memslot, __rmap_set_dirty, false,
+			NULL);
 	spin_unlock(&kvm->mmu_lock);
 
 	lockdep_assert_held(&kvm->slots_lock);
-- 
2.16.4

^ permalink raw reply related

* [PATCH 3/3] [RFC V3] KVM: X86: Adding skeleton for Memory ROE
From: Ahmed Abd El Mawgood @ 2018-07-19 21:38 UTC (permalink / raw)
  To: kvm, Kernel Hardening, virtualization, linux-doc, x86
  Cc: Ard Biesheuvel, Kees Cook, nathan Corbet, David Vrabel, rkrcmar,
	Boris Lukashev, Ingo Molnar, nigel.edwards, hpa, Paolo Bonzini,
	Thomas Gleixner, Rik van Riel
In-Reply-To: <20180719213802.17161-1-ahmedsoliman0x666@gmail.com>

This patch introduces a hypercall implemented for X86 that can assist
against subset of kernel rootkits, it works by place readonly protection in
shadow PTE. The end result protection is also kept in a bitmap for each
kvm_memory_slot and is used as reference when updating SPTEs. The whole
goal is to protect the guest kernel static data from modification if
attacker is running from guest ring 0, for this reason there is no
hypercall to revert effect of Memory ROE hypercall. This patch doesn't
implement integrity check on guest TLB so obvious attack on the current
implementation will involve guest virtual address -> guest physical
address remapping, but there are plans to fix that.

Signed-off-by: Ahmed Abd El Mawgood <ahmedsoliman0x666@gmail.com>
---
 arch/x86/include/asm/kvm_host.h | 11 +++++-
 arch/x86/kvm/Kconfig            |  7 ++++
 arch/x86/kvm/mmu.c              | 72 ++++++++++++++++++++++++++++++------
 arch/x86/kvm/x86.c              | 82 +++++++++++++++++++++++++++++++++++++++--
 include/linux/kvm_host.h        |  3 ++
 include/uapi/linux/kvm_para.h   |  1 +
 virt/kvm/kvm_main.c             | 29 +++++++++++++--
 7 files changed, 186 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c13cd28d9d1b..128bcfa246a3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -236,6 +236,15 @@ struct kvm_mmu_memory_cache {
 	void *objects[KVM_NR_MEM_OBJS];
 };
 
+/*
+ * This is internal structure used to be be able to access kvm memory slot and
+ * have track of the number of current PTE when doing shadow PTE walk
+ */
+struct kvm_write_access_data {
+	int i;
+	struct kvm_memory_slot *memslot;
+};
+
 /*
  * the pages used as guest page table on soft mmu are tracked by
  * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used
@@ -1130,7 +1139,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 		u64 acc_track_mask, u64 me_mask);
 
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
-void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
+void kvm_mmu_slot_apply_write_access(struct kvm *kvm,
 				      struct kvm_memory_slot *memslot);
 void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
 				   const struct kvm_memory_slot *memslot);
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 92fd433c50b9..8ae822a8dc7a 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -96,6 +96,13 @@ config KVM_MMU_AUDIT
 	 This option adds a R/W kVM module parameter 'mmu_audit', which allows
 	 auditing of KVM MMU events at runtime.
 
+config KVM_MROE
+	bool "Hypercall Memory Read-Only Enforcement"
+	depends on KVM && X86
+	help
+	This option add KVM_HC_HMROE hypercall to kvm which as hardening
+	mechanism to protect memory pages from being edited.
+
 # OK, it's a little counter-intuitive to do this, but it puts it neatly under
 # the virtualization menu.
 source drivers/vhost/Kconfig
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 77661530b2c4..4ce6a9a19a23 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1416,9 +1416,8 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect)
 	return mmu_spte_update(sptep, spte);
 }
 
-static bool __rmap_write_protect(struct kvm *kvm,
-				 struct kvm_rmap_head *rmap_head,
-				 bool pt_protect, void *data)
+static bool __rmap_write_protection(struct kvm *kvm,
+		struct kvm_rmap_head *rmap_head, bool pt_protect)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1430,6 +1429,38 @@ static bool __rmap_write_protect(struct kvm *kvm,
 	return flush;
 }
 
+#ifdef CONFIG_KVM_MROE
+static bool __rmap_write_protect_mroe(struct kvm *kvm,
+		struct kvm_rmap_head *rmap_head,
+		bool pt_protect,
+		struct kvm_write_access_data *d)
+{
+	u64 *sptep;
+	struct rmap_iterator iter;
+	bool prot;
+	bool flush = false;
+
+	for_each_rmap_spte(rmap_head, &iter, sptep) {
+		prot = !test_bit(d->i, d->memslot->mroe_bitmap) && pt_protect;
+		flush |= spte_write_protect(sptep, prot);
+		d->i++;
+	}
+	return flush;
+}
+#endif
+
+static bool __rmap_write_protect(struct kvm *kvm,
+		struct kvm_rmap_head *rmap_head,
+		bool pt_protect,
+		struct kvm_write_access_data *d)
+{
+#ifdef CONFIG_KVM_MROE
+	if (d != NULL)
+		return __rmap_write_protect_mroe(kvm, rmap_head, pt_protect, d);
+#endif
+	return __rmap_write_protection(kvm, rmap_head, pt_protect);
+}
+
 static bool spte_clear_dirty(u64 *sptep)
 {
 	u64 spte = *sptep;
@@ -1517,7 +1548,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 	while (mask) {
 		rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
 					  PT_PAGE_TABLE_LEVEL, slot);
-		__rmap_write_protect(kvm, rmap_head, false, NULL);
+		__rmap_write_protection(kvm, rmap_head, false);
 
 		/* clear the first set bit */
 		mask &= mask - 1;
@@ -1593,11 +1624,15 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 	struct kvm_rmap_head *rmap_head;
 	int i;
 	bool write_protected = false;
+	struct kvm_write_access_data data = {
+		.i = 0,
+		.memslot = slot,
+	};
 
 	for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
 		rmap_head = __gfn_to_rmap(gfn, i, slot);
 		write_protected |= __rmap_write_protect(kvm, rmap_head, true,
-				NULL);
+				&data);
 	}
 
 	return write_protected;
@@ -5190,21 +5225,36 @@ static bool slot_rmap_write_protect(struct kvm *kvm,
 				    struct kvm_rmap_head *rmap_head,
 				    void *data)
 {
-	return __rmap_write_protect(kvm, rmap_head, false, data);
+	return __rmap_write_protect(kvm, rmap_head, false,
+			(struct kvm_write_access_data *)data);
 }
 
-void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
+static bool slot_rmap_apply_protection(struct kvm *kvm,
+		struct kvm_rmap_head *rmap_head,
+		void *data)
+{
+	struct kvm_write_access_data *d = (struct kvm_write_access_data *) data;
+	bool prot_mask = !(d->memslot->flags & KVM_MEM_READONLY);
+
+	return __rmap_write_protect(kvm, rmap_head, prot_mask, d);
+}
+
+void kvm_mmu_slot_apply_write_access(struct kvm *kvm,
 				      struct kvm_memory_slot *memslot)
 {
 	bool flush;
+	struct kvm_write_access_data data = {
+		.i = 0,
+		.memslot = memslot,
+	};
 
 	spin_lock(&kvm->mmu_lock);
-	flush = slot_handle_all_level(kvm, memslot, slot_rmap_write_protect,
-				      false, NULL);
+	flush = slot_handle_all_level(kvm, memslot, slot_rmap_apply_protection,
+				      false, &data);
 	spin_unlock(&kvm->mmu_lock);
 
 	/*
-	 * kvm_mmu_slot_remove_write_access() and kvm_vm_ioctl_get_dirty_log()
+	 * kvm_mmu_slot_apply_write_access() and kvm_vm_ioctl_get_dirty_log()
 	 * which do tlb flush out of mmu-lock should be serialized by
 	 * kvm->slots_lock otherwise tlb flush would be missed.
 	 */
@@ -5301,7 +5351,7 @@ void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
 					false, NULL);
 	spin_unlock(&kvm->mmu_lock);
 
-	/* see kvm_mmu_slot_remove_write_access */
+	/* see kvm_mmu_slot_apply_write_access*/
 	lockdep_assert_held(&kvm->slots_lock);
 
 	if (flush)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0046aa70205a..9addc46d75be 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4177,7 +4177,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 
 	/*
 	 * All the TLBs can be flushed out of mmu lock, see the comments in
-	 * kvm_mmu_slot_remove_write_access().
+	 * kvm_mmu_slot_apply_write_access().
 	 */
 	lockdep_assert_held(&kvm->slots_lock);
 	if (is_dirty)
@@ -6670,7 +6670,76 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
 }
 #endif
 
-/*
+#ifdef CONFIG_KVM_MROE
+static int __roe_protect_frame(struct kvm *kvm, gpa_t gpa)
+{
+	struct kvm_memory_slot *slot;
+	gfn_t gfn = gpa >> PAGE_SHIFT;
+
+	slot = gfn_to_memslot(kvm, gfn);
+	if (!slot || gfn > slot->base_gfn + slot->npages)
+		return -EINVAL;
+	set_bit(gfn - slot->base_gfn, slot->mroe_bitmap);
+	kvm_mmu_slot_apply_write_access(kvm, slot);
+	kvm_arch_flush_shadow_memslot(kvm, slot);
+
+	return 0;
+}
+
+static int roe_protect_frame(struct kvm *kvm, gpa_t gpa)
+{
+	int r;
+
+	mutex_lock(&kvm->slots_lock);
+	r = __roe_protect_frame(kvm, gpa);
+	mutex_unlock(&kvm->slots_lock);
+	return r;
+}
+
+static bool kvm_mroe_userspace(struct kvm_vcpu *vcpu)
+{
+	u64 rflags;
+	u64 cr0 = kvm_read_cr0(vcpu);
+	u64 iopl;
+
+	// first checking we are not in protected mode
+	if ((cr0 & 1) == 0)
+		return false;
+	/*
+	 * we don't need to worry about comments in __get_regs
+	 * because we are sure that this function will only be
+	 * triggered at the end of a hypercall
+	 */
+	 rflags = kvm_get_rflags(vcpu);
+	iopl = (rflags >> 12) & 3;
+	if (iopl != 3)
+		return false;
+	return true;
+}
+
+static int kvm_mroe(struct kvm_vcpu *vcpu, u64 gva)
+{
+	struct kvm *kvm = vcpu->kvm;
+	gpa_t gpa;
+	u64 hva;
+
+	/*
+	 * First we need to maek sure that we are running from something that
+	 * isn't usermode
+	 */
+	if (kvm_mroe_userspace(vcpu))
+		return -1;//I don't really know what to return
+	if (gva & ~PAGE_MASK)
+		return -EINVAL;
+	gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL);
+	hva = gfn_to_hva(kvm, gpa >> PAGE_SHIFT);
+	if (!access_ok(VERIFY_WRITE, hva, PAGE_SIZE))
+		return -EINVAL;
+	return roe_protect_frame(vcpu->kvm, gpa);
+}
+#endif
+
+ /*
  * kvm_pv_kick_cpu_op:  Kick a vcpu.
  *
  * @apicid - apicid of vcpu to be kicked.
@@ -6737,6 +6806,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 	case KVM_HC_CLOCK_PAIRING:
 		ret = kvm_pv_clock_pairing(vcpu, a0, a1);
 		break;
+#endif
+#ifdef CONFIG_KVM_MROE
+	case KVM_HC_HMROE:
+		ret = kvm_mroe(vcpu, a0);
+		break;
 #endif
 	default:
 		ret = -KVM_ENOSYS;
@@ -8971,8 +9045,8 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
 				     struct kvm_memory_slot *new)
 {
 	/* Still write protect RO slot */
+	kvm_mmu_slot_apply_write_access(kvm, new);
 	if (new->flags & KVM_MEM_READONLY) {
-		kvm_mmu_slot_remove_write_access(kvm, new);
 		return;
 	}
 
@@ -9010,7 +9084,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
 		if (kvm_x86_ops->slot_enable_log_dirty)
 			kvm_x86_ops->slot_enable_log_dirty(kvm, new);
 		else
-			kvm_mmu_slot_remove_write_access(kvm, new);
+			kvm_mmu_slot_apply_write_access(kvm, new);
 	} else {
 		if (kvm_x86_ops->slot_disable_log_dirty)
 			kvm_x86_ops->slot_disable_log_dirty(kvm, new);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4ee7bc548a83..82c5780e11d9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -297,6 +297,9 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
 struct kvm_memory_slot {
 	gfn_t base_gfn;
 	unsigned long npages;
+#ifdef CONFIG_KVM_MROE
+	unsigned long *mroe_bitmap;
+#endif
 	unsigned long *dirty_bitmap;
 	struct kvm_arch_memory_slot arch;
 	unsigned long userspace_addr;
diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index dcf629dd2889..4e2badc09b5b 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h
@@ -26,6 +26,7 @@
 #define KVM_HC_MIPS_EXIT_VM		7
 #define KVM_HC_MIPS_CONSOLE_OUTPUT	8
 #define KVM_HC_CLOCK_PAIRING		9
+#define KVM_HC_HMROE			10
 
 /*
  * hypercalls use architecture specific
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8b47507faab5..0f7141e4d550 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -794,6 +794,17 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
 	return 0;
 }
 
+static int kvm_init_mroe_bitmap(struct kvm_memory_slot *slot)
+{
+#ifdef CONFIG_KVM_MROE
+	slot->mroe_bitmap = kvzalloc(BITS_TO_LONGS(slot->npages) *
+	sizeof(unsigned long), GFP_KERNEL);
+	if (!slot->mroe_bitmap)
+		return -ENOMEM;
+#endif
+	return 0;
+}
+
 /*
  * Insert memslot and re-sort memslots based on their GFN,
  * so binary search could be used to lookup GFN.
@@ -1011,6 +1022,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
 		if (kvm_create_dirty_bitmap(&new) < 0)
 			goto out_free;
 	}
+	if (kvm_init_mroe_bitmap(&new) < 0)
+		goto out_free;
 
 	slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
 	if (!slots)
@@ -1264,13 +1277,23 @@ static bool memslot_is_readonly(struct kvm_memory_slot *slot)
 	return slot->flags & KVM_MEM_READONLY;
 }
 
+static bool gfn_is_readonly(struct kvm_memory_slot *slot, gfn_t gfn)
+{
+#ifdef CONFIG_KVM_MROE
+	return test_bit(gfn - slot->base_gfn, slot->mroe_bitmap) ||
+		memslot_is_readonly(slot);
+#else
+	return memslot_is_readonly(slot);
+#endif
+}
+
 static unsigned long __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
 				       gfn_t *nr_pages, bool write)
 {
 	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
 		return KVM_HVA_ERR_BAD;
 
-	if (memslot_is_readonly(slot) && write)
+	if (gfn_is_readonly(slot, gfn) && write)
 		return KVM_HVA_ERR_RO_BAD;
 
 	if (nr_pages)
@@ -1314,7 +1337,7 @@ unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot,
 	unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
 
 	if (!kvm_is_error_hva(hva) && writable)
-		*writable = !memslot_is_readonly(slot);
+		*writable = !gfn_is_readonly(slot, gfn);
 
 	return hva;
 }
@@ -1554,7 +1577,7 @@ kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
 	}
 
 	/* Do not map writable pfn in the readonly memslot. */
-	if (writable && memslot_is_readonly(slot)) {
+	if (writable && gfn_is_readonly(slot, gfn)) {
 		*writable = false;
 		writable = NULL;
 	}
-- 
2.16.4

^ permalink raw reply related

* [PATCH net-next 0/9] TX used ring batched updating for vhost
From: Jason Wang @ 2018-07-20  0:15 UTC (permalink / raw)
  To: mst, jasowang, netdev; +Cc: linux-kernel, kvm, virtualization

Hi:

This series implement batch updating of used ring for TX. This help to
reduce the cache contention on used ring. The idea is first split
datacopy path from zerocopy, and do only batching for datacopy. This
is because zercopy had already supported its own batching.

TX PPS was increased 25.8% and Netperf TCP does not show obvious
differences.

The split of datapath will also be helpful for future implementation
like in order completion.

Please review.

Thanks

Jason Wang (9):
  vhost_net: drop unnecessary parameter
  vhost_net: introduce helper to initialize tx iov iter
  vhost_net: introduce vhost_exceeds_weight()
  vhost_net: introduce get_tx_bufs()
  vhost_net: introduce tx_can_batch()
  vhost_net: split out datacopy logic
  vhost_net: rename vhost_rx_signal_used() to vhost_net_signal_used()
  vhost_net: rename VHOST_RX_BATCH to VHOST_NET_BATCH
  vhost_net: batch update used ring for datacopy TX

 drivers/vhost/net.c | 249 +++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 179 insertions(+), 70 deletions(-)

-- 
2.7.4

^ permalink raw reply

* [PATCH net-next 1/9] vhost_net: drop unnecessary parameter
From: Jason Wang @ 2018-07-20  0:15 UTC (permalink / raw)
  To: mst, jasowang, netdev; +Cc: linux-kernel, kvm, virtualization
In-Reply-To: <1532045721-4958-1-git-send-email-jasowang@redhat.com>

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/net.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index b224036..1a8175a 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -430,7 +430,6 @@ static int vhost_net_enable_vq(struct vhost_net *n,
 
 static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
 				    struct vhost_virtqueue *vq,
-				    struct iovec iov[], unsigned int iov_size,
 				    unsigned int *out_num, unsigned int *in_num,
 				    bool *busyloop_intr)
 {
@@ -512,9 +511,8 @@ static void handle_tx(struct vhost_net *net)
 			vhost_zerocopy_signal_used(net, vq);
 
 		busyloop_intr = false;
-		head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
-						ARRAY_SIZE(vq->iov),
-						&out, &in, &busyloop_intr);
+		head = vhost_net_tx_get_vq_desc(net, vq, &out, &in,
+						&busyloop_intr);
 		/* On error, stop handling until the next kick. */
 		if (unlikely(head < 0))
 			break;
-- 
2.7.4

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox