linux-s390.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Janosch Frank <frankja@linux.vnet.ibm.com>
To: kvm@vger.kernel.org
Cc: schwidefsky@de.ibm.com, borntraeger@de.ibm.com, david@redhat.com,
	dominik.dingel@gmail.com, linux-s390@vger.kernel.org
Subject: [RFC/PATCH v2 03/22] s390/mm: add gmap PMD invalidation notification
Date: Wed, 13 Dec 2017 13:53:14 +0100	[thread overview]
Message-ID: <1513169613-13509-4-git-send-email-frankja@linux.vnet.ibm.com> (raw)
In-Reply-To: <1513169613-13509-1-git-send-email-frankja@linux.vnet.ibm.com>

For later migration of huge pages we want to write-protect guest
PMDs. While doing this, we have to make absolutely sure, that the
guest's lowcore is always accessible when the VCPU is running. With
PTEs, this is solved by marking the PGSTEs of the lowcore pages with
the invalidation notification bit and kicking the guest out of the SIE
via a notifier function if we need to invalidate such a page.

With PMDs we do not have PGSTEs or some other bits we could use in the
host PMD. Instead we pick one of the free bits in the gmap PMD. Every
time a host pmd will be invalidated, we will check if the respective
gmap PMD has the bit set and in that case fire up the notifier.

In the first step we only support setting the invalidation bit, but we
do not support restricting access of guest pmds. It will follow
shortly.

Signed-off-by: Janosch Frank <frankja@linux.vnet.ibm.com>
---
 arch/s390/include/asm/gmap.h    |  3 ++
 arch/s390/include/asm/pgtable.h |  7 +++-
 arch/s390/mm/gmap.c             | 92 ++++++++++++++++++++++++++++++++++++-----
 arch/s390/mm/pgtable.c          |  4 ++
 4 files changed, 94 insertions(+), 12 deletions(-)

diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index c1bc563..21bb658 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -13,6 +13,9 @@
 #define GMAP_NOTIFY_SHADOW	0x2
 #define GMAP_NOTIFY_MPROT	0x1
 
+/* Status bits in the gmap segment entry. */
+#define _SEGMENT_ENTRY_GMAP_IN		0x0001	/* invalidation notify bit */
+
 /**
  * struct gmap_struct - guest address space
  * @list: list head for the mm->context gmap list
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 57d7bc9..ba3840c 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -269,8 +269,10 @@ static inline int is_module_addr(void *addr)
 #define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL
 
 /* Bits in the segment table entry */
-#define _SEGMENT_ENTRY_BITS	0xfffffffffffffe33UL
-#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL
+#define _SEGMENT_ENTRY_BITS			0xfffffffffffffe33UL
+#define _SEGMENT_ENTRY_BITS_LARGE 		0xfffffffffff0ff33UL
+#define _SEGMENT_ENTRY_HARDWARE_BITS		0xfffffffffffffe30UL
+#define _SEGMENT_ENTRY_HARDWARE_BITS_LARGE 	0xfffffffffff00730UL
 #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address	    */
 #define _SEGMENT_ENTRY_ORIGIN	~0x7ffUL/* page table origin		    */
 #define _SEGMENT_ENTRY_PROTECT	0x200	/* segment protection bit	    */
@@ -1093,6 +1095,7 @@ void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
 void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
 void ptep_notify(struct mm_struct *mm, unsigned long addr,
 		 pte_t *ptep, unsigned long bits);
+void pmdp_notify(struct mm_struct *mm, unsigned long addr);
 int ptep_force_prot(struct mm_struct *mm, unsigned long gaddr,
 		    pte_t *ptep, int prot, unsigned long bit);
 void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index e7825d2..ff7fe24 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -596,10 +596,15 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 	if (*table == _SEGMENT_ENTRY_EMPTY) {
 		rc = radix_tree_insert(&gmap->host_to_guest,
 				       vmaddr >> PMD_SHIFT, table);
-		if (!rc)
-			*table = pmd_val(*pmd);
-	} else
-		rc = 0;
+		if (!rc) {
+			if (pmd_large(*pmd)) {
+				*table = pmd_val(*pmd) &
+					_SEGMENT_ENTRY_HARDWARE_BITS_LARGE;
+			} else
+				*table = pmd_val(*pmd) &
+					_SEGMENT_ENTRY_HARDWARE_BITS;
+		}
+	}
 	spin_unlock(&gmap->guest_table_lock);
 	spin_unlock(ptl);
 	radix_tree_preload_end();
@@ -962,6 +967,33 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
 }
 
 /*
+ * gmap_protect_pmd - set pmd notification bits
+ * @pmdp: pointer to the pmd to be protected
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bits: notification bits to set
+ *
+ * Returns 0 if successfully protected, -ENOMEM if out of memory and
+ * -EAGAIN if a fixup is needed.
+ *
+ * Expected to be called with sg->mm->mmap_sem in read and
+ * guest_table_lock held.
+ */
+static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr,
+			    pmd_t *pmdp, int prot, unsigned long bits)
+{
+	const int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID;
+	const int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT;
+
+	/* Fixup needed */
+	if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot & PROT_WRITE)))
+		return -EAGAIN;
+
+	if (bits & GMAP_NOTIFY_MPROT)
+		pmd_val(*pmdp) |=  _SEGMENT_ENTRY_GMAP_IN;
+	return 0;
+}
+
+/*
  * gmap_protect_range - remove access rights to memory and set pgste bits
  * @gmap: pointer to guest mapping meta data structure
  * @gaddr: virtual address in the guest address space
@@ -979,7 +1011,7 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
 static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
 			      unsigned long len, int prot, unsigned long bits)
 {
-	unsigned long vmaddr;
+	unsigned long vmaddr, dist;
 	pmd_t *pmdp;
 	int rc;
 
@@ -987,11 +1019,21 @@ static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
 		rc = -EAGAIN;
 		pmdp = gmap_pmd_op_walk(gmap, gaddr);
 		if (pmdp) {
-			rc = gmap_protect_pte(gmap, gaddr, pmdp, prot,
-					      bits);
-			if (!rc) {
-				len -= PAGE_SIZE;
-				gaddr += PAGE_SIZE;
+			if (!pmd_large(*pmdp)) {
+				rc = gmap_protect_pte(gmap, gaddr, pmdp, prot,
+						      bits);
+				if (!rc) {
+					len -= PAGE_SIZE;
+					gaddr += PAGE_SIZE;
+				}
+			} else {
+				rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot,
+						      bits);
+				if (!rc) {
+					dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK);
+					len = len < dist ? 0 : len - dist;
+					gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE;
+				}
 			}
 			gmap_pmd_op_end(gmap, pmdp);
 		}
@@ -2185,6 +2227,36 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
 }
 EXPORT_SYMBOL_GPL(ptep_notify);
 
+/**
+ * pmdp_notify - call all invalidation callbacks for a specific pmd
+ * @mm: pointer to the process mm_struct
+ * @vmaddr: virtual address in the process address space
+ *
+ * This function is expected to be called with mmap_sem held in read.
+ */
+void pmdp_notify(struct mm_struct *mm, unsigned long vmaddr)
+{
+	unsigned long *table, gaddr;
+	struct gmap *gmap;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
+		spin_lock(&gmap->guest_table_lock);
+		table = radix_tree_lookup(&gmap->host_to_guest,
+					  vmaddr >> PMD_SHIFT);
+		if (!table || !(*table & _SEGMENT_ENTRY_GMAP_IN)) {
+			spin_unlock(&gmap->guest_table_lock);
+			continue;
+		}
+		gaddr = __gmap_segment_gaddr(table);
+		*table &= ~_SEGMENT_ENTRY_GMAP_IN;
+		spin_unlock(&gmap->guest_table_lock);
+		gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(pmdp_notify);
+
 static inline void thp_split_mm(struct mm_struct *mm)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 4f2b65d..a6cc540 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -405,6 +405,8 @@ pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
 	pmd_t old;
 
 	preempt_disable();
+	if (mm_has_pgste(mm))
+		pmdp_notify(mm, addr);
 	old = pmdp_flush_direct(mm, addr, pmdp);
 	*pmdp = new;
 	preempt_enable();
@@ -418,6 +420,8 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
 	pmd_t old;
 
 	preempt_disable();
+	if (mm_has_pgste(mm))
+		pmdp_notify(mm, addr);
 	old = pmdp_flush_lazy(mm, addr, pmdp);
 	*pmdp = new;
 	preempt_enable();
-- 
2.7.4

  parent reply	other threads:[~2017-12-13 12:53 UTC|newest]

Thread overview: 67+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-13 12:53 [RFC/PATCH v2 00/22] KVM/s390: Hugetlbfs enablement Janosch Frank
2017-12-13 12:53 ` [RFC/PATCH v2 01/22] s390/mm: make gmap_protect_range more modular Janosch Frank
2018-01-22 11:33   ` David Hildenbrand
2018-01-22 12:31     ` Janosch Frank
2018-01-22 12:50       ` David Hildenbrand
2018-01-22 13:02         ` Janosch Frank
2017-12-13 12:53 ` [RFC/PATCH v2 02/22] s390/mm: Abstract gmap notify bit setting Janosch Frank
2018-01-22 11:34   ` David Hildenbrand
2017-12-13 12:53 ` Janosch Frank [this message]
2017-12-21  9:24   ` [RFC/PATCH v2 03/22] s390/mm: add gmap PMD invalidation notification Janosch Frank
2018-01-22 11:46   ` David Hildenbrand
2018-01-22 13:13     ` Janosch Frank
2018-01-22 13:29       ` David Hildenbrand
2018-01-22 14:04         ` Janosch Frank
2018-01-22 11:56   ` David Hildenbrand
2018-01-22 12:09     ` Janosch Frank
2018-01-22 12:12       ` David Hildenbrand
2017-12-13 12:53 ` [RFC/PATCH v2 04/22] s390/mm: Add gmap pmd invalidation and clearing Janosch Frank
2017-12-13 12:53 ` [RFC/PATCH v2 05/22] s390/mm: hugetlb pages within a gmap can not be freed Janosch Frank
2018-01-24 13:45   ` David Hildenbrand
2018-01-24 13:56     ` Janosch Frank
2017-12-13 12:53 ` [RFC/PATCH v2 06/22] s390/mm: Introduce gmap_pmdp_xchg Janosch Frank
2017-12-13 12:53 ` [RFC/PATCH v2 07/22] RFC: s390/mm: Transfer guest pmd protection to host Janosch Frank
2017-12-13 12:53 ` [RFC/PATCH v2 08/22] s390/mm: Add huge page dirty sync support Janosch Frank
2017-12-13 12:53 ` [RFC/PATCH v2 09/22] s390/mm: clear huge page storage keys on enable_skey Janosch Frank
2017-12-13 12:53 ` [RFC/PATCH v2 10/22] s390/mm: Add huge pmd storage key handling Janosch Frank
2017-12-13 12:53 ` [RFC/PATCH v2 11/22] s390/mm: Remove superfluous parameter Janosch Frank
2017-12-21  9:22   ` Janosch Frank
2018-01-16 12:39     ` Janosch Frank
2018-01-16 13:11   ` David Hildenbrand
2018-01-22 13:14   ` Christian Borntraeger
2018-01-22 13:24     ` Martin Schwidefsky
2017-12-13 12:53 ` [RFC/PATCH v2 12/22] s390/mm: Add gmap_protect_large read protection support Janosch Frank
2017-12-13 12:53 ` [RFC/PATCH v2 13/22] s390/mm: Make gmap_read_table EDAT1 compatible Janosch Frank
2017-12-13 12:53 ` [RFC/PATCH v2 14/22] s390/mm: Make protect_rmap " Janosch Frank
2017-12-13 12:53 ` [RFC/PATCH v2 15/22] s390/mm: GMAP read table extensions Janosch Frank
2017-12-13 12:53 ` [RFC/PATCH v2 16/22] s390/mm: Add shadow segment code Janosch Frank
2017-12-13 12:53 ` [RFC/PATCH v2 17/22] s390/mm: Add VSIE reverse fake case Janosch Frank
2017-12-13 12:53 ` [RFC/PATCH v2 18/22] s390/mm: Remove gmap_pte_op_walk Janosch Frank
2017-12-13 12:53 ` [RFC/PATCH v2 19/22] s390/mm: Split huge pages if granular protection is needed Janosch Frank
2018-01-25  7:16   ` Janosch Frank
2018-01-25 14:39     ` David Hildenbrand
2018-01-25 14:55       ` Janosch Frank
2017-12-13 12:53 ` [RFC/PATCH v2 20/22] s390/mm: Enable gmap huge pmd support Janosch Frank
2017-12-13 12:53 ` [RFC/PATCH v2 21/22] KVM: s390: Add KVM HPAGE capability Janosch Frank
2017-12-20 13:02   ` Cornelia Huck
2017-12-20 13:17     ` Janosch Frank
2017-12-20 13:21       ` Cornelia Huck
2017-12-13 12:53 ` [RFC/PATCH v2 22/22] RFC: s390/mm: Add gmap lock classes Janosch Frank
2017-12-20 12:24   ` Christian Borntraeger
2017-12-20 12:36     ` Janosch Frank
2017-12-20 12:23 ` [RFC/PATCH v2 00/22] KVM/s390: Hugetlbfs enablement Christian Borntraeger
2017-12-21 12:00   ` David Hildenbrand
2017-12-22  9:08     ` Christian Borntraeger
2018-01-02  0:02       ` Janosch Frank
2018-01-22 11:23 ` David Hildenbrand
2018-01-22 11:56   ` Christian Borntraeger
2018-01-23 21:15 ` David Hildenbrand
2018-01-24  9:01   ` Janosch Frank
2018-01-24  9:14     ` David Hildenbrand
2018-01-25 15:33       ` [PATCH 0/2] Huge page pte protection Janosch Frank
2018-01-25 15:33         ` [PATCH 1/2] mm: s390: Only notify on 4k pages Janosch Frank
2018-01-25 16:04           ` David Hildenbrand
2018-01-26 10:31             ` Janosch Frank
2018-01-25 15:33         ` [PATCH 2/2] mm: s390: Rename gmap_pte_op_fixup Janosch Frank
2018-01-26 10:34       ` [PATCH v2] mm: s390: Only notify on 4k pages Janosch Frank
2018-01-30 10:19         ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1513169613-13509-4-git-send-email-frankja@linux.vnet.ibm.com \
    --to=frankja@linux.vnet.ibm.com \
    --cc=borntraeger@de.ibm.com \
    --cc=david@redhat.com \
    --cc=dominik.dingel@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=schwidefsky@de.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).