[PATCH v3] x86/PoD: shorten certain operations on higher order ranges

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v3] x86/PoD: shorten certain operations on higher order ranges
@ 2015-10-01 10:26 Jan Beulich
  2015-10-01 11:25 ` Andrew Cooper
  2015-10-01 15:50 ` George Dunlap
  0 siblings, 2 replies; 3+ messages in thread
From: Jan Beulich @ 2015-10-01 10:26 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap, Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 9214 bytes --]

Now that p2m->get_entry() always returns a valid order, utilize this
to accelerate some of the operations in PoD code. (There are two uses
of p2m->get_entry() left which don't easily lend themselves to this
optimization.)

Also adjust a few types as needed and remove stale comments from
p2m_pod_cache_add() (to avoid duplicating them yet another time).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: Restore per-page checking in p2m_pod_zero_check_superpage().

--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -119,20 +119,23 @@ p2m_pod_cache_add(struct p2m_domain *p2m
 
     unlock_page_alloc(p2m);
 
-    /* Then add the first one to the appropriate populate-on-demand list */
-    switch(order)
+    /* Then add to the appropriate populate-on-demand list. */
+    switch ( order )
     {
+    case PAGE_ORDER_1G:
+        for ( i = 0; i < (1UL << PAGE_ORDER_1G); i += 1UL << PAGE_ORDER_2M )
+            page_list_add_tail(page + i, &p2m->pod.super);
+        break;
     case PAGE_ORDER_2M:
-        page_list_add_tail(page, &p2m->pod.super); /* lock: page_alloc */
-        p2m->pod.count += 1 << order;
+        page_list_add_tail(page, &p2m->pod.super);
         break;
     case PAGE_ORDER_4K:
-        page_list_add_tail(page, &p2m->pod.single); /* lock: page_alloc */
-        p2m->pod.count += 1;
+        page_list_add_tail(page, &p2m->pod.single);
         break;
     default:
         BUG();
     }
+    p2m->pod.count += 1 << order;
 
     return 0;
 }
@@ -502,11 +505,10 @@ p2m_pod_decrease_reservation(struct doma
                              unsigned int order)
 {
     int ret=0;
-    int i;
+    unsigned long i, n;
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
-
-    int steal_for_cache;
-    int pod, nonpod, ram;
+    bool_t steal_for_cache;
+    long pod, nonpod, ram;
 
     gfn_lock(p2m, gpfn, order);
     pod_lock(p2m);    
@@ -525,21 +527,21 @@ recount:
     /* Figure out if we need to steal some freed memory for our cache */
     steal_for_cache =  ( p2m->pod.entry_count > p2m->pod.count );
 
-    /* FIXME: Add contiguous; query for PSE entries? */
-    for ( i=0; i<(1<<order); i++)
+    for ( i = 0; i < (1UL << order); i += n )
     {
         p2m_access_t a;
         p2m_type_t t;
+        unsigned int cur_order;
 
-        (void)p2m->get_entry(p2m, gpfn + i, &t, &a, 0, NULL, NULL);
-
+        p2m->get_entry(p2m, gpfn + i, &t, &a, 0, &cur_order, NULL);
+        n = 1UL << min(order, cur_order);
         if ( t == p2m_populate_on_demand )
-            pod++;
+            pod += n;
         else
         {
-            nonpod++;
+            nonpod += n;
             if ( p2m_is_ram(t) )
-                ram++;
+                ram += n;
         }
     }
 
@@ -574,41 +576,53 @@ recount:
      * + There are PoD entries to handle, or
      * + There is ram left, and we want to steal it
      */
-    for ( i=0;
-          i<(1<<order) && (pod>0 || (steal_for_cache && ram > 0));
-          i++)
+    for ( i = 0;
+          i < (1UL << order) && (pod > 0 || (steal_for_cache && ram > 0));
+          i += n )
     {
         mfn_t mfn;
         p2m_type_t t;
         p2m_access_t a;
+        unsigned int cur_order;
 
-        mfn = p2m->get_entry(p2m, gpfn + i, &t, &a, 0, NULL, NULL);
+        mfn = p2m->get_entry(p2m, gpfn + i, &t, &a, 0, &cur_order, NULL);
+        if ( order < cur_order )
+            cur_order = order;
+        n = 1UL << cur_order;
         if ( t == p2m_populate_on_demand )
         {
-            p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid,
-                          p2m->default_access);
-            p2m->pod.entry_count--;
+            p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), cur_order,
+                          p2m_invalid, p2m->default_access);
+            p2m->pod.entry_count -= n;
             BUG_ON(p2m->pod.entry_count < 0);
-            pod--;
+            pod -= n;
         }
         else if ( steal_for_cache && p2m_is_ram(t) )
         {
+            /*
+             * If we need less than 1 << cur_order, we may end up stealing
+             * more memory here than we actually need. This will be rectified
+             * below, however; and stealing too much and then freeing what we
+             * need may allow us to free smaller pages from the cache, and
+             * avoid breaking up superpages.
+             */
             struct page_info *page;
+            unsigned int j;
 
             ASSERT(mfn_valid(mfn));
 
             page = mfn_to_page(mfn);
 
-            p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid,
-                          p2m->default_access);
-            set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY);
-
-            p2m_pod_cache_add(p2m, page, 0);
+            p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), cur_order,
+                          p2m_invalid, p2m->default_access);
+            for ( j = 0; j < n; ++j )
+                set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY);
+            p2m_pod_cache_add(p2m, page, cur_order);
 
             steal_for_cache =  ( p2m->pod.entry_count > p2m->pod.count );
 
-            nonpod--;
-            ram--;
+            nonpod -= n;
+            ram -= n;
         }
     }    
 
@@ -649,7 +663,8 @@ p2m_pod_zero_check_superpage(struct p2m_
     p2m_type_t type, type0 = 0;
     unsigned long * map = NULL;
     int ret=0, reset = 0;
-    int i, j;
+    unsigned long i, n;
+    unsigned int j;
     int max_ref = 1;
     struct domain *d = p2m->domain;
 
@@ -668,16 +683,14 @@ p2m_pod_zero_check_superpage(struct p2m_
 
     /* Look up the mfns, checking to make sure they're the same mfn
      * and aligned, and mapping them. */
-    for ( i=0; i<SUPERPAGE_PAGES; i++ )
+    for ( i = 0; i < SUPERPAGE_PAGES; i += n )
     {
         p2m_access_t a; 
-        mfn = p2m->get_entry(p2m, gfn + i, &type, &a, 0, NULL, NULL);
+        unsigned int cur_order;
+        unsigned long k;
+        const struct page_info *page;
 
-        if ( i == 0 )
-        {
-            mfn0 = mfn;
-            type0 = type;
-        }
+        mfn = p2m->get_entry(p2m, gfn + i, &type, &a, 0, &cur_order, NULL);
 
         /* Conditions that must be met for superpage-superpage:
          * + All gfns are ram types
@@ -690,15 +703,25 @@ p2m_pod_zero_check_superpage(struct p2m_
          * + None of the mfns are likely to be mapped elsewhere (refcount
          *   2 or less for shadow, 1 for hap)
          */
-        if ( !p2m_is_ram(type)
-             || type != type0
-             || ( (mfn_to_page(mfn)->count_info & PGC_allocated) == 0 )
-             || ( (mfn_to_page(mfn)->count_info & (PGC_page_table|PGC_xen_heap)) != 0 )
-             || ( (mfn_to_page(mfn)->count_info & PGC_xen_heap  ) != 0 )
-             || ( (mfn_to_page(mfn)->count_info & PGC_count_mask) > max_ref )
-             || !( ( i == 0 && superpage_aligned(mfn_x(mfn0)) )
-                   || ( i != 0 && mfn_x(mfn) == (mfn_x(mfn0) + i) ) ) )
+        if ( !p2m_is_ram(type) )
+            goto out;
+
+        if ( i == 0 )
+        {
+            if ( !superpage_aligned(mfn_x(mfn)) )
+                goto out;
+            mfn0 = mfn;
+            type0 = type;
+        }
+        else if ( type != type0 || mfn_x(mfn) != (mfn_x(mfn0) + i) )
             goto out;
+
+        n = 1UL << min(cur_order, SUPERPAGE_ORDER + 0U);
+        for ( k = 0, page = mfn_to_page(mfn); k < n; ++k, ++page )
+            if ( !(page->count_info & PGC_allocated) ||
+                 (page->count_info & (PGC_page_table | PGC_xen_heap)) ||
+                 (page->count_info & PGC_count_mask) > max_ref )
+                goto out;
     }
 
     /* Now, do a quick check to see if it may be zero before unmapping. */
@@ -1114,7 +1137,7 @@ guest_physmap_mark_populate_on_demand(st
                                       unsigned int order)
 {
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
-    unsigned long i, pod_count = 0;
+    unsigned long i, n, pod_count = 0;
     p2m_type_t ot;
     mfn_t omfn;
     int rc = 0;
@@ -1127,10 +1150,13 @@ guest_physmap_mark_populate_on_demand(st
     P2M_DEBUG("mark pod gfn=%#lx\n", gfn);
 
     /* Make sure all gpfns are unused */
-    for ( i = 0; i < (1UL << order); i++ )
+    for ( i = 0; i < (1UL << order); i += n )
     {
         p2m_access_t a;
-        omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, 0, NULL, NULL);
+        unsigned int cur_order;
+
+        omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, 0, &cur_order, NULL);
+        n = 1UL << min(order, cur_order);
         if ( p2m_is_ram(ot) )
         {
             P2M_DEBUG("gfn_to_mfn returned type %d!\n", ot);
@@ -1140,7 +1166,7 @@ guest_physmap_mark_populate_on_demand(st
         else if ( ot == p2m_populate_on_demand )
         {
             /* Count how man PoD entries we'll be replacing if successful */
-            pod_count++;
+            pod_count += n;
         }
     }
 



[-- Attachment #2: x86-p2m-pod-query-contiguous.patch --]
[-- Type: text/plain, Size: 9272 bytes --]

x86/PoD: shorten certain operations on higher order ranges

Now that p2m->get_entry() always returns a valid order, utilize this
to accelerate some of the operations in PoD code. (There are two uses
of p2m->get_entry() left which don't easily lend themselves to this
optimization.)

Also adjust a few types as needed and remove stale comments from
p2m_pod_cache_add() (to avoid duplicating them yet another time).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: Restore per-page checking in p2m_pod_zero_check_superpage().

--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -119,20 +119,23 @@ p2m_pod_cache_add(struct p2m_domain *p2m
 
     unlock_page_alloc(p2m);
 
-    /* Then add the first one to the appropriate populate-on-demand list */
-    switch(order)
+    /* Then add to the appropriate populate-on-demand list. */
+    switch ( order )
     {
+    case PAGE_ORDER_1G:
+        for ( i = 0; i < (1UL << PAGE_ORDER_1G); i += 1UL << PAGE_ORDER_2M )
+            page_list_add_tail(page + i, &p2m->pod.super);
+        break;
     case PAGE_ORDER_2M:
-        page_list_add_tail(page, &p2m->pod.super); /* lock: page_alloc */
-        p2m->pod.count += 1 << order;
+        page_list_add_tail(page, &p2m->pod.super);
         break;
     case PAGE_ORDER_4K:
-        page_list_add_tail(page, &p2m->pod.single); /* lock: page_alloc */
-        p2m->pod.count += 1;
+        page_list_add_tail(page, &p2m->pod.single);
         break;
     default:
         BUG();
     }
+    p2m->pod.count += 1 << order;
 
     return 0;
 }
@@ -502,11 +505,10 @@ p2m_pod_decrease_reservation(struct doma
                              unsigned int order)
 {
     int ret=0;
-    int i;
+    unsigned long i, n;
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
-
-    int steal_for_cache;
-    int pod, nonpod, ram;
+    bool_t steal_for_cache;
+    long pod, nonpod, ram;
 
     gfn_lock(p2m, gpfn, order);
     pod_lock(p2m);    
@@ -525,21 +527,21 @@ recount:
     /* Figure out if we need to steal some freed memory for our cache */
     steal_for_cache =  ( p2m->pod.entry_count > p2m->pod.count );
 
-    /* FIXME: Add contiguous; query for PSE entries? */
-    for ( i=0; i<(1<<order); i++)
+    for ( i = 0; i < (1UL << order); i += n )
     {
         p2m_access_t a;
         p2m_type_t t;
+        unsigned int cur_order;
 
-        (void)p2m->get_entry(p2m, gpfn + i, &t, &a, 0, NULL, NULL);
-
+        p2m->get_entry(p2m, gpfn + i, &t, &a, 0, &cur_order, NULL);
+        n = 1UL << min(order, cur_order);
         if ( t == p2m_populate_on_demand )
-            pod++;
+            pod += n;
         else
         {
-            nonpod++;
+            nonpod += n;
             if ( p2m_is_ram(t) )
-                ram++;
+                ram += n;
         }
     }
 
@@ -574,41 +576,53 @@ recount:
      * + There are PoD entries to handle, or
      * + There is ram left, and we want to steal it
      */
-    for ( i=0;
-          i<(1<<order) && (pod>0 || (steal_for_cache && ram > 0));
-          i++)
+    for ( i = 0;
+          i < (1UL << order) && (pod > 0 || (steal_for_cache && ram > 0));
+          i += n )
     {
         mfn_t mfn;
         p2m_type_t t;
         p2m_access_t a;
+        unsigned int cur_order;
 
-        mfn = p2m->get_entry(p2m, gpfn + i, &t, &a, 0, NULL, NULL);
+        mfn = p2m->get_entry(p2m, gpfn + i, &t, &a, 0, &cur_order, NULL);
+        if ( order < cur_order )
+            cur_order = order;
+        n = 1UL << cur_order;
         if ( t == p2m_populate_on_demand )
         {
-            p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid,
-                          p2m->default_access);
-            p2m->pod.entry_count--;
+            p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), cur_order,
+                          p2m_invalid, p2m->default_access);
+            p2m->pod.entry_count -= n;
             BUG_ON(p2m->pod.entry_count < 0);
-            pod--;
+            pod -= n;
         }
         else if ( steal_for_cache && p2m_is_ram(t) )
         {
+            /*
+             * If we need less than 1 << cur_order, we may end up stealing
+             * more memory here than we actually need. This will be rectified
+             * below, however; and stealing too much and then freeing what we
+             * need may allow us to free smaller pages from the cache, and
+             * avoid breaking up superpages.
+             */
             struct page_info *page;
+            unsigned int j;
 
             ASSERT(mfn_valid(mfn));
 
             page = mfn_to_page(mfn);
 
-            p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid,
-                          p2m->default_access);
-            set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY);
-
-            p2m_pod_cache_add(p2m, page, 0);
+            p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), cur_order,
+                          p2m_invalid, p2m->default_access);
+            for ( j = 0; j < n; ++j )
+                set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY);
+            p2m_pod_cache_add(p2m, page, cur_order);
 
             steal_for_cache =  ( p2m->pod.entry_count > p2m->pod.count );
 
-            nonpod--;
-            ram--;
+            nonpod -= n;
+            ram -= n;
         }
     }    
 
@@ -649,7 +663,8 @@ p2m_pod_zero_check_superpage(struct p2m_
     p2m_type_t type, type0 = 0;
     unsigned long * map = NULL;
     int ret=0, reset = 0;
-    int i, j;
+    unsigned long i, n;
+    unsigned int j;
     int max_ref = 1;
     struct domain *d = p2m->domain;
 
@@ -668,16 +683,14 @@ p2m_pod_zero_check_superpage(struct p2m_
 
     /* Look up the mfns, checking to make sure they're the same mfn
      * and aligned, and mapping them. */
-    for ( i=0; i<SUPERPAGE_PAGES; i++ )
+    for ( i = 0; i < SUPERPAGE_PAGES; i += n )
     {
         p2m_access_t a; 
-        mfn = p2m->get_entry(p2m, gfn + i, &type, &a, 0, NULL, NULL);
+        unsigned int cur_order;
+        unsigned long k;
+        const struct page_info *page;
 
-        if ( i == 0 )
-        {
-            mfn0 = mfn;
-            type0 = type;
-        }
+        mfn = p2m->get_entry(p2m, gfn + i, &type, &a, 0, &cur_order, NULL);
 
         /* Conditions that must be met for superpage-superpage:
          * + All gfns are ram types
@@ -690,15 +703,25 @@ p2m_pod_zero_check_superpage(struct p2m_
          * + None of the mfns are likely to be mapped elsewhere (refcount
          *   2 or less for shadow, 1 for hap)
          */
-        if ( !p2m_is_ram(type)
-             || type != type0
-             || ( (mfn_to_page(mfn)->count_info & PGC_allocated) == 0 )
-             || ( (mfn_to_page(mfn)->count_info & (PGC_page_table|PGC_xen_heap)) != 0 )
-             || ( (mfn_to_page(mfn)->count_info & PGC_xen_heap  ) != 0 )
-             || ( (mfn_to_page(mfn)->count_info & PGC_count_mask) > max_ref )
-             || !( ( i == 0 && superpage_aligned(mfn_x(mfn0)) )
-                   || ( i != 0 && mfn_x(mfn) == (mfn_x(mfn0) + i) ) ) )
+        if ( !p2m_is_ram(type) )
+            goto out;
+
+        if ( i == 0 )
+        {
+            if ( !superpage_aligned(mfn_x(mfn)) )
+                goto out;
+            mfn0 = mfn;
+            type0 = type;
+        }
+        else if ( type != type0 || mfn_x(mfn) != (mfn_x(mfn0) + i) )
             goto out;
+
+        n = 1UL << min(cur_order, SUPERPAGE_ORDER + 0U);
+        for ( k = 0, page = mfn_to_page(mfn); k < n; ++k, ++page )
+            if ( !(page->count_info & PGC_allocated) ||
+                 (page->count_info & (PGC_page_table | PGC_xen_heap)) ||
+                 (page->count_info & PGC_count_mask) > max_ref )
+                goto out;
     }
 
     /* Now, do a quick check to see if it may be zero before unmapping. */
@@ -1114,7 +1137,7 @@ guest_physmap_mark_populate_on_demand(st
                                       unsigned int order)
 {
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
-    unsigned long i, pod_count = 0;
+    unsigned long i, n, pod_count = 0;
     p2m_type_t ot;
     mfn_t omfn;
     int rc = 0;
@@ -1127,10 +1150,13 @@ guest_physmap_mark_populate_on_demand(st
     P2M_DEBUG("mark pod gfn=%#lx\n", gfn);
 
     /* Make sure all gpfns are unused */
-    for ( i = 0; i < (1UL << order); i++ )
+    for ( i = 0; i < (1UL << order); i += n )
     {
         p2m_access_t a;
-        omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, 0, NULL, NULL);
+        unsigned int cur_order;
+
+        omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, 0, &cur_order, NULL);
+        n = 1UL << min(order, cur_order);
         if ( p2m_is_ram(ot) )
         {
             P2M_DEBUG("gfn_to_mfn returned type %d!\n", ot);
@@ -1140,7 +1166,7 @@ guest_physmap_mark_populate_on_demand(st
         else if ( ot == p2m_populate_on_demand )
         {
             /* Count how man PoD entries we'll be replacing if successful */
-            pod_count++;
+            pod_count += n;
         }
     }
 

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v3] x86/PoD: shorten certain operations on higher order ranges
  2015-10-01 10:26 [PATCH v3] x86/PoD: shorten certain operations on higher order ranges Jan Beulich
@ 2015-10-01 11:25 ` Andrew Cooper
  2015-10-01 15:50 ` George Dunlap
  1 sibling, 0 replies; 3+ messages in thread
From: Andrew Cooper @ 2015-10-01 11:25 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: George Dunlap

On 01/10/15 11:26, Jan Beulich wrote:
> Now that p2m->get_entry() always returns a valid order, utilize this
> to accelerate some of the operations in PoD code. (There are two uses
> of p2m->get_entry() left which don't easily lend themselves to this
> optimization.)
>
> Also adjust a few types as needed and remove stale comments from
> p2m_pod_cache_add() (to avoid duplicating them yet another time).
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> v3: Restore per-page checking in p2m_pod_zero_check_superpage().
>
> --- a/xen/arch/x86/mm/p2m-pod.c
> +++ b/xen/arch/x86/mm/p2m-pod.c
> @@ -119,20 +119,23 @@ p2m_pod_cache_add(struct p2m_domain *p2m
>  
>      unlock_page_alloc(p2m);
>  
> -    /* Then add the first one to the appropriate populate-on-demand list */
> -    switch(order)
> +    /* Then add to the appropriate populate-on-demand list. */
> +    switch ( order )
>      {
> +    case PAGE_ORDER_1G:
> +        for ( i = 0; i < (1UL << PAGE_ORDER_1G); i += 1UL << PAGE_ORDER_2M )
> +            page_list_add_tail(page + i, &p2m->pod.super);
> +        break;
>      case PAGE_ORDER_2M:
> -        page_list_add_tail(page, &p2m->pod.super); /* lock: page_alloc */
> -        p2m->pod.count += 1 << order;
> +        page_list_add_tail(page, &p2m->pod.super);
>          break;
>      case PAGE_ORDER_4K:
> -        page_list_add_tail(page, &p2m->pod.single); /* lock: page_alloc */
> -        p2m->pod.count += 1;
> +        page_list_add_tail(page, &p2m->pod.single);
>          break;
>      default:
>          BUG();
>      }
> +    p2m->pod.count += 1 << order;

You appear to have lost the 1L from v2.

>
>
>
>> +        for ( k = 0, page = mfn_to_page(mfn); k < n; ++k, ++page )
>> +            if ( !(page->count_info & PGC_allocated) ||
>> +                 (page->count_info & (PGC_page_table | PGC_xen_heap)) ||
>> +                 (page->count_info & PGC_count_mask) > max_ref )
>> +                goto out;
>>      }
>>  
>>      /* Now, do a quick check to see if it may be zero before unmapping. */
>> @@ -1114,7 +1137,7 @@ guest_physmap_mark_populate_on_demand(st
>>                                        unsigned int order)
>>  {
>>      struct p2m_domain *p2m = p2m_get_hostp2m(d);
>> -    unsigned long i, pod_count = 0;
>> +    unsigned long i, n, pod_count = 0;
>>      p2m_type_t ot;
>>      mfn_t omfn;
>>      int rc = 0;
>> @@ -1127,10 +1150,13 @@ guest_physmap_mark_populate_on_demand(st
>>      P2M_DEBUG("mark pod gfn=%#lx\n", gfn);
>>  
>>      /* Make sure all gpfns are unused */
>> -    for ( i = 0; i < (1UL << order); i++ )
>> +    for ( i = 0; i < (1UL << order); i += n )
>>      {
>>          p2m_access_t a;
>> -        omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, 0, NULL, NULL);
>> +        unsigned int cur_order;
>> +
>> +        omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, 0, &cur_order, NULL);
>> +        n = 1UL << min(order, cur_order);
>>          if ( p2m_is_ram(ot) )
>>          {
>>              P2M_DEBUG("gfn_to_mfn returned type %d!\n", ot);
>> @@ -1140,7 +1166,7 @@ guest_physmap_mark_populate_on_demand(st
>>          else if ( ot == p2m_populate_on_demand )
>>          {
>>              /* Count how man PoD entries we'll be replacing if successful */
>> -            pod_count++;
>> +            pod_count += n;
>>          }
>>      }
>>  
>>
>>
>  
>      return 0;
>  }
> @@ -502,11 +505,10 @@ p2m_pod_decrease_reservation(struct doma
>                               unsigned int order)
>  {
>      int ret=0;
> -    int i;
> +    unsigned long i, n;
>      struct p2m_domain *p2m = p2m_get_hostp2m(d);
> -
> -    int steal_for_cache;
> -    int pod, nonpod, ram;
> +    bool_t steal_for_cache;
> +    long pod, nonpod, ram;
>  
>      gfn_lock(p2m, gpfn, order);
>      pod_lock(p2m);    
> @@ -525,21 +527,21 @@ recount:
>      /* Figure out if we need to steal some freed memory for our cache */
>      steal_for_cache =  ( p2m->pod.entry_count > p2m->pod.count );
>  
> -    /* FIXME: Add contiguous; query for PSE entries? */
> -    for ( i=0; i<(1<<order); i++)
> +    for ( i = 0; i < (1UL << order); i += n )
>      {
>          p2m_access_t a;
>          p2m_type_t t;
> +        unsigned int cur_order;
>  
> -        (void)p2m->get_entry(p2m, gpfn + i, &t, &a, 0, NULL, NULL);
> -
> +        p2m->get_entry(p2m, gpfn + i, &t, &a, 0, &cur_order, NULL);
> +        n = 1UL << min(order, cur_order);
>          if ( t == p2m_populate_on_demand )
> -            pod++;
> +            pod += n;
>          else
>          {
> -            nonpod++;
> +            nonpod += n;
>              if ( p2m_is_ram(t) )
> -                ram++;
> +                ram += n;
>          }
>      }
>  
> @@ -574,41 +576,53 @@ recount:
>       * + There are PoD entries to handle, or
>       * + There is ram left, and we want to steal it
>       */
> -    for ( i=0;
> -          i<(1<<order) && (pod>0 || (steal_for_cache && ram > 0));
> -          i++)
> +    for ( i = 0;
> +          i < (1UL << order) && (pod > 0 || (steal_for_cache && ram > 0));
> +          i += n )
>      {
>          mfn_t mfn;
>          p2m_type_t t;
>          p2m_access_t a;
> +        unsigned int cur_order;
>  
> -        mfn = p2m->get_entry(p2m, gpfn + i, &t, &a, 0, NULL, NULL);
> +        mfn = p2m->get_entry(p2m, gpfn + i, &t, &a, 0, &cur_order, NULL);
> +        if ( order < cur_order )
> +            cur_order = order;
> +        n = 1UL << cur_order;
>          if ( t == p2m_populate_on_demand )
>          {
> -            p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid,
> -                          p2m->default_access);
> -            p2m->pod.entry_count--;
> +            p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), cur_order,
> +                          p2m_invalid, p2m->default_access);
> +            p2m->pod.entry_count -= n;
>              BUG_ON(p2m->pod.entry_count < 0);
> -            pod--;
> +            pod -= n;
>          }
>          else if ( steal_for_cache && p2m_is_ram(t) )
>          {
> +            /*
> +             * If we need less than 1 << cur_order, we may end up stealing
> +             * more memory here than we actually need. This will be rectified
> +             * below, however; and stealing too much and then freeing what we
> +             * need may allow us to free smaller pages from the cache, and
> +             * avoid breaking up superpages.
> +             */
>              struct page_info *page;
> +            unsigned int j;
>  
>              ASSERT(mfn_valid(mfn));
>  
>              page = mfn_to_page(mfn);
>  
> -            p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid,
> -                          p2m->default_access);
> -            set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY);
> -
> -            p2m_pod_cache_add(p2m, page, 0);
> +            p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), cur_order,
> +                          p2m_invalid, p2m->default_access);
> +            for ( j = 0; j < n; ++j )
> +                set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY);
> +            p2m_pod_cache_add(p2m, page, cur_order);
>  
>              steal_for_cache =  ( p2m->pod.entry_count > p2m->pod.count );
>  
> -            nonpod--;
> -            ram--;
> +            nonpod -= n;
> +            ram -= n;
>          }
>      }    
>  
> @@ -649,7 +663,8 @@ p2m_pod_zero_check_superpage(struct p2m_
>      p2m_type_t type, type0 = 0;
>      unsigned long * map = NULL;
>      int ret=0, reset = 0;
> -    int i, j;
> +    unsigned long i, n;
> +    unsigned int j;
>      int max_ref = 1;
>      struct domain *d = p2m->domain;
>  
> @@ -668,16 +683,14 @@ p2m_pod_zero_check_superpage(struct p2m_
>  
>      /* Look up the mfns, checking to make sure they're the same mfn
>       * and aligned, and mapping them. */
> -    for ( i=0; i<SUPERPAGE_PAGES; i++ )
> +    for ( i = 0; i < SUPERPAGE_PAGES; i += n )
>      {
>          p2m_access_t a; 
> -        mfn = p2m->get_entry(p2m, gfn + i, &type, &a, 0, NULL, NULL);
> +        unsigned int cur_order;
> +        unsigned long k;
> +        const struct page_info *page;
>  
> -        if ( i == 0 )
> -        {
> -            mfn0 = mfn;
> -            type0 = type;
> -        }
> +        mfn = p2m->get_entry(p2m, gfn + i, &type, &a, 0, &cur_order, NULL);
>  
>          /* Conditions that must be met for superpage-superpage:
>           * + All gfns are ram types
> @@ -690,15 +703,25 @@ p2m_pod_zero_check_superpage(struct p2m_
>           * + None of the mfns are likely to be mapped elsewhere (refcount
>           *   2 or less for shadow, 1 for hap)
>           */
> -        if ( !p2m_is_ram(type)
> -             || type != type0
> -             || ( (mfn_to_page(mfn)->count_info & PGC_allocated) == 0 )
> -             || ( (mfn_to_page(mfn)->count_info & (PGC_page_table|PGC_xen_heap)) != 0 )
> -             || ( (mfn_to_page(mfn)->count_info & PGC_xen_heap  ) != 0 )
> -             || ( (mfn_to_page(mfn)->count_info & PGC_count_mask) > max_ref )
> -             || !( ( i == 0 && superpage_aligned(mfn_x(mfn0)) )
> -                   || ( i != 0 && mfn_x(mfn) == (mfn_x(mfn0) + i) ) ) )
> +        if ( !p2m_is_ram(type) )
> +            goto out;
> +
> +        if ( i == 0 )
> +        {
> +            if ( !superpage_aligned(mfn_x(mfn)) )
> +                goto out;
> +            mfn0 = mfn;
> +            type0 = type;
> +        }
> +        else if ( type != type0 || mfn_x(mfn) != (mfn_x(mfn0) + i) )
>              goto out;
> +
> +        n = 1UL << min(cur_order, SUPERPAGE_ORDER + 0U);

Neither SUPERPAGE_SIZE nor SUPERPAGE_ORDER are used in assembly code
(nor can I see a plausible reason for them to be introduced in the future).

It would be neater to turn them into unsigned constants, rather than
playing tricks like this to make the min() typecheck happy.

Either way, Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v3] x86/PoD: shorten certain operations on higher order ranges
  2015-10-01 10:26 [PATCH v3] x86/PoD: shorten certain operations on higher order ranges Jan Beulich
  2015-10-01 11:25 ` Andrew Cooper
@ 2015-10-01 15:50 ` George Dunlap
  1 sibling, 0 replies; 3+ messages in thread
From: George Dunlap @ 2015-10-01 15:50 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Andrew Cooper

On Thu, Oct 1, 2015 at 11:26 AM, Jan Beulich <JBeulich@suse.com> wrote:
> Now that p2m->get_entry() always returns a valid order, utilize this
> to accelerate some of the operations in PoD code. (There are two uses
> of p2m->get_entry() left which don't easily lend themselves to this
> optimization.)
>
> Also adjust a few types as needed and remove stale comments from
> p2m_pod_cache_add() (to avoid duplicating them yet another time).
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: George Dunlap <george.dunlap@citrix.com>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2015-10-01 15:50 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-10-01 10:26 [PATCH v3] x86/PoD: shorten certain operations on higher order ranges Jan Beulich
2015-10-01 11:25 ` Andrew Cooper
2015-10-01 15:50 ` George Dunlap

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.