All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86: fix preemptable page type handling (v2)
@ 2008-10-30 12:27 Jan Beulich
  2008-10-30 14:46 ` Keir Fraser
  0 siblings, 1 reply; 8+ messages in thread
From: Jan Beulich @ 2008-10-30 12:27 UTC (permalink / raw)
  To: xen-devel

- retain a page reference when PGT_partial is set on a page (and drop
  it when clearing that flag)
- don't drop a page reference never acquired when freeing the page type
  of a page where the allocation of the type got preempted (and never
  completed)
- don't acquire a page reference when allocating the page type of a
  page where freeing the type got preempted (and never completed, and
  hence didn't drop the respective reference)

Signed-off-by: Jan Beulich <jbeulich@novell.com>

Index: 2008-10-27/xen/arch/x86/domain.c
===================================================================
--- 2008-10-27.orig/xen/arch/x86/domain.c	2008-10-29 17:10:41.000000000 +0100
+++ 2008-10-27/xen/arch/x86/domain.c	2008-10-29 17:11:51.000000000 +0100
@@ -1683,18 +1683,24 @@ static int relinquish_memory(
                     break;
                 case -EINTR:
                     page->u.inuse.type_info |= PGT_validated;
+                    if ( x & PGT_partial )
+                        put_page(page);
                     put_page(page);
                     ret = -EAGAIN;
                     goto out;
                 case -EAGAIN:
                     page->u.inuse.type_info |= PGT_partial;
-                    put_page(page);
+                    if ( x & PGT_partial )
+                        put_page(page);
                     goto out;
                 default:
                     BUG();
                 }
                 if ( x & PGT_partial )
+                {
                     page->u.inuse.type_info--;
+                    put_page(page);
+                }
                 break;
             }
         }
Index: 2008-10-27/xen/arch/x86/mm.c
===================================================================
--- 2008-10-27.orig/xen/arch/x86/mm.c	2008-10-28 14:53:16.000000000 +0100
+++ 2008-10-27/xen/arch/x86/mm.c	2008-10-29 16:46:52.000000000 +0100
@@ -566,19 +566,21 @@ static int get_page_from_pagenr(unsigned
 static int get_page_and_type_from_pagenr(unsigned long page_nr, 
                                          unsigned long type,
                                          struct domain *d,
+                                         int partial,
                                          int preemptible)
 {
     struct page_info *page = mfn_to_page(page_nr);
     int rc;
 
-    if ( unlikely(!get_page_from_pagenr(page_nr, d)) )
+    if ( likely(partial >= 0) &&
+         unlikely(!get_page_from_pagenr(page_nr, d)) )
         return -EINVAL;
 
     rc = (preemptible ?
           get_page_type_preemptible(page, type) :
           (get_page_type(page, type) ? 0 : -EINVAL));
 
-    if ( rc )
+    if ( unlikely(rc) && partial >= 0 )
         put_page(page);
 
     return rc;
@@ -761,7 +763,7 @@ get_page_from_l2e(
     }
 
     rc = get_page_and_type_from_pagenr(
-        l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
+        l2e_get_pfn(l2e), PGT_l1_page_table, d, 0, 0);
     if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
         rc = 0;
 
@@ -772,7 +774,7 @@ get_page_from_l2e(
 define_get_linear_pagetable(l3);
 static int
 get_page_from_l3e(
-    l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int preemptible)
+    l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int partial, int preemptible)
 {
     int rc;
 
@@ -786,7 +788,7 @@ get_page_from_l3e(
     }
 
     rc = get_page_and_type_from_pagenr(
-        l3e_get_pfn(l3e), PGT_l2_page_table, d, preemptible);
+        l3e_get_pfn(l3e), PGT_l2_page_table, d, partial, preemptible);
     if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) )
         rc = 0;
 
@@ -797,7 +799,7 @@ get_page_from_l3e(
 define_get_linear_pagetable(l4);
 static int
 get_page_from_l4e(
-    l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int preemptible)
+    l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int partial, int preemptible)
 {
     int rc;
 
@@ -811,7 +813,7 @@ get_page_from_l4e(
     }
 
     rc = get_page_and_type_from_pagenr(
-        l4e_get_pfn(l4e), PGT_l3_page_table, d, preemptible);
+        l4e_get_pfn(l4e), PGT_l3_page_table, d, partial, preemptible);
     if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) )
         rc = 0;
 
@@ -961,23 +963,32 @@ static int put_page_from_l2e(l2_pgentry_
     return 1;
 }
 
+static int __put_page_type(struct page_info *, int preemptible);
 
 static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
-                             int preemptible)
+                             int partial, int preemptible)
 {
     if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) && 
          (l3e_get_pfn(l3e) != pfn) )
+    {
+        if ( unlikely(partial > 0) )
+            return __put_page_type(l3e_get_page(l3e), preemptible);
         return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
+    }
     return 1;
 }
 
 #if CONFIG_PAGING_LEVELS >= 4
 static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
-                             int preemptible)
+                             int partial, int preemptible)
 {
     if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) && 
          (l4e_get_pfn(l4e) != pfn) )
+    {
+        if ( unlikely(partial > 0) )
+            return __put_page_type(l4e_get_page(l4e), preemptible);
         return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible);
+    }
     return 1;
 }
 #endif
@@ -1184,7 +1195,7 @@ static int alloc_l3_table(struct page_in
     unsigned long  pfn = page_to_mfn(page);
     l3_pgentry_t  *pl3e;
     unsigned int   i;
-    int            rc = 0;
+    int            rc = 0, partial = page->partial_pte;
 
 #if CONFIG_PAGING_LEVELS == 3
     /*
@@ -1213,7 +1224,8 @@ static int alloc_l3_table(struct page_in
     if ( is_pv_32on64_domain(d) )
         memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
 
-    for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES; i++ )
+    for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES;
+          i++, partial = 0 )
     {
         if ( is_pv_32bit_domain(d) && (i == 3) )
         {
@@ -1224,16 +1236,17 @@ static int alloc_l3_table(struct page_in
                 rc = get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
                                                    PGT_l2_page_table |
                                                    PGT_pae_xen_l2,
-                                                   d, preemptible);
+                                                   d, partial, preemptible);
         }
         else if ( !is_guest_l3_slot(i) ||
-                  (rc = get_page_from_l3e(pl3e[i], pfn, d, preemptible)) > 0 )
+                  (rc = get_page_from_l3e(pl3e[i], pfn, d,
+                                          partial, preemptible)) > 0 )
             continue;
 
         if ( rc == -EAGAIN )
         {
             page->nr_validated_ptes = i;
-            page->partial_pte = 1;
+            page->partial_pte = partial ?: 1;
         }
         else if ( rc == -EINTR && i )
         {
@@ -1257,7 +1270,7 @@ static int alloc_l3_table(struct page_in
             if ( !is_guest_l3_slot(i) )
                 continue;
             unadjust_guest_l3e(pl3e[i], d);
-            put_page_from_l3e(pl3e[i], pfn, 0);
+            put_page_from_l3e(pl3e[i], pfn, 0, 0);
         }
     }
 
@@ -1272,18 +1285,20 @@ static int alloc_l4_table(struct page_in
     unsigned long  pfn = page_to_mfn(page);
     l4_pgentry_t  *pl4e = page_to_virt(page);
     unsigned int   i;
-    int            rc = 0;
+    int            rc = 0, partial = page->partial_pte;
 
-    for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES; i++ )
+    for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES;
+          i++, partial = 0 )
     {
         if ( !is_guest_l4_slot(d, i) ||
-             (rc = get_page_from_l4e(pl4e[i], pfn, d, preemptible)) > 0 )
+             (rc = get_page_from_l4e(pl4e[i], pfn, d,
+                                     partial, preemptible)) > 0 )
             continue;
 
         if ( rc == -EAGAIN )
         {
             page->nr_validated_ptes = i;
-            page->partial_pte = 1;
+            page->partial_pte = partial ?: 1;
         }
         else if ( rc == -EINTR )
         {
@@ -1299,7 +1314,7 @@ static int alloc_l4_table(struct page_in
             MEM_LOG("Failure in alloc_l4_table: entry %d", i);
             while ( i-- > 0 )
                 if ( is_guest_l4_slot(d, i) )
-                    put_page_from_l4e(pl4e[i], pfn, 0);
+                    put_page_from_l4e(pl4e[i], pfn, 0, 0);
         }
         if ( rc < 0 )
             return rc;
@@ -1377,19 +1392,20 @@ static int free_l3_table(struct page_inf
     struct domain *d = page_get_owner(page);
     unsigned long pfn = page_to_mfn(page);
     l3_pgentry_t *pl3e;
-    unsigned int  i = page->nr_validated_ptes - !page->partial_pte;
-    int rc = 0;
+    int rc = 0, partial = page->partial_pte;
+    unsigned int  i = page->nr_validated_ptes - !partial;
 
     pl3e = map_domain_page(pfn);
 
     do {
         if ( is_guest_l3_slot(i) )
         {
-            rc = put_page_from_l3e(pl3e[i], pfn, preemptible);
+            rc = put_page_from_l3e(pl3e[i], pfn, partial, preemptible);
+            if ( rc < 0 )
+                break;
+            partial = 0;
             if ( rc > 0 )
                 continue;
-            if ( rc )
-                break;
             unadjust_guest_l3e(pl3e[i], d);
         }
     } while ( i-- );
@@ -1399,7 +1415,7 @@ static int free_l3_table(struct page_inf
     if ( rc == -EAGAIN )
     {
         page->nr_validated_ptes = i;
-        page->partial_pte = 1;
+        page->partial_pte = partial ?: -1;
     }
     else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 )
     {
@@ -1416,18 +1432,21 @@ static int free_l4_table(struct page_inf
     struct domain *d = page_get_owner(page);
     unsigned long pfn = page_to_mfn(page);
     l4_pgentry_t *pl4e = page_to_virt(page);
-    unsigned int  i = page->nr_validated_ptes - !page->partial_pte;
-    int rc = 0;
+    int rc = 0, partial = page->partial_pte;
+    unsigned int  i = page->nr_validated_ptes - !partial;
 
     do {
         if ( is_guest_l4_slot(d, i) )
-            rc = put_page_from_l4e(pl4e[i], pfn, preemptible);
-    } while ( rc >= 0 && i-- );
+            rc = put_page_from_l4e(pl4e[i], pfn, partial, preemptible);
+        if ( rc < 0 )
+            break;
+        partial = 0;
+    } while ( i-- );
 
     if ( rc == -EAGAIN )
     {
         page->nr_validated_ptes = i;
-        page->partial_pte = 1;
+        page->partial_pte = partial ?: -1;
     }
     else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 )
     {
@@ -1703,7 +1722,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
             return rc ? 0 : -EFAULT;
         }
 
-        rc = get_page_from_l3e(nl3e, pfn, d, preemptible);
+        rc = get_page_from_l3e(nl3e, pfn, d, 0, preemptible);
         if ( unlikely(rc < 0) )
             return page_unlock(l3pg), rc;
         rc = 0;
@@ -1732,7 +1751,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
     }
 
     page_unlock(l3pg);
-    put_page_from_l3e(ol3e, pfn, 0);
+    put_page_from_l3e(ol3e, pfn, 0, 0);
     return rc;
 }
 
@@ -1781,7 +1800,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
             return rc ? 0 : -EFAULT;
         }
 
-        rc = get_page_from_l4e(nl4e, pfn, d, preemptible);
+        rc = get_page_from_l4e(nl4e, pfn, d, 0, preemptible);
         if ( unlikely(rc < 0) )
             return page_unlock(l4pg), rc;
         rc = 0;
@@ -1802,7 +1821,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
     }
 
     page_unlock(l4pg);
-    put_page_from_l4e(ol4e, pfn, 0);
+    put_page_from_l4e(ol4e, pfn, 0, 0);
     return rc;
 }
 
@@ -1866,6 +1885,10 @@ static int alloc_page_type(struct page_i
     struct domain *owner = page_get_owner(page);
     int rc;
 
+    /* Obtain an extra reference to retain if we set PGT_partial. */
+    if ( preemptible && !get_page(page, owner) )
+        return -EINVAL;
+
     /* A page table is dirtied when its type count becomes non-zero. */
     if ( likely(owner != NULL) )
         paging_mark_dirty(owner, page_to_mfn(page));
@@ -1900,8 +1923,13 @@ static int alloc_page_type(struct page_i
     if ( rc == -EAGAIN )
     {
         page->u.inuse.type_info |= PGT_partial;
+        return -EAGAIN;
     }
-    else if ( rc == -EINTR )
+
+    if ( preemptible )
+        put_page(page);
+
+    if ( rc == -EINTR )
     {
         ASSERT((page->u.inuse.type_info &
                 (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
@@ -2020,11 +2048,16 @@ static int __put_page_type_final(struct 
     case -EAGAIN:
         wmb();
         page->u.inuse.type_info |= PGT_partial;
+        /* Must skip put_page() below. */
+        preemptible = 0;
         break;
     default:
         BUG();
     }
 
+    if ( preemptible )
+        put_page(page);
+
     return rc;
 }
 
@@ -2034,6 +2067,10 @@ static int __put_page_type(struct page_i
 {
     unsigned long nx, x, y = page->u.inuse.type_info;
 
+    /* Obtain an extra reference to retain if we set PGT_partial. */
+    if ( preemptible && !get_page(page, page_get_owner(page)) )
+        return -EINVAL;
+
     for ( ; ; )
     {
         x  = y;
@@ -2055,6 +2092,8 @@ static int __put_page_type(struct page_i
                 if ( unlikely((y = cmpxchg(&page->u.inuse.type_info,
                                            x, nx)) != x) )
                     continue;
+                if ( x & PGT_partial )
+                    put_page(page);
                 /* We cleared the 'valid bit' so we do the clean up. */
                 return __put_page_type_final(page, x, preemptible);
             }
@@ -2075,9 +2114,16 @@ static int __put_page_type(struct page_i
             break;
 
         if ( preemptible && hypercall_preempt_check() )
+        {
+            if ( preemptible )
+                put_page(page);
             return -EINTR;
+        }
     }
 
+    if ( preemptible )
+        put_page(page);
+
     return 0;
 }
 
@@ -2181,7 +2227,11 @@ static int __get_page_type(struct page_i
         }
 
         if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
+        {
+            if ( (x & PGT_partial) && !(nx & PGT_partial) )
+                put_page(page);
             break;
+        }
 
         if ( preemptible && hypercall_preempt_check() )
             return -EINTR;
@@ -2290,7 +2340,7 @@ int new_guest_cr3(unsigned long mfn)
 #endif
     okay = paging_mode_refcounts(d)
         ? get_page_from_pagenr(mfn, d)
-        : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0);
+        : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 0);
     if ( unlikely(!okay) )
     {
         MEM_LOG("Error while installing new baseptr %lx", mfn);
@@ -2534,7 +2584,7 @@ int do_mmuext_op(
             if ( paging_mode_refcounts(FOREIGNDOM) )
                 break;
 
-            rc = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM, 1);
+            rc = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM, 0, 1);
             okay = !rc;
             if ( unlikely(!okay) )
             {
@@ -2615,7 +2665,7 @@ int do_mmuext_op(
                     okay = get_page_from_pagenr(mfn, d);
                 else
                     okay = !get_page_and_type_from_pagenr(
-                        mfn, PGT_root_page_table, d, 0);
+                        mfn, PGT_root_page_table, d, 0, 0);
                 if ( unlikely(!okay) )
                 {
                     MEM_LOG("Error while installing new mfn %lx", mfn);
@@ -2722,7 +2772,7 @@ int do_mmuext_op(
             unsigned char *ptr;
 
             okay = !get_page_and_type_from_pagenr(mfn, PGT_writable_page,
-                                                  FOREIGNDOM, 0);
+                                                  FOREIGNDOM, 0, 0);
             if ( unlikely(!okay) )
             {
                 MEM_LOG("Error while clearing mfn %lx", mfn);
@@ -2755,7 +2805,7 @@ int do_mmuext_op(
             }
 
             okay = !get_page_and_type_from_pagenr(mfn, PGT_writable_page,
-                                                  FOREIGNDOM, 0);
+                                                  FOREIGNDOM, 0, 0);
             if ( unlikely(!okay) )
             {
                 put_page(mfn_to_page(src_mfn));
Index: 2008-10-27/xen/include/asm-x86/mm.h
===================================================================
--- 2008-10-27.orig/xen/include/asm-x86/mm.h	2008-10-29 17:09:06.000000000 +0100
+++ 2008-10-27/xen/include/asm-x86/mm.h	2008-10-30 13:25:44.000000000 +0100
@@ -61,12 +61,36 @@ struct page_info
         /*
          * When PGT_partial is true then this field is valid and indicates
          * that PTEs in the range [0, @nr_validated_ptes) have been validated.
-         * If @partial_pte is true then PTE at @nr_validated_ptes+1 has been
-         * partially validated.
+         * An extra page reference must be acquired (or not dropped) whenever
+         * PGT_partial gets set, and it must be dropped when the flag gets
+         * cleared. This is so that a get() leaving a page in partially
+         * validated state (where the caller would drop the reference acquired
+         * due to the getting of the type [apparently] failing [-EAGAIN])
+         * would not accidentally result in a page left with zero general
+         * reference count, but non-zero type reference count (possible when
+         * the partial get() is followed immediately by domain destruction).
+         * Likewise, the ownership of the single type reference for partially
+         * (in-)validated pages is tied to this flag, i.e. the instance
+         * setting the flag must not drop that reference, whereas the instance
+         * clearing it will have to.
+         *
+         * If @partial_pte is positive then PTE at @nr_validated_ptes+1 has
+         * been partially validated. This implies that the general reference
+         * to the page (acquired from get_page_from_lNe()) would be dropped
+         * (again due to the apparent failure) and hence must be re-acquired
+         * when resuming the validation, but must not be dropped when picking
+         * up the page for invalidation.
+         *
+         * If @partial_pte is negative then PTE at @nr_validated_ptes+1 has
+         * been partially invalidated. This is basically the opposite case of
+         * above, i.e. the general reference to the page was not dropped in
+         * put_page_from_lNe() (due to the apparent failure), and hence it
+         * must be dropped when the put operation is resumed (and completes),
+         * but it must not be acquired if picking up the page for validation.
          */
         struct {
             u16 nr_validated_ptes;
-            bool_t partial_pte;
+            s8 partial_pte;
         };
 
         /*

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86: fix preemptable page type handling (v2)
  2008-10-30 12:27 [PATCH] x86: fix preemptable page type handling (v2) Jan Beulich
@ 2008-10-30 14:46 ` Keir Fraser
  2008-10-30 14:56   ` [PATCH] x86: fix preemptable page type handling(v2) Jan Beulich
  0 siblings, 1 reply; 8+ messages in thread
From: Keir Fraser @ 2008-10-30 14:46 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

Why does this patch not do what the new comment says and directly get_page()
when PGT_partial is set and put_page() when PGT_partial is cleared? Doing
get_page() across all __put_page() operations and then skipping the
put_page() in some cases, for example, seems inefficient and also makes the
code less clear. Why would you do it that way?

 -- Keir

On 30/10/08 12:27, "Jan Beulich" <jbeulich@novell.com> wrote:

> - retain a page reference when PGT_partial is set on a page (and drop
>   it when clearing that flag)
> - don't drop a page reference never acquired when freeing the page type
>   of a page where the allocation of the type got preempted (and never
>   completed)
> - don't acquire a page reference when allocating the page type of a
>   page where freeing the type got preempted (and never completed, and
>   hence didn't drop the respective reference)
> 
> Signed-off-by: Jan Beulich <jbeulich@novell.com>
> 
> Index: 2008-10-27/xen/arch/x86/domain.c
> ===================================================================
> --- 2008-10-27.orig/xen/arch/x86/domain.c 2008-10-29 17:10:41.000000000 +0100
> +++ 2008-10-27/xen/arch/x86/domain.c 2008-10-29 17:11:51.000000000 +0100
> @@ -1683,18 +1683,24 @@ static int relinquish_memory(
>                      break;
>                  case -EINTR:
>                      page->u.inuse.type_info |= PGT_validated;
> +                    if ( x & PGT_partial )
> +                        put_page(page);
>                      put_page(page);
>                      ret = -EAGAIN;
>                      goto out;
>                  case -EAGAIN:
>                      page->u.inuse.type_info |= PGT_partial;
> -                    put_page(page);
> +                    if ( x & PGT_partial )
> +                        put_page(page);
>                      goto out;
>                  default:
>                      BUG();
>                  }
>                  if ( x & PGT_partial )
> +                {
>                      page->u.inuse.type_info--;
> +                    put_page(page);
> +                }
>                  break;
>              }
>          }
> Index: 2008-10-27/xen/arch/x86/mm.c
> ===================================================================
> --- 2008-10-27.orig/xen/arch/x86/mm.c 2008-10-28 14:53:16.000000000 +0100
> +++ 2008-10-27/xen/arch/x86/mm.c 2008-10-29 16:46:52.000000000 +0100
> @@ -566,19 +566,21 @@ static int get_page_from_pagenr(unsigned
>  static int get_page_and_type_from_pagenr(unsigned long page_nr,
>                                           unsigned long type,
>                                           struct domain *d,
> +                                         int partial,
>                                           int preemptible)
>  {
>      struct page_info *page = mfn_to_page(page_nr);
>      int rc;
>  
> -    if ( unlikely(!get_page_from_pagenr(page_nr, d)) )
> +    if ( likely(partial >= 0) &&
> +         unlikely(!get_page_from_pagenr(page_nr, d)) )
>          return -EINVAL;
>  
>      rc = (preemptible ?
>            get_page_type_preemptible(page, type) :
>            (get_page_type(page, type) ? 0 : -EINVAL));
>  
> -    if ( rc )
> +    if ( unlikely(rc) && partial >= 0 )
>          put_page(page);
>  
>      return rc;
> @@ -761,7 +763,7 @@ get_page_from_l2e(
>      }
>  
>      rc = get_page_and_type_from_pagenr(
> -        l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
> +        l2e_get_pfn(l2e), PGT_l1_page_table, d, 0, 0);
>      if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
>          rc = 0;
>  
> @@ -772,7 +774,7 @@ get_page_from_l2e(
>  define_get_linear_pagetable(l3);
>  static int
>  get_page_from_l3e(
> -    l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int preemptible)
> +    l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int partial, int
> preemptible)
>  {
>      int rc;
>  
> @@ -786,7 +788,7 @@ get_page_from_l3e(
>      }
>  
>      rc = get_page_and_type_from_pagenr(
> -        l3e_get_pfn(l3e), PGT_l2_page_table, d, preemptible);
> +        l3e_get_pfn(l3e), PGT_l2_page_table, d, partial, preemptible);
>      if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) )
>          rc = 0;
>  
> @@ -797,7 +799,7 @@ get_page_from_l3e(
>  define_get_linear_pagetable(l4);
>  static int
>  get_page_from_l4e(
> -    l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int preemptible)
> +    l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int partial, int
> preemptible)
>  {
>      int rc;
>  
> @@ -811,7 +813,7 @@ get_page_from_l4e(
>      }
>  
>      rc = get_page_and_type_from_pagenr(
> -        l4e_get_pfn(l4e), PGT_l3_page_table, d, preemptible);
> +        l4e_get_pfn(l4e), PGT_l3_page_table, d, partial, preemptible);
>      if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) )
>          rc = 0;
>  
> @@ -961,23 +963,32 @@ static int put_page_from_l2e(l2_pgentry_
>      return 1;
>  }
>  
> +static int __put_page_type(struct page_info *, int preemptible);
>  
>  static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
> -                             int preemptible)
> +                             int partial, int preemptible)
>  {
>      if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) &&
>           (l3e_get_pfn(l3e) != pfn) )
> +    {
> +        if ( unlikely(partial > 0) )
> +            return __put_page_type(l3e_get_page(l3e), preemptible);
>          return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
> +    }
>      return 1;
>  }
>  
>  #if CONFIG_PAGING_LEVELS >= 4
>  static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
> -                             int preemptible)
> +                             int partial, int preemptible)
>  {
>      if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
>           (l4e_get_pfn(l4e) != pfn) )
> +    {
> +        if ( unlikely(partial > 0) )
> +            return __put_page_type(l4e_get_page(l4e), preemptible);
>          return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible);
> +    }
>      return 1;
>  }
>  #endif
> @@ -1184,7 +1195,7 @@ static int alloc_l3_table(struct page_in
>      unsigned long  pfn = page_to_mfn(page);
>      l3_pgentry_t  *pl3e;
>      unsigned int   i;
> -    int            rc = 0;
> +    int            rc = 0, partial = page->partial_pte;
>  
>  #if CONFIG_PAGING_LEVELS == 3
>      /*
> @@ -1213,7 +1224,8 @@ static int alloc_l3_table(struct page_in
>      if ( is_pv_32on64_domain(d) )
>          memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
>  
> -    for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES; i++ )
> +    for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES;
> +          i++, partial = 0 )
>      {
>          if ( is_pv_32bit_domain(d) && (i == 3) )
>          {
> @@ -1224,16 +1236,17 @@ static int alloc_l3_table(struct page_in
>                  rc = get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
>                                                     PGT_l2_page_table |
>                                                     PGT_pae_xen_l2,
> -                                                   d, preemptible);
> +                                                   d, partial, preemptible);
>          }
>          else if ( !is_guest_l3_slot(i) ||
> -                  (rc = get_page_from_l3e(pl3e[i], pfn, d, preemptible)) > 0
> )
> +                  (rc = get_page_from_l3e(pl3e[i], pfn, d,
> +                                          partial, preemptible)) > 0 )
>              continue;
>  
>          if ( rc == -EAGAIN )
>          {
>              page->nr_validated_ptes = i;
> -            page->partial_pte = 1;
> +            page->partial_pte = partial ?: 1;
>          }
>          else if ( rc == -EINTR && i )
>          {
> @@ -1257,7 +1270,7 @@ static int alloc_l3_table(struct page_in
>              if ( !is_guest_l3_slot(i) )
>                  continue;
>              unadjust_guest_l3e(pl3e[i], d);
> -            put_page_from_l3e(pl3e[i], pfn, 0);
> +            put_page_from_l3e(pl3e[i], pfn, 0, 0);
>          }
>      }
>  
> @@ -1272,18 +1285,20 @@ static int alloc_l4_table(struct page_in
>      unsigned long  pfn = page_to_mfn(page);
>      l4_pgentry_t  *pl4e = page_to_virt(page);
>      unsigned int   i;
> -    int            rc = 0;
> +    int            rc = 0, partial = page->partial_pte;
>  
> -    for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES; i++ )
> +    for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES;
> +          i++, partial = 0 )
>      {
>          if ( !is_guest_l4_slot(d, i) ||
> -             (rc = get_page_from_l4e(pl4e[i], pfn, d, preemptible)) > 0 )
> +             (rc = get_page_from_l4e(pl4e[i], pfn, d,
> +                                     partial, preemptible)) > 0 )
>              continue;
>  
>          if ( rc == -EAGAIN )
>          {
>              page->nr_validated_ptes = i;
> -            page->partial_pte = 1;
> +            page->partial_pte = partial ?: 1;
>          }
>          else if ( rc == -EINTR )
>          {
> @@ -1299,7 +1314,7 @@ static int alloc_l4_table(struct page_in
>              MEM_LOG("Failure in alloc_l4_table: entry %d", i);
>              while ( i-- > 0 )
>                  if ( is_guest_l4_slot(d, i) )
> -                    put_page_from_l4e(pl4e[i], pfn, 0);
> +                    put_page_from_l4e(pl4e[i], pfn, 0, 0);
>          }
>          if ( rc < 0 )
>              return rc;
> @@ -1377,19 +1392,20 @@ static int free_l3_table(struct page_inf
>      struct domain *d = page_get_owner(page);
>      unsigned long pfn = page_to_mfn(page);
>      l3_pgentry_t *pl3e;
> -    unsigned int  i = page->nr_validated_ptes - !page->partial_pte;
> -    int rc = 0;
> +    int rc = 0, partial = page->partial_pte;
> +    unsigned int  i = page->nr_validated_ptes - !partial;
>  
>      pl3e = map_domain_page(pfn);
>  
>      do {
>          if ( is_guest_l3_slot(i) )
>          {
> -            rc = put_page_from_l3e(pl3e[i], pfn, preemptible);
> +            rc = put_page_from_l3e(pl3e[i], pfn, partial, preemptible);
> +            if ( rc < 0 )
> +                break;
> +            partial = 0;
>              if ( rc > 0 )
>                  continue;
> -            if ( rc )
> -                break;
>              unadjust_guest_l3e(pl3e[i], d);
>          }
>      } while ( i-- );
> @@ -1399,7 +1415,7 @@ static int free_l3_table(struct page_inf
>      if ( rc == -EAGAIN )
>      {
>          page->nr_validated_ptes = i;
> -        page->partial_pte = 1;
> +        page->partial_pte = partial ?: -1;
>      }
>      else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 )
>      {
> @@ -1416,18 +1432,21 @@ static int free_l4_table(struct page_inf
>      struct domain *d = page_get_owner(page);
>      unsigned long pfn = page_to_mfn(page);
>      l4_pgentry_t *pl4e = page_to_virt(page);
> -    unsigned int  i = page->nr_validated_ptes - !page->partial_pte;
> -    int rc = 0;
> +    int rc = 0, partial = page->partial_pte;
> +    unsigned int  i = page->nr_validated_ptes - !partial;
>  
>      do {
>          if ( is_guest_l4_slot(d, i) )
> -            rc = put_page_from_l4e(pl4e[i], pfn, preemptible);
> -    } while ( rc >= 0 && i-- );
> +            rc = put_page_from_l4e(pl4e[i], pfn, partial, preemptible);
> +        if ( rc < 0 )
> +            break;
> +        partial = 0;
> +    } while ( i-- );
>  
>      if ( rc == -EAGAIN )
>      {
>          page->nr_validated_ptes = i;
> -        page->partial_pte = 1;
> +        page->partial_pte = partial ?: -1;
>      }
>      else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 )
>      {
> @@ -1703,7 +1722,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
>              return rc ? 0 : -EFAULT;
>          }
>  
> -        rc = get_page_from_l3e(nl3e, pfn, d, preemptible);
> +        rc = get_page_from_l3e(nl3e, pfn, d, 0, preemptible);
>          if ( unlikely(rc < 0) )
>              return page_unlock(l3pg), rc;
>          rc = 0;
> @@ -1732,7 +1751,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
>      }
>  
>      page_unlock(l3pg);
> -    put_page_from_l3e(ol3e, pfn, 0);
> +    put_page_from_l3e(ol3e, pfn, 0, 0);
>      return rc;
>  }
>  
> @@ -1781,7 +1800,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
>              return rc ? 0 : -EFAULT;
>          }
>  
> -        rc = get_page_from_l4e(nl4e, pfn, d, preemptible);
> +        rc = get_page_from_l4e(nl4e, pfn, d, 0, preemptible);
>          if ( unlikely(rc < 0) )
>              return page_unlock(l4pg), rc;
>          rc = 0;
> @@ -1802,7 +1821,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
>      }
>  
>      page_unlock(l4pg);
> -    put_page_from_l4e(ol4e, pfn, 0);
> +    put_page_from_l4e(ol4e, pfn, 0, 0);
>      return rc;
>  }
>  
> @@ -1866,6 +1885,10 @@ static int alloc_page_type(struct page_i
>      struct domain *owner = page_get_owner(page);
>      int rc;
>  
> +    /* Obtain an extra reference to retain if we set PGT_partial. */
> +    if ( preemptible && !get_page(page, owner) )
> +        return -EINVAL;
> +
>      /* A page table is dirtied when its type count becomes non-zero. */
>      if ( likely(owner != NULL) )
>          paging_mark_dirty(owner, page_to_mfn(page));
> @@ -1900,8 +1923,13 @@ static int alloc_page_type(struct page_i
>      if ( rc == -EAGAIN )
>      {
>          page->u.inuse.type_info |= PGT_partial;
> +        return -EAGAIN;
>      }
> -    else if ( rc == -EINTR )
> +
> +    if ( preemptible )
> +        put_page(page);
> +
> +    if ( rc == -EINTR )
>      {
>          ASSERT((page->u.inuse.type_info &
>                  (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
> @@ -2020,11 +2048,16 @@ static int __put_page_type_final(struct
>      case -EAGAIN:
>          wmb();
>          page->u.inuse.type_info |= PGT_partial;
> +        /* Must skip put_page() below. */
> +        preemptible = 0;
>          break;
>      default:
>          BUG();
>      }
>  
> +    if ( preemptible )
> +        put_page(page);
> +
>      return rc;
>  }
>  
> @@ -2034,6 +2067,10 @@ static int __put_page_type(struct page_i
>  {
>      unsigned long nx, x, y = page->u.inuse.type_info;
>  
> +    /* Obtain an extra reference to retain if we set PGT_partial. */
> +    if ( preemptible && !get_page(page, page_get_owner(page)) )
> +        return -EINVAL;
> +
>      for ( ; ; )
>      {
>          x  = y;
> @@ -2055,6 +2092,8 @@ static int __put_page_type(struct page_i
>                  if ( unlikely((y = cmpxchg(&page->u.inuse.type_info,
>                                             x, nx)) != x) )
>                      continue;
> +                if ( x & PGT_partial )
> +                    put_page(page);
>                  /* We cleared the 'valid bit' so we do the clean up. */
>                  return __put_page_type_final(page, x, preemptible);
>              }
> @@ -2075,9 +2114,16 @@ static int __put_page_type(struct page_i
>              break;
>  
>          if ( preemptible && hypercall_preempt_check() )
> +        {
> +            if ( preemptible )
> +                put_page(page);
>              return -EINTR;
> +        }
>      }
>  
> +    if ( preemptible )
> +        put_page(page);
> +
>      return 0;
>  }
>  
> @@ -2181,7 +2227,11 @@ static int __get_page_type(struct page_i
>          }
>  
>          if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
> +        {
> +            if ( (x & PGT_partial) && !(nx & PGT_partial) )
> +                put_page(page);
>              break;
> +        }
>  
>          if ( preemptible && hypercall_preempt_check() )
>              return -EINTR;
> @@ -2290,7 +2340,7 @@ int new_guest_cr3(unsigned long mfn)
>  #endif
>      okay = paging_mode_refcounts(d)
>          ? get_page_from_pagenr(mfn, d)
> -        : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0);
> +        : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 0);
>      if ( unlikely(!okay) )
>      {
>          MEM_LOG("Error while installing new baseptr %lx", mfn);
> @@ -2534,7 +2584,7 @@ int do_mmuext_op(
>              if ( paging_mode_refcounts(FOREIGNDOM) )
>                  break;
>  
> -            rc = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM, 1);
> +            rc = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM, 0, 1);
>              okay = !rc;
>              if ( unlikely(!okay) )
>              {
> @@ -2615,7 +2665,7 @@ int do_mmuext_op(
>                      okay = get_page_from_pagenr(mfn, d);
>                  else
>                      okay = !get_page_and_type_from_pagenr(
> -                        mfn, PGT_root_page_table, d, 0);
> +                        mfn, PGT_root_page_table, d, 0, 0);
>                  if ( unlikely(!okay) )
>                  {
>                      MEM_LOG("Error while installing new mfn %lx", mfn);
> @@ -2722,7 +2772,7 @@ int do_mmuext_op(
>              unsigned char *ptr;
>  
>              okay = !get_page_and_type_from_pagenr(mfn, PGT_writable_page,
> -                                                  FOREIGNDOM, 0);
> +                                                  FOREIGNDOM, 0, 0);
>              if ( unlikely(!okay) )
>              {
>                  MEM_LOG("Error while clearing mfn %lx", mfn);
> @@ -2755,7 +2805,7 @@ int do_mmuext_op(
>              }
>  
>              okay = !get_page_and_type_from_pagenr(mfn, PGT_writable_page,
> -                                                  FOREIGNDOM, 0);
> +                                                  FOREIGNDOM, 0, 0);
>              if ( unlikely(!okay) )
>              {
>                  put_page(mfn_to_page(src_mfn));
> Index: 2008-10-27/xen/include/asm-x86/mm.h
> ===================================================================
> --- 2008-10-27.orig/xen/include/asm-x86/mm.h 2008-10-29 17:09:06.000000000
> +0100
> +++ 2008-10-27/xen/include/asm-x86/mm.h 2008-10-30 13:25:44.000000000 +0100
> @@ -61,12 +61,36 @@ struct page_info
>          /*
>           * When PGT_partial is true then this field is valid and indicates
>           * that PTEs in the range [0, @nr_validated_ptes) have been
> validated.
> -         * If @partial_pte is true then PTE at @nr_validated_ptes+1 has been
> -         * partially validated.
> +         * An extra page reference must be acquired (or not dropped) whenever
> +         * PGT_partial gets set, and it must be dropped when the flag gets
> +         * cleared. This is so that a get() leaving a page in partially
> +         * validated state (where the caller would drop the reference
> acquired
> +         * due to the getting of the type [apparently] failing [-EAGAIN])
> +         * would not accidentally result in a page left with zero general
> +         * reference count, but non-zero type reference count (possible when
> +         * the partial get() is followed immediately by domain destruction).
> +         * Likewise, the ownership of the single type reference for partially
> +         * (in-)validated pages is tied to this flag, i.e. the instance
> +         * setting the flag must not drop that reference, whereas the
> instance
> +         * clearing it will have to.
> +         *
> +         * If @partial_pte is positive then PTE at @nr_validated_ptes+1 has
> +         * been partially validated. This implies that the general reference
> +         * to the page (acquired from get_page_from_lNe()) would be dropped
> +         * (again due to the apparent failure) and hence must be re-acquired
> +         * when resuming the validation, but must not be dropped when picking
> +         * up the page for invalidation.
> +         *
> +         * If @partial_pte is negative then PTE at @nr_validated_ptes+1 has
> +         * been partially invalidated. This is basically the opposite case of
> +         * above, i.e. the general reference to the page was not dropped in
> +         * put_page_from_lNe() (due to the apparent failure), and hence it
> +         * must be dropped when the put operation is resumed (and completes),
> +         * but it must not be acquired if picking up the page for validation.
>           */
>          struct {
>              u16 nr_validated_ptes;
> -            bool_t partial_pte;
> +            s8 partial_pte;
>          };
>  
>          /*
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86: fix preemptable page type handling(v2)
  2008-10-30 14:46 ` Keir Fraser
@ 2008-10-30 14:56   ` Jan Beulich
  2008-10-30 15:00     ` Keir Fraser
  0 siblings, 1 reply; 8+ messages in thread
From: Jan Beulich @ 2008-10-30 14:56 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel

>>> Keir Fraser <keir.fraser@eu.citrix.com> 30.10.08 15:46 >>>
>Why does this patch not do what the new comment says and directly get_page()
>when PGT_partial is set and put_page() when PGT_partial is cleared? Doing

Hmm, the description was meant to tell the net effect, not the way it's
implemented.

>get_page() across all __put_page() operations and then skipping the
>put_page() in some cases, for example, seems inefficient and also makes the
>code less clear. Why would you do it that way?

Because I can't handle a failure of get_page() (due to count overflow)
once I'm past the put()s.

Jan

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86: fix preemptable page type handling(v2)
  2008-10-30 14:56   ` [PATCH] x86: fix preemptable page type handling(v2) Jan Beulich
@ 2008-10-30 15:00     ` Keir Fraser
  2008-10-30 17:03       ` [PATCH] x86: fix preemptable page typehandling(v2) Jan Beulich
  0 siblings, 1 reply; 8+ messages in thread
From: Keir Fraser @ 2008-10-30 15:00 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

On 30/10/08 14:56, "Jan Beulich" <jbeulich@novell.com> wrote:

>> get_page() across all __put_page() operations and then skipping the
>> put_page() in some cases, for example, seems inefficient and also makes the
>> code less clear. Why would you do it that way?
> 
> Because I can't handle a failure of get_page() (due to count overflow)
> once I'm past the put()s.

We could make get_page() always leave a few available references, and then
define a new get_page() variant which:
 A. Does not check the owner
 B. Knows that it is incrementing a non-zero count
 C. Is able to use the few references never claimed by get_page().

This would be basically a small cmpxchg() loop. Since it's only executed
before setting PGT_partial we know that only one such special reference is
needed per page and we could just make get_page() leave headroom of one
reference.

Simpler code and faster code?

 -- Keir

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86: fix preemptable page typehandling(v2)
  2008-10-30 15:00     ` Keir Fraser
@ 2008-10-30 17:03       ` Jan Beulich
  2008-10-30 17:27         ` Keir Fraser
  0 siblings, 1 reply; 8+ messages in thread
From: Jan Beulich @ 2008-10-30 17:03 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel

>>> Keir Fraser <keir.fraser@eu.citrix.com> 30.10.08 16:00 >>>
>On 30/10/08 14:56, "Jan Beulich" <jbeulich@novell.com> wrote:
>
>>> get_page() across all __put_page() operations and then skipping the
>>> put_page() in some cases, for example, seems inefficient and also makes the
>>> code less clear. Why would you do it that way?
>> 
>> Because I can't handle a failure of get_page() (due to count overflow)
>> once I'm past the put()s.
>
>We could make get_page() always leave a few available references, and then
>define a new get_page() variant which:
> A. Does not check the owner
> B. Knows that it is incrementing a non-zero count
> C. Is able to use the few references never claimed by get_page().
>
>This would be basically a small cmpxchg() loop. Since it's only executed
>before setting PGT_partial we know that only one such special reference is
>needed per page and we could just make get_page() leave headroom of one
>reference.
>
>Simpler code and faster code?

Yes - admittedly I had expected you to dislike a special casing approach like
this...

Jan

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86: fix preemptable page typehandling(v2)
  2008-10-30 17:03       ` [PATCH] x86: fix preemptable page typehandling(v2) Jan Beulich
@ 2008-10-30 17:27         ` Keir Fraser
  2008-10-30 21:19           ` Use of the "has_shutdown_code" code in v3.2 Roger Cruz
  0 siblings, 1 reply; 8+ messages in thread
From: Keir Fraser @ 2008-10-30 17:27 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

On 30/10/08 17:03, "Jan Beulich" <jbeulich@novell.com> wrote:

>> This would be basically a small cmpxchg() loop. Since it's only executed
>> before setting PGT_partial we know that only one such special reference is
>> needed per page and we could just make get_page() leave headroom of one
>> reference.
>> 
>> Simpler code and faster code?
> 
> Yes - admittedly I had expected you to dislike a special casing approach like
> this...

I think it will overall make the code clearer.

 -- Keir

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Use of the "has_shutdown_code" code in v3.2
  2008-10-30 17:27         ` Keir Fraser
@ 2008-10-30 21:19           ` Roger Cruz
  2008-10-30 22:02             ` Keir Fraser
  0 siblings, 1 reply; 8+ messages in thread
From: Roger Cruz @ 2008-10-30 21:19 UTC (permalink / raw)
  To: xen-devel


We're using the V3.2 of the hypervisor code and while tracking down a
suspend/resume bug, I came across the following code that caused me to
ask myself whether the "has_shutdown_code" should also be cleared when
the domain is resumed.

This hypercall will not set a new shutdown reason if the domain already
has the flag set to 1.  So, if a domain is suspended and then resumed,
the flag would remain set.


    case SCHEDOP_shutdown_code:
    {
        struct sched_shutdown sched_shutdown;

        ret = -EFAULT;
        if ( copy_from_guest(&sched_shutdown, arg, 1) )
            break;

        ret = 0;
        TRACE_3D(TRC_SCHED_SHUTDOWN_CODE,
                 current->domain->domain_id, current->vcpu_id,
                 sched_shutdown.reason);
        spin_lock(&current->domain->shutdown_lock);
        if ( !current->domain->has_shutdown_code )
        {
            current->domain->shutdown_code = (u8)sched_shutdown.reason;
            current->domain->has_shutdown_code = 1;
        }

So I asked myself, should the domain_resume routine also clear that
flag?



void domain_resume(struct domain *d)
{
    struct vcpu *v;

    /*
     * Some code paths assume that shutdown status does not get reset
under
     * their feet (e.g., some assertions make this assumption).
     */
    domain_pause(d);

    spin_lock(&d->shutdown_lock);

    d->is_shutting_down = d->is_shut_down = 0;  <---- should it also
include: "d->has_shutdown_code = "

    for_each_vcpu ( d, v )
    {
        if ( v->paused_for_shutdown )
            vcpu_unpause(v);
        v->paused_for_shutdown = 0;
    }

    spin_unlock(&d->shutdown_lock);

    domain_unpause(d);
}

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Use of the "has_shutdown_code" code in v3.2
  2008-10-30 21:19           ` Use of the "has_shutdown_code" code in v3.2 Roger Cruz
@ 2008-10-30 22:02             ` Keir Fraser
  0 siblings, 0 replies; 8+ messages in thread
From: Keir Fraser @ 2008-10-30 22:02 UTC (permalink / raw)
  To: Roger Cruz, xen-devel

On 30/10/08 21:19, "Roger Cruz" <rcruz@marathontechnologies.com> wrote:

> We're using the V3.2 of the hypervisor code and while tracking down a
> suspend/resume bug, I came across the following code that caused me to
> ask myself whether the "has_shutdown_code" should also be cleared when
> the domain is resumed.

You're musing on a flag which has never been part of mainline Xen. Looks
like you're right though -- a tree which has the has_shutdown_code flag
should clear it on domain resume.

 -- Keir

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2008-10-30 22:02 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-10-30 12:27 [PATCH] x86: fix preemptable page type handling (v2) Jan Beulich
2008-10-30 14:46 ` Keir Fraser
2008-10-30 14:56   ` [PATCH] x86: fix preemptable page type handling(v2) Jan Beulich
2008-10-30 15:00     ` Keir Fraser
2008-10-30 17:03       ` [PATCH] x86: fix preemptable page typehandling(v2) Jan Beulich
2008-10-30 17:27         ` Keir Fraser
2008-10-30 21:19           ` Use of the "has_shutdown_code" code in v3.2 Roger Cruz
2008-10-30 22:02             ` Keir Fraser

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.