All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ben Guthro <bguthro@virtualiron.com>
To: xen-devel <xen-devel@lists.xensource.com>
Cc: Dave Lively <dlively@virtualiron.com>
Subject: [PATCH] Segment dirty log for performance
Date: Wed, 24 Oct 2007 17:00:26 -0400	[thread overview]
Message-ID: <471FB26A.8060603@virtualiron.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 375 bytes --]

Represent dirty log as an array of bitmaps.
Also, when bookkeeping the valid HVM pfn ranges, breaks the PFNs
into two ranges  -- RAM and VGA.  This allows the dirty
page bitmaps to conform to these ranges and to skip the
(sometimes large) empty PFN range between them.

Signed-off-by: Ben Guthro <bguthro@virtualron.com>
Signed-off-by: Dave Lively <dlively@virtualiron.com>


[-- Attachment #2: lm-segment-dirty-log.patch --]
[-- Type: text/x-patch, Size: 14480 bytes --]

diff -r 9bdb3e7a99c9 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c	Tue Oct 23 10:26:00 2007 -0400
+++ b/xen/arch/x86/mm/p2m.c	Tue Oct 23 12:58:25 2007 -0400
@@ -241,9 +241,12 @@ set_p2m_entry(struct domain *d, unsigned
                                0, L1_PAGETABLE_ENTRIES);
     ASSERT(p2m_entry);
 
-    /* Track the highest gfn for which we have ever had a valid mapping */
-    if ( mfn_valid(mfn) && (gfn > d->arch.p2m.max_mapped_pfn) )
-        d->arch.p2m.max_mapped_pfn = gfn;
+    if ( mfn_valid(mfn) ) {
+        paging_pfn_range_append(d, gfn);
+        /* Track the highest gfn for which we have ever had a valid mapping */
+        if (gfn > d->arch.p2m.max_mapped_pfn ) 
+            d->arch.p2m.max_mapped_pfn = gfn;
+    }
 
     if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
         entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
diff -r 9bdb3e7a99c9 xen/arch/x86/mm/paging.c
--- a/xen/arch/x86/mm/paging.c	Tue Oct 23 10:26:00 2007 -0400
+++ b/xen/arch/x86/mm/paging.c	Tue Oct 23 12:59:41 2007 -0400
@@ -96,36 +96,98 @@
         spin_unlock(&(_d)->arch.paging.log_dirty.lock);                   \
     } while (0)
 
+void paging_pfn_range_append(struct domain *d, unsigned long gfn)
+{
+    /* Maintain a very small number of pfn ranges; ie 4
+     * Don't bother with an optimal representation (by consolidating ranges, etc.)
+     * because in practice it isn't required. */
+    struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
+
+    for (pr = pr0; pr - pr0 != PFN_RANGE_NR; pr++)
+    {
+        int last_range;
+        
+        /* unused range? */
+        if ( unlikely(pr->len == 0) ) {
+            *pr = (struct pfn_range){ gfn, 1 };
+            return;
+        }
+        
+        last_range = (pr == pr0 + PFN_RANGE_NR - 1);
+        
+#define CLOSE_ENOUGH 0x20 /* keep 0x00..0x9f and 0xc0... in same range  */
+        
+        /* gfn precedes existing range by a substantial amount? */
+        if (unlikely(gfn + CLOSE_ENOUGH < pr->start && !last_range)) /* yes */
+        {
+            /* insert a new range */
+            memmove(pr+1, pr, (PFN_RANGE_NR - (pr - pr0) - 1) * sizeof(*pr));
+            *pr = (struct pfn_range){ gfn, 1 };
+            return;
+        }
+        
+        /* gfn precedes existing range? */
+        if (unlikely(gfn < pr->start)) /* yes */
+        {
+            /* extend start of range */
+            pr->len += pr->start - gfn;
+            pr->start = gfn;
+            return;
+        }
+        
+        /* gfn within existing range? */
+        if ( unlikely(pr->start <= gfn && gfn < pr->start + pr->len) ) /* yes */
+            return;
+        
+        /* gfn abuts or closely follows existing range? or this is last range? */
+        if ( likely(gfn <= pr->start + pr->len + CLOSE_ENOUGH || last_range) )
+        {
+            /* extend end of range */
+            pr->len = gfn - pr->start + 1;
+            return;
+        }
+    }
+    BUG();
+}
+
 /* allocate bitmap resources for log dirty */
 int paging_alloc_log_dirty_bitmap(struct domain *d)
 {
-    if ( d->arch.paging.log_dirty.bitmap != NULL )
-        return 0;
-
-    d->arch.paging.log_dirty.bitmap_size =
-        (domain_get_maximum_gpfn(d) + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
-    d->arch.paging.log_dirty.bitmap =
-        xmalloc_array(unsigned long,
-                      d->arch.paging.log_dirty.bitmap_size / BITS_PER_LONG);
-    if ( d->arch.paging.log_dirty.bitmap == NULL )
-    {
-        d->arch.paging.log_dirty.bitmap_size = 0;
-        return -ENOMEM;
-    }
-    memset(d->arch.paging.log_dirty.bitmap, 0,
-           d->arch.paging.log_dirty.bitmap_size/8);
-
-    return 0;
+     struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
+  
+     for (pr = pr0; pr - pr0 != PFN_RANGE_NR && pr->len > 0; pr++)
+     {
+         ASSERT(pr->dirty_bitmap == NULL);
+         pr->dirty_bitmap_size = (pr->len + (BITS_PER_LONG - 1)) & 
+             ~(BITS_PER_LONG - 1);
+         printk("%s: allocating %dKB for domain %d dirty log range %ld\n",
+                 __FUNCTION__, (pr->dirty_bitmap_size + 8195)/8196,
+                 d->domain_id, pr - pr0);
+         pr->dirty_bitmap =
+             xmalloc_array(uint8_t, pr->dirty_bitmap_size/8);
+         if ( pr->dirty_bitmap == NULL )
+         {
+             pr->dirty_bitmap_size = 0;
+             return -ENOMEM;
+         }
+         
+         memset(pr->dirty_bitmap, 0x0, pr->dirty_bitmap_size/8);
+     }
+     return 0;
 }
 
 /* free bitmap resources */
 void paging_free_log_dirty_bitmap(struct domain *d)
 {
-    d->arch.paging.log_dirty.bitmap_size = 0;
-    if ( d->arch.paging.log_dirty.bitmap )
-    {
-        xfree(d->arch.paging.log_dirty.bitmap);
-        d->arch.paging.log_dirty.bitmap = NULL;
+    struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
+    for (pr = pr0; pr - pr0 != PFN_RANGE_NR; pr++)
+    {
+        pr->dirty_bitmap_size = 0;
+        if ( pr->dirty_bitmap )
+        {
+            xfree(pr->dirty_bitmap);
+            pr->dirty_bitmap = NULL;
+        }
     }
 }
 
@@ -174,7 +236,7 @@ int paging_log_dirty_disable(struct doma
     /* Safe because the domain is paused. */
     ret = d->arch.paging.log_dirty.disable_log_dirty(d);
     log_dirty_lock(d);
-    if ( !paging_mode_log_dirty(d) )
+    if ( !ret )
         paging_free_log_dirty_bitmap(d);
     log_dirty_unlock(d);
     domain_unpause(d);
@@ -187,6 +249,7 @@ void paging_mark_dirty(struct domain *d,
 {
     unsigned long pfn;
     mfn_t gmfn;
+    struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
 
     gmfn = _mfn(guest_mfn);
 
@@ -211,30 +274,40 @@ void paging_mark_dirty(struct domain *d,
         return;
     }
 
-    if ( likely(pfn < d->arch.paging.log_dirty.bitmap_size) )
-    {
-        if ( !__test_and_set_bit(pfn, d->arch.paging.log_dirty.bitmap) )
-        {
-            PAGING_DEBUG(LOGDIRTY,
-                         "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n",
-                         mfn_x(gmfn), pfn, d->domain_id);
-            d->arch.paging.log_dirty.dirty_count++;
-        }
-    }
-    else
-    {
-        PAGING_PRINTK("mark_dirty OOR! "
-                      "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n"
-                      "owner=%d c=%08x t=%" PRtype_info "\n",
-                      mfn_x(gmfn),
-                      pfn,
-                      d->arch.paging.log_dirty.bitmap_size,
-                      d->domain_id,
-                      (page_get_owner(mfn_to_page(gmfn))
-                       ? page_get_owner(mfn_to_page(gmfn))->domain_id
-                       : -1),
-                      mfn_to_page(gmfn)->count_info,
+    for ( pr = pr0; pr - pr0 != PFN_RANGE_NR && pr->len > 0; pr++ )
+    {
+        ASSERT(pr->dirty_bitmap != NULL);
+        if ( likely(pr->start <= pfn && pfn < pr->start + pr->len) ) 
+        {
+            if ( !__test_and_set_bit(pfn - pr->start, pr->dirty_bitmap) )
+            {
+                PAGING_DEBUG(LOGDIRTY,
+                             "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n",
+                              mfn_x(gmfn), pfn, d->domain_id);
+                d->arch.paging.log_dirty.dirty_count++;
+            }
+            log_dirty_unlock(d);
+            return;
+        }
+    }
+
+    PAGING_PRINTK("mark_dirty OOR! "
+                  "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n"
+                  "owner=%d c=%08x t=%" PRtype_info "\n",
+                  mfn_x(gmfn),
+                  pfn,
+                  d->arch.paging.log_dirty.bitmap_size,
+                  d->domain_id,
+                  (page_get_owner(mfn_to_page(gmfn))
+                   ? page_get_owner(mfn_to_page(gmfn))->domain_id
+                   : -1),
+                  mfn_to_page(gmfn)->count_info,
                       mfn_to_page(gmfn)->u.inuse.type_info);
+    for ( pr = pr0; pr - pr0 != PFN_RANGE_NR; pr++ )
+    {
+        PAGING_PRINTK("   pfn_range[%ld] start:0x%"
+                      PRI_mfn " len:0x%" PRI_mfn "\n",
+                      pr - pr0, pr->start, pr->len);
     }
 
     log_dirty_unlock(d);
@@ -245,6 +318,8 @@ int paging_log_dirty_op(struct domain *d
 int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
 {
     int i, rv = 0, clean = 0, peek = 1;
+    int bits;
+    struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
 
     domain_pause(d);
     log_dirty_lock(d);
@@ -270,37 +345,70 @@ int paging_log_dirty_op(struct domain *d
         /* caller may have wanted just to clean the state or access stats. */
         peek = 0;
 
-    if ( (peek || clean) && (d->arch.paging.log_dirty.bitmap == NULL) )
-    {
-        rv = -EINVAL; /* perhaps should be ENOMEM? */
-        goto out;
-    }
-
-    if ( sc->pages > d->arch.paging.log_dirty.bitmap_size )
-        sc->pages = d->arch.paging.log_dirty.bitmap_size;
-
-#define CHUNK (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
-    for ( i = 0; i < sc->pages; i += CHUNK )
-    {
-        int bytes = ((((sc->pages - i) > CHUNK)
-                      ? CHUNK
-                      : (sc->pages - i)) + 7) / 8;
-
-        if ( likely(peek) )
-        {
+    pr = pr0;
+#define BITS_PER_BYTE 8
+#define CHUNK (BITS_PER_BYTE*1024) /* Transfer and clear in 8kB chunks for L1 cache. */
+    for ( i = 0; i < sc->pages; i += bits )
+    {
+        /* In gap between ranges? */
+        if ( i < pr->start )      /* yes */
+        {
+            static uint8_t zeroes[CHUNK];
+            uint8_t *pzeroes = zeroes;
+            /* copy zeroes to guest */
+            bits = pr->start - i;
+            if ( bits > sc->pages - i )
+                bits = sc->pages - i;
+            if ( bits > CHUNK * BITS_PER_BYTE )
+                bits = CHUNK * BITS_PER_BYTE;
+            bits = (bits + BITS_PER_BYTE - 1) & ~(BITS_PER_BYTE - 1);
             if ( copy_to_guest_offset(
-                sc->dirty_bitmap, i/8,
-                (uint8_t *)d->arch.paging.log_dirty.bitmap + (i/8), bytes) )
+                     sc->dirty_bitmap, 
+                     i/BITS_PER_BYTE,
+                     pzeroes,
+                     bits/BITS_PER_BYTE) )
             {
-                rv = -EFAULT;
+                rv = -EINVAL;
                 goto out;
             }
         }
 
-        if ( clean )
-            memset((uint8_t *)d->arch.paging.log_dirty.bitmap + (i/8), 0, bytes);
+        /* Within a range? */
+        else if ( i < pr->start + pr->len ) /* yes */
+        {
+            bits = pr->start + pr->len - i;
+            if ( bits > sc->pages - i )
+                bits = sc->pages - i;
+            if ( bits > CHUNK * BITS_PER_BYTE )
+                bits = CHUNK * BITS_PER_BYTE;
+            bits = (bits + BITS_PER_BYTE - 1) & ~(BITS_PER_BYTE - 1);
+            if ( copy_to_guest_offset(
+                     sc->dirty_bitmap, 
+                     i/BITS_PER_BYTE,
+                     pr->dirty_bitmap + ((i - pr->start)/BITS_PER_BYTE),
+                     bits/BITS_PER_BYTE) )
+            {
+                rv = -EINVAL;
+                goto out;
+            }
+            if ( clean )
+                memset(pr->dirty_bitmap + ((i - pr->start)/BITS_PER_BYTE),
+                       0, bits/BITS_PER_BYTE);
+        }
+        /* Last range? */
+        else if (pr - pr0 == PFN_RANGE_NR-1) /* yes */
+        {
+            sc->pages = pr->start + pr->len;
+            break;
+        }
+        else
+        { /* Use next range */
+            pr++;
+            bits = 0;
+        }
     }
 #undef CHUNK
+#undef BITS_PER_BYTE
 
     log_dirty_unlock(d);
 
diff -r 9bdb3e7a99c9 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h	Tue Oct 23 10:26:00 2007 -0400
+++ b/xen/arch/x86/mm/shadow/private.h	Tue Oct 23 12:58:25 2007 -0400
@@ -491,17 +491,22 @@ sh_mfn_is_dirty(struct domain *d, mfn_t 
 /* Is this guest page dirty?  Call only in log-dirty mode. */
 {
     unsigned long pfn;
+    struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
+    
     ASSERT(shadow_mode_log_dirty(d));
-    ASSERT(d->arch.paging.log_dirty.bitmap != NULL);
 
     /* We /really/ mean PFN here, even for non-translated guests. */
     pfn = get_gpfn_from_mfn(mfn_x(gmfn));
-    if ( likely(VALID_M2P(pfn))
-         && likely(pfn < d->arch.paging.log_dirty.bitmap_size) 
-         && test_bit(pfn, d->arch.paging.log_dirty.bitmap) )
-        return 1;
-
-    return 0;
+    if ( unlikely(!VALID_M2P(pfn)) )
+         return 0;
+         
+    for (pr = pr0; likely(pr - pr0 != PFN_RANGE_NR && pr->len > 0); pr++)
+    {
+        ASSERT(pr->dirty_bitmap != NULL);
+        if ( likely(pr->start <= pfn && pfn < pr->start + pr->len) )
+            return test_bit(pfn - pr->start, pr->dirty_bitmap);
+    }
+    return 0; /* shouldn't get here */
 }
 
 
diff -r 9bdb3e7a99c9 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h	Tue Oct 23 10:26:00 2007 -0400
+++ b/xen/include/asm-x86/domain.h	Tue Oct 23 12:58:25 2007 -0400
@@ -158,13 +158,18 @@ struct log_dirty_domain {
     int            locker; /* processor that holds the lock */
     const char    *locker_function; /* func that took it */
 
-    /* log-dirty bitmap to record dirty pages */
-    unsigned long *bitmap;
-    unsigned int   bitmap_size;  /* in pages, bit per page */
-
     /* log-dirty mode stats */
     unsigned int   fault_count;
     unsigned int   dirty_count;
+
+    /* segmented log-dirty bitmap to record dirty pages */
+#define PFN_RANGE_NR 4
+    struct pfn_range {
+        unsigned long start;
+        unsigned long len;
+        uint8_t *dirty_bitmap;
+        unsigned int dirty_bitmap_size;  /* in pages, bit per page */
+    } pfn_range[PFN_RANGE_NR];
 
     /* functions which are paging mode specific */
     int            (*enable_log_dirty   )(struct domain *d);
diff -r 9bdb3e7a99c9 xen/include/asm-x86/paging.h
--- a/xen/include/asm-x86/paging.h	Tue Oct 23 10:26:00 2007 -0400
+++ b/xen/include/asm-x86/paging.h	Tue Oct 23 12:58:25 2007 -0400
@@ -258,6 +258,8 @@ static inline int paging_cmpxchg_guest_e
         return (!cmpxchg_user(p, *old, new));
 }
 
+void paging_pfn_range_append(struct domain *d, unsigned long gfn);
+
 /* Helper function that writes a pte in such a way that a concurrent read 
  * never sees a half-written entry that has _PAGE_PRESENT set */
 static inline void safe_write_pte(l1_pgentry_t *p, l1_pgentry_t new)


[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

             reply	other threads:[~2007-10-24 21:00 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-10-24 21:00 Ben Guthro [this message]
2007-10-25 14:25 ` [PATCH] Segment dirty log for performance Keir Fraser
2007-10-25 15:23   ` David Lively
2007-10-25 15:31     ` Keir Fraser

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=471FB26A.8060603@virtualiron.com \
    --to=bguthro@virtualiron.com \
    --cc=dlively@virtualiron.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.