xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] tmem: add full support for x86 up to 16Tb
@ 2013-09-23  2:23 Bob Liu
  2013-09-23  2:23 ` [PATCH 2/2] xen: arch: x86: re-enable tmem for system " Bob Liu
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Bob Liu @ 2013-09-23  2:23 UTC (permalink / raw)
  To: xen-devel; +Cc: Bob Liu, keir, ian.campbell, jbeulich

tmem used to have code assuming to be able to directly access all memory.
This patch try to fix this problem fully so that tmem can work on x86 up to
16Tb.

tmem allocates pages mainly for two purposes.
1. store pages passed from guests through the frontswap/cleancache frontend.
In this case tmem code has already using map_domain_page() before
accessing the memory, no need to change for 16Tb supporting.

2. store tmem meta data.
In this case, there is a problem if we use map_domain_page(). That's the mapping
entrys are limited, in the 2 VCPU guest we only have 32 entries and tmem can't
call unmap in a short time.
The fixing is allocate xen heap pages instead of domain heap for tmem meta
data.

Signed-off-by: Bob Liu <bob.liu@oracle.com>
---
 xen/common/tmem.c          |    9 ++++--
 xen/common/tmem_xen.c      |    4 +--
 xen/include/xen/tmem_xen.h |   71 ++++++++++++++++++++++++++++++--------------
 3 files changed, 56 insertions(+), 28 deletions(-)

diff --git a/xen/common/tmem.c b/xen/common/tmem.c
index a122651..ce6a788 100644
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -352,10 +352,13 @@ static NOINLINE pfp_t *tmem_page_alloc(pool_t *pool)
 {
     pfp_t *pfp = NULL;
 
+    /*
+     * All pages are allocated from domain heap.
+     */
     if ( pool != NULL && is_persistent(pool) )
-        pfp = tmh_alloc_page_thispool(pool);
+        pfp = tmh_alloc_page_thispool(pool,0);
     else
-        pfp = tmh_alloc_page(pool,0);
+        pfp = tmh_alloc_page(pool,0,0);
     if ( pfp == NULL )
         alloc_page_failed++;
     else
@@ -2911,7 +2914,7 @@ EXPORT void *tmem_relinquish_pages(unsigned int order, unsigned int memflags)
             read_lock(&tmem_rwlock);
     }
 
-    while ( (pfp = tmh_alloc_page(NULL,1)) == NULL )
+    while ( (pfp = tmh_alloc_page(NULL,1,0)) == NULL )
     {
         if ( (max_evictions-- <= 0) || !tmem_evict())
             break;
diff --git a/xen/common/tmem_xen.c b/xen/common/tmem_xen.c
index 54ec09f..3496c82 100644
--- a/xen/common/tmem_xen.c
+++ b/xen/common/tmem_xen.c
@@ -353,7 +353,7 @@ static noinline void *tmh_mempool_page_get(unsigned long size)
     struct page_info *pi;
 
     ASSERT(size == PAGE_SIZE);
-    if ( (pi = tmh_alloc_page(NULL,0)) == NULL )
+    if ( (pi = tmh_alloc_page(NULL,0,1)) == NULL )
         return NULL;
     ASSERT(IS_VALID_PAGE(pi));
     return page_to_virt(pi);
@@ -382,7 +382,7 @@ static void *tmh_persistent_pool_page_get(unsigned long size)
     struct domain *d = current->domain;
 
     ASSERT(size == PAGE_SIZE);
-    if ( (pi = _tmh_alloc_page_thispool(d)) == NULL )
+    if ( (pi = _tmh_alloc_page_thispool(d,1)) == NULL )
         return NULL;
     ASSERT(IS_VALID_PAGE(pi));
     return page_to_virt(pi);
diff --git a/xen/include/xen/tmem_xen.h b/xen/include/xen/tmem_xen.h
index ad1ddd5..d52d68c 100644
--- a/xen/include/xen/tmem_xen.h
+++ b/xen/include/xen/tmem_xen.h
@@ -156,7 +156,8 @@ static inline void _tmh_free_subpage_thispool(struct xmem_pool *cmem_mempool,
 #define tmh_free_subpage_thispool(_pool, _p, _s) \
  _tmh_free_subpage_thispool(_pool->client->tmh->persistent_pool, _p, _s)
 
-static inline struct page_info *_tmh_alloc_page_thispool(struct domain *d)
+static inline struct page_info *_tmh_alloc_page_thispool(struct domain *d,
+							int xen_heap)
 {
     struct page_info *pi;
 
@@ -166,38 +167,48 @@ static inline struct page_info *_tmh_alloc_page_thispool(struct domain *d)
     if ( d->tot_pages >= d->max_pages )
         return NULL;
 
-    if ( tmh_page_list_pages )
-    {
-        if ( (pi = tmh_page_list_get()) != NULL )
-        {
-            if ( donate_page(d,pi,0) == 0 )
-                goto out;
-            else
-                tmh_page_list_put(pi);
-        }
+    if (xen_heap)
+	    pi = alloc_xenheap_pages(0,MEMF_tmem);
+    else {
+	    if ( tmh_page_list_pages )
+	    {
+		    if ( (pi = tmh_page_list_get()) != NULL )
+		    {
+			    if ( donate_page(d,pi,0) == 0 )
+				    goto out;
+			    else
+				    tmh_page_list_put(pi);
+		    }
+	    }
+
+	    pi = alloc_domheap_pages(d,0,MEMF_tmem);
     }
 
-    pi = alloc_domheap_pages(d,0,MEMF_tmem);
-
 out:
     ASSERT((pi == NULL) || IS_VALID_PAGE(pi));
     return pi;
 }
-#define tmh_alloc_page_thispool(_pool) \
-    _tmh_alloc_page_thispool(_pool->client->tmh->domain)
+#define tmh_alloc_page_thispool(_pool,xen_heap) \
+    _tmh_alloc_page_thispool(_pool->client->tmh->domain,xen_heap)
 
 static inline void _tmh_free_page_thispool(struct page_info *pi)
 {
     struct domain *d = page_get_owner(pi);
+    int xen_heap = is_xen_heap_page(pi);
 
     ASSERT(IS_VALID_PAGE(pi));
-    if ( (d == NULL) || steal_page(d,pi,0) == 0 )
-        tmh_page_list_put(pi);
+    if ( (d == NULL) || steal_page(d,pi,0) == 0 ) {
+	    if (!xen_heap)
+		    tmh_page_list_put(pi);
+    }
     else
     {
         scrub_one_page(pi);
         ASSERT((pi->count_info & ~(PGC_allocated | 1)) == 0);
-        free_domheap_pages(pi,0);
+	if (xen_heap)
+		free_xenheap_pages(pi,0);
+	else
+		free_domheap_pages(pi,0);
     }
 }
 #define tmh_free_page_thispool(_pool,_pg) \
@@ -221,12 +232,19 @@ static inline void tmh_free_subpage(void *ptr, size_t size)
     xmem_pool_free(ptr,tmh_mempool);
 }
 
-static inline struct page_info *tmh_alloc_page(void *pool, int no_heap)
+static inline struct page_info *tmh_alloc_page(void *pool, int no_heap,
+						int xen_heap)
 {
-    struct page_info *pi = tmh_page_list_get();
+    struct page_info *pi = NULL;
+    if (!xen_heap)
+	    pi = tmh_page_list_get();
 
-    if ( pi == NULL && !no_heap )
-        pi = alloc_domheap_pages(0,0,MEMF_tmem);
+    if ( pi == NULL && !no_heap ) {
+    	if (xen_heap)
+		pi = alloc_xenheap_pages(0,MEMF_tmem);
+	else
+		pi = alloc_domheap_pages(0,0,MEMF_tmem);
+    }
     ASSERT((pi == NULL) || IS_VALID_PAGE(pi));
     if ( pi != NULL && !no_heap )
         atomic_inc(&freeable_page_count);
@@ -235,9 +253,16 @@ static inline struct page_info *tmh_alloc_page(void *pool, int no_heap)
 
 static inline void tmh_free_page(struct page_info *pi)
 {
+    int xen_heap = is_xen_heap_page(pi);
     ASSERT(IS_VALID_PAGE(pi));
-    tmh_page_list_put(pi);
-    atomic_dec(&freeable_page_count);
+    if (xen_heap){
+	    scrub_one_page(pi);
+	    ASSERT((pi->count_info & ~(PGC_allocated | 1)) == 0);
+	    free_xenheap_pages(pi,0);
+    } else {
+	    tmh_page_list_put(pi);
+	    atomic_dec(&freeable_page_count);
+    }
 }
 
 static inline unsigned int tmem_subpage_maxsize(void)
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] xen: arch: x86: re-enable tmem for system up to 16Tb
  2013-09-23  2:23 [PATCH 1/2] tmem: add full support for x86 up to 16Tb Bob Liu
@ 2013-09-23  2:23 ` Bob Liu
  2013-09-23  9:36 ` [PATCH 1/2] tmem: add full support for x86 " Andrew Cooper
  2013-09-23 10:46 ` Jan Beulich
  2 siblings, 0 replies; 8+ messages in thread
From: Bob Liu @ 2013-09-23  2:23 UTC (permalink / raw)
  To: xen-devel; +Cc: Bob Liu, keir, ian.campbell, jbeulich

tmem can work on system up to 16Tb after my privious patch "tmem: add full
support for x86 up to 16Tb", so re-enabled it.

Signed-off-by: Bob Liu <bob.liu@oracle.com>
---
 xen/arch/x86/setup.c |    8 --------
 1 file changed, 8 deletions(-)

diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index c550e8e..f73effe 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -25,7 +25,6 @@
 #include <xen/dmi.h>
 #include <xen/pfn.h>
 #include <xen/nodemask.h>
-#include <xen/tmem_xen.h> /* for opt_tmem only */
 #include <xen/watchdog.h>
 #include <public/version.h>
 #include <compat/platform.h>
@@ -1160,13 +1159,6 @@ void __init __start_xen(unsigned long mbi_p)
                 s = pfn_to_paddr(limit + 1);
             init_domheap_pages(s, e);
         }
-
-        if ( opt_tmem )
-        {
-           printk(XENLOG_WARNING
-                  "TMEM physical RAM limit exceeded, disabling TMEM\n");
-           opt_tmem = 0;
-        }
     }
 
     vm_init();
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] tmem: add full support for x86 up to 16Tb
  2013-09-23  2:23 [PATCH 1/2] tmem: add full support for x86 up to 16Tb Bob Liu
  2013-09-23  2:23 ` [PATCH 2/2] xen: arch: x86: re-enable tmem for system " Bob Liu
@ 2013-09-23  9:36 ` Andrew Cooper
  2013-09-23  9:45   ` Ian Campbell
  2013-09-23 10:46 ` Jan Beulich
  2 siblings, 1 reply; 8+ messages in thread
From: Andrew Cooper @ 2013-09-23  9:36 UTC (permalink / raw)
  To: Bob Liu; +Cc: Bob Liu, keir, ian.campbell, jbeulich, xen-devel

On 23/09/13 03:23, Bob Liu wrote:
> tmem used to have code assuming to be able to directly access all memory.
> This patch try to fix this problem fully so that tmem can work on x86 up to
> 16Tb.
>
> tmem allocates pages mainly for two purposes.
> 1. store pages passed from guests through the frontswap/cleancache frontend.
> In this case tmem code has already using map_domain_page() before
> accessing the memory, no need to change for 16Tb supporting.
>
> 2. store tmem meta data.
> In this case, there is a problem if we use map_domain_page(). That's the mapping
> entrys are limited, in the 2 VCPU guest we only have 32 entries and tmem can't
> call unmap in a short time.
> The fixing is allocate xen heap pages instead of domain heap for tmem meta
> data.

This is a no go.

Xenheap pages just as limited as domain mapping slots (and perhaps moreso).

All Xenheap pages live inside the Xen mapped region, including in the
upper virtual region of 32bit PV guests.

~Andrew

>
> Signed-off-by: Bob Liu <bob.liu@oracle.com>
> ---
>  xen/common/tmem.c          |    9 ++++--
>  xen/common/tmem_xen.c      |    4 +--
>  xen/include/xen/tmem_xen.h |   71 ++++++++++++++++++++++++++++++--------------
>  3 files changed, 56 insertions(+), 28 deletions(-)
>
> diff --git a/xen/common/tmem.c b/xen/common/tmem.c
> index a122651..ce6a788 100644
> --- a/xen/common/tmem.c
> +++ b/xen/common/tmem.c
> @@ -352,10 +352,13 @@ static NOINLINE pfp_t *tmem_page_alloc(pool_t *pool)
>  {
>      pfp_t *pfp = NULL;
>  
> +    /*
> +     * All pages are allocated from domain heap.
> +     */
>      if ( pool != NULL && is_persistent(pool) )
> -        pfp = tmh_alloc_page_thispool(pool);
> +        pfp = tmh_alloc_page_thispool(pool,0);
>      else
> -        pfp = tmh_alloc_page(pool,0);
> +        pfp = tmh_alloc_page(pool,0,0);
>      if ( pfp == NULL )
>          alloc_page_failed++;
>      else
> @@ -2911,7 +2914,7 @@ EXPORT void *tmem_relinquish_pages(unsigned int order, unsigned int memflags)
>              read_lock(&tmem_rwlock);
>      }
>  
> -    while ( (pfp = tmh_alloc_page(NULL,1)) == NULL )
> +    while ( (pfp = tmh_alloc_page(NULL,1,0)) == NULL )
>      {
>          if ( (max_evictions-- <= 0) || !tmem_evict())
>              break;
> diff --git a/xen/common/tmem_xen.c b/xen/common/tmem_xen.c
> index 54ec09f..3496c82 100644
> --- a/xen/common/tmem_xen.c
> +++ b/xen/common/tmem_xen.c
> @@ -353,7 +353,7 @@ static noinline void *tmh_mempool_page_get(unsigned long size)
>      struct page_info *pi;
>  
>      ASSERT(size == PAGE_SIZE);
> -    if ( (pi = tmh_alloc_page(NULL,0)) == NULL )
> +    if ( (pi = tmh_alloc_page(NULL,0,1)) == NULL )
>          return NULL;
>      ASSERT(IS_VALID_PAGE(pi));
>      return page_to_virt(pi);
> @@ -382,7 +382,7 @@ static void *tmh_persistent_pool_page_get(unsigned long size)
>      struct domain *d = current->domain;
>  
>      ASSERT(size == PAGE_SIZE);
> -    if ( (pi = _tmh_alloc_page_thispool(d)) == NULL )
> +    if ( (pi = _tmh_alloc_page_thispool(d,1)) == NULL )
>          return NULL;
>      ASSERT(IS_VALID_PAGE(pi));
>      return page_to_virt(pi);
> diff --git a/xen/include/xen/tmem_xen.h b/xen/include/xen/tmem_xen.h
> index ad1ddd5..d52d68c 100644
> --- a/xen/include/xen/tmem_xen.h
> +++ b/xen/include/xen/tmem_xen.h
> @@ -156,7 +156,8 @@ static inline void _tmh_free_subpage_thispool(struct xmem_pool *cmem_mempool,
>  #define tmh_free_subpage_thispool(_pool, _p, _s) \
>   _tmh_free_subpage_thispool(_pool->client->tmh->persistent_pool, _p, _s)
>  
> -static inline struct page_info *_tmh_alloc_page_thispool(struct domain *d)
> +static inline struct page_info *_tmh_alloc_page_thispool(struct domain *d,
> +							int xen_heap)
>  {
>      struct page_info *pi;
>  
> @@ -166,38 +167,48 @@ static inline struct page_info *_tmh_alloc_page_thispool(struct domain *d)
>      if ( d->tot_pages >= d->max_pages )
>          return NULL;
>  
> -    if ( tmh_page_list_pages )
> -    {
> -        if ( (pi = tmh_page_list_get()) != NULL )
> -        {
> -            if ( donate_page(d,pi,0) == 0 )
> -                goto out;
> -            else
> -                tmh_page_list_put(pi);
> -        }
> +    if (xen_heap)
> +	    pi = alloc_xenheap_pages(0,MEMF_tmem);
> +    else {
> +	    if ( tmh_page_list_pages )
> +	    {
> +		    if ( (pi = tmh_page_list_get()) != NULL )
> +		    {
> +			    if ( donate_page(d,pi,0) == 0 )
> +				    goto out;
> +			    else
> +				    tmh_page_list_put(pi);
> +		    }
> +	    }
> +
> +	    pi = alloc_domheap_pages(d,0,MEMF_tmem);
>      }
>  
> -    pi = alloc_domheap_pages(d,0,MEMF_tmem);
> -
>  out:
>      ASSERT((pi == NULL) || IS_VALID_PAGE(pi));
>      return pi;
>  }
> -#define tmh_alloc_page_thispool(_pool) \
> -    _tmh_alloc_page_thispool(_pool->client->tmh->domain)
> +#define tmh_alloc_page_thispool(_pool,xen_heap) \
> +    _tmh_alloc_page_thispool(_pool->client->tmh->domain,xen_heap)
>  
>  static inline void _tmh_free_page_thispool(struct page_info *pi)
>  {
>      struct domain *d = page_get_owner(pi);
> +    int xen_heap = is_xen_heap_page(pi);
>  
>      ASSERT(IS_VALID_PAGE(pi));
> -    if ( (d == NULL) || steal_page(d,pi,0) == 0 )
> -        tmh_page_list_put(pi);
> +    if ( (d == NULL) || steal_page(d,pi,0) == 0 ) {
> +	    if (!xen_heap)
> +		    tmh_page_list_put(pi);
> +    }
>      else
>      {
>          scrub_one_page(pi);
>          ASSERT((pi->count_info & ~(PGC_allocated | 1)) == 0);
> -        free_domheap_pages(pi,0);
> +	if (xen_heap)
> +		free_xenheap_pages(pi,0);
> +	else
> +		free_domheap_pages(pi,0);
>      }
>  }
>  #define tmh_free_page_thispool(_pool,_pg) \
> @@ -221,12 +232,19 @@ static inline void tmh_free_subpage(void *ptr, size_t size)
>      xmem_pool_free(ptr,tmh_mempool);
>  }
>  
> -static inline struct page_info *tmh_alloc_page(void *pool, int no_heap)
> +static inline struct page_info *tmh_alloc_page(void *pool, int no_heap,
> +						int xen_heap)
>  {
> -    struct page_info *pi = tmh_page_list_get();
> +    struct page_info *pi = NULL;
> +    if (!xen_heap)
> +	    pi = tmh_page_list_get();
>  
> -    if ( pi == NULL && !no_heap )
> -        pi = alloc_domheap_pages(0,0,MEMF_tmem);
> +    if ( pi == NULL && !no_heap ) {
> +    	if (xen_heap)
> +		pi = alloc_xenheap_pages(0,MEMF_tmem);
> +	else
> +		pi = alloc_domheap_pages(0,0,MEMF_tmem);
> +    }
>      ASSERT((pi == NULL) || IS_VALID_PAGE(pi));
>      if ( pi != NULL && !no_heap )
>          atomic_inc(&freeable_page_count);
> @@ -235,9 +253,16 @@ static inline struct page_info *tmh_alloc_page(void *pool, int no_heap)
>  
>  static inline void tmh_free_page(struct page_info *pi)
>  {
> +    int xen_heap = is_xen_heap_page(pi);
>      ASSERT(IS_VALID_PAGE(pi));
> -    tmh_page_list_put(pi);
> -    atomic_dec(&freeable_page_count);
> +    if (xen_heap){
> +	    scrub_one_page(pi);
> +	    ASSERT((pi->count_info & ~(PGC_allocated | 1)) == 0);
> +	    free_xenheap_pages(pi,0);
> +    } else {
> +	    tmh_page_list_put(pi);
> +	    atomic_dec(&freeable_page_count);
> +    }
>  }
>  
>  static inline unsigned int tmem_subpage_maxsize(void)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] tmem: add full support for x86 up to 16Tb
  2013-09-23  9:36 ` [PATCH 1/2] tmem: add full support for x86 " Andrew Cooper
@ 2013-09-23  9:45   ` Ian Campbell
  0 siblings, 0 replies; 8+ messages in thread
From: Ian Campbell @ 2013-09-23  9:45 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Bob Liu, Bob Liu, keir, jbeulich, xen-devel

On Mon, 2013-09-23 at 10:36 +0100, Andrew Cooper wrote:
> On 23/09/13 03:23, Bob Liu wrote:
> > tmem used to have code assuming to be able to directly access all memory.
> > This patch try to fix this problem fully so that tmem can work on x86 up to
> > 16Tb.
> >
> > tmem allocates pages mainly for two purposes.
> > 1. store pages passed from guests through the frontswap/cleancache frontend.
> > In this case tmem code has already using map_domain_page() before
> > accessing the memory, no need to change for 16Tb supporting.
> >
> > 2. store tmem meta data.
> > In this case, there is a problem if we use map_domain_page(). That's the mapping
> > entrys are limited, in the 2 VCPU guest we only have 32 entries and tmem can't
> > call unmap in a short time.
> > The fixing is allocate xen heap pages instead of domain heap for tmem meta
> > data.
> 
> This is a no go.
> 
> Xenheap pages just as limited as domain mapping slots (and perhaps moreso).
> 
> All Xenheap pages live inside the Xen mapped region, including in the
> upper virtual region of 32bit PV guests.

Not on a 64-bit hypervisor, which is all we support these days...

(32-bit ARM doesn't have the same limitations as 32-bit x86 did)

Ian.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] tmem: add full support for x86 up to 16Tb
  2013-09-23  2:23 [PATCH 1/2] tmem: add full support for x86 up to 16Tb Bob Liu
  2013-09-23  2:23 ` [PATCH 2/2] xen: arch: x86: re-enable tmem for system " Bob Liu
  2013-09-23  9:36 ` [PATCH 1/2] tmem: add full support for x86 " Andrew Cooper
@ 2013-09-23 10:46 ` Jan Beulich
  2013-09-23 13:17   ` Bob Liu
  2013-09-25 20:44   ` Konrad Rzeszutek Wilk
  2 siblings, 2 replies; 8+ messages in thread
From: Jan Beulich @ 2013-09-23 10:46 UTC (permalink / raw)
  To: Bob Liu; +Cc: xen-devel, Bob Liu, keir, ian.campbell

>>> On 23.09.13 at 04:23, Bob Liu <lliubbo@gmail.com> wrote:
> tmem used to have code assuming to be able to directly access all memory.
> This patch try to fix this problem fully so that tmem can work on x86 up to
> 16Tb.
> 
> tmem allocates pages mainly for two purposes.
> 1. store pages passed from guests through the frontswap/cleancache frontend.
> In this case tmem code has already using map_domain_page() before
> accessing the memory, no need to change for 16Tb supporting.

Did you also verify that none of these mappings remain in place
for undue periods of time?

> 2. store tmem meta data.
> In this case, there is a problem if we use map_domain_page(). That's the 
> mapping
> entrys are limited, in the 2 VCPU guest we only have 32 entries and tmem 
> can't
> call unmap in a short time.
> The fixing is allocate xen heap pages instead of domain heap for tmem meta
> data.

Please quantify the amount of Xen heap memory potentially
getting buried here.

Please also clarify whether reclaiming this memory in fact works
(i.e. that upon memory pressure the rest of the system can
be kept alive by tmem giving up this memory in due course).

> Signed-off-by: Bob Liu <bob.liu@oracle.com>

The patch is full of coding style violations; in fact there are too
many of them to list them individually.

> @@ -166,38 +167,48 @@ static inline struct page_info *_tmh_alloc_page_thispool(struct domain *d)
>      if ( d->tot_pages >= d->max_pages )
>          return NULL;
>  
> -    if ( tmh_page_list_pages )
> -    {
> -        if ( (pi = tmh_page_list_get()) != NULL )
> -        {
> -            if ( donate_page(d,pi,0) == 0 )
> -                goto out;
> -            else
> -                tmh_page_list_put(pi);
> -        }
> +    if (xen_heap)
> +	    pi = alloc_xenheap_pages(0,MEMF_tmem);
> +    else {
> +	    if ( tmh_page_list_pages )
> +	    {
> +		    if ( (pi = tmh_page_list_get()) != NULL )
> +		    {
> +			    if ( donate_page(d,pi,0) == 0 )
> +				    goto out;
> +			    else
> +				    tmh_page_list_put(pi);
> +		    }
> +	    }
> +
> +	    pi = alloc_domheap_pages(d,0,MEMF_tmem);
>      }
>  [...]
>  static inline void _tmh_free_page_thispool(struct page_info *pi)
>  {
>      struct domain *d = page_get_owner(pi);
> +    int xen_heap = is_xen_heap_page(pi);
>  
>      ASSERT(IS_VALID_PAGE(pi));
> -    if ( (d == NULL) || steal_page(d,pi,0) == 0 )
> -        tmh_page_list_put(pi);
> +    if ( (d == NULL) || steal_page(d,pi,0) == 0 ) {
> +	    if (!xen_heap)
> +		    tmh_page_list_put(pi);

Looking at the allocation path above the opposite ought to be
impossible. Hence rather than if() you want to ASSERT() here.

> +    }
>      else
>      {
>          scrub_one_page(pi);
>          ASSERT((pi->count_info & ~(PGC_allocated | 1)) == 0);
> -        free_domheap_pages(pi,0);
> +	if (xen_heap)
> +		free_xenheap_pages(pi,0);

free_xenheap_page().

> +	else
> +		free_domheap_pages(pi,0);

free_domheap_page()

>  static inline void tmh_free_page(struct page_info *pi)
>  {
> +    int xen_heap = is_xen_heap_page(pi);
>      ASSERT(IS_VALID_PAGE(pi));
> -    tmh_page_list_put(pi);
> -    atomic_dec(&freeable_page_count);
> +    if (xen_heap){
> +	    scrub_one_page(pi);
> +	    ASSERT((pi->count_info & ~(PGC_allocated | 1)) == 0);

Did you perhaps copy this from somewhere else without
understanding why it is needed there? It certainly doesn't look
necessary here: Such checks are meaningful only when you
allow a guest access to a Xen heap page.

> +	    free_xenheap_pages(pi,0);

free_xenheap_page().

> +    } else {
> +	    tmh_page_list_put(pi);
> +	    atomic_dec(&freeable_page_count);
> +    }
>  }

All that said - I'm not sure we want to accept enhancements to
tmem without the security audit happening first, which has been
due for over a year now.

Jan

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] tmem: add full support for x86 up to 16Tb
  2013-09-23 10:46 ` Jan Beulich
@ 2013-09-23 13:17   ` Bob Liu
  2013-09-23 13:28     ` Jan Beulich
  2013-09-25 20:44   ` Konrad Rzeszutek Wilk
  1 sibling, 1 reply; 8+ messages in thread
From: Bob Liu @ 2013-09-23 13:17 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Bob Liu, keir, ian.campbell

Hi Jan,

Thanks for you review.

On 09/23/2013 06:46 PM, Jan Beulich wrote:
>>>> On 23.09.13 at 04:23, Bob Liu <lliubbo@gmail.com> wrote:
>> tmem used to have code assuming to be able to directly access all memory.
>> This patch try to fix this problem fully so that tmem can work on x86 up to
>> 16Tb.
>>
>> tmem allocates pages mainly for two purposes.
>> 1. store pages passed from guests through the frontswap/cleancache frontend.
>> In this case tmem code has already using map_domain_page() before
>> accessing the memory, no need to change for 16Tb supporting.
> 
> Did you also verify that none of these mappings remain in place
> for undue periods of time?
>

Yes, I think so.

>> 2. store tmem meta data.
>> In this case, there is a problem if we use map_domain_page(). That's the 
>> mapping
>> entrys are limited, in the 2 VCPU guest we only have 32 entries and tmem 
>> can't
>> call unmap in a short time.
>> The fixing is allocate xen heap pages instead of domain heap for tmem meta
>> data.
> 
> Please quantify the amount of Xen heap memory potentially
> getting buried here.
> 

Okay, I'll try to count xen heap pages used here and provide some
statistic result.

> Please also clarify whether reclaiming this memory in fact works
> (i.e. that upon memory pressure the rest of the system can
> be kept alive by tmem giving up this memory in due course).
> 

This memory here can't be reclaimed directly because it's used for meta
data i.e. radix tree node.
But if the tmem data page gets reclaimed during memory pressure, the
corresponding radix tree node will also be freed.
After enough radix tree nodes get freed meta page used here will be
reclaimed.

>> Signed-off-by: Bob Liu <bob.liu@oracle.com>
> 
> The patch is full of coding style violations; in fact there are too
> many of them to list them individually.
> 

Sorry for that, it's my first xen patch.
Is there any document or checkpatch script so that I can get rid of the
coding style problem?

>> @@ -166,38 +167,48 @@ static inline struct page_info *_tmh_alloc_page_thispool(struct domain *d)
>>      if ( d->tot_pages >= d->max_pages )
>>          return NULL;
>>  
>> -    if ( tmh_page_list_pages )
>> -    {
>> -        if ( (pi = tmh_page_list_get()) != NULL )
>> -        {
>> -            if ( donate_page(d,pi,0) == 0 )
>> -                goto out;
>> -            else
>> -                tmh_page_list_put(pi);
>> -        }
>> +    if (xen_heap)
>> +	    pi = alloc_xenheap_pages(0,MEMF_tmem);
>> +    else {
>> +	    if ( tmh_page_list_pages )
>> +	    {
>> +		    if ( (pi = tmh_page_list_get()) != NULL )
>> +		    {
>> +			    if ( donate_page(d,pi,0) == 0 )
>> +				    goto out;
>> +			    else
>> +				    tmh_page_list_put(pi);
>> +		    }
>> +	    }
>> +
>> +	    pi = alloc_domheap_pages(d,0,MEMF_tmem);
>>      }
>>  [...]
>>  static inline void _tmh_free_page_thispool(struct page_info *pi)
>>  {
>>      struct domain *d = page_get_owner(pi);
>> +    int xen_heap = is_xen_heap_page(pi);
>>  
>>      ASSERT(IS_VALID_PAGE(pi));
>> -    if ( (d == NULL) || steal_page(d,pi,0) == 0 )
>> -        tmh_page_list_put(pi);
>> +    if ( (d == NULL) || steal_page(d,pi,0) == 0 ) {
>> +	    if (!xen_heap)
>> +		    tmh_page_list_put(pi);
> 
> Looking at the allocation path above the opposite ought to be
> impossible. Hence rather than if() you want to ASSERT() here.
> 

Thanks, will be fixed.

>> +    }
>>      else
>>      {
>>          scrub_one_page(pi);
>>          ASSERT((pi->count_info & ~(PGC_allocated | 1)) == 0);
>> -        free_domheap_pages(pi,0);
>> +	if (xen_heap)
>> +		free_xenheap_pages(pi,0);
> 
> free_xenheap_page().
> 

Same with those place.

>> +	else
>> +		free_domheap_pages(pi,0);
> 
> free_domheap_page()
> 
>>  static inline void tmh_free_page(struct page_info *pi)
>>  {
>> +    int xen_heap = is_xen_heap_page(pi);
>>      ASSERT(IS_VALID_PAGE(pi));
>> -    tmh_page_list_put(pi);
>> -    atomic_dec(&freeable_page_count);
>> +    if (xen_heap){
>> +	    scrub_one_page(pi);
>> +	    ASSERT((pi->count_info & ~(PGC_allocated | 1)) == 0);
> 
> Did you perhaps copy this from somewhere else without
> understanding why it is needed there? It certainly doesn't look
> necessary here: Such checks are meaningful only when you
> allow a guest access to a Xen heap page.
> 

Sorry, I just copyed this code from _tmh_free_page_thispool().
Sounds like these checks are also unneeded in _tmh_free_page_thispool()?

>> +	    free_xenheap_pages(pi,0);
> 
> free_xenheap_page().
> 
>> +    } else {
>> +	    tmh_page_list_put(pi);
>> +	    atomic_dec(&freeable_page_count);
>> +    }
>>  }
> 
> All that said - I'm not sure we want to accept enhancements to
> tmem without the security audit happening first, which has been
> due for over a year now.
> 

Okay, I'll take a look at it.
Anyway I'll try to make a V2 of this patchset based on your feedbacks.

-- 
Regards,
-Bob

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] tmem: add full support for x86 up to 16Tb
  2013-09-23 13:17   ` Bob Liu
@ 2013-09-23 13:28     ` Jan Beulich
  0 siblings, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2013-09-23 13:28 UTC (permalink / raw)
  To: Bob Liu; +Cc: xen-devel, Bob Liu, keir, ian.campbell

>>> On 23.09.13 at 15:17, Bob Liu <bob.liu@oracle.com> wrote:
>> The patch is full of coding style violations; in fact there are too
>> many of them to list them individually.
>> 
> 
> Sorry for that, it's my first xen patch.
> Is there any document or checkpatch script so that I can get rid of the
> coding style problem?

./CODINGSTYLE (and of course you could use other core Xen files
as reference - just don't use the tmem ones themselves because
they are quite inconsistent).

>>>  static inline void tmh_free_page(struct page_info *pi)
>>>  {
>>> +    int xen_heap = is_xen_heap_page(pi);
>>>      ASSERT(IS_VALID_PAGE(pi));
>>> -    tmh_page_list_put(pi);
>>> -    atomic_dec(&freeable_page_count);
>>> +    if (xen_heap){
>>> +	    scrub_one_page(pi);
>>> +	    ASSERT((pi->count_info & ~(PGC_allocated | 1)) == 0);
>> 
>> Did you perhaps copy this from somewhere else without
>> understanding why it is needed there? It certainly doesn't look
>> necessary here: Such checks are meaningful only when you
>> allow a guest access to a Xen heap page.
>> 
> 
> Sorry, I just copyed this code from _tmh_free_page_thispool().
> Sounds like these checks are also unneeded in _tmh_free_page_thispool()?

Yes, this looks as bogus. But I'm not a tmem expert, so you'd
better check whether there's some non-obvious dependency
(depending on when these got added, doing some archaeology
may also help here).

Jan

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] tmem: add full support for x86 up to 16Tb
  2013-09-23 10:46 ` Jan Beulich
  2013-09-23 13:17   ` Bob Liu
@ 2013-09-25 20:44   ` Konrad Rzeszutek Wilk
  1 sibling, 0 replies; 8+ messages in thread
From: Konrad Rzeszutek Wilk @ 2013-09-25 20:44 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Bob Liu, Bob Liu, keir, ian.campbell

> All that said - I'm not sure we want to accept enhancements to
> tmem without the security audit happening first, which has been
> due for over a year now.

I had looked at the tmem code with a peper and pencil to see if
there are no issues. So far none - I still have the deduplication
to finish.

Also I had been looking at Coverity to help me in that and it has
spotted two issues where we did not look at the return value.

I plan to post patches for that once I am done with my vacation
email-backlog.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2013-09-25 20:44 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-09-23  2:23 [PATCH 1/2] tmem: add full support for x86 up to 16Tb Bob Liu
2013-09-23  2:23 ` [PATCH 2/2] xen: arch: x86: re-enable tmem for system " Bob Liu
2013-09-23  9:36 ` [PATCH 1/2] tmem: add full support for x86 " Andrew Cooper
2013-09-23  9:45   ` Ian Campbell
2013-09-23 10:46 ` Jan Beulich
2013-09-23 13:17   ` Bob Liu
2013-09-23 13:28     ` Jan Beulich
2013-09-25 20:44   ` Konrad Rzeszutek Wilk

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).