public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] iommu/iova: Improve alloc_iova performance
@ 2017-09-20 15:28 David Woods
  2017-09-27 14:10 ` Joerg Roedel
  0 siblings, 1 reply; 3+ messages in thread
From: David Woods @ 2017-09-20 15:28 UTC (permalink / raw)
  To: joro, iommu, linux-kernel, cmetcalf; +Cc: David Woods

When allocating pages with alloc_iova() where limit_pfn > dma_32bit_pfn
__alloc_and_insert_iova_range does a linear traversal of the tree to
find a free block.  In the worst case it makes the alloc O(n) for each
page, where n is the number of pages allocated so far.  The worst case
turns out to be common for drivers that allocate lots of pages at start
up and never free them.

There is an optimization for allocating 32-bit addresses where it
starts the search at the point where the previous allocated page was
inserted.  This is sensible, since otherwise it would have to always
search through all the 64-bit pages first.

To improve this situation, extend the optimization to also keep track
of the point were the previous 64-bit page was inserted.  With this
change, the search for an available slot can normally be done in
constant time and the whole alloc_iova only costs O(log n) due to the
rb tree insert.

Reviewed-by: Chris Metcalf <cmetcalf@mellanox.com>
Signed-off-by: David Woods <dwoods@mellanox.com>
---
 drivers/iommu/iova.c | 82 +++++++++++++++++++++++++++++++++++-----------------
 include/linux/iova.h |  1 +
 2 files changed, 56 insertions(+), 27 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 33edfa7..3c82694 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -108,15 +108,26 @@ int init_iova_flush_queue(struct iova_domain *iovad,
 EXPORT_SYMBOL_GPL(init_iova_flush_queue);
 
 static struct rb_node *
-__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
+__get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
 {
-	if ((*limit_pfn > iovad->dma_32bit_pfn) ||
-		(iovad->cached32_node == NULL))
+	if (limit_pfn <= iovad->dma_32bit_pfn)
+		return iovad->cached32_node;
+	else
+		return iovad->cached64_node;
+}
+
+static struct rb_node *
+__get_cached_rbnode_and_limit(struct iova_domain *iovad,
+			      unsigned long *limit_pfn)
+{
+	struct rb_node *cached_node = __get_cached_rbnode(iovad, *limit_pfn);
+
+	if (cached_node == NULL) {
 		return rb_last(&iovad->rbroot);
-	else {
-		struct rb_node *prev_node = rb_prev(iovad->cached32_node);
+	} else {
+		struct rb_node *prev_node = rb_prev(cached_node);
 		struct iova *curr_iova =
-			rb_entry(iovad->cached32_node, struct iova, node);
+			rb_entry(cached_node, struct iova, node);
 		*limit_pfn = curr_iova->pfn_lo;
 		return prev_node;
 	}
@@ -124,33 +135,50 @@ __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
 
 static void
 __cached_rbnode_insert_update(struct iova_domain *iovad,
-	unsigned long limit_pfn, struct iova *new)
+	unsigned long limit_pfn, struct rb_node *node)
 {
-	if (limit_pfn != iovad->dma_32bit_pfn)
-		return;
-	iovad->cached32_node = &new->node;
+	if (limit_pfn == iovad->dma_32bit_pfn)
+		iovad->cached32_node = node;
+	else if (limit_pfn > iovad->dma_32bit_pfn)
+		iovad->cached64_node = node;
 }
 
 static void
 __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
 {
 	struct iova *cached_iova;
-	struct rb_node *curr;
-
-	if (!iovad->cached32_node)
-		return;
-	curr = iovad->cached32_node;
-	cached_iova = rb_entry(curr, struct iova, node);
-
-	if (free->pfn_lo >= cached_iova->pfn_lo) {
-		struct rb_node *node = rb_next(&free->node);
-		struct iova *iova = rb_entry(node, struct iova, node);
+	struct rb_node *cached_node;
 
-		/* only cache if it's below 32bit pfn */
-		if (node && iova->pfn_lo < iovad->dma_32bit_pfn)
-			iovad->cached32_node = node;
-		else
-			iovad->cached32_node = NULL;
+	if (free->pfn_lo <= iovad->dma_32bit_pfn) {
+		if (unlikely(iovad->cached64_node == &free->node))
+			iovad->cached64_node = NULL;
+		cached_node = iovad->cached32_node;
+		if (!cached_node)
+			return;
+		cached_iova = rb_entry(cached_node, struct iova, node);
+		if (free->pfn_lo >= cached_iova->pfn_lo) {
+			struct rb_node *next_node = rb_next(&free->node);
+
+			if (next_node &&
+			    rb_entry(next_node, struct iova, node)->pfn_lo <=
+			    iovad->dma_32bit_pfn)
+				iovad->cached32_node = next_node;
+			else
+				iovad->cached32_node = NULL;
+		}
+	} else {
+		cached_node = iovad->cached64_node;
+		if (!cached_node)
+			return;
+		cached_iova = container_of(cached_node, struct iova, node);
+		if (free->pfn_lo >= cached_iova->pfn_lo) {
+			struct rb_node *next_node = rb_next(&free->node);
+
+			if (next_node)
+				iovad->cached64_node = next_node;
+			else
+				iovad->cached64_node = NULL;
+		}
 	}
 }
 
@@ -204,7 +232,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
 	/* Walk the tree backwards */
 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 	saved_pfn = limit_pfn;
-	curr = __get_cached_rbnode(iovad, &limit_pfn);
+	curr = __get_cached_rbnode_and_limit(iovad, &limit_pfn);
 	prev = curr;
 	while (curr) {
 		struct iova *curr_iova = rb_entry(curr, struct iova, node);
@@ -238,7 +266,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
 
 	/* If we have 'prev', it's a valid place to start the insertion. */
 	iova_insert_rbtree(&iovad->rbroot, new, prev);
-	__cached_rbnode_insert_update(iovad, saved_pfn, new);
+	__cached_rbnode_insert_update(iovad, saved_pfn, &new->node);
 
 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 
diff --git a/include/linux/iova.h b/include/linux/iova.h
index d179b9b..ea89695 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -71,6 +71,7 @@ struct iova_domain {
 	spinlock_t	iova_rbtree_lock; /* Lock to protect update of rbtree */
 	struct rb_root	rbroot;		/* iova domain rbtree root */
 	struct rb_node	*cached32_node; /* Save last alloced node */
+	struct rb_node	*cached64_node; /* Save last alloced node */
 	unsigned long	granule;	/* pfn granularity for this domain */
 	unsigned long	start_pfn;	/* Lower limit for this domain */
 	unsigned long	dma_32bit_pfn;
-- 
2.1.2

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] iommu/iova: Improve alloc_iova performance
  2017-09-20 15:28 [PATCH] iommu/iova: Improve alloc_iova performance David Woods
@ 2017-09-27 14:10 ` Joerg Roedel
  2017-09-27 15:21   ` David Woods
  0 siblings, 1 reply; 3+ messages in thread
From: Joerg Roedel @ 2017-09-27 14:10 UTC (permalink / raw)
  To: David Woods, robin.murphy; +Cc: iommu, linux-kernel, cmetcalf

Adding Robin.

Robin, can you please have a look?

On Wed, Sep 20, 2017 at 11:28:19AM -0400, David Woods wrote:
> When allocating pages with alloc_iova() where limit_pfn > dma_32bit_pfn
> __alloc_and_insert_iova_range does a linear traversal of the tree to
> find a free block.  In the worst case it makes the alloc O(n) for each
> page, where n is the number of pages allocated so far.  The worst case
> turns out to be common for drivers that allocate lots of pages at start
> up and never free them.
> 
> There is an optimization for allocating 32-bit addresses where it
> starts the search at the point where the previous allocated page was
> inserted.  This is sensible, since otherwise it would have to always
> search through all the 64-bit pages first.
> 
> To improve this situation, extend the optimization to also keep track
> of the point were the previous 64-bit page was inserted.  With this
> change, the search for an available slot can normally be done in
> constant time and the whole alloc_iova only costs O(log n) due to the
> rb tree insert.
> 
> Reviewed-by: Chris Metcalf <cmetcalf@mellanox.com>
> Signed-off-by: David Woods <dwoods@mellanox.com>
> ---
>  drivers/iommu/iova.c | 82 +++++++++++++++++++++++++++++++++++-----------------
>  include/linux/iova.h |  1 +
>  2 files changed, 56 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
> index 33edfa7..3c82694 100644
> --- a/drivers/iommu/iova.c
> +++ b/drivers/iommu/iova.c
> @@ -108,15 +108,26 @@ int init_iova_flush_queue(struct iova_domain *iovad,
>  EXPORT_SYMBOL_GPL(init_iova_flush_queue);
>  
>  static struct rb_node *
> -__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
> +__get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
>  {
> -	if ((*limit_pfn > iovad->dma_32bit_pfn) ||
> -		(iovad->cached32_node == NULL))
> +	if (limit_pfn <= iovad->dma_32bit_pfn)
> +		return iovad->cached32_node;
> +	else
> +		return iovad->cached64_node;
> +}
> +
> +static struct rb_node *
> +__get_cached_rbnode_and_limit(struct iova_domain *iovad,
> +			      unsigned long *limit_pfn)
> +{
> +	struct rb_node *cached_node = __get_cached_rbnode(iovad, *limit_pfn);
> +
> +	if (cached_node == NULL) {
>  		return rb_last(&iovad->rbroot);
> -	else {
> -		struct rb_node *prev_node = rb_prev(iovad->cached32_node);
> +	} else {
> +		struct rb_node *prev_node = rb_prev(cached_node);
>  		struct iova *curr_iova =
> -			rb_entry(iovad->cached32_node, struct iova, node);
> +			rb_entry(cached_node, struct iova, node);
>  		*limit_pfn = curr_iova->pfn_lo;
>  		return prev_node;
>  	}
> @@ -124,33 +135,50 @@ __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
>  
>  static void
>  __cached_rbnode_insert_update(struct iova_domain *iovad,
> -	unsigned long limit_pfn, struct iova *new)
> +	unsigned long limit_pfn, struct rb_node *node)
>  {
> -	if (limit_pfn != iovad->dma_32bit_pfn)
> -		return;
> -	iovad->cached32_node = &new->node;
> +	if (limit_pfn == iovad->dma_32bit_pfn)
> +		iovad->cached32_node = node;
> +	else if (limit_pfn > iovad->dma_32bit_pfn)
> +		iovad->cached64_node = node;
>  }
>  
>  static void
>  __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
>  {
>  	struct iova *cached_iova;
> -	struct rb_node *curr;
> -
> -	if (!iovad->cached32_node)
> -		return;
> -	curr = iovad->cached32_node;
> -	cached_iova = rb_entry(curr, struct iova, node);
> -
> -	if (free->pfn_lo >= cached_iova->pfn_lo) {
> -		struct rb_node *node = rb_next(&free->node);
> -		struct iova *iova = rb_entry(node, struct iova, node);
> +	struct rb_node *cached_node;
>  
> -		/* only cache if it's below 32bit pfn */
> -		if (node && iova->pfn_lo < iovad->dma_32bit_pfn)
> -			iovad->cached32_node = node;
> -		else
> -			iovad->cached32_node = NULL;
> +	if (free->pfn_lo <= iovad->dma_32bit_pfn) {
> +		if (unlikely(iovad->cached64_node == &free->node))
> +			iovad->cached64_node = NULL;
> +		cached_node = iovad->cached32_node;
> +		if (!cached_node)
> +			return;
> +		cached_iova = rb_entry(cached_node, struct iova, node);
> +		if (free->pfn_lo >= cached_iova->pfn_lo) {
> +			struct rb_node *next_node = rb_next(&free->node);
> +
> +			if (next_node &&
> +			    rb_entry(next_node, struct iova, node)->pfn_lo <=
> +			    iovad->dma_32bit_pfn)
> +				iovad->cached32_node = next_node;
> +			else
> +				iovad->cached32_node = NULL;
> +		}
> +	} else {
> +		cached_node = iovad->cached64_node;
> +		if (!cached_node)
> +			return;
> +		cached_iova = container_of(cached_node, struct iova, node);
> +		if (free->pfn_lo >= cached_iova->pfn_lo) {
> +			struct rb_node *next_node = rb_next(&free->node);
> +
> +			if (next_node)
> +				iovad->cached64_node = next_node;
> +			else
> +				iovad->cached64_node = NULL;
> +		}
>  	}
>  }
>  
> @@ -204,7 +232,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
>  	/* Walk the tree backwards */
>  	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
>  	saved_pfn = limit_pfn;
> -	curr = __get_cached_rbnode(iovad, &limit_pfn);
> +	curr = __get_cached_rbnode_and_limit(iovad, &limit_pfn);
>  	prev = curr;
>  	while (curr) {
>  		struct iova *curr_iova = rb_entry(curr, struct iova, node);
> @@ -238,7 +266,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
>  
>  	/* If we have 'prev', it's a valid place to start the insertion. */
>  	iova_insert_rbtree(&iovad->rbroot, new, prev);
> -	__cached_rbnode_insert_update(iovad, saved_pfn, new);
> +	__cached_rbnode_insert_update(iovad, saved_pfn, &new->node);
>  
>  	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
>  
> diff --git a/include/linux/iova.h b/include/linux/iova.h
> index d179b9b..ea89695 100644
> --- a/include/linux/iova.h
> +++ b/include/linux/iova.h
> @@ -71,6 +71,7 @@ struct iova_domain {
>  	spinlock_t	iova_rbtree_lock; /* Lock to protect update of rbtree */
>  	struct rb_root	rbroot;		/* iova domain rbtree root */
>  	struct rb_node	*cached32_node; /* Save last alloced node */
> +	struct rb_node	*cached64_node; /* Save last alloced node */
>  	unsigned long	granule;	/* pfn granularity for this domain */
>  	unsigned long	start_pfn;	/* Lower limit for this domain */
>  	unsigned long	dma_32bit_pfn;
> -- 
> 2.1.2

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] iommu/iova: Improve alloc_iova performance
  2017-09-27 14:10 ` Joerg Roedel
@ 2017-09-27 15:21   ` David Woods
  0 siblings, 0 replies; 3+ messages in thread
From: David Woods @ 2017-09-27 15:21 UTC (permalink / raw)
  To: Joerg Roedel, robin.murphy; +Cc: iommu, linux-kernel, cmetcalf


I see now that this is redundant with Robin's patch series "Optimise 
64-bit IOVA allocations".  I tested those patches on our platform and 
find that they solve the performance problem we were having. So, I'd 
like to withdraw this patch.

On 9/27/2017 10:10 AM, Joerg Roedel wrote:
> Adding Robin.
>
> Robin, can you please have a look?
>
> On Wed, Sep 20, 2017 at 11:28:19AM -0400, David Woods wrote:
>> When allocating pages with alloc_iova() where limit_pfn > dma_32bit_pfn
>> __alloc_and_insert_iova_range does a linear traversal of the tree to
>> find a free block.  In the worst case it makes the alloc O(n) for each
>> page, where n is the number of pages allocated so far.  The worst case
>> turns out to be common for drivers that allocate lots of pages at start
>> up and never free them.
>>
>> There is an optimization for allocating 32-bit addresses where it
>> starts the search at the point where the previous allocated page was
>> inserted.  This is sensible, since otherwise it would have to always
>> search through all the 64-bit pages first.
>>
>> To improve this situation, extend the optimization to also keep track
>> of the point were the previous 64-bit page was inserted.  With this
>> change, the search for an available slot can normally be done in
>> constant time and the whole alloc_iova only costs O(log n) due to the
>> rb tree insert.
>>
>> Reviewed-by: Chris Metcalf <cmetcalf@mellanox.com>
>> Signed-off-by: David Woods <dwoods@mellanox.com>
>> ---
>>   drivers/iommu/iova.c | 82 +++++++++++++++++++++++++++++++++++-----------------
>>   include/linux/iova.h |  1 +
>>   2 files changed, 56 insertions(+), 27 deletions(-)
>>
>> diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
>> index 33edfa7..3c82694 100644
>> --- a/drivers/iommu/iova.c
>> +++ b/drivers/iommu/iova.c
>> @@ -108,15 +108,26 @@ int init_iova_flush_queue(struct iova_domain *iovad,
>>   EXPORT_SYMBOL_GPL(init_iova_flush_queue);
>>   
>>   static struct rb_node *
>> -__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
>> +__get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
>>   {
>> -	if ((*limit_pfn > iovad->dma_32bit_pfn) ||
>> -		(iovad->cached32_node == NULL))
>> +	if (limit_pfn <= iovad->dma_32bit_pfn)
>> +		return iovad->cached32_node;
>> +	else
>> +		return iovad->cached64_node;
>> +}
>> +
>> +static struct rb_node *
>> +__get_cached_rbnode_and_limit(struct iova_domain *iovad,
>> +			      unsigned long *limit_pfn)
>> +{
>> +	struct rb_node *cached_node = __get_cached_rbnode(iovad, *limit_pfn);
>> +
>> +	if (cached_node == NULL) {
>>   		return rb_last(&iovad->rbroot);
>> -	else {
>> -		struct rb_node *prev_node = rb_prev(iovad->cached32_node);
>> +	} else {
>> +		struct rb_node *prev_node = rb_prev(cached_node);
>>   		struct iova *curr_iova =
>> -			rb_entry(iovad->cached32_node, struct iova, node);
>> +			rb_entry(cached_node, struct iova, node);
>>   		*limit_pfn = curr_iova->pfn_lo;
>>   		return prev_node;
>>   	}
>> @@ -124,33 +135,50 @@ __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
>>   
>>   static void
>>   __cached_rbnode_insert_update(struct iova_domain *iovad,
>> -	unsigned long limit_pfn, struct iova *new)
>> +	unsigned long limit_pfn, struct rb_node *node)
>>   {
>> -	if (limit_pfn != iovad->dma_32bit_pfn)
>> -		return;
>> -	iovad->cached32_node = &new->node;
>> +	if (limit_pfn == iovad->dma_32bit_pfn)
>> +		iovad->cached32_node = node;
>> +	else if (limit_pfn > iovad->dma_32bit_pfn)
>> +		iovad->cached64_node = node;
>>   }
>>   
>>   static void
>>   __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
>>   {
>>   	struct iova *cached_iova;
>> -	struct rb_node *curr;
>> -
>> -	if (!iovad->cached32_node)
>> -		return;
>> -	curr = iovad->cached32_node;
>> -	cached_iova = rb_entry(curr, struct iova, node);
>> -
>> -	if (free->pfn_lo >= cached_iova->pfn_lo) {
>> -		struct rb_node *node = rb_next(&free->node);
>> -		struct iova *iova = rb_entry(node, struct iova, node);
>> +	struct rb_node *cached_node;
>>   
>> -		/* only cache if it's below 32bit pfn */
>> -		if (node && iova->pfn_lo < iovad->dma_32bit_pfn)
>> -			iovad->cached32_node = node;
>> -		else
>> -			iovad->cached32_node = NULL;
>> +	if (free->pfn_lo <= iovad->dma_32bit_pfn) {
>> +		if (unlikely(iovad->cached64_node == &free->node))
>> +			iovad->cached64_node = NULL;
>> +		cached_node = iovad->cached32_node;
>> +		if (!cached_node)
>> +			return;
>> +		cached_iova = rb_entry(cached_node, struct iova, node);
>> +		if (free->pfn_lo >= cached_iova->pfn_lo) {
>> +			struct rb_node *next_node = rb_next(&free->node);
>> +
>> +			if (next_node &&
>> +			    rb_entry(next_node, struct iova, node)->pfn_lo <=
>> +			    iovad->dma_32bit_pfn)
>> +				iovad->cached32_node = next_node;
>> +			else
>> +				iovad->cached32_node = NULL;
>> +		}
>> +	} else {
>> +		cached_node = iovad->cached64_node;
>> +		if (!cached_node)
>> +			return;
>> +		cached_iova = container_of(cached_node, struct iova, node);
>> +		if (free->pfn_lo >= cached_iova->pfn_lo) {
>> +			struct rb_node *next_node = rb_next(&free->node);
>> +
>> +			if (next_node)
>> +				iovad->cached64_node = next_node;
>> +			else
>> +				iovad->cached64_node = NULL;
>> +		}
>>   	}
>>   }
>>   
>> @@ -204,7 +232,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
>>   	/* Walk the tree backwards */
>>   	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
>>   	saved_pfn = limit_pfn;
>> -	curr = __get_cached_rbnode(iovad, &limit_pfn);
>> +	curr = __get_cached_rbnode_and_limit(iovad, &limit_pfn);
>>   	prev = curr;
>>   	while (curr) {
>>   		struct iova *curr_iova = rb_entry(curr, struct iova, node);
>> @@ -238,7 +266,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
>>   
>>   	/* If we have 'prev', it's a valid place to start the insertion. */
>>   	iova_insert_rbtree(&iovad->rbroot, new, prev);
>> -	__cached_rbnode_insert_update(iovad, saved_pfn, new);
>> +	__cached_rbnode_insert_update(iovad, saved_pfn, &new->node);
>>   
>>   	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
>>   
>> diff --git a/include/linux/iova.h b/include/linux/iova.h
>> index d179b9b..ea89695 100644
>> --- a/include/linux/iova.h
>> +++ b/include/linux/iova.h
>> @@ -71,6 +71,7 @@ struct iova_domain {
>>   	spinlock_t	iova_rbtree_lock; /* Lock to protect update of rbtree */
>>   	struct rb_root	rbroot;		/* iova domain rbtree root */
>>   	struct rb_node	*cached32_node; /* Save last alloced node */
>> +	struct rb_node	*cached64_node; /* Save last alloced node */
>>   	unsigned long	granule;	/* pfn granularity for this domain */
>>   	unsigned long	start_pfn;	/* Lower limit for this domain */
>>   	unsigned long	dma_32bit_pfn;
>> -- 
>> 2.1.2

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2017-09-27 15:22 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-09-20 15:28 [PATCH] iommu/iova: Improve alloc_iova performance David Woods
2017-09-27 14:10 ` Joerg Roedel
2017-09-27 15:21   ` David Woods

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox