xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: Carsten Schiers <carsten@schiers.de>
Cc: Konrad Rzeszutek Wilk <konrad@darnok.org>,
	xen-devel <xen-devel@lists.xensource.com>,
	Jan Beulich <jbeulich@suse.com>,
	Sander Eikelenboom <linux@eikelenboom.it>
Subject: Re: Load increase after memory upgrade (part2)
Date: Wed, 13 Jun 2012 12:55:29 -0400	[thread overview]
Message-ID: <20120613165529.GA10986@phenom.dumpdata.com> (raw)
In-Reply-To: <20120511194138.GA30099@phenom.dumpdata.com>

On Fri, May 11, 2012 at 03:41:38PM -0400, Konrad Rzeszutek Wilk wrote:
> On Fri, May 11, 2012 at 11:39:08AM +0200, Carsten Schiers wrote:
> > Hi Konrad,
> > 
> >  
> > don't want to be pushy, as I have no real issue. I simply use the Xenified kernel or take the double load. 
> > 
> > But I think this mistery is still open. My last status was that the latest patch you produced resulted in a BUG, 
> 
> Yes, that is right. Thank you for reminding me.
> > 
> > so we still have not checked whether our theory is correct.
> 
> No we haven't. And I should be have no trouble reproducing this. I can just write
> a tiny module that allocates vmalloc_32().

Done. Found some bugs.. and here is anew version. Can you please
try it out? It has the #define DEBUG 1 set so it should print a lot of
stuff when the DVB module loads. If it crashes please send me the full log.

Thanks.
>From 5afb4ab1fb3d2b059fe1a6db93ab65cb76f43b8a Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 31 May 2012 14:21:04 -0400
Subject: [PATCH] xen/vmalloc_32: Use xen_exchange_.. when GFP flags are DMA.
 [v3]

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c    |  187 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/xen/xen-ops.h |    2 +
 mm/vmalloc.c          |   18 +++++-
 3 files changed, 202 insertions(+), 5 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3a73785..960d206 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -47,6 +47,7 @@
 #include <linux/gfp.h>
 #include <linux/memblock.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 
 #include <trace/events/xen.h>
 
@@ -2051,6 +2052,7 @@ void __init xen_init_mmu_ops(void)
 /* Protected by xen_reservation_lock. */
 #define MAX_CONTIG_ORDER 9 /* 2MB */
 static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
+static unsigned long limited_frames[1<<MAX_CONTIG_ORDER];
 
 #define VOID_PTE (mfn_pte(0, __pgprot(0)))
 static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
@@ -2075,6 +2077,42 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
 	}
 	xen_mc_issue(0);
 }
+static int xen_zap_page_range(struct page *pages, unsigned int order,
+				unsigned long *in_frames,
+				unsigned long *out_frames,
+				void *limit_bitmap)
+{
+	int i, n = 0;
+	struct multicall_space mcs;
+	struct page *page;
+
+	xen_mc_batch();
+	for (i = 0; i < (1UL<<order); i++) {
+		if (!test_bit(i, limit_bitmap))
+			continue;
+
+		page = &pages[i];
+		mcs = __xen_mc_entry(0);
+#define DEBUG 1
+		if (in_frames) {
+#ifdef DEBUG
+			printk(KERN_INFO "%s:%d 0x%lx(pfn) 0x%lx (mfn) 0x%lx(vaddr)\n",
+				__func__, i, page_to_pfn(page),
+				pfn_to_mfn(page_to_pfn(page)), page_address(page));
+#endif
+			in_frames[i] = pfn_to_mfn(page_to_pfn(page));
+		}
+		MULTI_update_va_mapping(mcs.mc, (unsigned long)page_address(page), VOID_PTE, 0);
+		set_phys_to_machine(page_to_pfn(page), INVALID_P2M_ENTRY);
+
+		if (out_frames)
+			out_frames[i] = page_to_pfn(page);
+		++n;
+
+	}
+	xen_mc_issue(0);
+	return n;
+}
 
 /*
  * Update the pfn-to-mfn mappings for a virtual address range, either to
@@ -2118,6 +2156,53 @@ static void xen_remap_exchanged_ptes(unsigned long vaddr, int order,
 
 	xen_mc_issue(0);
 }
+static void xen_remap_exchanged_pages(struct page *pages, int order,
+				     unsigned long *mfns,
+				     unsigned long first_mfn, /* in_frame if we failed*/
+				     void *limit_map)
+{
+	unsigned i, limit;
+	unsigned long mfn;
+	struct page *page;
+
+	xen_mc_batch();
+
+	limit = 1ULL << order;
+	for (i = 0; i < limit; i++) {
+		struct multicall_space mcs;
+		unsigned flags;
+
+		if (!test_bit(i, limit_map))
+			continue;
+
+		page = &pages[i];
+		mcs = __xen_mc_entry(0);
+		if (mfns)
+			mfn = mfns[i];
+		else
+			mfn = first_mfn + i;
+
+		if (i < (limit - 1))
+			flags = 0;
+		else {
+			if (order == 0)
+				flags = UVMF_INVLPG | UVMF_ALL;
+			else
+				flags = UVMF_TLB_FLUSH | UVMF_ALL;
+		}
+#ifdef DEBUG
+		printk(KERN_INFO "%s (%d) pfn:0x%lx, pfn: 0x%lx vaddr: 0x%lx\n",
+			__func__, i, page_to_pfn(page), mfn, page_address(page));
+#endif
+		MULTI_update_va_mapping(mcs.mc, (unsigned long)page_address(page),
+				mfn_pte(mfn, PAGE_KERNEL), flags);
+
+		set_phys_to_machine(page_to_pfn(page), mfn);
+	}
+
+	xen_mc_issue(0);
+}
+
 
 /*
  * Perform the hypercall to exchange a region of our pfns to point to
@@ -2136,7 +2221,9 @@ static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in,
 {
 	long rc;
 	int success;
-
+#ifdef DEBUG
+	int i;
+#endif
 	struct xen_memory_exchange exchange = {
 		.in = {
 			.nr_extents   = extents_in,
@@ -2157,7 +2244,11 @@ static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in,
 
 	rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
 	success = (exchange.nr_exchanged == extents_in);
-
+#ifdef DEBUG
+	for (i = 0; i <  exchange.nr_exchanged; i++) {
+		printk(KERN_INFO "%s 0x%lx (mfn) <-> 0x%lx (mfn)\n",  __func__,pfns_in[i], mfns_out[i]);
+	}
+#endif
 	BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
 	BUG_ON(success && (rc != 0));
 
@@ -2231,8 +2322,8 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
 	xen_zap_pfn_range(vstart, order, NULL, out_frames);
 
 	/* 3. Do the exchange for non-contiguous MFNs. */
-	success = xen_exchange_memory(1, order, &in_frame, 1UL << order,
-					0, out_frames, 0);
+	success = xen_exchange_memory(1, order, &in_frame,
+				      1UL << order, 0, out_frames, 0);
 
 	/* 4. Map new pages in place of old pages. */
 	if (success)
@@ -2244,6 +2335,94 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
 }
 EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
 
+int xen_limit_pages_to_max_mfn(struct page *pages, unsigned int order,
+			       unsigned int address_bits)
+{
+	unsigned long *in_frames = discontig_frames, *out_frames = limited_frames;
+	unsigned long  flags;
+	struct page *page;
+	int success;
+	int i, n = 0;
+	unsigned long _limit_map;
+	unsigned long *limit_map;
+
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return 0;
+
+	if (unlikely(order > MAX_CONTIG_ORDER))
+		return -ENOMEM;
+
+	if (BITS_PER_LONG >> order) {
+		limit_map = kzalloc(BITS_TO_LONGS(1U << order) *
+				    sizeof(*limit_map), GFP_KERNEL);
+		if (unlikely(!limit_map))
+			return -ENOMEM;
+	} else
+		limit_map = &_limit_map;
+
+	/* 0. Construct our per page bitmap lookup. */
+
+	if (address_bits && (address_bits < PAGE_SHIFT))
+			return -EINVAL;
+
+	if (order)
+		bitmap_zero(limit_map, 1U << order);
+	else
+		__set_bit(0, limit_map);
+
+	/* 1. Clear the pages */
+	for (i = 0; i < (1ULL << order); i++) {
+		void *vaddr;
+		page = &pages[i];
+
+		vaddr = page_address(page);
+#ifdef DEBUG
+		printk(KERN_INFO "%s: page: %p vaddr: %p 0x%lx(mfn) 0x%lx(pfn)\n", __func__, page, vaddr, virt_to_mfn(vaddr), mfn_to_pfn(virt_to_mfn(vaddr)));
+#endif
+		if (address_bits) {
+			if (!(virt_to_mfn(vaddr) >> (address_bits - PAGE_SHIFT)))
+				continue;
+			__set_bit(i, limit_map);
+		}
+		if (!PageHighMem(page))
+			memset(vaddr, 0, PAGE_SIZE);
+		else {
+			memset(kmap(page), 0, PAGE_SIZE);
+			kunmap(page);
+			++n;
+		}
+	}
+	/* Check to see if we actually have to do any work. */
+	if (bitmap_empty(limit_map, 1U << order)) {
+		if (limit_map != &_limit_map)
+			kfree(limit_map);
+		return 0;
+	}
+	if (n)
+		kmap_flush_unused();
+
+	spin_lock_irqsave(&xen_reservation_lock, flags);
+
+	/* 2. Zap current PTEs. */
+	n = xen_zap_page_range(pages, order, in_frames, NULL /*out_frames */, limit_map);
+
+	/* 3. Do the exchange for non-contiguous MFNs. */
+	success = xen_exchange_memory(n, 0 /* this is always called per page */, in_frames,
+				      n, 0, out_frames, address_bits);
+
+	/* 4. Map new pages in place of old pages. */
+	if (success)
+		xen_remap_exchanged_pages(pages, order, out_frames, 0, limit_map);
+	else
+		xen_remap_exchanged_pages(pages, order, NULL, *in_frames, limit_map);
+
+	spin_unlock_irqrestore(&xen_reservation_lock, flags);
+	if (limit_map != &_limit_map)
+		kfree(limit_map);
+
+	return success ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(xen_limit_pages_to_max_mfn);
 #ifdef CONFIG_XEN_PVHVM
 static void xen_hvm_exit_mmap(struct mm_struct *mm)
 {
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 6a198e4..2f8709f 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -29,4 +29,6 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 			       unsigned long mfn, int nr,
 			       pgprot_t prot, unsigned domid);
 
+int xen_limit_pages_to_max_mfn(struct page *pages, unsigned int order,
+			       unsigned int address_bits);
 #endif /* INCLUDE_XEN_OPS_H */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 2aad499..194af07 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -31,6 +31,8 @@
 #include <asm/tlbflush.h>
 #include <asm/shmparam.h>
 
+#include <xen/xen.h>
+#include <xen/xen-ops.h>
 /*** Page table manipulation functions ***/
 
 static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
@@ -1576,7 +1578,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 	struct page **pages;
 	unsigned int nr_pages, array_size, i;
 	gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
-
+	gfp_t dma_mask = gfp_mask & (__GFP_DMA | __GFP_DMA32);
+	if (xen_pv_domain()) {
+		if (dma_mask == (__GFP_DMA | __GFP_DMA32))
+			gfp_mask &= ~(__GFP_DMA | __GFP_DMA32);
+	}
 	nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
 	array_size = (nr_pages * sizeof(struct page *));
 
@@ -1612,6 +1618,16 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 			goto fail;
 		}
 		area->pages[i] = page;
+		if (xen_pv_domain()) {
+			if (dma_mask) {
+				if (xen_limit_pages_to_max_mfn(page, 0, 32)) {
+					area->nr_pages = i + 1;
+					goto fail;
+				}
+			if (gfp_mask & __GFP_ZERO)
+				clear_highpage(page);
+			}
+		}
 	}
 
 	if (map_vm_area(area, prot, &pages))
-- 
1.7.7.6

  reply	other threads:[~2012-06-13 16:55 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-11-24 12:28 Load increase after memory upgrade (part2) Carsten Schiers
2011-11-25 18:42 ` Konrad Rzeszutek Wilk
2011-11-25 22:11   ` Carsten Schiers
2011-11-28 15:28     ` Konrad Rzeszutek Wilk
2011-11-28 15:40       ` Ian Campbell
2011-11-28 16:45         ` Konrad Rzeszutek Wilk
2011-11-29  8:31           ` Jan Beulich
2011-11-29  9:31             ` Carsten Schiers
2011-11-29  9:46           ` Carsten Schiers
2011-11-29 10:23           ` Ian Campbell
2011-11-29 15:33             ` Konrad Rzeszutek Wilk
2011-12-02 15:23               ` Konrad Rzeszutek Wilk
2011-12-04 11:59                 ` Carsten Schiers
2011-12-04 12:09                 ` Carsten Schiers
2011-12-06  3:26                   ` Konrad Rzeszutek Wilk
2011-12-14 20:23                     ` Konrad Rzeszutek Wilk
2011-12-14 22:07                       ` Konrad Rzeszutek Wilk
2011-12-15 14:52                         ` Carsten Schiers
2011-12-16 14:56                         ` Carsten Schiers
2011-12-16 15:04                           ` Konrad Rzeszutek Wilk
2011-12-16 15:51                             ` Carsten Schiers
2011-12-16 16:19                               ` Konrad Rzeszutek Wilk
2011-12-17 22:12                                 ` Carsten Schiers
2011-12-18  0:19                                   ` Sander Eikelenboom
2011-12-19 14:56                                     ` Konrad Rzeszutek Wilk
2012-01-10 21:55                                       ` Konrad Rzeszutek Wilk
2012-01-12 22:06                                         ` Sander Eikelenboom
2012-01-13  8:12                                           ` Jan Beulich
2012-01-13 15:13                                           ` Konrad Rzeszutek Wilk
2012-01-15 11:32                                             ` Sander Eikelenboom
2012-01-17 21:02                                               ` Konrad Rzeszutek Wilk
2012-01-18 11:28                                                 ` Pasi Kärkkäinen
2012-01-18 11:39                                                   ` Jan Beulich
2012-01-18 11:35                                                 ` Jan Beulich
2012-01-18 14:29                                                   ` Konrad Rzeszutek Wilk
2012-01-23 22:32                                                     ` Konrad Rzeszutek Wilk
2012-01-24  8:58                                                       ` Jan Beulich
2012-01-24 14:17                                                         ` Konrad Rzeszutek Wilk
2012-01-24 21:32                                                       ` Carsten Schiers
2012-01-25 12:02                                                       ` Carsten Schiers
2012-01-25 19:06                                                       ` Carsten Schiers
2012-01-25 21:02                                                         ` Konrad Rzeszutek Wilk
2012-02-15 19:28                                                         ` Konrad Rzeszutek Wilk
2012-02-16  8:56                                                           ` Jan Beulich
2012-02-17 15:07                                                             ` Konrad Rzeszutek Wilk
2012-02-28 14:35                                                               ` Carsten Schiers
2012-02-29 12:10                                                                 ` Carsten Schiers
2012-02-29 12:56                                                                   ` Carsten Schiers
2012-05-11  9:39                                                                     ` Carsten Schiers
2012-05-11 19:41                                                                       ` Konrad Rzeszutek Wilk
2012-06-13 16:55                                                                         ` Konrad Rzeszutek Wilk [this message]
2012-06-14  7:07                                                                           ` Jan Beulich
2012-06-14 18:33                                                                             ` Konrad Rzeszutek Wilk
2012-06-14 18:43                                                                             ` Carsten Schiers
2012-06-14  8:38                                                                           ` David Vrabel
2012-06-14 18:31                                                                             ` Konrad Rzeszutek Wilk
2012-06-14 18:40                                                                           ` Carsten Schiers
2012-06-14 19:16                                                                             ` Carsten Schiers
2011-12-19 14:54                                   ` Konrad Rzeszutek Wilk
2011-12-04 12:18                 ` Carsten Schiers
2011-11-28 16:58         ` Laszlo Ersek
2011-11-29  9:37         ` Carsten Schiers
2011-11-28 15:52       ` Carsten Schiers
2011-11-26  9:14   ` Carsten Schiers
2011-11-28 15:30     ` Konrad Rzeszutek Wilk
2011-11-29  9:42       ` Carsten Schiers

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120613165529.GA10986@phenom.dumpdata.com \
    --to=konrad.wilk@oracle.com \
    --cc=carsten@schiers.de \
    --cc=jbeulich@suse.com \
    --cc=konrad@darnok.org \
    --cc=linux@eikelenboom.it \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).