All of lore.kernel.org
 help / color / mirror / Atom feed
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: Carsten Schiers <carsten@schiers.de>
Cc: Konrad Rzeszutek Wilk <konrad@darnok.org>,
	xen-devel <xen-devel@lists.xensource.com>,
	Jan Beulich <jbeulich@suse.com>,
	Sander Eikelenboom <linux@eikelenboom.it>
Subject: Re: Load increase after memory upgrade (part2)
Date: Wed, 13 Jun 2012 12:55:29 -0400	[thread overview]
Message-ID: <20120613165529.GA10986@phenom.dumpdata.com> (raw)
In-Reply-To: <20120511194138.GA30099@phenom.dumpdata.com>

On Fri, May 11, 2012 at 03:41:38PM -0400, Konrad Rzeszutek Wilk wrote:
> On Fri, May 11, 2012 at 11:39:08AM +0200, Carsten Schiers wrote:
> > Hi Konrad,
> > 
> >  
> > don't want to be pushy, as I have no real issue. I simply use the Xenified kernel or take the double load. 
> > 
> > But I think this mistery is still open. My last status was that the latest patch you produced resulted in a BUG, 
> 
> Yes, that is right. Thank you for reminding me.
> > 
> > so we still have not checked whether our theory is correct.
> 
> No we haven't. And I should be have no trouble reproducing this. I can just write
> a tiny module that allocates vmalloc_32().

Done. Found some bugs.. and here is anew version. Can you please
try it out? It has the #define DEBUG 1 set so it should print a lot of
stuff when the DVB module loads. If it crashes please send me the full log.

Thanks.
>From 5afb4ab1fb3d2b059fe1a6db93ab65cb76f43b8a Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 31 May 2012 14:21:04 -0400
Subject: [PATCH] xen/vmalloc_32: Use xen_exchange_.. when GFP flags are DMA.
 [v3]

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c    |  187 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/xen/xen-ops.h |    2 +
 mm/vmalloc.c          |   18 +++++-
 3 files changed, 202 insertions(+), 5 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3a73785..960d206 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -47,6 +47,7 @@
 #include <linux/gfp.h>
 #include <linux/memblock.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 
 #include <trace/events/xen.h>
 
@@ -2051,6 +2052,7 @@ void __init xen_init_mmu_ops(void)
 /* Protected by xen_reservation_lock. */
 #define MAX_CONTIG_ORDER 9 /* 2MB */
 static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
+static unsigned long limited_frames[1<<MAX_CONTIG_ORDER];
 
 #define VOID_PTE (mfn_pte(0, __pgprot(0)))
 static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
@@ -2075,6 +2077,42 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
 	}
 	xen_mc_issue(0);
 }
+static int xen_zap_page_range(struct page *pages, unsigned int order,
+				unsigned long *in_frames,
+				unsigned long *out_frames,
+				void *limit_bitmap)
+{
+	int i, n = 0;
+	struct multicall_space mcs;
+	struct page *page;
+
+	xen_mc_batch();
+	for (i = 0; i < (1UL<<order); i++) {
+		if (!test_bit(i, limit_bitmap))
+			continue;
+
+		page = &pages[i];
+		mcs = __xen_mc_entry(0);
+#define DEBUG 1
+		if (in_frames) {
+#ifdef DEBUG
+			printk(KERN_INFO "%s:%d 0x%lx(pfn) 0x%lx (mfn) 0x%lx(vaddr)\n",
+				__func__, i, page_to_pfn(page),
+				pfn_to_mfn(page_to_pfn(page)), page_address(page));
+#endif
+			in_frames[i] = pfn_to_mfn(page_to_pfn(page));
+		}
+		MULTI_update_va_mapping(mcs.mc, (unsigned long)page_address(page), VOID_PTE, 0);
+		set_phys_to_machine(page_to_pfn(page), INVALID_P2M_ENTRY);
+
+		if (out_frames)
+			out_frames[i] = page_to_pfn(page);
+		++n;
+
+	}
+	xen_mc_issue(0);
+	return n;
+}
 
 /*
  * Update the pfn-to-mfn mappings for a virtual address range, either to
@@ -2118,6 +2156,53 @@ static void xen_remap_exchanged_ptes(unsigned long vaddr, int order,
 
 	xen_mc_issue(0);
 }
+static void xen_remap_exchanged_pages(struct page *pages, int order,
+				     unsigned long *mfns,
+				     unsigned long first_mfn, /* in_frame if we failed*/
+				     void *limit_map)
+{
+	unsigned i, limit;
+	unsigned long mfn;
+	struct page *page;
+
+	xen_mc_batch();
+
+	limit = 1ULL << order;
+	for (i = 0; i < limit; i++) {
+		struct multicall_space mcs;
+		unsigned flags;
+
+		if (!test_bit(i, limit_map))
+			continue;
+
+		page = &pages[i];
+		mcs = __xen_mc_entry(0);
+		if (mfns)
+			mfn = mfns[i];
+		else
+			mfn = first_mfn + i;
+
+		if (i < (limit - 1))
+			flags = 0;
+		else {
+			if (order == 0)
+				flags = UVMF_INVLPG | UVMF_ALL;
+			else
+				flags = UVMF_TLB_FLUSH | UVMF_ALL;
+		}
+#ifdef DEBUG
+		printk(KERN_INFO "%s (%d) pfn:0x%lx, pfn: 0x%lx vaddr: 0x%lx\n",
+			__func__, i, page_to_pfn(page), mfn, page_address(page));
+#endif
+		MULTI_update_va_mapping(mcs.mc, (unsigned long)page_address(page),
+				mfn_pte(mfn, PAGE_KERNEL), flags);
+
+		set_phys_to_machine(page_to_pfn(page), mfn);
+	}
+
+	xen_mc_issue(0);
+}
+
 
 /*
  * Perform the hypercall to exchange a region of our pfns to point to
@@ -2136,7 +2221,9 @@ static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in,
 {
 	long rc;
 	int success;
-
+#ifdef DEBUG
+	int i;
+#endif
 	struct xen_memory_exchange exchange = {
 		.in = {
 			.nr_extents   = extents_in,
@@ -2157,7 +2244,11 @@ static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in,
 
 	rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
 	success = (exchange.nr_exchanged == extents_in);
-
+#ifdef DEBUG
+	for (i = 0; i <  exchange.nr_exchanged; i++) {
+		printk(KERN_INFO "%s 0x%lx (mfn) <-> 0x%lx (mfn)\n",  __func__,pfns_in[i], mfns_out[i]);
+	}
+#endif
 	BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
 	BUG_ON(success && (rc != 0));
 
@@ -2231,8 +2322,8 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
 	xen_zap_pfn_range(vstart, order, NULL, out_frames);
 
 	/* 3. Do the exchange for non-contiguous MFNs. */
-	success = xen_exchange_memory(1, order, &in_frame, 1UL << order,
-					0, out_frames, 0);
+	success = xen_exchange_memory(1, order, &in_frame,
+				      1UL << order, 0, out_frames, 0);
 
 	/* 4. Map new pages in place of old pages. */
 	if (success)
@@ -2244,6 +2335,94 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
 }
 EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
 
+int xen_limit_pages_to_max_mfn(struct page *pages, unsigned int order,
+			       unsigned int address_bits)
+{
+	unsigned long *in_frames = discontig_frames, *out_frames = limited_frames;
+	unsigned long  flags;
+	struct page *page;
+	int success;
+	int i, n = 0;
+	unsigned long _limit_map;
+	unsigned long *limit_map;
+
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return 0;
+
+	if (unlikely(order > MAX_CONTIG_ORDER))
+		return -ENOMEM;
+
+	if (BITS_PER_LONG >> order) {
+		limit_map = kzalloc(BITS_TO_LONGS(1U << order) *
+				    sizeof(*limit_map), GFP_KERNEL);
+		if (unlikely(!limit_map))
+			return -ENOMEM;
+	} else
+		limit_map = &_limit_map;
+
+	/* 0. Construct our per page bitmap lookup. */
+
+	if (address_bits && (address_bits < PAGE_SHIFT))
+			return -EINVAL;
+
+	if (order)
+		bitmap_zero(limit_map, 1U << order);
+	else
+		__set_bit(0, limit_map);
+
+	/* 1. Clear the pages */
+	for (i = 0; i < (1ULL << order); i++) {
+		void *vaddr;
+		page = &pages[i];
+
+		vaddr = page_address(page);
+#ifdef DEBUG
+		printk(KERN_INFO "%s: page: %p vaddr: %p 0x%lx(mfn) 0x%lx(pfn)\n", __func__, page, vaddr, virt_to_mfn(vaddr), mfn_to_pfn(virt_to_mfn(vaddr)));
+#endif
+		if (address_bits) {
+			if (!(virt_to_mfn(vaddr) >> (address_bits - PAGE_SHIFT)))
+				continue;
+			__set_bit(i, limit_map);
+		}
+		if (!PageHighMem(page))
+			memset(vaddr, 0, PAGE_SIZE);
+		else {
+			memset(kmap(page), 0, PAGE_SIZE);
+			kunmap(page);
+			++n;
+		}
+	}
+	/* Check to see if we actually have to do any work. */
+	if (bitmap_empty(limit_map, 1U << order)) {
+		if (limit_map != &_limit_map)
+			kfree(limit_map);
+		return 0;
+	}
+	if (n)
+		kmap_flush_unused();
+
+	spin_lock_irqsave(&xen_reservation_lock, flags);
+
+	/* 2. Zap current PTEs. */
+	n = xen_zap_page_range(pages, order, in_frames, NULL /*out_frames */, limit_map);
+
+	/* 3. Do the exchange for non-contiguous MFNs. */
+	success = xen_exchange_memory(n, 0 /* this is always called per page */, in_frames,
+				      n, 0, out_frames, address_bits);
+
+	/* 4. Map new pages in place of old pages. */
+	if (success)
+		xen_remap_exchanged_pages(pages, order, out_frames, 0, limit_map);
+	else
+		xen_remap_exchanged_pages(pages, order, NULL, *in_frames, limit_map);
+
+	spin_unlock_irqrestore(&xen_reservation_lock, flags);
+	if (limit_map != &_limit_map)
+		kfree(limit_map);
+
+	return success ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(xen_limit_pages_to_max_mfn);
 #ifdef CONFIG_XEN_PVHVM
 static void xen_hvm_exit_mmap(struct mm_struct *mm)
 {
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 6a198e4..2f8709f 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -29,4 +29,6 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 			       unsigned long mfn, int nr,
 			       pgprot_t prot, unsigned domid);
 
+int xen_limit_pages_to_max_mfn(struct page *pages, unsigned int order,
+			       unsigned int address_bits);
 #endif /* INCLUDE_XEN_OPS_H */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 2aad499..194af07 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -31,6 +31,8 @@
 #include <asm/tlbflush.h>
 #include <asm/shmparam.h>
 
+#include <xen/xen.h>
+#include <xen/xen-ops.h>
 /*** Page table manipulation functions ***/
 
 static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
@@ -1576,7 +1578,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 	struct page **pages;
 	unsigned int nr_pages, array_size, i;
 	gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
-
+	gfp_t dma_mask = gfp_mask & (__GFP_DMA | __GFP_DMA32);
+	if (xen_pv_domain()) {
+		if (dma_mask == (__GFP_DMA | __GFP_DMA32))
+			gfp_mask &= ~(__GFP_DMA | __GFP_DMA32);
+	}
 	nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
 	array_size = (nr_pages * sizeof(struct page *));
 
@@ -1612,6 +1618,16 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 			goto fail;
 		}
 		area->pages[i] = page;
+		if (xen_pv_domain()) {
+			if (dma_mask) {
+				if (xen_limit_pages_to_max_mfn(page, 0, 32)) {
+					area->nr_pages = i + 1;
+					goto fail;
+				}
+			if (gfp_mask & __GFP_ZERO)
+				clear_highpage(page);
+			}
+		}
 	}
 
 	if (map_vm_area(area, prot, &pages))
-- 
1.7.7.6

  reply	other threads:[~2012-06-13 16:55 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-11-24 12:28 Load increase after memory upgrade (part2) Carsten Schiers
2011-11-25 18:42 ` Konrad Rzeszutek Wilk
2011-11-25 22:11   ` Carsten Schiers
2011-11-28 15:28     ` Konrad Rzeszutek Wilk
2011-11-28 15:40       ` Ian Campbell
2011-11-28 16:45         ` Konrad Rzeszutek Wilk
2011-11-29  8:31           ` Jan Beulich
2011-11-29  9:31             ` Carsten Schiers
2011-11-29  9:46           ` Carsten Schiers
2011-11-29 10:23           ` Ian Campbell
2011-11-29 15:33             ` Konrad Rzeszutek Wilk
2011-12-02 15:23               ` Konrad Rzeszutek Wilk
2011-12-04 11:59                 ` Carsten Schiers
2011-12-04 12:09                 ` Carsten Schiers
2011-12-06  3:26                   ` Konrad Rzeszutek Wilk
2011-12-14 20:23                     ` Konrad Rzeszutek Wilk
2011-12-14 22:07                       ` Konrad Rzeszutek Wilk
2011-12-15 14:52                         ` Carsten Schiers
2011-12-16 14:56                         ` Carsten Schiers
2011-12-16 15:04                           ` Konrad Rzeszutek Wilk
2011-12-16 15:51                             ` Carsten Schiers
2011-12-16 16:19                               ` Konrad Rzeszutek Wilk
2011-12-17 22:12                                 ` Carsten Schiers
2011-12-18  0:19                                   ` Sander Eikelenboom
2011-12-19 14:56                                     ` Konrad Rzeszutek Wilk
2012-01-10 21:55                                       ` Konrad Rzeszutek Wilk
2012-01-12 22:06                                         ` Sander Eikelenboom
2012-01-13  8:12                                           ` Jan Beulich
2012-01-13 15:13                                           ` Konrad Rzeszutek Wilk
2012-01-15 11:32                                             ` Sander Eikelenboom
2012-01-17 21:02                                               ` Konrad Rzeszutek Wilk
2012-01-18 11:28                                                 ` Pasi Kärkkäinen
2012-01-18 11:39                                                   ` Jan Beulich
2012-01-18 11:35                                                 ` Jan Beulich
2012-01-18 14:29                                                   ` Konrad Rzeszutek Wilk
2012-01-23 22:32                                                     ` Konrad Rzeszutek Wilk
2012-01-24  8:58                                                       ` Jan Beulich
2012-01-24 14:17                                                         ` Konrad Rzeszutek Wilk
2012-01-24 21:32                                                       ` Carsten Schiers
2012-01-25 12:02                                                       ` Carsten Schiers
2012-01-25 19:06                                                       ` Carsten Schiers
2012-01-25 21:02                                                         ` Konrad Rzeszutek Wilk
2012-02-15 19:28                                                         ` Konrad Rzeszutek Wilk
2012-02-16  8:56                                                           ` Jan Beulich
2012-02-17 15:07                                                             ` Konrad Rzeszutek Wilk
2012-02-28 14:35                                                               ` Carsten Schiers
2012-02-29 12:10                                                                 ` Carsten Schiers
2012-02-29 12:56                                                                   ` Carsten Schiers
2012-05-11  9:39                                                                     ` Carsten Schiers
2012-05-11 19:41                                                                       ` Konrad Rzeszutek Wilk
2012-06-13 16:55                                                                         ` Konrad Rzeszutek Wilk [this message]
2012-06-14  7:07                                                                           ` Jan Beulich
2012-06-14 18:33                                                                             ` Konrad Rzeszutek Wilk
2012-06-14 18:43                                                                             ` Carsten Schiers
2012-06-14  8:38                                                                           ` David Vrabel
2012-06-14 18:31                                                                             ` Konrad Rzeszutek Wilk
2012-06-14 18:40                                                                           ` Carsten Schiers
2012-06-14 19:16                                                                             ` Carsten Schiers
2011-12-19 14:54                                   ` Konrad Rzeszutek Wilk
2011-12-04 12:18                 ` Carsten Schiers
2011-11-28 16:58         ` Laszlo Ersek
2011-11-29  9:37         ` Carsten Schiers
2011-11-28 15:52       ` Carsten Schiers
2011-11-26  9:14   ` Carsten Schiers
2011-11-28 15:30     ` Konrad Rzeszutek Wilk
2011-11-29  9:42       ` Carsten Schiers

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120613165529.GA10986@phenom.dumpdata.com \
    --to=konrad.wilk@oracle.com \
    --cc=carsten@schiers.de \
    --cc=jbeulich@suse.com \
    --cc=konrad@darnok.org \
    --cc=linux@eikelenboom.it \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.