From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: Carsten Schiers <carsten@schiers.de>
Cc: Konrad Rzeszutek Wilk <konrad@darnok.org>,
xen-devel <xen-devel@lists.xensource.com>,
Jan Beulich <JBeulich@suse.com>,
Sander Eikelenboom <linux@eikelenboom.it>
Subject: Re: Load increase after memory upgrade (part2)
Date: Wed, 15 Feb 2012 14:28:04 -0500 [thread overview]
Message-ID: <20120215192804.GA21695@phenom.dumpdata.com> (raw)
In-Reply-To: <zarafa.4f2052a4.080f.57e9c5dc2a4ae722@uhura.space.zz>
[-- Attachment #1: Type: text/plain, Size: 467 bytes --]
On Wed, Jan 25, 2012 at 08:06:12PM +0100, Carsten Schiers wrote:
> Some news: in order to prepare a clean setting, I upgraded to 3.2.1 kernel. I noticed that the load increase is
> reduced a bit, but noticably. It's only a simple test, running the DomU for 2 minutes, but the idle load is aprox.
>
> - 2.6.32 pvops 12-13%
> - 3.2.1 pvops 10-11%
> - 2.6.34 XenoLinux 7-8%
I took a stab at Jan's idea - it compiles but I hadn't been able to properly test it.
[-- Attachment #2: vmalloc_using_xen_limit_pages.patch --]
[-- Type: text/plain, Size: 6845 bytes --]
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 87f6673..6bb6f68 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -47,6 +47,7 @@
#include <linux/gfp.h>
#include <linux/memblock.h>
#include <linux/seq_file.h>
+#include <linux/slab.h>
#include <trace/events/xen.h>
@@ -2073,6 +2074,7 @@ void __init xen_init_mmu_ops(void)
/* Protected by xen_reservation_lock. */
#define MAX_CONTIG_ORDER 9 /* 2MB */
static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
+static unsigned long limited_frames[1<<MAX_CONTIG_ORDER];
#define VOID_PTE (mfn_pte(0, __pgprot(0)))
static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
@@ -2097,6 +2099,36 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
}
xen_mc_issue(0);
}
+static int xen_zap_page_range(struct page *pages, unsigned int order,
+ unsigned long *in_frames,
+ unsigned long *out_frames,
+ void *limit_bitmap)
+{
+ int i, n = 0;
+ struct multicall_space mcs;
+ struct page *page;
+ xen_mc_batch();
+ for (i = 0; i < (1UL<<order); i++) {
+ if (!test_bit(i, limit_bitmap))
+ continue;
+ page = &pages[i];
+ mcs = __xen_mc_entry(0);
+
+ if (in_frames)
+ in_frames[i] = pfn_to_mfn(page_to_pfn(page));
+
+ MULTI_update_va_mapping(mcs.mc, (unsigned long)page_address(page), VOID_PTE, 0);
+ __set_phys_to_machine(page_to_pfn(page), INVALID_P2M_ENTRY);
+
+ if (out_frames)
+ out_frames[i] = page_to_pfn(page);
+ ++n;
+
+ }
+ xen_mc_issue(0);
+
+ return n;
+}
/*
* Update the pfn-to-mfn mappings for a virtual address range, either to
@@ -2140,6 +2172,49 @@ static void xen_remap_exchanged_ptes(unsigned long vaddr, int order,
xen_mc_issue(0);
}
+static void xen_remap_exchanged_pages(struct page *pages, int order,
+ unsigned long *mfns,
+ unsigned long first_mfn,
+ void *limit_map)
+{
+ unsigned i, limit;
+ unsigned long mfn;
+ struct page *page;
+
+ xen_mc_batch();
+
+ limit = 1u << order;
+ for (i = 0; i < limit; i++) {
+ struct multicall_space mcs;
+ unsigned flags;
+
+ if (!test_bit(i, limit_map))
+ continue;
+ page = &pages[i];
+ mcs = __xen_mc_entry(0);
+ if (mfns)
+ mfn = mfns[i];
+ else
+ mfn = first_mfn + i;
+
+ if (i < (limit - 1))
+ flags = 0;
+ else {
+ if (order == 0)
+ flags = UVMF_INVLPG | UVMF_ALL;
+ else
+ flags = UVMF_TLB_FLUSH | UVMF_ALL;
+ }
+
+ MULTI_update_va_mapping(mcs.mc, (unsigned long)page_address(page),
+ mfn_pte(mfn, PAGE_KERNEL), flags);
+
+ set_phys_to_machine(page_to_pfn(page), mfn);
+ }
+
+ xen_mc_issue(0);
+}
+
/*
* Perform the hypercall to exchange a region of our pfns to point to
@@ -2266,6 +2341,90 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
}
EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
+int xen_limit_pages_to_max_mfn(struct page *pages, unsigned int order,
+ unsigned int address_bits)
+{
+ unsigned long *in_frames = discontig_frames, *out_frames = limited_frames;
+ unsigned long flags;
+ struct page *page;
+ int success;
+ int i, n = 0;
+ unsigned long _limit_map;
+ unsigned long *limit_map;
+
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return 0;
+
+ if (unlikely(order > MAX_CONTIG_ORDER))
+ return -ENOMEM;
+
+ if (BITS_PER_LONG >> order) {
+ limit_map = kzalloc(BITS_TO_LONGS(1U << order) *
+ sizeof(*limit_map), GFP_KERNEL);
+ if (unlikely(!limit_map))
+ return -ENOMEM;
+ } else
+ limit_map = &_limit_map;
+
+ /* 0. Construct our per page bitmap lookup. */
+
+ if (address_bits && (address_bits < PAGE_SHIFT))
+ return -EINVAL;
+
+ if (order)
+ bitmap_zero(limit_map, 1U << order);
+ else
+ __set_bit(0, limit_map);
+
+ /* 1. Clear the pages */
+ for (i = 0; i < 1ULL << order; i++) {
+ void *vaddr;
+ page = &pages[i];
+ vaddr = page_address(page);
+ if (address_bits) {
+ if (!pfn_to_mfn(virt_to_mfn(vaddr)) >> (address_bits - PAGE_SHIFT))
+ continue;
+ __set_bit(i, limit_map);
+ }
+ if (!PageHighMem(page))
+ memset(vaddr, 0, PAGE_SIZE);
+ else {
+ memset(kmap(page), 0, PAGE_SIZE);
+ kunmap(page);
+ ++n;
+ }
+ }
+ /* Check to see if we actually have to do any work. */
+ if (bitmap_empty(limit_map, 1U << order)) {
+ if (limit_map != &_limit_map)
+ kfree(limit_map);
+ return 0;
+ }
+ if (n)
+ kmap_flush_unused();
+
+ spin_lock_irqsave(&xen_reservation_lock, flags);
+
+ /* 2. Zap current PTEs. */
+ n = xen_zap_page_range(pages, order, in_frames, NULL /*out_frames */, limit_map);
+
+ /* 3. Do the exchange for non-contiguous MFNs. */
+ success = xen_exchange_memory(n, 0, in_frames,
+ n, 0, out_frames, address_bits);
+
+ /* 4. Map new pages in place of old pages. */
+ if (success)
+ xen_remap_exchanged_pages(pages, order, out_frames, 0, limit_map);
+ else
+ xen_remap_exchanged_pages(pages, order, NULL, *in_frames, limit_map);
+
+ spin_unlock_irqrestore(&xen_reservation_lock, flags);
+ if (limit_map != &_limit_map)
+ kfree(limit_map);
+
+ return success ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(xen_limit_pages_to_max_mfn);
#ifdef CONFIG_XEN_PVHVM
static void xen_hvm_exit_mmap(struct mm_struct *mm)
{
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 03c85d7..ae5b1ef 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -28,4 +28,6 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long mfn, int nr,
pgprot_t prot, unsigned domid);
+int xen_limit_pages_to_max_mfn(struct page *pages, unsigned int order,
+ unsigned int address_bits);
#endif /* INCLUDE_XEN_OPS_H */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 27be2f0..4fa2066 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -31,6 +31,8 @@
#include <asm/tlbflush.h>
#include <asm/shmparam.h>
+#include <xen/xen.h>
+#include <xen/xen-ops.h>
/*** Page table manipulation functions ***/
static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
@@ -1550,7 +1552,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
struct page **pages;
unsigned int nr_pages, array_size, i;
gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
-
+ gfp_t dma_mask = gfp_mask & (__GFP_DMA | __GFP_DMA32);
+ if (xen_pv_domain()) {
+ if (dma_mask == (__GFP_DMA | __GFP_DMA32))
+ gfp_mask &= (__GFP_DMA | __GFP_DMA32);
+ }
nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
array_size = (nr_pages * sizeof(struct page *));
@@ -1586,6 +1592,16 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
goto fail;
}
area->pages[i] = page;
+ if (xen_pv_domain()) {
+ if (dma_mask) {
+ if (xen_limit_pages_to_max_mfn(page, 0, 32)) {
+ area->nr_pages = i + 1;
+ goto fail;
+ }
+ if (gfp_mask & __GFP_ZERO)
+ clear_highpage(page);
+ }
+ }
}
if (map_vm_area(area, prot, &pages))
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
next prev parent reply other threads:[~2012-02-15 19:28 UTC|newest]
Thread overview: 66+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-11-24 12:28 Load increase after memory upgrade (part2) Carsten Schiers
2011-11-25 18:42 ` Konrad Rzeszutek Wilk
2011-11-25 22:11 ` Carsten Schiers
2011-11-28 15:28 ` Konrad Rzeszutek Wilk
2011-11-28 15:40 ` Ian Campbell
2011-11-28 16:45 ` Konrad Rzeszutek Wilk
2011-11-29 8:31 ` Jan Beulich
2011-11-29 9:31 ` Carsten Schiers
2011-11-29 9:46 ` Carsten Schiers
2011-11-29 10:23 ` Ian Campbell
2011-11-29 15:33 ` Konrad Rzeszutek Wilk
2011-12-02 15:23 ` Konrad Rzeszutek Wilk
2011-12-04 11:59 ` Carsten Schiers
2011-12-04 12:09 ` Carsten Schiers
2011-12-06 3:26 ` Konrad Rzeszutek Wilk
2011-12-14 20:23 ` Konrad Rzeszutek Wilk
2011-12-14 22:07 ` Konrad Rzeszutek Wilk
2011-12-15 14:52 ` Carsten Schiers
2011-12-16 14:56 ` Carsten Schiers
2011-12-16 15:04 ` Konrad Rzeszutek Wilk
2011-12-16 15:51 ` Carsten Schiers
2011-12-16 16:19 ` Konrad Rzeszutek Wilk
2011-12-17 22:12 ` Carsten Schiers
2011-12-18 0:19 ` Sander Eikelenboom
2011-12-19 14:56 ` Konrad Rzeszutek Wilk
2012-01-10 21:55 ` Konrad Rzeszutek Wilk
2012-01-12 22:06 ` Sander Eikelenboom
2012-01-13 8:12 ` Jan Beulich
2012-01-13 15:13 ` Konrad Rzeszutek Wilk
2012-01-15 11:32 ` Sander Eikelenboom
2012-01-17 21:02 ` Konrad Rzeszutek Wilk
2012-01-18 11:28 ` Pasi Kärkkäinen
2012-01-18 11:39 ` Jan Beulich
2012-01-18 11:35 ` Jan Beulich
2012-01-18 14:29 ` Konrad Rzeszutek Wilk
2012-01-23 22:32 ` Konrad Rzeszutek Wilk
2012-01-24 8:58 ` Jan Beulich
2012-01-24 14:17 ` Konrad Rzeszutek Wilk
2012-01-24 21:32 ` Carsten Schiers
2012-01-25 12:02 ` Carsten Schiers
2012-01-25 19:06 ` Carsten Schiers
2012-01-25 21:02 ` Konrad Rzeszutek Wilk
2012-02-15 19:28 ` Konrad Rzeszutek Wilk [this message]
2012-02-16 8:56 ` Jan Beulich
2012-02-17 15:07 ` Konrad Rzeszutek Wilk
2012-02-28 14:35 ` Carsten Schiers
2012-02-29 12:10 ` Carsten Schiers
2012-02-29 12:56 ` Carsten Schiers
2012-05-11 9:39 ` Carsten Schiers
2012-05-11 19:41 ` Konrad Rzeszutek Wilk
2012-06-13 16:55 ` Konrad Rzeszutek Wilk
2012-06-14 7:07 ` Jan Beulich
2012-06-14 18:33 ` Konrad Rzeszutek Wilk
2012-06-14 18:43 ` Carsten Schiers
2012-06-14 8:38 ` David Vrabel
2012-06-14 18:31 ` Konrad Rzeszutek Wilk
2012-06-14 18:40 ` Carsten Schiers
2012-06-14 19:16 ` Carsten Schiers
2011-12-19 14:54 ` Konrad Rzeszutek Wilk
2011-12-04 12:18 ` Carsten Schiers
2011-11-28 16:58 ` Laszlo Ersek
2011-11-29 9:37 ` Carsten Schiers
2011-11-28 15:52 ` Carsten Schiers
2011-11-26 9:14 ` Carsten Schiers
2011-11-28 15:30 ` Konrad Rzeszutek Wilk
2011-11-29 9:42 ` Carsten Schiers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120215192804.GA21695@phenom.dumpdata.com \
--to=konrad.wilk@oracle.com \
--cc=JBeulich@suse.com \
--cc=carsten@schiers.de \
--cc=konrad@darnok.org \
--cc=linux@eikelenboom.it \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.