From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: Carsten Schiers <carsten@schiers.de>
Cc: Konrad Rzeszutek Wilk <konrad@darnok.org>,
xen-devel <xen-devel@lists.xensource.com>,
Jan Beulich <JBeulich@suse.com>,
Sander Eikelenboom <linux@eikelenboom.it>
Subject: Re: Load increase after memory upgrade (part2)
Date: Wed, 15 Feb 2012 14:28:04 -0500 [thread overview]
Message-ID: <20120215192804.GA21695@phenom.dumpdata.com> (raw)
In-Reply-To: <zarafa.4f2052a4.080f.57e9c5dc2a4ae722@uhura.space.zz>
[-- Attachment #1: Type: text/plain, Size: 467 bytes --]
On Wed, Jan 25, 2012 at 08:06:12PM +0100, Carsten Schiers wrote:
> Some news: in order to prepare a clean setting, I upgraded to 3.2.1 kernel. I noticed that the load increase is
> reduced a bit, but noticably. It's only a simple test, running the DomU for 2 minutes, but the idle load is aprox.
>
> - 2.6.32 pvops 12-13%
> - 3.2.1 pvops 10-11%
> - 2.6.34 XenoLinux 7-8%
I took a stab at Jan's idea - it compiles but I hadn't been able to properly test it.
[-- Attachment #2: vmalloc_using_xen_limit_pages.patch --]
[-- Type: text/plain, Size: 6845 bytes --]
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 87f6673..6bb6f68 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -47,6 +47,7 @@
#include <linux/gfp.h>
#include <linux/memblock.h>
#include <linux/seq_file.h>
+#include <linux/slab.h>
#include <trace/events/xen.h>
@@ -2073,6 +2074,7 @@ void __init xen_init_mmu_ops(void)
/* Protected by xen_reservation_lock. */
#define MAX_CONTIG_ORDER 9 /* 2MB */
static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
+static unsigned long limited_frames[1<<MAX_CONTIG_ORDER];
#define VOID_PTE (mfn_pte(0, __pgprot(0)))
static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
@@ -2097,6 +2099,36 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
}
xen_mc_issue(0);
}
+static int xen_zap_page_range(struct page *pages, unsigned int order,
+ unsigned long *in_frames,
+ unsigned long *out_frames,
+ void *limit_bitmap)
+{
+ int i, n = 0;
+ struct multicall_space mcs;
+ struct page *page;
+ xen_mc_batch();
+ for (i = 0; i < (1UL<<order); i++) {
+ if (!test_bit(i, limit_bitmap))
+ continue;
+ page = &pages[i];
+ mcs = __xen_mc_entry(0);
+
+ if (in_frames)
+ in_frames[i] = pfn_to_mfn(page_to_pfn(page));
+
+ MULTI_update_va_mapping(mcs.mc, (unsigned long)page_address(page), VOID_PTE, 0);
+ __set_phys_to_machine(page_to_pfn(page), INVALID_P2M_ENTRY);
+
+ if (out_frames)
+ out_frames[i] = page_to_pfn(page);
+ ++n;
+
+ }
+ xen_mc_issue(0);
+
+ return n;
+}
/*
* Update the pfn-to-mfn mappings for a virtual address range, either to
@@ -2140,6 +2172,49 @@ static void xen_remap_exchanged_ptes(unsigned long vaddr, int order,
xen_mc_issue(0);
}
+static void xen_remap_exchanged_pages(struct page *pages, int order,
+ unsigned long *mfns,
+ unsigned long first_mfn,
+ void *limit_map)
+{
+ unsigned i, limit;
+ unsigned long mfn;
+ struct page *page;
+
+ xen_mc_batch();
+
+ limit = 1u << order;
+ for (i = 0; i < limit; i++) {
+ struct multicall_space mcs;
+ unsigned flags;
+
+ if (!test_bit(i, limit_map))
+ continue;
+ page = &pages[i];
+ mcs = __xen_mc_entry(0);
+ if (mfns)
+ mfn = mfns[i];
+ else
+ mfn = first_mfn + i;
+
+ if (i < (limit - 1))
+ flags = 0;
+ else {
+ if (order == 0)
+ flags = UVMF_INVLPG | UVMF_ALL;
+ else
+ flags = UVMF_TLB_FLUSH | UVMF_ALL;
+ }
+
+ MULTI_update_va_mapping(mcs.mc, (unsigned long)page_address(page),
+ mfn_pte(mfn, PAGE_KERNEL), flags);
+
+ set_phys_to_machine(page_to_pfn(page), mfn);
+ }
+
+ xen_mc_issue(0);
+}
+
/*
* Perform the hypercall to exchange a region of our pfns to point to
@@ -2266,6 +2341,90 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
}
EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
+int xen_limit_pages_to_max_mfn(struct page *pages, unsigned int order,
+ unsigned int address_bits)
+{
+ unsigned long *in_frames = discontig_frames, *out_frames = limited_frames;
+ unsigned long flags;
+ struct page *page;
+ int success;
+ int i, n = 0;
+ unsigned long _limit_map;
+ unsigned long *limit_map;
+
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return 0;
+
+ if (unlikely(order > MAX_CONTIG_ORDER))
+ return -ENOMEM;
+
+ if (BITS_PER_LONG >> order) {
+ limit_map = kzalloc(BITS_TO_LONGS(1U << order) *
+ sizeof(*limit_map), GFP_KERNEL);
+ if (unlikely(!limit_map))
+ return -ENOMEM;
+ } else
+ limit_map = &_limit_map;
+
+ /* 0. Construct our per page bitmap lookup. */
+
+ if (address_bits && (address_bits < PAGE_SHIFT))
+ return -EINVAL;
+
+ if (order)
+ bitmap_zero(limit_map, 1U << order);
+ else
+ __set_bit(0, limit_map);
+
+ /* 1. Clear the pages */
+ for (i = 0; i < 1ULL << order; i++) {
+ void *vaddr;
+ page = &pages[i];
+ vaddr = page_address(page);
+ if (address_bits) {
+ if (!pfn_to_mfn(virt_to_mfn(vaddr)) >> (address_bits - PAGE_SHIFT))
+ continue;
+ __set_bit(i, limit_map);
+ }
+ if (!PageHighMem(page))
+ memset(vaddr, 0, PAGE_SIZE);
+ else {
+ memset(kmap(page), 0, PAGE_SIZE);
+ kunmap(page);
+ ++n;
+ }
+ }
+ /* Check to see if we actually have to do any work. */
+ if (bitmap_empty(limit_map, 1U << order)) {
+ if (limit_map != &_limit_map)
+ kfree(limit_map);
+ return 0;
+ }
+ if (n)
+ kmap_flush_unused();
+
+ spin_lock_irqsave(&xen_reservation_lock, flags);
+
+ /* 2. Zap current PTEs. */
+ n = xen_zap_page_range(pages, order, in_frames, NULL /*out_frames */, limit_map);
+
+ /* 3. Do the exchange for non-contiguous MFNs. */
+ success = xen_exchange_memory(n, 0, in_frames,
+ n, 0, out_frames, address_bits);
+
+ /* 4. Map new pages in place of old pages. */
+ if (success)
+ xen_remap_exchanged_pages(pages, order, out_frames, 0, limit_map);
+ else
+ xen_remap_exchanged_pages(pages, order, NULL, *in_frames, limit_map);
+
+ spin_unlock_irqrestore(&xen_reservation_lock, flags);
+ if (limit_map != &_limit_map)
+ kfree(limit_map);
+
+ return success ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(xen_limit_pages_to_max_mfn);
#ifdef CONFIG_XEN_PVHVM
static void xen_hvm_exit_mmap(struct mm_struct *mm)
{
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 03c85d7..ae5b1ef 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -28,4 +28,6 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long mfn, int nr,
pgprot_t prot, unsigned domid);
+int xen_limit_pages_to_max_mfn(struct page *pages, unsigned int order,
+ unsigned int address_bits);
#endif /* INCLUDE_XEN_OPS_H */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 27be2f0..4fa2066 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -31,6 +31,8 @@
#include <asm/tlbflush.h>
#include <asm/shmparam.h>
+#include <xen/xen.h>
+#include <xen/xen-ops.h>
/*** Page table manipulation functions ***/
static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
@@ -1550,7 +1552,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
struct page **pages;
unsigned int nr_pages, array_size, i;
gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
-
+ gfp_t dma_mask = gfp_mask & (__GFP_DMA | __GFP_DMA32);
+ if (xen_pv_domain()) {
+ if (dma_mask == (__GFP_DMA | __GFP_DMA32))
+ gfp_mask &= (__GFP_DMA | __GFP_DMA32);
+ }
nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
array_size = (nr_pages * sizeof(struct page *));
@@ -1586,6 +1592,16 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
goto fail;
}
area->pages[i] = page;
+ if (xen_pv_domain()) {
+ if (dma_mask) {
+ if (xen_limit_pages_to_max_mfn(page, 0, 32)) {
+ area->nr_pages = i + 1;
+ goto fail;
+ }
+ if (gfp_mask & __GFP_ZERO)
+ clear_highpage(page);
+ }
+ }
}
if (map_vm_area(area, prot, &pages))
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
next prev parent reply other threads:[~2012-02-15 19:28 UTC|newest]
Thread overview: 66+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-11-24 12:28 Load increase after memory upgrade (part2) Carsten Schiers
2011-11-25 18:42 ` Konrad Rzeszutek Wilk
2011-11-25 22:11 ` Carsten Schiers
2011-11-28 15:28 ` Konrad Rzeszutek Wilk
2011-11-28 15:40 ` Ian Campbell
2011-11-28 16:45 ` Konrad Rzeszutek Wilk
2011-11-29 8:31 ` Jan Beulich
2011-11-29 9:31 ` Carsten Schiers
2011-11-29 9:46 ` Carsten Schiers
2011-11-29 10:23 ` Ian Campbell
2011-11-29 15:33 ` Konrad Rzeszutek Wilk
2011-12-02 15:23 ` Konrad Rzeszutek Wilk
2011-12-04 11:59 ` Carsten Schiers
2011-12-04 12:09 ` Carsten Schiers
2011-12-06 3:26 ` Konrad Rzeszutek Wilk
2011-12-14 20:23 ` Konrad Rzeszutek Wilk
2011-12-14 22:07 ` Konrad Rzeszutek Wilk
2011-12-15 14:52 ` Carsten Schiers
2011-12-16 14:56 ` Carsten Schiers
2011-12-16 15:04 ` Konrad Rzeszutek Wilk
2011-12-16 15:51 ` Carsten Schiers
2011-12-16 16:19 ` Konrad Rzeszutek Wilk
2011-12-17 22:12 ` Carsten Schiers
2011-12-18 0:19 ` Sander Eikelenboom
2011-12-19 14:56 ` Konrad Rzeszutek Wilk
2012-01-10 21:55 ` Konrad Rzeszutek Wilk
2012-01-12 22:06 ` Sander Eikelenboom
2012-01-13 8:12 ` Jan Beulich
2012-01-13 15:13 ` Konrad Rzeszutek Wilk
2012-01-15 11:32 ` Sander Eikelenboom
2012-01-17 21:02 ` Konrad Rzeszutek Wilk
2012-01-18 11:28 ` Pasi Kärkkäinen
2012-01-18 11:39 ` Jan Beulich
2012-01-18 11:35 ` Jan Beulich
2012-01-18 14:29 ` Konrad Rzeszutek Wilk
2012-01-23 22:32 ` Konrad Rzeszutek Wilk
2012-01-24 8:58 ` Jan Beulich
2012-01-24 14:17 ` Konrad Rzeszutek Wilk
2012-01-24 21:32 ` Carsten Schiers
2012-01-25 12:02 ` Carsten Schiers
2012-01-25 19:06 ` Carsten Schiers
2012-01-25 21:02 ` Konrad Rzeszutek Wilk
2012-02-15 19:28 ` Konrad Rzeszutek Wilk [this message]
2012-02-16 8:56 ` Jan Beulich
2012-02-17 15:07 ` Konrad Rzeszutek Wilk
2012-02-28 14:35 ` Carsten Schiers
2012-02-29 12:10 ` Carsten Schiers
2012-02-29 12:56 ` Carsten Schiers
2012-05-11 9:39 ` Carsten Schiers
2012-05-11 19:41 ` Konrad Rzeszutek Wilk
2012-06-13 16:55 ` Konrad Rzeszutek Wilk
2012-06-14 7:07 ` Jan Beulich
2012-06-14 18:33 ` Konrad Rzeszutek Wilk
2012-06-14 18:43 ` Carsten Schiers
2012-06-14 8:38 ` David Vrabel
2012-06-14 18:31 ` Konrad Rzeszutek Wilk
2012-06-14 18:40 ` Carsten Schiers
2012-06-14 19:16 ` Carsten Schiers
2011-12-19 14:54 ` Konrad Rzeszutek Wilk
2011-12-04 12:18 ` Carsten Schiers
2011-11-28 16:58 ` Laszlo Ersek
2011-11-29 9:37 ` Carsten Schiers
2011-11-28 15:52 ` Carsten Schiers
2011-11-26 9:14 ` Carsten Schiers
2011-11-28 15:30 ` Konrad Rzeszutek Wilk
2011-11-29 9:42 ` Carsten Schiers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120215192804.GA21695@phenom.dumpdata.com \
--to=konrad.wilk@oracle.com \
--cc=JBeulich@suse.com \
--cc=carsten@schiers.de \
--cc=konrad@darnok.org \
--cc=linux@eikelenboom.it \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).