* [RFC 1/2] kvm: host-side changes for tmem on KVM
@ 2012-03-08 16:54 Akshay Karle
2012-03-08 17:37 ` Bobby Powers
2012-03-15 16:54 ` Konrad Rzeszutek Wilk
0 siblings, 2 replies; 8+ messages in thread
From: Akshay Karle @ 2012-03-08 16:54 UTC (permalink / raw)
To: linux-kernel
Cc: Dan Magenheimer, konrad.wilk, kvm, ashu tripathi, nishant gulhane,
Shreyas Mahure, amarmore2006, mahesh mohan
From: Akshay Karle <akshay.a.karle@gmail.com>
Subject: [RFC 1/2] kvm: host-side changes for tmem on KVM
Working at host:
Once the guest exits to the kvm host, the host determines that the guest exited
to perform some tmem operation(done at kvm_emulate_hypercall)and then
we use zcache to implement this required operations(performed by kvm_pv_tmem_op).
---
Diffstat for host patch:
arch/x86/include/asm/kvm_host.h | 1
arch/x86/kvm/x86.c | 4 +
drivers/staging/zcache/zcache-main.c | 98 ++++++++++++++++++++++++++++++++---
3 files changed, 95 insertions(+), 8 deletions(-)
diff -Napur vanilla/linux-3.1.5/arch/x86/include/asm/kvm_host.h linux-3.1.5//arch/x86/include/asm/kvm_host.h
--- vanilla/linux-3.1.5/arch/x86/include/asm/kvm_host.h 2011-12-09 22:27:05.000000000 +0530
+++ linux-3.1.5//arch/x86/include/asm/kvm_host.h 2012-03-05 14:09:41.648006153 +0530
@@ -668,6 +668,7 @@ int emulator_write_phys(struct kvm_vcpu
const void *val, int bytes);
int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
gpa_t addr, unsigned long *ret);
+int kvm_pv_tmem_op(struct kvm_vcpu *vcpu, gpa_t addr, unsigned long *ret);
u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
extern bool tdp_enabled;
diff -Napur vanilla/linux-3.1.5/arch/x86/kvm/x86.c linux-3.1.5//arch/x86/kvm/x86.c
--- vanilla/linux-3.1.5/arch/x86/kvm/x86.c 2011-12-09 22:27:05.000000000 +0530
+++ linux-3.1.5//arch/x86/kvm/x86.c 2012-03-05 14:09:41.652006083 +0530
@@ -5267,6 +5267,10 @@ int kvm_emulate_hypercall(struct kvm_vcp
case KVM_HC_MMU_OP:
r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
break;
+ case KVM_HC_TMEM:
+ r = kvm_pv_tmem_op(vcpu, a0, &ret);
+ ret = ret - 1000;
+ break;
default:
ret = -KVM_ENOSYS;
break;
diff -Napur vanilla/linux-3.1.5/drivers/staging/zcache/zcache-main.c linux-3.1.5//drivers/staging/zcache/zcache-main.c
--- vanilla/linux-3.1.5/drivers/staging/zcache/zcache-main.c 2011-12-09 22:27:05.000000000 +0530
+++ linux-3.1.5//drivers/staging/zcache/zcache-main.c 2012-03-05 14:10:31.264006031 +0530
@@ -30,6 +30,7 @@
#include <linux/atomic.h>
#include <linux/math64.h>
#include "tmem.h"
+#include "kvm-tmem.h"
#include "../zram/xvmalloc.h" /* if built in drivers/staging */
@@ -669,7 +670,6 @@ static struct zv_hdr *zv_create(struct x
int chunks = (alloc_size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT;
int ret;
- BUG_ON(!irqs_disabled());
BUG_ON(chunks >= NCHUNKS);
ret = xv_malloc(xvpool, alloc_size,
&page, &offset, ZCACHE_GFP_MASK);
@@ -1313,7 +1313,6 @@ static int zcache_compress(struct page *
unsigned char *wmem = __get_cpu_var(zcache_workmem);
char *from_va;
- BUG_ON(!irqs_disabled());
if (unlikely(dmem == NULL || wmem == NULL))
goto out; /* no buffer, so can't compress */
from_va = kmap_atomic(from, KM_USER0);
@@ -1533,7 +1532,6 @@ static int zcache_put_page(int cli_id, i
struct tmem_pool *pool;
int ret = -1;
- BUG_ON(!irqs_disabled());
pool = zcache_get_pool_by_id(cli_id, pool_id);
if (unlikely(pool == NULL))
goto out;
@@ -1898,6 +1896,67 @@ struct frontswap_ops zcache_frontswap_re
#endif
/*
+ * tmem op to support tmem in kvm guests
+ */
+
+int kvm_pv_tmem_op(struct kvm_vcpu *vcpu, gpa_t addr, unsigned long *ret)
+{
+ struct tmem_ops op;
+ struct tmem_oid oid;
+ uint64_t pfn;
+ struct page *page;
+ int r;
+
+ r = kvm_read_guest(vcpu->kvm, addr, &op, sizeof(op));
+ if (r < 0)
+ return r;
+
+ switch (op.cmd) {
+ case TMEM_NEW_POOL:
+ *ret = zcache_new_pool(op.u.new.cli_id, op.u.new.flags);
+ break;
+ case TMEM_DESTROY_POOL:
+ *ret = zcache_destroy_pool(op.u.gen.cli_id, op.pool_id);
+ break;
+ case TMEM_NEW_PAGE:
+ break;
+ case TMEM_PUT_PAGE:
+ pfn = gfn_to_pfn(vcpu->kvm, op.u.gen.pfn);
+ page = pfn_to_page(pfn);
+ oid.oid[0] = op.u.gen.oid[0];
+ oid.oid[1] = op.u.gen.oid[1];
+ oid.oid[2] = op.u.gen.oid[2];
+ VM_BUG_ON(!PageLocked(page));
+ *ret = zcache_put_page(op.u.gen.cli_id, op.pool_id,
+ &oid, op.u.gen.index, page);
+ break;
+ case TMEM_GET_PAGE:
+ pfn = gfn_to_pfn(vcpu->kvm, op.u.gen.pfn);
+ page = pfn_to_page(pfn);
+ oid.oid[0] = op.u.gen.oid[0];
+ oid.oid[1] = op.u.gen.oid[1];
+ oid.oid[2] = op.u.gen.oid[2];
+ *ret = zcache_get_page(TMEM_CLI, op.pool_id,
+ &oid, op.u.gen.index, page);
+ break;
+ case TMEM_FLUSH_PAGE:
+ oid.oid[0] = op.u.gen.oid[0];
+ oid.oid[1] = op.u.gen.oid[1];
+ oid.oid[2] = op.u.gen.oid[2];
+ *ret = zcache_flush_page(op.u.gen.cli_id, op.pool_id,
+ &oid, op.u.gen.index);
+ break;
+ case TMEM_FLUSH_OBJECT:
+ oid.oid[0] = op.u.gen.oid[0];
+ oid.oid[1] = op.u.gen.oid[1];
+ oid.oid[2] = op.u.gen.oid[2];
+ *ret = zcache_flush_object(op.u.gen.cli_id, op.pool_id, &oid);
+ break;
+ }
+ return 0;
+}
+
+/*
* zcache initialization
* NOTE FOR NOW zcache MUST BE PROVIDED AS A KERNEL BOOT PARAMETER OR
* NOTHING HAPPENS!
@@ -1934,10 +1993,19 @@ static int __init no_frontswap(char *s)
__setup("nofrontswap", no_frontswap);
+static int kvm_tmem_enabled = 0;
+
+static int __init enable_kvm_tmem(char *s)
+{
+ kvm_tmem_enabled = 1;
+ return 1;
+}
+
+__setup("kvmtmem", enable_kvm_tmem);
+
static int __init zcache_init(void)
{
int ret = 0;
-
#ifdef CONFIG_SYSFS
ret = sysfs_create_group(mm_kobj, &zcache_attr_group);
if (ret) {
@@ -1946,7 +2014,7 @@ static int __init zcache_init(void)
}
#endif /* CONFIG_SYSFS */
#if defined(CONFIG_CLEANCACHE) || defined(CONFIG_FRONTSWAP)
- if (zcache_enabled) {
+ if (zcache_enabled || kvm_tmem_enabled) {
unsigned int cpu;
tmem_register_hostops(&zcache_hostops);
@@ -1966,11 +2034,25 @@ static int __init zcache_init(void)
sizeof(struct tmem_objnode), 0, 0, NULL);
zcache_obj_cache = kmem_cache_create("zcache_obj",
sizeof(struct tmem_obj), 0, 0, NULL);
- ret = zcache_new_client(LOCAL_CLIENT);
- if (ret) {
- pr_err("zcache: can't create client\n");
+ if(kvm_tmem_enabled) {
+ ret = zcache_new_client(TMEM_CLI);
+ if(ret) {
+ pr_err("zcache: can't create client\n");
+ goto out;
+ }
+ zbud_init();
+ register_shrinker(&zcache_shrinker);
+ pr_info("zcache: transcendent memory enabled using kernel "
+ "for kvm guests\n");
goto out;
}
+ else {
+ ret = zcache_new_client(LOCAL_CLIENT);
+ if (ret) {
+ pr_err("zcache: can't create client\n");
+ goto out;
+ }
+ }
#endif
#ifdef CONFIG_CLEANCACHE
if (zcache_enabled && use_cleancache) {
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC 1/2] kvm: host-side changes for tmem on KVM
2012-03-08 16:54 [RFC 1/2] kvm: host-side changes for tmem on KVM Akshay Karle
@ 2012-03-08 17:37 ` Bobby Powers
2012-03-15 16:54 ` Konrad Rzeszutek Wilk
1 sibling, 0 replies; 8+ messages in thread
From: Bobby Powers @ 2012-03-08 17:37 UTC (permalink / raw)
To: Akshay Karle
Cc: linux-kernel, Dan Magenheimer, konrad.wilk, kvm, ashu tripathi,
nishant gulhane, Shreyas Mahure, amarmore2006, mahesh mohan
On Thu, Mar 8, 2012 at 11:54 AM, Akshay Karle <akshay.a.karle@gmail.com> wrote:
> From: Akshay Karle <akshay.a.karle@gmail.com>
> Subject: [RFC 1/2] kvm: host-side changes for tmem on KVM
>
> Working at host:
> Once the guest exits to the kvm host, the host determines that the guest exited
> to perform some tmem operation(done at kvm_emulate_hypercall)and then
> we use zcache to implement this required operations(performed by kvm_pv_tmem_op).
>
> ---
> Diffstat for host patch:
> arch/x86/include/asm/kvm_host.h | 1
> arch/x86/kvm/x86.c | 4 +
> drivers/staging/zcache/zcache-main.c | 98 ++++++++++++++++++++++++++++++++---
> 3 files changed, 95 insertions(+), 8 deletions(-)
>
> diff -Napur vanilla/linux-3.1.5/arch/x86/include/asm/kvm_host.h linux-3.1.5//arch/x86/include/asm/kvm_host.h
> --- vanilla/linux-3.1.5/arch/x86/include/asm/kvm_host.h 2011-12-09 22:27:05.000000000 +0530
> +++ linux-3.1.5//arch/x86/include/asm/kvm_host.h 2012-03-05 14:09:41.648006153 +0530
> @@ -668,6 +668,7 @@ int emulator_write_phys(struct kvm_vcpu
> const void *val, int bytes);
> int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
> gpa_t addr, unsigned long *ret);
> +int kvm_pv_tmem_op(struct kvm_vcpu *vcpu, gpa_t addr, unsigned long *ret);
> u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
>
> extern bool tdp_enabled;
> diff -Napur vanilla/linux-3.1.5/arch/x86/kvm/x86.c linux-3.1.5//arch/x86/kvm/x86.c
> --- vanilla/linux-3.1.5/arch/x86/kvm/x86.c 2011-12-09 22:27:05.000000000 +0530
> +++ linux-3.1.5//arch/x86/kvm/x86.c 2012-03-05 14:09:41.652006083 +0530
> @@ -5267,6 +5267,10 @@ int kvm_emulate_hypercall(struct kvm_vcp
> case KVM_HC_MMU_OP:
> r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
> break;
> + case KVM_HC_TMEM:
> + r = kvm_pv_tmem_op(vcpu, a0, &ret);
> + ret = ret - 1000;
> + break;
> default:
> ret = -KVM_ENOSYS;
> break;
> diff -Napur vanilla/linux-3.1.5/drivers/staging/zcache/zcache-main.c linux-3.1.5//drivers/staging/zcache/zcache-main.c
> --- vanilla/linux-3.1.5/drivers/staging/zcache/zcache-main.c 2011-12-09 22:27:05.000000000 +0530
> +++ linux-3.1.5//drivers/staging/zcache/zcache-main.c 2012-03-05 14:10:31.264006031 +0530
> @@ -30,6 +30,7 @@
> #include <linux/atomic.h>
> #include <linux/math64.h>
> #include "tmem.h"
> +#include "kvm-tmem.h"
This header should be introduced in this patch, not the next.
>
> #include "../zram/xvmalloc.h" /* if built in drivers/staging */
>
> @@ -669,7 +670,6 @@ static struct zv_hdr *zv_create(struct x
> int chunks = (alloc_size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT;
> int ret;
>
> - BUG_ON(!irqs_disabled());
> BUG_ON(chunks >= NCHUNKS);
> ret = xv_malloc(xvpool, alloc_size,
> &page, &offset, ZCACHE_GFP_MASK);
> @@ -1313,7 +1313,6 @@ static int zcache_compress(struct page *
> unsigned char *wmem = __get_cpu_var(zcache_workmem);
> char *from_va;
>
> - BUG_ON(!irqs_disabled());
> if (unlikely(dmem == NULL || wmem == NULL))
> goto out; /* no buffer, so can't compress */
> from_va = kmap_atomic(from, KM_USER0);
> @@ -1533,7 +1532,6 @@ static int zcache_put_page(int cli_id, i
> struct tmem_pool *pool;
> int ret = -1;
>
> - BUG_ON(!irqs_disabled());
> pool = zcache_get_pool_by_id(cli_id, pool_id);
> if (unlikely(pool == NULL))
> goto out;
> @@ -1898,6 +1896,67 @@ struct frontswap_ops zcache_frontswap_re
> #endif
>
> /*
> + * tmem op to support tmem in kvm guests
> + */
> +
> +int kvm_pv_tmem_op(struct kvm_vcpu *vcpu, gpa_t addr, unsigned long *ret)
> +{
> + struct tmem_ops op;
> + struct tmem_oid oid;
> + uint64_t pfn;
> + struct page *page;
> + int r;
> +
> + r = kvm_read_guest(vcpu->kvm, addr, &op, sizeof(op));
> + if (r < 0)
> + return r;
> +
> + switch (op.cmd) {
> + case TMEM_NEW_POOL:
> + *ret = zcache_new_pool(op.u.new.cli_id, op.u.new.flags);
> + break;
> + case TMEM_DESTROY_POOL:
> + *ret = zcache_destroy_pool(op.u.gen.cli_id, op.pool_id);
> + break;
> + case TMEM_NEW_PAGE:
> + break;
> + case TMEM_PUT_PAGE:
> + pfn = gfn_to_pfn(vcpu->kvm, op.u.gen.pfn);
> + page = pfn_to_page(pfn);
> + oid.oid[0] = op.u.gen.oid[0];
> + oid.oid[1] = op.u.gen.oid[1];
> + oid.oid[2] = op.u.gen.oid[2];
> + VM_BUG_ON(!PageLocked(page));
> + *ret = zcache_put_page(op.u.gen.cli_id, op.pool_id,
> + &oid, op.u.gen.index, page);
> + break;
> + case TMEM_GET_PAGE:
> + pfn = gfn_to_pfn(vcpu->kvm, op.u.gen.pfn);
> + page = pfn_to_page(pfn);
> + oid.oid[0] = op.u.gen.oid[0];
> + oid.oid[1] = op.u.gen.oid[1];
> + oid.oid[2] = op.u.gen.oid[2];
> + *ret = zcache_get_page(TMEM_CLI, op.pool_id,
> + &oid, op.u.gen.index, page);
> + break;
> + case TMEM_FLUSH_PAGE:
> + oid.oid[0] = op.u.gen.oid[0];
> + oid.oid[1] = op.u.gen.oid[1];
> + oid.oid[2] = op.u.gen.oid[2];
> + *ret = zcache_flush_page(op.u.gen.cli_id, op.pool_id,
> + &oid, op.u.gen.index);
> + break;
> + case TMEM_FLUSH_OBJECT:
> + oid.oid[0] = op.u.gen.oid[0];
> + oid.oid[1] = op.u.gen.oid[1];
> + oid.oid[2] = op.u.gen.oid[2];
> + *ret = zcache_flush_object(op.u.gen.cli_id, op.pool_id, &oid);
> + break;
> + }
> + return 0;
> +}
> +
> +/*
> * zcache initialization
> * NOTE FOR NOW zcache MUST BE PROVIDED AS A KERNEL BOOT PARAMETER OR
> * NOTHING HAPPENS!
> @@ -1934,10 +1993,19 @@ static int __init no_frontswap(char *s)
>
> __setup("nofrontswap", no_frontswap);
>
> +static int kvm_tmem_enabled = 0;
> +
> +static int __init enable_kvm_tmem(char *s)
> +{
> + kvm_tmem_enabled = 1;
> + return 1;
> +}
> +
> +__setup("kvmtmem", enable_kvm_tmem);
> +
> static int __init zcache_init(void)
> {
> int ret = 0;
> -
> #ifdef CONFIG_SYSFS
> ret = sysfs_create_group(mm_kobj, &zcache_attr_group);
> if (ret) {
> @@ -1946,7 +2014,7 @@ static int __init zcache_init(void)
> }
> #endif /* CONFIG_SYSFS */
> #if defined(CONFIG_CLEANCACHE) || defined(CONFIG_FRONTSWAP)
> - if (zcache_enabled) {
> + if (zcache_enabled || kvm_tmem_enabled) {
> unsigned int cpu;
>
> tmem_register_hostops(&zcache_hostops);
> @@ -1966,11 +2034,25 @@ static int __init zcache_init(void)
> sizeof(struct tmem_objnode), 0, 0, NULL);
> zcache_obj_cache = kmem_cache_create("zcache_obj",
> sizeof(struct tmem_obj), 0, 0, NULL);
> - ret = zcache_new_client(LOCAL_CLIENT);
> - if (ret) {
> - pr_err("zcache: can't create client\n");
> + if(kvm_tmem_enabled) {
> + ret = zcache_new_client(TMEM_CLI);
> + if(ret) {
> + pr_err("zcache: can't create client\n");
> + goto out;
> + }
> + zbud_init();
> + register_shrinker(&zcache_shrinker);
> + pr_info("zcache: transcendent memory enabled using kernel "
> + "for kvm guests\n");
> goto out;
> }
> + else {
> + ret = zcache_new_client(LOCAL_CLIENT);
> + if (ret) {
> + pr_err("zcache: can't create client\n");
> + goto out;
> + }
> + }
> #endif
> #ifdef CONFIG_CLEANCACHE
> if (zcache_enabled && use_cleancache) {
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC 1/2] kvm: host-side changes for tmem on KVM
2012-03-08 16:54 [RFC 1/2] kvm: host-side changes for tmem on KVM Akshay Karle
2012-03-08 17:37 ` Bobby Powers
@ 2012-03-15 16:54 ` Konrad Rzeszutek Wilk
2012-03-15 18:41 ` Akshay Karle
1 sibling, 1 reply; 8+ messages in thread
From: Konrad Rzeszutek Wilk @ 2012-03-15 16:54 UTC (permalink / raw)
To: Akshay Karle
Cc: linux-kernel, Dan Magenheimer, kvm, ashu tripathi,
nishant gulhane, Shreyas Mahure, amarmore2006, mahesh mohan
On Thu, Mar 08, 2012 at 10:24:08PM +0530, Akshay Karle wrote:
> From: Akshay Karle <akshay.a.karle@gmail.com>
> Subject: [RFC 1/2] kvm: host-side changes for tmem on KVM
>
> Working at host:
> Once the guest exits to the kvm host, the host determines that the guest exited
> to perform some tmem operation(done at kvm_emulate_hypercall)and then
> we use zcache to implement this required operations(performed by kvm_pv_tmem_op).
Do you need any modifications to the Kconfig file to reflect the KVM dependency?
>
> ---
> Diffstat for host patch:
> arch/x86/include/asm/kvm_host.h | 1
> arch/x86/kvm/x86.c | 4 +
> drivers/staging/zcache/zcache-main.c | 98 ++++++++++++++++++++++++++++++++---
> 3 files changed, 95 insertions(+), 8 deletions(-)
>
> diff -Napur vanilla/linux-3.1.5/arch/x86/include/asm/kvm_host.h linux-3.1.5//arch/x86/include/asm/kvm_host.h
> --- vanilla/linux-3.1.5/arch/x86/include/asm/kvm_host.h 2011-12-09 22:27:05.000000000 +0530
> +++ linux-3.1.5//arch/x86/include/asm/kvm_host.h 2012-03-05 14:09:41.648006153 +0530
> @@ -668,6 +668,7 @@ int emulator_write_phys(struct kvm_vcpu
> const void *val, int bytes);
> int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
> gpa_t addr, unsigned long *ret);
> +int kvm_pv_tmem_op(struct kvm_vcpu *vcpu, gpa_t addr, unsigned long *ret);
> u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
>
> extern bool tdp_enabled;
> diff -Napur vanilla/linux-3.1.5/arch/x86/kvm/x86.c linux-3.1.5//arch/x86/kvm/x86.c
> --- vanilla/linux-3.1.5/arch/x86/kvm/x86.c 2011-12-09 22:27:05.000000000 +0530
> +++ linux-3.1.5//arch/x86/kvm/x86.c 2012-03-05 14:09:41.652006083 +0530
> @@ -5267,6 +5267,10 @@ int kvm_emulate_hypercall(struct kvm_vcp
> case KVM_HC_MMU_OP:
> r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
> break;
> + case KVM_HC_TMEM:
> + r = kvm_pv_tmem_op(vcpu, a0, &ret);
> + ret = ret - 1000;
That is rather odd. Why the subtraction of 1000?
> + break;
> default:
> ret = -KVM_ENOSYS;
> break;
> diff -Napur vanilla/linux-3.1.5/drivers/staging/zcache/zcache-main.c linux-3.1.5//drivers/staging/zcache/zcache-main.c
> --- vanilla/linux-3.1.5/drivers/staging/zcache/zcache-main.c 2011-12-09 22:27:05.000000000 +0530
> +++ linux-3.1.5//drivers/staging/zcache/zcache-main.c 2012-03-05 14:10:31.264006031 +0530
> @@ -30,6 +30,7 @@
> #include <linux/atomic.h>
> #include <linux/math64.h>
> #include "tmem.h"
> +#include "kvm-tmem.h"
>
> #include "../zram/xvmalloc.h" /* if built in drivers/staging */
>
> @@ -669,7 +670,6 @@ static struct zv_hdr *zv_create(struct x
> int chunks = (alloc_size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT;
> int ret;
>
> - BUG_ON(!irqs_disabled());
Can you explain why?
> BUG_ON(chunks >= NCHUNKS);
> ret = xv_malloc(xvpool, alloc_size,
> &page, &offset, ZCACHE_GFP_MASK);
> @@ -1313,7 +1313,6 @@ static int zcache_compress(struct page *
> unsigned char *wmem = __get_cpu_var(zcache_workmem);
> char *from_va;
>
> - BUG_ON(!irqs_disabled());
> if (unlikely(dmem == NULL || wmem == NULL))
> goto out; /* no buffer, so can't compress */
> from_va = kmap_atomic(from, KM_USER0);
> @@ -1533,7 +1532,6 @@ static int zcache_put_page(int cli_id, i
> struct tmem_pool *pool;
> int ret = -1;
>
> - BUG_ON(!irqs_disabled());
> pool = zcache_get_pool_by_id(cli_id, pool_id);
> if (unlikely(pool == NULL))
> goto out;
> @@ -1898,6 +1896,67 @@ struct frontswap_ops zcache_frontswap_re
> #endif
>
> /*
> + * tmem op to support tmem in kvm guests
> + */
> +
> +int kvm_pv_tmem_op(struct kvm_vcpu *vcpu, gpa_t addr, unsigned long *ret)
> +{
> + struct tmem_ops op;
> + struct tmem_oid oid;
> + uint64_t pfn;
> + struct page *page;
> + int r;
> +
> + r = kvm_read_guest(vcpu->kvm, addr, &op, sizeof(op));
> + if (r < 0)
> + return r;
> +
> + switch (op.cmd) {
> + case TMEM_NEW_POOL:
> + *ret = zcache_new_pool(op.u.new.cli_id, op.u.new.flags);
> + break;
> + case TMEM_DESTROY_POOL:
> + *ret = zcache_destroy_pool(op.u.gen.cli_id, op.pool_id);
> + break;
> + case TMEM_NEW_PAGE:
> + break;
> + case TMEM_PUT_PAGE:
> + pfn = gfn_to_pfn(vcpu->kvm, op.u.gen.pfn);
> + page = pfn_to_page(pfn);
> + oid.oid[0] = op.u.gen.oid[0];
> + oid.oid[1] = op.u.gen.oid[1];
> + oid.oid[2] = op.u.gen.oid[2];
> + VM_BUG_ON(!PageLocked(page));
> + *ret = zcache_put_page(op.u.gen.cli_id, op.pool_id,
> + &oid, op.u.gen.index, page);
> + break;
> + case TMEM_GET_PAGE:
> + pfn = gfn_to_pfn(vcpu->kvm, op.u.gen.pfn);
> + page = pfn_to_page(pfn);
> + oid.oid[0] = op.u.gen.oid[0];
> + oid.oid[1] = op.u.gen.oid[1];
> + oid.oid[2] = op.u.gen.oid[2];
> + *ret = zcache_get_page(TMEM_CLI, op.pool_id,
> + &oid, op.u.gen.index, page);
> + break;
> + case TMEM_FLUSH_PAGE:
> + oid.oid[0] = op.u.gen.oid[0];
> + oid.oid[1] = op.u.gen.oid[1];
> + oid.oid[2] = op.u.gen.oid[2];
> + *ret = zcache_flush_page(op.u.gen.cli_id, op.pool_id,
> + &oid, op.u.gen.index);
> + break;
> + case TMEM_FLUSH_OBJECT:
> + oid.oid[0] = op.u.gen.oid[0];
> + oid.oid[1] = op.u.gen.oid[1];
> + oid.oid[2] = op.u.gen.oid[2];
> + *ret = zcache_flush_object(op.u.gen.cli_id, op.pool_id, &oid);
> + break;
> + }
> + return 0;
> +}
> +
> +/*
> * zcache initialization
> * NOTE FOR NOW zcache MUST BE PROVIDED AS A KERNEL BOOT PARAMETER OR
> * NOTHING HAPPENS!
> @@ -1934,10 +1993,19 @@ static int __init no_frontswap(char *s)
>
> __setup("nofrontswap", no_frontswap);
>
> +static int kvm_tmem_enabled = 0;
No need to declare it zero. Don't we want to have it running by default?
So the function below would be 'disable_kvm' instead of enabling it?
> +
> +static int __init enable_kvm_tmem(char *s)
> +{
> + kvm_tmem_enabled = 1;
> + return 1;
> +}
> +
> +__setup("kvmtmem", enable_kvm_tmem);
> +
> static int __init zcache_init(void)
> {
> int ret = 0;
> -
> #ifdef CONFIG_SYSFS
> ret = sysfs_create_group(mm_kobj, &zcache_attr_group);
> if (ret) {
> @@ -1946,7 +2014,7 @@ static int __init zcache_init(void)
> }
> #endif /* CONFIG_SYSFS */
> #if defined(CONFIG_CLEANCACHE) || defined(CONFIG_FRONTSWAP)
> - if (zcache_enabled) {
> + if (zcache_enabled || kvm_tmem_enabled) {
> unsigned int cpu;
>
> tmem_register_hostops(&zcache_hostops);
> @@ -1966,11 +2034,25 @@ static int __init zcache_init(void)
> sizeof(struct tmem_objnode), 0, 0, NULL);
> zcache_obj_cache = kmem_cache_create("zcache_obj",
> sizeof(struct tmem_obj), 0, 0, NULL);
> - ret = zcache_new_client(LOCAL_CLIENT);
> - if (ret) {
> - pr_err("zcache: can't create client\n");
> + if(kvm_tmem_enabled) {
Space..
> + ret = zcache_new_client(TMEM_CLI);
> + if(ret) {
> + pr_err("zcache: can't create client\n");
> + goto out;
> + }
> + zbud_init();
> + register_shrinker(&zcache_shrinker);
> + pr_info("zcache: transcendent memory enabled using kernel "
> + "for kvm guests\n");
> goto out;
> }
> + else {
> + ret = zcache_new_client(LOCAL_CLIENT);
> + if (ret) {
> + pr_err("zcache: can't create client\n");
> + goto out;
> + }
> + }
> #endif
> #ifdef CONFIG_CLEANCACHE
> if (zcache_enabled && use_cleancache) {
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC 1/2] kvm: host-side changes for tmem on KVM
2012-03-15 16:54 ` Konrad Rzeszutek Wilk
@ 2012-03-15 18:41 ` Akshay Karle
2012-03-15 19:44 ` Dan Magenheimer
2012-03-15 19:51 ` Dan Magenheimer
0 siblings, 2 replies; 8+ messages in thread
From: Akshay Karle @ 2012-03-15 18:41 UTC (permalink / raw)
To: Konrad Rzeszutek Wilk
Cc: linux-kernel, Dan Magenheimer, kvm, ashu tripathi,
nishant gulhane, Shreyas Mahure, amarmore2006, mahesh mohan
>> Working at host:
>> Once the guest exits to the kvm host, the host determines that the guest exited
>> to perform some tmem operation(done at kvm_emulate_hypercall)and then
>> we use zcache to implement this required operations(performed by kvm_pv_tmem_op).
>
> Do you need any modifications to the Kconfig file to reflect the KVM dependency?
Yeah, the Kconfig of zcache now needs to be modified to reflect the KVM and the KVM_INTEL or
KVM_AMD dependency. The Kconfig can also be modified to support tmem for the guest kernels,
as the zcache code is not at all used in the guest, only the kvm-tmem, cleancache and
frontswap(if present) code is used in the guest. But, in the host its necessary to enable zcache.
So the Kconfig can be modified to have the following new field:
"Transcendent memory support for kvm guests".... Which would compile just the kvm-tmem code.
And we can add Cleancache and Frontswap as dependencies(similar to zcache).
But it should be made sure that the host kernel has zcache enabled..
>> diff -Napur vanilla/linux-3.1.5/arch/x86/kvm/x86.c linux-3.1.5//arch/x86/kvm/x86.c
>> --- vanilla/linux-3.1.5/arch/x86/kvm/x86.c 2011-12-09 22:27:05.000000000 +0530
>> +++ linux-3.1.5//arch/x86/kvm/x86.c 2012-03-05 14:09:41.652006083 +0530
>> @@ -5267,6 +5267,10 @@ int kvm_emulate_hypercall(struct kvm_vcp
>> case KVM_HC_MMU_OP:
>> r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
>> break;
>> + case KVM_HC_TMEM:
>> + r = kvm_pv_tmem_op(vcpu, a0, &ret);
>> + ret = ret - 1000;
>
> That is rather odd. Why the subtraction of 1000?
The reason for the subtraction is that the kvm hypercalls were able to return only negative
values. Any return of non-negative value resulted in guest kernel panic. We couldn't figure
out why? Suggestions to remove this would help improvise..
We could've just returned -ret but when the tmem_op returned 0 it wouldn't work so the,
ret - 1000.
>> @@ -669,7 +670,6 @@ static struct zv_hdr *zv_create(struct x
>> int chunks = (alloc_size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT;
>> int ret;
>>
>> - BUG_ON(!irqs_disabled());
>
> Can you explain why?
Zcache is by default used in the non-virtualized environment for page compression. Whenever
a page is to be evicted from the page cache the spin_lock_irq is held on the page mapping.
To ensure that this is done, the BUG_ON(!irqs_disabled()) was used.
But now the situation is different, we are using zcache functions for kvm VM's.
So if any page of the guest is to be evicted the irqs should be disabled in just that
guest and not the host, so we removed the BUG_ON(!irqs_disabled()); line.
>> @@ -1934,10 +1993,19 @@ static int __init no_frontswap(char *s)
>>
>> __setup("nofrontswap", no_frontswap);
>>
>> +static int kvm_tmem_enabled = 0;
>
> No need to declare it zero. Don't we want to have it running by default?
> So the function below would be 'disable_kvm' instead of enabling it?
This can be enabled by default only if the Kconfig was modified as described above.
As the user may not want tmem enabled by default we chose to keep it zero.
>> +
>> +static int __init enable_kvm_tmem(char *s)
>> +{
>> + kvm_tmem_enabled = 1;
>> + return 1;
>> +}
>> +
>> +__setup("kvmtmem", enable_kvm_tmem);
>> +
>> static int __init zcache_init(void)
>> {
>> int ret = 0;
>> -
>> #ifdef CONFIG_SYSFS
>> ret = sysfs_create_group(mm_kobj, &zcache_attr_group);
>> if (ret) {
^ permalink raw reply [flat|nested] 8+ messages in thread
* RE: [RFC 1/2] kvm: host-side changes for tmem on KVM
2012-03-15 18:41 ` Akshay Karle
@ 2012-03-15 19:44 ` Dan Magenheimer
2012-03-15 19:51 ` Dan Magenheimer
1 sibling, 0 replies; 8+ messages in thread
From: Dan Magenheimer @ 2012-03-15 19:44 UTC (permalink / raw)
To: Akshay Karle, Konrad Wilk
Cc: linux-kernel, kvm, ashu tripathi, nishant gulhane, Shreyas Mahure,
amarmore2006, mahesh mohan
> From: Akshay Karle [mailto:akshay.a.karle@gmail.com]
> Subject: Re: [RFC 1/2] kvm: host-side changes for tmem on KVM
>
> >> Working at host:
> >> Once the guest exits to the kvm host, the host determines that the guest exited
> >> to perform some tmem operation(done at kvm_emulate_hypercall)and then
> >> we use zcache to implement this required operations(performed by kvm_pv_tmem_op).
> >
> > Do you need any modifications to the Kconfig file to reflect the KVM dependency?
>
> Yeah, the Kconfig of zcache now needs to be modified to reflect the KVM and the KVM_INTEL or
> KVM_AMD dependency. The Kconfig can also be modified to support tmem for the guest kernels,
> as the zcache code is not at all used in the guest, only the kvm-tmem, cleancache and
> frontswap(if present) code is used in the guest. But, in the host its necessary to enable zcache.
> So the Kconfig can be modified to have the following new field:
> "Transcendent memory support for kvm guests".... Which would compile just the kvm-tmem code.
> And we can add Cleancache and Frontswap as dependencies(similar to zcache).
>
> But it should be made sure that the host kernel has zcache enabled..
If the host kernel does not have zcache enabled, it may return
the value for "unimplemented hypercall". The kvm tmem code in
the guest should be prepared for that as it may even be running
on an older kvm host.
^ permalink raw reply [flat|nested] 8+ messages in thread
* RE: [RFC 1/2] kvm: host-side changes for tmem on KVM
2012-03-15 18:41 ` Akshay Karle
2012-03-15 19:44 ` Dan Magenheimer
@ 2012-03-15 19:51 ` Dan Magenheimer
2012-03-17 18:02 ` Akshay Karle
1 sibling, 1 reply; 8+ messages in thread
From: Dan Magenheimer @ 2012-03-15 19:51 UTC (permalink / raw)
To: Akshay Karle, Konrad Wilk
Cc: linux-kernel, kvm, ashu tripathi, nishant gulhane, Shreyas Mahure,
amarmore2006, mahesh mohan
> From: Akshay Karle [mailto:akshay.a.karle@gmail.com]
> Subject: Re: [RFC 1/2] kvm: host-side changes for tmem on KVM
>
> >> @@ -669,7 +670,6 @@ static struct zv_hdr *zv_create(struct x
> >> int chunks = (alloc_size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT;
> >> int ret;
> >>
> >> - BUG_ON(!irqs_disabled());
> >
> > Can you explain why?
>
> Zcache is by default used in the non-virtualized environment for page compression. Whenever
> a page is to be evicted from the page cache the spin_lock_irq is held on the page mapping.
> To ensure that this is done, the BUG_ON(!irqs_disabled()) was used.
> But now the situation is different, we are using zcache functions for kvm VM's.
> So if any page of the guest is to be evicted the irqs should be disabled in just that
> guest and not the host, so we removed the BUG_ON(!irqs_disabled()); line.
I think irqs may still need to be disabled (in your code by the caller)
since the tmem code (in tmem.c) takes spinlocks with this assumption.
I'm not sure since I don't know what can occur with scheduling a
kvm guest during an interrupt... can a different vcpu of the same guest
be scheduled on this same host pcpu?
Dan
^ permalink raw reply [flat|nested] 8+ messages in thread
* RE: [RFC 1/2] kvm: host-side changes for tmem on KVM
2012-03-15 19:51 ` Dan Magenheimer
@ 2012-03-17 18:02 ` Akshay Karle
2012-03-18 19:52 ` Dan Magenheimer
0 siblings, 1 reply; 8+ messages in thread
From: Akshay Karle @ 2012-03-17 18:02 UTC (permalink / raw)
To: Dan Magenheimer
Cc: Konrad Wilk, linux-kernel, kvm, ashu tripathi, nishant gulhane,
Shreyas Mahure, amarmore2006, mahesh mohan
> > From: Akshay Karle [mailto:akshay.a.karle@gmail.com]
> > Subject: Re: [RFC 1/2] kvm: host-side changes for tmem on KVM
> >
> > >> @@ -669,7 +670,6 @@ static struct zv_hdr *zv_create(struct x
> > >> int chunks = (alloc_size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT;
> > >> int ret;
> > >>
> > >> - BUG_ON(!irqs_disabled());
> > >
> > > Can you explain why?
> >
> > Zcache is by default used in the non-virtualized environment for page compression. Whenever
> > a page is to be evicted from the page cache the spin_lock_irq is held on the page mapping.
> > To ensure that this is done, the BUG_ON(!irqs_disabled()) was used.
> > But now the situation is different, we are using zcache functions for kvm VM's.
> > So if any page of the guest is to be evicted the irqs should be disabled in just that
> > guest and not the host, so we removed the BUG_ON(!irqs_disabled()); line.
>
> I think irqs may still need to be disabled (in your code by the caller)
> since the tmem code (in tmem.c) takes spinlocks with this assumption.
> I'm not sure since I don't know what can occur with scheduling a
> kvm guest during an interrupt... can a different vcpu of the same guest
> be scheduled on this same host pcpu?
>
> Dan
The irqs are disabled but only in the guest kernel not in the host. We
tried adding the spin_lock_irq code into the host but that was resulting
in host panic as the lock is being taken on the entire mapping. If the
irqs are disabled in the guest, is there a need to disable them on the
host as well? Because the mappings maybe different in the host and the
guest.
^ permalink raw reply [flat|nested] 8+ messages in thread
* RE: [RFC 1/2] kvm: host-side changes for tmem on KVM
2012-03-17 18:02 ` Akshay Karle
@ 2012-03-18 19:52 ` Dan Magenheimer
0 siblings, 0 replies; 8+ messages in thread
From: Dan Magenheimer @ 2012-03-18 19:52 UTC (permalink / raw)
To: Akshay Karle
Cc: Konrad Wilk, linux-kernel, kvm, ashu tripathi, nishant gulhane,
Shreyas Mahure, amarmore2006, mahesh mohan
> From: Akshay Karle [mailto:akshay.a.karle@gmail.com]
> Subject: RE: [RFC 1/2] kvm: host-side changes for tmem on KVM
>
> > > From: Akshay Karle [mailto:akshay.a.karle@gmail.com]
> > > Subject: Re: [RFC 1/2] kvm: host-side changes for tmem on KVM
> > >
> > > >> @@ -669,7 +670,6 @@ static struct zv_hdr *zv_create(struct x
> > > >> int chunks = (alloc_size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT;
> > > >> int ret;
> > > >>
> > > >> - BUG_ON(!irqs_disabled());
> > > >
> > > > Can you explain why?
> > >
> > > Zcache is by default used in the non-virtualized environment for page compression. Whenever
> > > a page is to be evicted from the page cache the spin_lock_irq is held on the page mapping.
> > > To ensure that this is done, the BUG_ON(!irqs_disabled()) was used.
> > > But now the situation is different, we are using zcache functions for kvm VM's.
> > > So if any page of the guest is to be evicted the irqs should be disabled in just that
> > > guest and not the host, so we removed the BUG_ON(!irqs_disabled()); line.
> >
> > I think irqs may still need to be disabled (in your code by the caller)
> > since the tmem code (in tmem.c) takes spinlocks with this assumption.
> > I'm not sure since I don't know what can occur with scheduling a
> > kvm guest during an interrupt... can a different vcpu of the same guest
> > be scheduled on this same host pcpu?
>
> The irqs are disabled but only in the guest kernel not in the host. We
> tried adding the spin_lock_irq code into the host but that was resulting
> in host panic as the lock is being taken on the entire mapping. If the
> irqs are disabled in the guest, is there a need to disable them on the
> host as well? Because the mappings maybe different in the host and the
> guest.
The issue is that interrupts MUST be disabled in code this is
called by zcache_put_page() and by zv_create() because the
called code (tmem_put and xv_malloc) takes locks. This may
be difficult to reproduce, but if an interrupt occurs during
a critical region, a deadlock is possible.
You don't need to do a spin_lock_irq. You just need to do a local_irq_save
and restore in zcache_put_page if kvm_tmem_enabled. Look at zcache_get_page
as an example... the code in zcache_put_page would be something like:
{
if (kvm_tmem_enabled)
local_irq_save(flags);
:
:
out:
if (kvm_tmem_enabled)
local_irq_restore(flags);
return ret;
}
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2012-03-18 19:53 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-03-08 16:54 [RFC 1/2] kvm: host-side changes for tmem on KVM Akshay Karle
2012-03-08 17:37 ` Bobby Powers
2012-03-15 16:54 ` Konrad Rzeszutek Wilk
2012-03-15 18:41 ` Akshay Karle
2012-03-15 19:44 ` Dan Magenheimer
2012-03-15 19:51 ` Dan Magenheimer
2012-03-17 18:02 ` Akshay Karle
2012-03-18 19:52 ` Dan Magenheimer
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox