* [PATCH 2/2] KVM: Create debugfs dir and stat files for each VM
2015-11-18 13:11 [PATCH 0/2] kvm: provide kvm stat per process Christian Borntraeger
@ 2015-11-18 13:11 ` Christian Borntraeger
0 siblings, 0 replies; 14+ messages in thread
From: Christian Borntraeger @ 2015-11-18 13:11 UTC (permalink / raw)
To: Paolo Bonzini
Cc: KVM, Cornelia Huck, Jens Freimann, Janosch Frank, linux-s390,
Christian Borntraeger
From: Janosch Frank <frankja@linux.vnet.ibm.com>
KVM statistics for VMs (no. of exits, halts and other special
instructions) are currently only available in a summarized manner for
all VMs. They are exported to userland through files in the kvm
debugfs directory and used for performance monitoring, as well as VM
problem detection with helper tools like kvm_stat. If a VM has
problems and therefore creates a large number of exits, one can not
easily find out which one it is, as there is no VM specific data.
This patch adds a kvm debugfs subdirectory for each VM on
kvm_create_vm(). The subdirectories are named by the VM pid and
contain the same type of exported statistics that are already in the
kvm debugfs directory, but the exported data is now VM specific.
Signed-off-by: Janosch Frank <frankja@linux.vnet.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
include/linux/kvm_host.h | 7 ++
virt/kvm/kvm_main.c | 188 +++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 187 insertions(+), 8 deletions(-)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6981bc6..23071bc 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -425,6 +425,8 @@ struct kvm {
#endif
long tlbs_dirty;
struct list_head devices;
+ struct dentry *debugfs_dentry;
+ struct kvm_stat_data **debugfs_data;
};
#define kvm_err(fmt, ...) \
@@ -1005,6 +1007,11 @@ enum kvm_stat_kind {
KVM_STAT_VCPU,
};
+struct kvm_stat_data {
+ int offset;
+ struct kvm *kvm;
+};
+
struct kvm_stats_debugfs_item {
const char *name;
int offset;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 726bb51..1ee2f73 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -63,6 +63,9 @@
#define CREATE_TRACE_POINTS
#include <trace/events/kvm.h>
+/* Worst case buffer size needed for holding an integer. */
+#define ITOA_MAX_LEN 12
+
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
@@ -100,6 +103,9 @@ static __read_mostly struct preempt_ops kvm_preempt_ops;
struct dentry *kvm_debugfs_dir;
EXPORT_SYMBOL_GPL(kvm_debugfs_dir);
+static u64 kvm_debugfs_num_entries;
+static const struct file_operations *stat_fops_per_vm[];
+
static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
unsigned long arg);
#ifdef CONFIG_KVM_COMPAT
@@ -539,6 +545,71 @@ static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots)
kvfree(slots);
}
+static int kvm_destroy_vm_debugfs(struct kvm *kvm)
+{
+ u64 i;
+ struct kvm_stat_data **stat_data = kvm->debugfs_data;
+
+ for (i = 0; i < kvm_debugfs_num_entries; i++)
+ kfree(stat_data[i]);
+
+ kfree(kvm->debugfs_data);
+
+ return 0;
+}
+
+static int kvm_create_vm_debugfs(struct kvm *kvm)
+{
+ int r = 0, i = 0;
+ char dir_name[ITOA_MAX_LEN];
+ struct kvm_stat_data *stat_data;
+ struct kvm_stats_debugfs_item *p;
+
+ if (!kvm)
+ return -EINVAL;
+
+ snprintf(dir_name, sizeof(dir_name), "%d", current->pid);
+ kvm->debugfs_dentry = debugfs_create_dir(dir_name, kvm_debugfs_dir);
+ if (!kvm->debugfs_dentry)
+ goto out_err;
+
+ kvm->debugfs_data = kmalloc(sizeof(*kvm->debugfs_data) *
+ kvm_debugfs_num_entries, GFP_KERNEL);
+ if (!kvm->debugfs_data)
+ return -ENOMEM;
+
+ for (p = debugfs_entries; p->name; p++, i++) {
+ stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL);
+ if (!stat_data) {
+ r = -ENOMEM;
+ goto out_err_clean;
+ }
+
+ stat_data->offset = p->offset;
+ stat_data->kvm = kvm;
+ if (!debugfs_create_file(p->name, 0444,
+ kvm->debugfs_dentry,
+ stat_data,
+ stat_fops_per_vm[p->kind])) {
+ r = -EEXIST;
+ goto out_err_clean;
+ }
+ kvm->debugfs_data[i] = stat_data;
+ }
+
+ return r;
+
+out_err_clean:
+ debugfs_remove_recursive(kvm->debugfs_dentry);
+ kfree(stat_data);
+ for (i--; i >= 0; i--)
+ kfree(kvm->debugfs_data[i]);
+
+ kfree(kvm->debugfs_data);
+out_err:
+ return r;
+}
+
static struct kvm *kvm_create_vm(unsigned long type)
{
int r, i;
@@ -597,6 +668,10 @@ static struct kvm *kvm_create_vm(unsigned long type)
list_add(&kvm->vm_list, &vm_list);
spin_unlock(&kvm_lock);
+ r = kvm_create_vm_debugfs(kvm);
+ if (r)
+ goto out_err;
+
preempt_notifier_inc();
return kvm;
@@ -646,6 +721,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
int i;
struct mm_struct *mm = kvm->mm;
+ kvm_destroy_vm_debugfs(kvm);
kvm_arch_sync_events(kvm);
spin_lock(&kvm_lock);
list_del(&kvm->vm_list);
@@ -689,6 +765,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
{
struct kvm *kvm = filp->private_data;
+ debugfs_remove_recursive(kvm->debugfs_dentry);
kvm_irqfd_release(kvm);
kvm_put_kvm(kvm);
@@ -3398,15 +3475,107 @@ static struct notifier_block kvm_cpu_notifier = {
.notifier_call = kvm_cpu_hotplug,
};
+static int kvm_debugfs_open(struct inode *inode, struct file *file,
+ int (*get)(void *, u64 *), int (*set)(void *, u64),
+ const char *fmt)
+{
+ int err;
+ struct kvm_stat_data *stat_data = (struct kvm_stat_data *)
+ inode->i_private;
+
+ err = simple_attr_open(inode, file, get, set, fmt);
+ if (err)
+ return err;
+
+ kvm_get_kvm(stat_data->kvm);
+
+ return 0;
+}
+
+static int kvm_debugfs_release(struct inode *inode, struct file *file)
+{
+ struct kvm_stat_data *stat_data = (struct kvm_stat_data *)
+ inode->i_private;
+
+ simple_attr_release(inode, file);
+ kvm_put_kvm(stat_data->kvm);
+
+ return 0;
+}
+
+static int vm_stat_get_per_vm(void *data, u64 *val)
+{
+ struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
+
+ *val = *(u32 *)((void *)stat_data->kvm + stat_data->offset);
+
+ return 0;
+}
+
+static int vm_stat_get_per_vm_open(struct inode *inode, struct file *file)
+{
+ __simple_attr_check_format("%llu\n", 0ull);
+ return simple_attr_open(inode, file, vm_stat_get_per_vm,
+ NULL, "%llu\n");
+}
+
+static const struct file_operations vm_stat_get_per_vm_fops = {
+ .owner = THIS_MODULE,
+ .open = vm_stat_get_per_vm_open,
+ .release = kvm_debugfs_release,
+ .read = simple_attr_read,
+ .write = simple_attr_write,
+ .llseek = generic_file_llseek,
+};
+
+static int vcpu_stat_get_per_vm(void *data, u64 *val)
+{
+ int i;
+ struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
+ struct kvm_vcpu *vcpu;
+
+ *val = 0;
+
+ kvm_for_each_vcpu(i, vcpu, stat_data->kvm)
+ *val += *(u32 *)((void *)vcpu + stat_data->offset);
+
+ return 0;
+}
+
+static int vcpu_stat_get_per_vm_open(struct inode *inode, struct file *file)
+{
+ __simple_attr_check_format("%llu\n", 0ull);
+ return kvm_debugfs_open(inode, file, vcpu_stat_get_per_vm,
+ NULL, "%llu\n");
+}
+
+static const struct file_operations vcpu_stat_get_per_vm_fops = {
+ .owner = THIS_MODULE,
+ .open = vcpu_stat_get_per_vm_open,
+ .release = kvm_debugfs_release,
+ .read = simple_attr_read,
+ .write = simple_attr_write,
+ .llseek = generic_file_llseek,
+};
+
+static const struct file_operations *stat_fops_per_vm[] = {
+ [KVM_STAT_VCPU] = &vcpu_stat_get_per_vm_fops,
+ [KVM_STAT_VM] = &vm_stat_get_per_vm_fops,
+};
+
static int vm_stat_get(void *_offset, u64 *val)
{
unsigned offset = (long)_offset;
struct kvm *kvm;
+ struct kvm_stat_data stat_tmp = {.offset = offset};
+ u64 tmp_val;
*val = 0;
spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
- *val += *(u32 *)((void *)kvm + offset);
+ stat_tmp.kvm = kvm;
+ vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
+ *val += tmp_val;
spin_unlock(&kvm_lock);
return 0;
}
@@ -3417,16 +3586,18 @@ static int vcpu_stat_get(void *_offset, u64 *val)
{
unsigned offset = (long)_offset;
struct kvm *kvm;
- struct kvm_vcpu *vcpu;
- int i;
+ struct kvm_stat_data stat_tmp = {.offset = offset};
+ u64 tmp_val;
*val = 0;
spin_lock(&kvm_lock);
- list_for_each_entry(kvm, &vm_list, vm_list)
- kvm_for_each_vcpu(i, vcpu, kvm)
- *val += *(u32 *)((void *)vcpu + offset);
-
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ stat_tmp.kvm = kvm;
+ vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
+ *val += tmp_val;
+ }
spin_unlock(&kvm_lock);
+
return 0;
}
@@ -3446,7 +3617,8 @@ static int kvm_init_debug(void)
if (kvm_debugfs_dir == NULL)
goto out;
- for (p = debugfs_entries; p->name; ++p) {
+ kvm_debugfs_num_entries = 0;
+ for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) {
if (!debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
(void *)(long)p->offset,
stat_fops[p->kind]))
--
2.3.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 2/2] KVM: Create debugfs dir and stat files for each VM
2015-11-23 11:24 [PATCH v2 0/2] RFC: kvm stat enhancements Christian Borntraeger
@ 2015-11-23 11:24 ` Christian Borntraeger
0 siblings, 0 replies; 14+ messages in thread
From: Christian Borntraeger @ 2015-11-23 11:24 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: KVM, Janosch Frank
From: Janosch Frank <frankja@linux.vnet.ibm.com>
KVM statistics for VMs (no. of exits, halts and other special
instructions) are currently only available in a summarized manner for
all VMs. They are exported to userland through files in the kvm
debugfs directory and used for performance monitoring, as well as VM
problem detection with helper tools like kvm_stat. If a VM has
problems and therefore creates a large number of exits, one can not
easily find out which one it is, as there is no VM specific data.
This patch adds a kvm debugfs subdirectory for each VM on
kvm_create_vm(). The subdirectories are named by the VM pid and
contain the same type of exported statistics that are already in the
kvm debugfs directory, but the exported data is now VM specific.
Signed-off-by: Janosch Frank <frankja@linux.vnet.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
[multiple fixes]
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
include/linux/kvm_host.h | 7 ++
virt/kvm/kvm_main.c | 194 ++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 192 insertions(+), 9 deletions(-)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 445a8c7..b7c9e03 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -425,6 +425,8 @@ struct kvm {
#endif
long tlbs_dirty;
struct list_head devices;
+ struct dentry *debugfs_dentry;
+ struct kvm_stat_data **debugfs_data;
};
#define kvm_err(fmt, ...) \
@@ -1000,6 +1002,11 @@ enum kvm_stat_kind {
KVM_STAT_VCPU,
};
+struct kvm_stat_data {
+ int offset;
+ struct kvm *kvm;
+};
+
struct kvm_stats_debugfs_item {
const char *name;
int offset;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e1f6587..432267d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -63,6 +63,9 @@
#define CREATE_TRACE_POINTS
#include <trace/events/kvm.h>
+/* Worst case buffer size needed for holding an integer. */
+#define ITOA_MAX_LEN 12
+
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
@@ -100,6 +103,9 @@ static __read_mostly struct preempt_ops kvm_preempt_ops;
struct dentry *kvm_debugfs_dir;
EXPORT_SYMBOL_GPL(kvm_debugfs_dir);
+static u64 kvm_debugfs_num_entries;
+static const struct file_operations *stat_fops_per_vm[];
+
static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
unsigned long arg);
#ifdef CONFIG_KVM_COMPAT
@@ -539,6 +545,75 @@ static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots)
kvfree(slots);
}
+static int kvm_destroy_vm_debugfs(struct kvm *kvm)
+{
+ u64 i;
+ struct kvm_stat_data **stat_data = kvm->debugfs_data;
+
+ for (i = 0; i < kvm_debugfs_num_entries; i++)
+ kfree(stat_data[i]);
+
+ kfree(kvm->debugfs_data);
+
+ return 0;
+}
+
+static int kvm_create_vm_debugfs(struct kvm *kvm)
+{
+ int r = 0, i = 0;
+ char dir_name[ITOA_MAX_LEN];
+ struct kvm_stat_data *stat_data;
+ struct kvm_stats_debugfs_item *p;
+
+ if (!kvm)
+ return -EINVAL;
+
+ snprintf(dir_name, sizeof(dir_name), "%d", current->pid);
+ kvm->debugfs_dentry = debugfs_create_dir(dir_name, kvm_debugfs_dir);
+ if (!kvm->debugfs_dentry)
+ return -ENOMEM;
+
+ kvm->debugfs_data = kmalloc(sizeof(*kvm->debugfs_data) *
+ kvm_debugfs_num_entries, GFP_KERNEL);
+ if (!kvm->debugfs_data) {
+ r = -ENOMEM;
+ goto err_remove_dir;
+ }
+
+ for (p = debugfs_entries; p->name; p++, i++) {
+ stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL);
+ if (!stat_data) {
+ r = -ENOMEM;
+ goto out_err_clean;
+ }
+
+ stat_data->offset = p->offset;
+ stat_data->kvm = kvm;
+ if (!debugfs_create_file(p->name, 0444,
+ kvm->debugfs_dentry,
+ stat_data,
+ stat_fops_per_vm[p->kind])) {
+ r = -EEXIST;
+ goto out_err_clean;
+ }
+ kvm->debugfs_data[i] = stat_data;
+ }
+
+ return r;
+
+out_err_clean:
+ kfree(stat_data);
+ for (i--; i >= 0; i--)
+ kfree(kvm->debugfs_data[i]);
+
+ kfree(kvm->debugfs_data);
+
+err_remove_dir:
+ debugfs_remove_recursive(kvm->debugfs_dentry);
+
+ return r;
+}
+
static struct kvm *kvm_create_vm(unsigned long type)
{
int r, i;
@@ -597,6 +672,10 @@ static struct kvm *kvm_create_vm(unsigned long type)
list_add(&kvm->vm_list, &vm_list);
spin_unlock(&kvm_lock);
+ r = kvm_create_vm_debugfs(kvm);
+ if (r)
+ goto out_err;
+
preempt_notifier_inc();
return kvm;
@@ -646,6 +725,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
int i;
struct mm_struct *mm = kvm->mm;
+ kvm_destroy_vm_debugfs(kvm);
kvm_arch_sync_events(kvm);
spin_lock(&kvm_lock);
list_del(&kvm->vm_list);
@@ -689,6 +769,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
{
struct kvm *kvm = filp->private_data;
+ debugfs_remove_recursive(kvm->debugfs_dentry);
kvm_irqfd_release(kvm);
kvm_put_kvm(kvm);
@@ -3400,15 +3481,108 @@ static struct notifier_block kvm_cpu_notifier = {
.notifier_call = kvm_cpu_hotplug,
};
+static int kvm_debugfs_open(struct inode *inode, struct file *file,
+ int (*get)(void *, u64 *), int (*set)(void *, u64),
+ const char *fmt)
+{
+ int err;
+ struct kvm_stat_data *stat_data = (struct kvm_stat_data *)
+ inode->i_private;
+
+ err = simple_attr_open(inode, file, get, set, fmt);
+ if (err)
+ return err;
+
+ kvm_get_kvm(stat_data->kvm);
+
+ return 0;
+}
+
+static int kvm_debugfs_release(struct inode *inode, struct file *file)
+{
+ struct kvm_stat_data *stat_data = (struct kvm_stat_data *)
+ inode->i_private;
+
+ simple_attr_release(inode, file);
+ kvm_put_kvm(stat_data->kvm);
+
+ return 0;
+}
+
+static int vm_stat_get_per_vm(void *data, u64 *val)
+{
+ struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
+
+ *val = *(u32 *)((void *)stat_data->kvm + stat_data->offset);
+
+ return 0;
+}
+
+static int vm_stat_get_per_vm_open(struct inode *inode, struct file *file)
+{
+ __simple_attr_check_format("%llu\n", 0ull);
+ return simple_attr_open(inode, file, vm_stat_get_per_vm,
+ NULL, "%llu\n");
+}
+
+static const struct file_operations vm_stat_get_per_vm_fops = {
+ .owner = THIS_MODULE,
+ .open = vm_stat_get_per_vm_open,
+ .release = kvm_debugfs_release,
+ .read = simple_attr_read,
+ .write = simple_attr_write,
+ .llseek = generic_file_llseek,
+};
+
+static int vcpu_stat_get_per_vm(void *data, u64 *val)
+{
+ int i;
+ struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
+ struct kvm_vcpu *vcpu;
+
+ *val = 0;
+
+ kvm_for_each_vcpu(i, vcpu, stat_data->kvm)
+ *val += *(u32 *)((void *)vcpu + stat_data->offset);
+
+ return 0;
+}
+
+static int vcpu_stat_get_per_vm_open(struct inode *inode, struct file *file)
+{
+ __simple_attr_check_format("%llu\n", 0ull);
+ return kvm_debugfs_open(inode, file, vcpu_stat_get_per_vm,
+ NULL, "%llu\n");
+}
+
+static const struct file_operations vcpu_stat_get_per_vm_fops = {
+ .owner = THIS_MODULE,
+ .open = vcpu_stat_get_per_vm_open,
+ .release = kvm_debugfs_release,
+ .read = simple_attr_read,
+ .write = simple_attr_write,
+ .llseek = generic_file_llseek,
+};
+
+static const struct file_operations *stat_fops_per_vm[] = {
+ [KVM_STAT_VCPU] = &vcpu_stat_get_per_vm_fops,
+ [KVM_STAT_VM] = &vm_stat_get_per_vm_fops,
+};
+
static int vm_stat_get(void *_offset, u64 *val)
{
unsigned offset = (long)_offset;
struct kvm *kvm;
+ struct kvm_stat_data stat_tmp = {.offset = offset};
+ u64 tmp_val;
*val = 0;
spin_lock(&kvm_lock);
- list_for_each_entry(kvm, &vm_list, vm_list)
- *val += *(u32 *)((void *)kvm + offset);
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ stat_tmp.kvm = kvm;
+ vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
+ *val += tmp_val;
+ }
spin_unlock(&kvm_lock);
return 0;
}
@@ -3419,15 +3593,16 @@ static int vcpu_stat_get(void *_offset, u64 *val)
{
unsigned offset = (long)_offset;
struct kvm *kvm;
- struct kvm_vcpu *vcpu;
- int i;
+ struct kvm_stat_data stat_tmp = {.offset = offset};
+ u64 tmp_val;
*val = 0;
spin_lock(&kvm_lock);
- list_for_each_entry(kvm, &vm_list, vm_list)
- kvm_for_each_vcpu(i, vcpu, kvm)
- *val += *(u32 *)((void *)vcpu + offset);
-
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ stat_tmp.kvm = kvm;
+ vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
+ *val += tmp_val;
+ }
spin_unlock(&kvm_lock);
return 0;
}
@@ -3448,7 +3623,8 @@ static int kvm_init_debug(void)
if (kvm_debugfs_dir == NULL)
goto out;
- for (p = debugfs_entries; p->name; ++p) {
+ kvm_debugfs_num_entries = 0;
+ for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) {
if (!debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
(void *)(long)p->offset,
stat_fops[p->kind]))
--
2.3.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2] KVM: Create debugfs dir and stat files for each VM
@ 2015-11-26 16:17 Tyler Baker
2015-11-26 20:47 ` Christian Borntraeger
0 siblings, 1 reply; 14+ messages in thread
From: Tyler Baker @ 2015-11-26 16:17 UTC (permalink / raw)
To: borntraeger
Cc: frankja, kvm, pbonzini, pmorel, dan.carpenter,
Kevin's boot bot
Hi Christian,
The kernelci.org bot recently has been reporting kvm guest boot
failures[1] on various arm64 platforms in next-20151126. The bot
bisected[2] the failures to the commit in -next titled "KVM: Create
debugfs dir and stat files for each VM". I confirmed by reverting this
commit on top of next-20151126 it resolves the boot issue.
In this test case the host and guest are booted with the same kernel.
The host is booted over nfs, installs qemu (qemu-system arm64 2.4.0),
and launches a guest. The host is booting fine, but when the guest is
launched it errors with "Failed to retrieve host CPU features!". I
checked the host logs, and found an "Unable to handle kernel paging
request" splat[3] which occurs when the guest is attempting to start.
I scanned the patch in question but nothing obvious jumped out at me,
any thoughts?
Cheers,
Tyler
[1] http://kernelci.org/boot/all/job/next/kernel/next-20151126/
[2] http://hastebin.com/fuhicugate.vhdl
[3]http://hastebin.com/yicefetuho.sm
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2] KVM: Create debugfs dir and stat files for each VM
2015-11-26 16:17 [PATCH 2/2] KVM: Create debugfs dir and stat files for each VM Tyler Baker
@ 2015-11-26 20:47 ` Christian Borntraeger
2015-11-27 8:54 ` Christian Borntraeger
0 siblings, 1 reply; 14+ messages in thread
From: Christian Borntraeger @ 2015-11-26 20:47 UTC (permalink / raw)
To: Tyler Baker
Cc: frankja, kvm, pbonzini, pmorel, dan.carpenter,
Kevin's boot bot
On 11/26/2015 05:17 PM, Tyler Baker wrote:
> Hi Christian,
>
> The kernelci.org bot recently has been reporting kvm guest boot
> failures[1] on various arm64 platforms in next-20151126. The bot
> bisected[2] the failures to the commit in -next titled "KVM: Create
> debugfs dir and stat files for each VM". I confirmed by reverting this
> commit on top of next-20151126 it resolves the boot issue.
>
> In this test case the host and guest are booted with the same kernel.
> The host is booted over nfs, installs qemu (qemu-system arm64 2.4.0),
> and launches a guest. The host is booting fine, but when the guest is
> launched it errors with "Failed to retrieve host CPU features!". I
> checked the host logs, and found an "Unable to handle kernel paging
> request" splat[3] which occurs when the guest is attempting to start.
>
> I scanned the patch in question but nothing obvious jumped out at me,
> any thoughts?
Not really.
Do you have processing running that do read the files in /sys/kernel/debug/kvm/* ?
If I read the arm oops message correctly it oopsed inside
__srcu_read_lock. there is actually nothing in there that can oops,
except the access to the preempt count. I am just guessing right now,
but maybe the preempt variable is no longer available (as the process
is gone). As long as a debugfs file is open, we hold a reference to
the kvm, which holds a reference to the mm, so the mm might be killed
after the process. But this is supposed to work, so maybe its something
different. An objdump of __srcu_read_lock might help.
I will drop it from my tree until we understand the problem
Christian
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2] KVM: Create debugfs dir and stat files for each VM
2015-11-26 20:47 ` Christian Borntraeger
@ 2015-11-27 8:54 ` Christian Borntraeger
2015-11-27 17:08 ` Tyler Baker
0 siblings, 1 reply; 14+ messages in thread
From: Christian Borntraeger @ 2015-11-27 8:54 UTC (permalink / raw)
To: Tyler Baker
Cc: frankja, kvm, pbonzini, pmorel, dan.carpenter,
Kevin's boot bot
On 11/26/2015 09:47 PM, Christian Borntraeger wrote:
> On 11/26/2015 05:17 PM, Tyler Baker wrote:
>> Hi Christian,
>>
>> The kernelci.org bot recently has been reporting kvm guest boot
>> failures[1] on various arm64 platforms in next-20151126. The bot
>> bisected[2] the failures to the commit in -next titled "KVM: Create
>> debugfs dir and stat files for each VM". I confirmed by reverting this
>> commit on top of next-20151126 it resolves the boot issue.
>>
>> In this test case the host and guest are booted with the same kernel.
>> The host is booted over nfs, installs qemu (qemu-system arm64 2.4.0),
>> and launches a guest. The host is booting fine, but when the guest is
>> launched it errors with "Failed to retrieve host CPU features!". I
>> checked the host logs, and found an "Unable to handle kernel paging
>> request" splat[3] which occurs when the guest is attempting to start.
>>
>> I scanned the patch in question but nothing obvious jumped out at me,
>> any thoughts?
>
> Not really.
> Do you have processing running that do read the files in /sys/kernel/debug/kvm/* ?
>
> If I read the arm oops message correctly it oopsed inside
> __srcu_read_lock. there is actually nothing in there that can oops,
> except the access to the preempt count. I am just guessing right now,
> but maybe the preempt variable is no longer available (as the process
> is gone). As long as a debugfs file is open, we hold a reference to
> the kvm, which holds a reference to the mm, so the mm might be killed
> after the process. But this is supposed to work, so maybe its something
> different. An objdump of __srcu_read_lock might help.
Hmm, the preempt thing is done in srcu_read_lock, but the crash is in
__srcu_read_lock. This function gets the srcu struct from mmu_notifier.c,
which must be present and is initialized during boot.
int __srcu_read_lock(struct srcu_struct *sp)
{
int idx;
idx = READ_ONCE(sp->completed) & 0x1;
__this_cpu_inc(sp->per_cpu_ref->c[idx]);
smp_mb(); /* B */ /* Avoid leaking the critical section. */
__this_cpu_inc(sp->per_cpu_ref->seq[idx]);
return idx;
}
Looking at the code I have no clue why the patch does make a difference.
Can you try to get an objdump -S for__Srcu_read_lock?
>
> I will drop it from my tree until we understand the problem
>
> Christian
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2] KVM: Create debugfs dir and stat files for each VM
2015-11-27 8:54 ` Christian Borntraeger
@ 2015-11-27 17:08 ` Tyler Baker
2015-11-27 18:53 ` Tyler Baker
0 siblings, 1 reply; 14+ messages in thread
From: Tyler Baker @ 2015-11-27 17:08 UTC (permalink / raw)
To: Christian Borntraeger
Cc: frankja, kvm, pbonzini, pmorel, dan.carpenter,
Kevin's boot bot
On 27 November 2015 at 00:54, Christian Borntraeger
<borntraeger@de.ibm.com> wrote:
> On 11/26/2015 09:47 PM, Christian Borntraeger wrote:
>> On 11/26/2015 05:17 PM, Tyler Baker wrote:
>>> Hi Christian,
>>>
>>> The kernelci.org bot recently has been reporting kvm guest boot
>>> failures[1] on various arm64 platforms in next-20151126. The bot
>>> bisected[2] the failures to the commit in -next titled "KVM: Create
>>> debugfs dir and stat files for each VM". I confirmed by reverting this
>>> commit on top of next-20151126 it resolves the boot issue.
>>>
>>> In this test case the host and guest are booted with the same kernel.
>>> The host is booted over nfs, installs qemu (qemu-system arm64 2.4.0),
>>> and launches a guest. The host is booting fine, but when the guest is
>>> launched it errors with "Failed to retrieve host CPU features!". I
>>> checked the host logs, and found an "Unable to handle kernel paging
>>> request" splat[3] which occurs when the guest is attempting to start.
>>>
>>> I scanned the patch in question but nothing obvious jumped out at me,
>>> any thoughts?
>>
>> Not really.
>> Do you have processing running that do read the files in /sys/kernel/debug/kvm/* ?
>>
>> If I read the arm oops message correctly it oopsed inside
>> __srcu_read_lock. there is actually nothing in there that can oops,
>> except the access to the preempt count. I am just guessing right now,
>> but maybe the preempt variable is no longer available (as the process
>> is gone). As long as a debugfs file is open, we hold a reference to
>> the kvm, which holds a reference to the mm, so the mm might be killed
>> after the process. But this is supposed to work, so maybe its something
>> different. An objdump of __srcu_read_lock might help.
>
> Hmm, the preempt thing is done in srcu_read_lock, but the crash is in
> __srcu_read_lock. This function gets the srcu struct from mmu_notifier.c,
> which must be present and is initialized during boot.
>
>
> int __srcu_read_lock(struct srcu_struct *sp)
> {
> int idx;
>
> idx = READ_ONCE(sp->completed) & 0x1;
> __this_cpu_inc(sp->per_cpu_ref->c[idx]);
> smp_mb(); /* B */ /* Avoid leaking the critical section. */
> __this_cpu_inc(sp->per_cpu_ref->seq[idx]);
> return idx;
> }
>
> Looking at the code I have no clue why the patch does make a difference.
> Can you try to get an objdump -S for__Srcu_read_lock?
Using next-20151126 as the base, here is the objdump[1] I came up with
for__srcu_read_lock.
Cheers,
Tyler
[1] http://hastebin.com/bifiqobola.pl
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2] KVM: Create debugfs dir and stat files for each VM
2015-11-27 17:08 ` Tyler Baker
@ 2015-11-27 18:53 ` Tyler Baker
2015-11-27 20:42 ` Tyler Baker
0 siblings, 1 reply; 14+ messages in thread
From: Tyler Baker @ 2015-11-27 18:53 UTC (permalink / raw)
To: Christian Borntraeger
Cc: frankja, kvm, pbonzini, pmorel, dan.carpenter,
Kevin's boot bot
On 27 November 2015 at 09:08, Tyler Baker <tyler.baker@linaro.org> wrote:
> On 27 November 2015 at 00:54, Christian Borntraeger
> <borntraeger@de.ibm.com> wrote:
>> On 11/26/2015 09:47 PM, Christian Borntraeger wrote:
>>> On 11/26/2015 05:17 PM, Tyler Baker wrote:
>>>> Hi Christian,
>>>>
>>>> The kernelci.org bot recently has been reporting kvm guest boot
>>>> failures[1] on various arm64 platforms in next-20151126. The bot
>>>> bisected[2] the failures to the commit in -next titled "KVM: Create
>>>> debugfs dir and stat files for each VM". I confirmed by reverting this
>>>> commit on top of next-20151126 it resolves the boot issue.
>>>>
>>>> In this test case the host and guest are booted with the same kernel.
>>>> The host is booted over nfs, installs qemu (qemu-system arm64 2.4.0),
>>>> and launches a guest. The host is booting fine, but when the guest is
>>>> launched it errors with "Failed to retrieve host CPU features!". I
>>>> checked the host logs, and found an "Unable to handle kernel paging
>>>> request" splat[3] which occurs when the guest is attempting to start.
>>>>
>>>> I scanned the patch in question but nothing obvious jumped out at me,
>>>> any thoughts?
>>>
>>> Not really.
>>> Do you have processing running that do read the files in /sys/kernel/debug/kvm/* ?
>>>
>>> If I read the arm oops message correctly it oopsed inside
>>> __srcu_read_lock. there is actually nothing in there that can oops,
>>> except the access to the preempt count. I am just guessing right now,
>>> but maybe the preempt variable is no longer available (as the process
>>> is gone). As long as a debugfs file is open, we hold a reference to
>>> the kvm, which holds a reference to the mm, so the mm might be killed
>>> after the process. But this is supposed to work, so maybe its something
>>> different. An objdump of __srcu_read_lock might help.
>>
>> Hmm, the preempt thing is done in srcu_read_lock, but the crash is in
>> __srcu_read_lock. This function gets the srcu struct from mmu_notifier.c,
>> which must be present and is initialized during boot.
>>
>>
>> int __srcu_read_lock(struct srcu_struct *sp)
>> {
>> int idx;
>>
>> idx = READ_ONCE(sp->completed) & 0x1;
>> __this_cpu_inc(sp->per_cpu_ref->c[idx]);
>> smp_mb(); /* B */ /* Avoid leaking the critical section. */
>> __this_cpu_inc(sp->per_cpu_ref->seq[idx]);
>> return idx;
>> }
>>
>> Looking at the code I have no clue why the patch does make a difference.
>> Can you try to get an objdump -S for__Srcu_read_lock?
Some other interesting finding below...
On the host, I do _not_ have any nodes under /sys/kernel/debug/kvm/
Running strace on the qemu command I use to launch the guest yields
the following.
[pid 5963] 1448649724.405537 mmap(NULL, 65536, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f6652a000
[pid 5963] 1448649724.405586 read(13, "MemTotal: 16414616
kB\nMemF"..., 1024) = 1024
[pid 5963] 1448649724.405699 close(13) = 0
[pid 5963] 1448649724.405755 munmap(0x7f6652a000, 65536) = 0
[pid 5963] 1448649724.405947 brk(0x2552f000) = 0x2552f000
[pid 5963] 1448649724.406148 openat(AT_FDCWD, "/dev/kvm",
O_RDWR|O_CLOEXEC) = 13
[pid 5963] 1448649724.406209 ioctl(13, KVM_CREATE_VM, 0) = -1 ENOMEM
(Cannot allocate memory)
[pid 5963] 1448649724.406382 close(13) = 0
[pid 5963] 1448649724.406435 write(2, "Failed to retrieve host CPU
feat"..., 38Failed to retrieve host CPU features!
) = 38
Tyler
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2] KVM: Create debugfs dir and stat files for each VM
2015-11-27 18:53 ` Tyler Baker
@ 2015-11-27 20:42 ` Tyler Baker
2015-11-30 8:37 ` Janosch Frank
2015-11-30 8:38 ` Christian Borntraeger
0 siblings, 2 replies; 14+ messages in thread
From: Tyler Baker @ 2015-11-27 20:42 UTC (permalink / raw)
To: Christian Borntraeger
Cc: frankja, kvm, pbonzini, pmorel, dan.carpenter,
Kevin's boot bot
On 27 November 2015 at 10:53, Tyler Baker <tyler.baker@linaro.org> wrote:
> On 27 November 2015 at 09:08, Tyler Baker <tyler.baker@linaro.org> wrote:
>> On 27 November 2015 at 00:54, Christian Borntraeger
>> <borntraeger@de.ibm.com> wrote:
>>> On 11/26/2015 09:47 PM, Christian Borntraeger wrote:
>>>> On 11/26/2015 05:17 PM, Tyler Baker wrote:
>>>>> Hi Christian,
>>>>>
>>>>> The kernelci.org bot recently has been reporting kvm guest boot
>>>>> failures[1] on various arm64 platforms in next-20151126. The bot
>>>>> bisected[2] the failures to the commit in -next titled "KVM: Create
>>>>> debugfs dir and stat files for each VM". I confirmed by reverting this
>>>>> commit on top of next-20151126 it resolves the boot issue.
>>>>>
>>>>> In this test case the host and guest are booted with the same kernel.
>>>>> The host is booted over nfs, installs qemu (qemu-system arm64 2.4.0),
>>>>> and launches a guest. The host is booting fine, but when the guest is
>>>>> launched it errors with "Failed to retrieve host CPU features!". I
>>>>> checked the host logs, and found an "Unable to handle kernel paging
>>>>> request" splat[3] which occurs when the guest is attempting to start.
>>>>>
>>>>> I scanned the patch in question but nothing obvious jumped out at me,
>>>>> any thoughts?
>>>>
>>>> Not really.
>>>> Do you have processing running that do read the files in /sys/kernel/debug/kvm/* ?
>>>>
>>>> If I read the arm oops message correctly it oopsed inside
>>>> __srcu_read_lock. there is actually nothing in there that can oops,
>>>> except the access to the preempt count. I am just guessing right now,
>>>> but maybe the preempt variable is no longer available (as the process
>>>> is gone). As long as a debugfs file is open, we hold a reference to
>>>> the kvm, which holds a reference to the mm, so the mm might be killed
>>>> after the process. But this is supposed to work, so maybe its something
>>>> different. An objdump of __srcu_read_lock might help.
>>>
>>> Hmm, the preempt thing is done in srcu_read_lock, but the crash is in
>>> __srcu_read_lock. This function gets the srcu struct from mmu_notifier.c,
>>> which must be present and is initialized during boot.
>>>
>>>
>>> int __srcu_read_lock(struct srcu_struct *sp)
>>> {
>>> int idx;
>>>
>>> idx = READ_ONCE(sp->completed) & 0x1;
>>> __this_cpu_inc(sp->per_cpu_ref->c[idx]);
>>> smp_mb(); /* B */ /* Avoid leaking the critical section. */
>>> __this_cpu_inc(sp->per_cpu_ref->seq[idx]);
>>> return idx;
>>> }
>>>
>>> Looking at the code I have no clue why the patch does make a difference.
>>> Can you try to get an objdump -S for__Srcu_read_lock?
>
> Some other interesting finding below...
>
> On the host, I do _not_ have any nodes under /sys/kernel/debug/kvm/
>
> Running strace on the qemu command I use to launch the guest yields
> the following.
>
> [pid 5963] 1448649724.405537 mmap(NULL, 65536, PROT_READ|PROT_WRITE,
> MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f6652a000
> [pid 5963] 1448649724.405586 read(13, "MemTotal: 16414616
> kB\nMemF"..., 1024) = 1024
> [pid 5963] 1448649724.405699 close(13) = 0
> [pid 5963] 1448649724.405755 munmap(0x7f6652a000, 65536) = 0
> [pid 5963] 1448649724.405947 brk(0x2552f000) = 0x2552f000
> [pid 5963] 1448649724.406148 openat(AT_FDCWD, "/dev/kvm",
> O_RDWR|O_CLOEXEC) = 13
> [pid 5963] 1448649724.406209 ioctl(13, KVM_CREATE_VM, 0) = -1 ENOMEM
> (Cannot allocate memory)
If I comment the call to kvm_create_vm_debugfs(kvm) the guest boots
fine. I put some printk's in the kvm_create_vm_debugfs() function and
it's returning -ENOMEM after it evaluates !kvm->debugfs_dentry. I was
chatting with some folks from the Linaro virtualization team and they
mentioned that ARM is a bit special as the same PID creates two vms in
quick succession, the first one is a scratch vm, and the other is the
'real' vm. With that bit of info, I suspect we may be trying to create
the debugfs directory twice, and the second time it's failing because
it already exists.
Cheers,
Tyler
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2] KVM: Create debugfs dir and stat files for each VM
2015-11-27 20:42 ` Tyler Baker
@ 2015-11-30 8:37 ` Janosch Frank
2015-11-30 15:10 ` Alex Bennée
2015-11-30 8:38 ` Christian Borntraeger
1 sibling, 1 reply; 14+ messages in thread
From: Janosch Frank @ 2015-11-30 8:37 UTC (permalink / raw)
To: Tyler Baker, Christian Borntraeger
Cc: kvm, pbonzini, pmorel, dan.carpenter, Kevin's boot bot
On 11/27/2015 09:42 PM, Tyler Baker wrote:
> On 27 November 2015 at 10:53, Tyler Baker <tyler.baker@linaro.org> wrote:
>> On 27 November 2015 at 09:08, Tyler Baker <tyler.baker@linaro.org> wrote:
>>> On 27 November 2015 at 00:54, Christian Borntraeger
>>> <borntraeger@de.ibm.com> wrote:
>>>> On 11/26/2015 09:47 PM, Christian Borntraeger wrote:
>>>>> On 11/26/2015 05:17 PM, Tyler Baker wrote:
>>>>>> Hi Christian,
>>>>>>
>>>>>> The kernelci.org bot recently has been reporting kvm guest boot
>>>>>> failures[1] on various arm64 platforms in next-20151126. The bot
>>>>>> bisected[2] the failures to the commit in -next titled "KVM: Create
>>>>>> debugfs dir and stat files for each VM". I confirmed by reverting this
>>>>>> commit on top of next-20151126 it resolves the boot issue.
>>>>>>
>>>>>> In this test case the host and guest are booted with the same kernel.
>>>>>> The host is booted over nfs, installs qemu (qemu-system arm64 2.4.0),
>>>>>> and launches a guest. The host is booting fine, but when the guest is
>>>>>> launched it errors with "Failed to retrieve host CPU features!". I
>>>>>> checked the host logs, and found an "Unable to handle kernel paging
>>>>>> request" splat[3] which occurs when the guest is attempting to start.
>>>>>>
>>>>>> I scanned the patch in question but nothing obvious jumped out at me,
>>>>>> any thoughts?
>>>>> Not really.
>>>>> Do you have processing running that do read the files in /sys/kernel/debug/kvm/* ?
>>>>>
>>>>> If I read the arm oops message correctly it oopsed inside
>>>>> __srcu_read_lock. there is actually nothing in there that can oops,
>>>>> except the access to the preempt count. I am just guessing right now,
>>>>> but maybe the preempt variable is no longer available (as the process
>>>>> is gone). As long as a debugfs file is open, we hold a reference to
>>>>> the kvm, which holds a reference to the mm, so the mm might be killed
>>>>> after the process. But this is supposed to work, so maybe its something
>>>>> different. An objdump of __srcu_read_lock might help.
>>>> Hmm, the preempt thing is done in srcu_read_lock, but the crash is in
>>>> __srcu_read_lock. This function gets the srcu struct from mmu_notifier.c,
>>>> which must be present and is initialized during boot.
>>>>
>>>>
>>>> int __srcu_read_lock(struct srcu_struct *sp)
>>>> {
>>>> int idx;
>>>>
>>>> idx = READ_ONCE(sp->completed) & 0x1;
>>>> __this_cpu_inc(sp->per_cpu_ref->c[idx]);
>>>> smp_mb(); /* B */ /* Avoid leaking the critical section. */
>>>> __this_cpu_inc(sp->per_cpu_ref->seq[idx]);
>>>> return idx;
>>>> }
>>>>
>>>> Looking at the code I have no clue why the patch does make a difference.
>>>> Can you try to get an objdump -S for__Srcu_read_lock?
>> Some other interesting finding below...
>>
>> On the host, I do _not_ have any nodes under /sys/kernel/debug/kvm/
>>
>> Running strace on the qemu command I use to launch the guest yields
>> the following.
>>
>> [pid 5963] 1448649724.405537 mmap(NULL, 65536, PROT_READ|PROT_WRITE,
>> MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f6652a000
>> [pid 5963] 1448649724.405586 read(13, "MemTotal: 16414616
>> kB\nMemF"..., 1024) = 1024
>> [pid 5963] 1448649724.405699 close(13) = 0
>> [pid 5963] 1448649724.405755 munmap(0x7f6652a000, 65536) = 0
>> [pid 5963] 1448649724.405947 brk(0x2552f000) = 0x2552f000
>> [pid 5963] 1448649724.406148 openat(AT_FDCWD, "/dev/kvm",
>> O_RDWR|O_CLOEXEC) = 13
>> [pid 5963] 1448649724.406209 ioctl(13, KVM_CREATE_VM, 0) = -1 ENOMEM
>> (Cannot allocate memory)
> If I comment the call to kvm_create_vm_debugfs(kvm) the guest boots
> fine. I put some printk's in the kvm_create_vm_debugfs() function and
> it's returning -ENOMEM after it evaluates !kvm->debugfs_dentry. I was
> chatting with some folks from the Linaro virtualization team and they
> mentioned that ARM is a bit special as the same PID creates two vms in
> quick succession, the first one is a scratch vm, and the other is the
> 'real' vm. With that bit of info, I suspect we may be trying to create
> the debugfs directory twice, and the second time it's failing because
> it already exists.
>
> Cheers,
>
> Tyler
>
After a quick look into qemu I guess I've found the problem:
kvm_init creates a vm, does checking and self initialization and
then calls kvm_arch_init. The arch initialization indirectly
calls kvm_arm_create_scratch_host_vcpu and that's where the
trouble begins, as it also creates a VM.
My assumption was, that nobody would create multiple VMs under
the same PID. Christian and I are working on a solution on kernel
side.
Cheers
Janosch
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2] KVM: Create debugfs dir and stat files for each VM
2015-11-27 20:42 ` Tyler Baker
2015-11-30 8:37 ` Janosch Frank
@ 2015-11-30 8:38 ` Christian Borntraeger
2015-11-30 17:00 ` Tyler Baker
2016-02-02 17:15 ` Paolo Bonzini
1 sibling, 2 replies; 14+ messages in thread
From: Christian Borntraeger @ 2015-11-30 8:38 UTC (permalink / raw)
To: Tyler Baker
Cc: frankja, kvm, pbonzini, pmorel, dan.carpenter,
Kevin's boot bot
[-- Attachment #1: Type: text/plain, Size: 4433 bytes --]
On 11/27/2015 09:42 PM, Tyler Baker wrote:
> On 27 November 2015 at 10:53, Tyler Baker <tyler.baker@linaro.org> wrote:
>> On 27 November 2015 at 09:08, Tyler Baker <tyler.baker@linaro.org> wrote:
>>> On 27 November 2015 at 00:54, Christian Borntraeger
>>> <borntraeger@de.ibm.com> wrote:
>>>> On 11/26/2015 09:47 PM, Christian Borntraeger wrote:
>>>>> On 11/26/2015 05:17 PM, Tyler Baker wrote:
>>>>>> Hi Christian,
>>>>>>
>>>>>> The kernelci.org bot recently has been reporting kvm guest boot
>>>>>> failures[1] on various arm64 platforms in next-20151126. The bot
>>>>>> bisected[2] the failures to the commit in -next titled "KVM: Create
>>>>>> debugfs dir and stat files for each VM". I confirmed by reverting this
>>>>>> commit on top of next-20151126 it resolves the boot issue.
>>>>>>
>>>>>> In this test case the host and guest are booted with the same kernel.
>>>>>> The host is booted over nfs, installs qemu (qemu-system arm64 2.4.0),
>>>>>> and launches a guest. The host is booting fine, but when the guest is
>>>>>> launched it errors with "Failed to retrieve host CPU features!". I
>>>>>> checked the host logs, and found an "Unable to handle kernel paging
>>>>>> request" splat[3] which occurs when the guest is attempting to start.
>>>>>>
>>>>>> I scanned the patch in question but nothing obvious jumped out at me,
>>>>>> any thoughts?
>>>>>
>>>>> Not really.
>>>>> Do you have processing running that do read the files in /sys/kernel/debug/kvm/* ?
>>>>>
>>>>> If I read the arm oops message correctly it oopsed inside
>>>>> __srcu_read_lock. there is actually nothing in there that can oops,
>>>>> except the access to the preempt count. I am just guessing right now,
>>>>> but maybe the preempt variable is no longer available (as the process
>>>>> is gone). As long as a debugfs file is open, we hold a reference to
>>>>> the kvm, which holds a reference to the mm, so the mm might be killed
>>>>> after the process. But this is supposed to work, so maybe its something
>>>>> different. An objdump of __srcu_read_lock might help.
>>>>
>>>> Hmm, the preempt thing is done in srcu_read_lock, but the crash is in
>>>> __srcu_read_lock. This function gets the srcu struct from mmu_notifier.c,
>>>> which must be present and is initialized during boot.
>>>>
>>>>
>>>> int __srcu_read_lock(struct srcu_struct *sp)
>>>> {
>>>> int idx;
>>>>
>>>> idx = READ_ONCE(sp->completed) & 0x1;
>>>> __this_cpu_inc(sp->per_cpu_ref->c[idx]);
>>>> smp_mb(); /* B */ /* Avoid leaking the critical section. */
>>>> __this_cpu_inc(sp->per_cpu_ref->seq[idx]);
>>>> return idx;
>>>> }
>>>>
>>>> Looking at the code I have no clue why the patch does make a difference.
>>>> Can you try to get an objdump -S for__Srcu_read_lock?
>>
>> Some other interesting finding below...
>>
>> On the host, I do _not_ have any nodes under /sys/kernel/debug/kvm/
>>
>> Running strace on the qemu command I use to launch the guest yields
>> the following.
>>
>> [pid 5963] 1448649724.405537 mmap(NULL, 65536, PROT_READ|PROT_WRITE,
>> MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f6652a000
>> [pid 5963] 1448649724.405586 read(13, "MemTotal: 16414616
>> kB\nMemF"..., 1024) = 1024
>> [pid 5963] 1448649724.405699 close(13) = 0
>> [pid 5963] 1448649724.405755 munmap(0x7f6652a000, 65536) = 0
>> [pid 5963] 1448649724.405947 brk(0x2552f000) = 0x2552f000
>> [pid 5963] 1448649724.406148 openat(AT_FDCWD, "/dev/kvm",
>> O_RDWR|O_CLOEXEC) = 13
>> [pid 5963] 1448649724.406209 ioctl(13, KVM_CREATE_VM, 0) = -1 ENOMEM
>> (Cannot allocate memory)
>
> If I comment the call to kvm_create_vm_debugfs(kvm) the guest boots
> fine. I put some printk's in the kvm_create_vm_debugfs() function and
> it's returning -ENOMEM after it evaluates !kvm->debugfs_dentry. I was
> chatting with some folks from the Linaro virtualization team and they
> mentioned that ARM is a bit special as the same PID creates two vms in
> quick succession, the first one is a scratch vm, and the other is the
> 'real' vm. With that bit of info, I suspect we may be trying to create
> the debugfs directory twice, and the second time it's failing because
> it already exists.
Hmmm, with a patched QEMU that calls VM_CREATE twice it errors out on s390
with -ENOMEM (which it should not), but it errors out gracefully.
Does the attached patch avoid the crash? (guest will not start, but qemu
should error out gracefully with ENOMEM)
[-- Attachment #2: test.patch --]
[-- Type: text/x-patch, Size: 555 bytes --]
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f7d6c8f..b26472a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -671,12 +671,16 @@ static struct kvm *kvm_create_vm(unsigned long type)
r = kvm_create_vm_debugfs(kvm);
if (r)
- goto out_err;
+ goto out_mmu;
preempt_notifier_inc();
return kvm;
+out_mmu:
+#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+ mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
+#endif
out_err:
cleanup_srcu_struct(&kvm->irq_srcu);
out_err_no_irq_srcu:
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2] KVM: Create debugfs dir and stat files for each VM
2015-11-30 8:37 ` Janosch Frank
@ 2015-11-30 15:10 ` Alex Bennée
0 siblings, 0 replies; 14+ messages in thread
From: Alex Bennée @ 2015-11-30 15:10 UTC (permalink / raw)
To: Janosch Frank
Cc: Tyler Baker, Christian Borntraeger, kvm, pbonzini, pmorel,
dan.carpenter, Kevin's boot bot
Janosch Frank <frankja@linux.vnet.ibm.com> writes:
> On 11/27/2015 09:42 PM, Tyler Baker wrote:
>> On 27 November 2015 at 10:53, Tyler Baker <tyler.baker@linaro.org> wrote:
>>> On 27 November 2015 at 09:08, Tyler Baker <tyler.baker@linaro.org> wrote:
>>>> On 27 November 2015 at 00:54, Christian Borntraeger
>>>> <borntraeger@de.ibm.com> wrote:
>>>>> On 11/26/2015 09:47 PM, Christian Borntraeger wrote:
>>>>>> On 11/26/2015 05:17 PM, Tyler Baker wrote:
>>>>>>> Hi Christian,
>>>>>>>
>>>>>>> The kernelci.org bot recently has been reporting kvm guest boot
>>>>>>> failures[1] on various arm64 platforms in next-20151126. The bot
>>>>>>> bisected[2] the failures to the commit in -next titled "KVM: Create
>>>>>>> debugfs dir and stat files for each VM". I confirmed by reverting this
>>>>>>> commit on top of next-20151126 it resolves the boot issue.
>>>>>>>
<snip>
>>
> After a quick look into qemu I guess I've found the problem:
> kvm_init creates a vm, does checking and self initialization and
> then calls kvm_arch_init. The arch initialization indirectly
> calls kvm_arm_create_scratch_host_vcpu and that's where the
> trouble begins, as it also creates a VM.
>
> My assumption was, that nobody would create multiple VMs under
> the same PID. Christian and I are working on a solution on kernel
> side.
Yeah ARM is a little weird in that respect as the scratch VM is used to
probe capabilities. There is nothing in the API that says you can't have
multiple VMs per PID so I guess a better unique identifier is needed.
--
Alex Bennée
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2] KVM: Create debugfs dir and stat files for each VM
2015-11-30 8:38 ` Christian Borntraeger
@ 2015-11-30 17:00 ` Tyler Baker
2016-02-02 17:15 ` Paolo Bonzini
1 sibling, 0 replies; 14+ messages in thread
From: Tyler Baker @ 2015-11-30 17:00 UTC (permalink / raw)
To: Christian Borntraeger
Cc: frankja, kvm, pbonzini, pmorel, dan.carpenter,
Kevin's boot bot
On 30 November 2015 at 00:38, Christian Borntraeger
<borntraeger@de.ibm.com> wrote:
> On 11/27/2015 09:42 PM, Tyler Baker wrote:
>> On 27 November 2015 at 10:53, Tyler Baker <tyler.baker@linaro.org> wrote:
>>> On 27 November 2015 at 09:08, Tyler Baker <tyler.baker@linaro.org> wrote:
>>>> On 27 November 2015 at 00:54, Christian Borntraeger
>>>> <borntraeger@de.ibm.com> wrote:
>>>>> On 11/26/2015 09:47 PM, Christian Borntraeger wrote:
>>>>>> On 11/26/2015 05:17 PM, Tyler Baker wrote:
>>>>>>> Hi Christian,
>>>>>>>
>>>>>>> The kernelci.org bot recently has been reporting kvm guest boot
>>>>>>> failures[1] on various arm64 platforms in next-20151126. The bot
>>>>>>> bisected[2] the failures to the commit in -next titled "KVM: Create
>>>>>>> debugfs dir and stat files for each VM". I confirmed by reverting this
>>>>>>> commit on top of next-20151126 it resolves the boot issue.
>>>>>>>
>>>>>>> In this test case the host and guest are booted with the same kernel.
>>>>>>> The host is booted over nfs, installs qemu (qemu-system arm64 2.4.0),
>>>>>>> and launches a guest. The host is booting fine, but when the guest is
>>>>>>> launched it errors with "Failed to retrieve host CPU features!". I
>>>>>>> checked the host logs, and found an "Unable to handle kernel paging
>>>>>>> request" splat[3] which occurs when the guest is attempting to start.
>>>>>>>
>>>>>>> I scanned the patch in question but nothing obvious jumped out at me,
>>>>>>> any thoughts?
>>>>>>
>>>>>> Not really.
>>>>>> Do you have processing running that do read the files in /sys/kernel/debug/kvm/* ?
>>>>>>
>>>>>> If I read the arm oops message correctly it oopsed inside
>>>>>> __srcu_read_lock. there is actually nothing in there that can oops,
>>>>>> except the access to the preempt count. I am just guessing right now,
>>>>>> but maybe the preempt variable is no longer available (as the process
>>>>>> is gone). As long as a debugfs file is open, we hold a reference to
>>>>>> the kvm, which holds a reference to the mm, so the mm might be killed
>>>>>> after the process. But this is supposed to work, so maybe its something
>>>>>> different. An objdump of __srcu_read_lock might help.
>>>>>
>>>>> Hmm, the preempt thing is done in srcu_read_lock, but the crash is in
>>>>> __srcu_read_lock. This function gets the srcu struct from mmu_notifier.c,
>>>>> which must be present and is initialized during boot.
>>>>>
>>>>>
>>>>> int __srcu_read_lock(struct srcu_struct *sp)
>>>>> {
>>>>> int idx;
>>>>>
>>>>> idx = READ_ONCE(sp->completed) & 0x1;
>>>>> __this_cpu_inc(sp->per_cpu_ref->c[idx]);
>>>>> smp_mb(); /* B */ /* Avoid leaking the critical section. */
>>>>> __this_cpu_inc(sp->per_cpu_ref->seq[idx]);
>>>>> return idx;
>>>>> }
>>>>>
>>>>> Looking at the code I have no clue why the patch does make a difference.
>>>>> Can you try to get an objdump -S for__Srcu_read_lock?
>>>
>>> Some other interesting finding below...
>>>
>>> On the host, I do _not_ have any nodes under /sys/kernel/debug/kvm/
>>>
>>> Running strace on the qemu command I use to launch the guest yields
>>> the following.
>>>
>>> [pid 5963] 1448649724.405537 mmap(NULL, 65536, PROT_READ|PROT_WRITE,
>>> MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f6652a000
>>> [pid 5963] 1448649724.405586 read(13, "MemTotal: 16414616
>>> kB\nMemF"..., 1024) = 1024
>>> [pid 5963] 1448649724.405699 close(13) = 0
>>> [pid 5963] 1448649724.405755 munmap(0x7f6652a000, 65536) = 0
>>> [pid 5963] 1448649724.405947 brk(0x2552f000) = 0x2552f000
>>> [pid 5963] 1448649724.406148 openat(AT_FDCWD, "/dev/kvm",
>>> O_RDWR|O_CLOEXEC) = 13
>>> [pid 5963] 1448649724.406209 ioctl(13, KVM_CREATE_VM, 0) = -1 ENOMEM
>>> (Cannot allocate memory)
>>
>> If I comment the call to kvm_create_vm_debugfs(kvm) the guest boots
>> fine. I put some printk's in the kvm_create_vm_debugfs() function and
>> it's returning -ENOMEM after it evaluates !kvm->debugfs_dentry. I was
>> chatting with some folks from the Linaro virtualization team and they
>> mentioned that ARM is a bit special as the same PID creates two vms in
>> quick succession, the first one is a scratch vm, and the other is the
>> 'real' vm. With that bit of info, I suspect we may be trying to create
>> the debugfs directory twice, and the second time it's failing because
>> it already exists.
>
> Hmmm, with a patched QEMU that calls VM_CREATE twice it errors out on s390
> with -ENOMEM (which it should not), but it errors out gracefully.
>
> Does the attached patch avoid the crash? (guest will not start, but qemu
> should error out gracefully with ENOMEM)
Yeah. I patched my host kernel and now the qemu guest launch errors
gracefully[1].
Cheers,
Tyler
[1] http://hastebin.com/rotiropayo.mel
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2] KVM: Create debugfs dir and stat files for each VM
2015-11-30 8:38 ` Christian Borntraeger
2015-11-30 17:00 ` Tyler Baker
@ 2016-02-02 17:15 ` Paolo Bonzini
2016-02-02 19:37 ` Christian Borntraeger
1 sibling, 1 reply; 14+ messages in thread
From: Paolo Bonzini @ 2016-02-02 17:15 UTC (permalink / raw)
To: Christian Borntraeger, Tyler Baker
Cc: frankja, kvm, pmorel, dan.carpenter, Kevin's boot bot
On 30/11/2015 09:38, Christian Borntraeger wrote:
>
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index f7d6c8f..b26472a 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -671,12 +671,16 @@ static struct kvm *kvm_create_vm(unsigned long type)
>
> r = kvm_create_vm_debugfs(kvm);
> if (r)
> - goto out_err;
> + goto out_mmu;
>
> preempt_notifier_inc();
>
> return kvm;
>
> +out_mmu:
> +#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
> + mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
> +#endif
> out_err:
> cleanup_srcu_struct(&kvm->irq_srcu);
> out_err_no_irq_srcu:
Looking at old unread email in my inbox... can you resubmit this to
kvm@vger.kernel.org as a proper patch (Signed-off-by, commit message, etc.)?
Thanks,
Paolo
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2] KVM: Create debugfs dir and stat files for each VM
2016-02-02 17:15 ` Paolo Bonzini
@ 2016-02-02 19:37 ` Christian Borntraeger
0 siblings, 0 replies; 14+ messages in thread
From: Christian Borntraeger @ 2016-02-02 19:37 UTC (permalink / raw)
To: Paolo Bonzini, Tyler Baker
Cc: frankja, kvm, pmorel, dan.carpenter, Kevin's boot bot
On 02/02/2016 06:15 PM, Paolo Bonzini wrote:
>
>
> On 30/11/2015 09:38, Christian Borntraeger wrote:
>>
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index f7d6c8f..b26472a 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -671,12 +671,16 @@ static struct kvm *kvm_create_vm(unsigned long type)
>>
>> r = kvm_create_vm_debugfs(kvm);
>> if (r)
>> - goto out_err;
>> + goto out_mmu;
>>
>> preempt_notifier_inc();
>>
>> return kvm;
>>
>> +out_mmu:
>> +#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
>> + mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
>> +#endif
>> out_err:
>> cleanup_srcu_struct(&kvm->irq_srcu);
>> out_err_no_irq_srcu:
>
> Looking at old unread email in my inbox... can you resubmit this to
> kvm@vger.kernel.org as a proper patch (Signed-off-by, commit message, etc.)?
>
This patch was only necessary to fixup an earlier version of Janosch's patch.
upstream is fine.
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2016-02-02 19:37 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-11-26 16:17 [PATCH 2/2] KVM: Create debugfs dir and stat files for each VM Tyler Baker
2015-11-26 20:47 ` Christian Borntraeger
2015-11-27 8:54 ` Christian Borntraeger
2015-11-27 17:08 ` Tyler Baker
2015-11-27 18:53 ` Tyler Baker
2015-11-27 20:42 ` Tyler Baker
2015-11-30 8:37 ` Janosch Frank
2015-11-30 15:10 ` Alex Bennée
2015-11-30 8:38 ` Christian Borntraeger
2015-11-30 17:00 ` Tyler Baker
2016-02-02 17:15 ` Paolo Bonzini
2016-02-02 19:37 ` Christian Borntraeger
-- strict thread matches above, loose matches on Subject: below --
2015-11-23 11:24 [PATCH v2 0/2] RFC: kvm stat enhancements Christian Borntraeger
2015-11-23 11:24 ` [PATCH 2/2] KVM: Create debugfs dir and stat files for each VM Christian Borntraeger
2015-11-18 13:11 [PATCH 0/2] kvm: provide kvm stat per process Christian Borntraeger
2015-11-18 13:11 ` [PATCH 2/2] KVM: Create debugfs dir and stat files for each VM Christian Borntraeger
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).