* [RFC 1/3] kvm tools: use mutex abstraction instead of pthread mutex
@ 2012-10-24 17:00 Sasha Levin
2012-10-24 17:00 ` [RFC 2/3] lockdep: be nice about compiling from userspace Sasha Levin
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: Sasha Levin @ 2012-10-24 17:00 UTC (permalink / raw)
To: penberg, mingo; +Cc: peterz, asias.hejun, tglx, gorcunov, kvm, Sasha Levin
We already have something to wrap pthread with mutex_[init,lock,unlock]
calls. This patch creates a new struct mutex abstraction and moves
everything to work with it.
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
---
tools/kvm/hw/serial.c | 10 +++++-----
tools/kvm/include/kvm/mutex.h | 22 ++++++++++++++--------
tools/kvm/include/kvm/qcow.h | 2 +-
tools/kvm/include/kvm/threadpool.h | 4 ++--
tools/kvm/include/kvm/uip.h | 10 +++++-----
tools/kvm/net/uip/buf.c | 4 ++--
tools/kvm/net/uip/core.c | 6 +++---
tools/kvm/net/uip/tcp.c | 6 +++---
tools/kvm/net/uip/udp.c | 2 +-
tools/kvm/util/threadpool.c | 8 ++++----
tools/kvm/virtio/blk.c | 4 ++--
tools/kvm/virtio/console.c | 4 ++--
tools/kvm/virtio/net.c | 14 +++++++-------
13 files changed, 51 insertions(+), 45 deletions(-)
diff --git a/tools/kvm/hw/serial.c b/tools/kvm/hw/serial.c
index a177a7f..53b684a 100644
--- a/tools/kvm/hw/serial.c
+++ b/tools/kvm/hw/serial.c
@@ -22,7 +22,7 @@
#define UART_IIR_TYPE_BITS 0xc0
struct serial8250_device {
- pthread_mutex_t mutex;
+ struct mutex mutex;
u8 id;
u16 iobase;
@@ -55,7 +55,7 @@ struct serial8250_device {
static struct serial8250_device devices[] = {
/* ttyS0 */
[0] = {
- .mutex = PTHREAD_MUTEX_INITIALIZER,
+ .mutex = MUTEX_INITIALIZER,
.id = 0,
.iobase = 0x3f8,
@@ -65,7 +65,7 @@ static struct serial8250_device devices[] = {
},
/* ttyS1 */
[1] = {
- .mutex = PTHREAD_MUTEX_INITIALIZER,
+ .mutex = MUTEX_INITIALIZER,
.id = 1,
.iobase = 0x2f8,
@@ -75,7 +75,7 @@ static struct serial8250_device devices[] = {
},
/* ttyS2 */
[2] = {
- .mutex = PTHREAD_MUTEX_INITIALIZER,
+ .mutex = MUTEX_INITIALIZER,
.id = 2,
.iobase = 0x3e8,
@@ -85,7 +85,7 @@ static struct serial8250_device devices[] = {
},
/* ttyS3 */
[3] = {
- .mutex = PTHREAD_MUTEX_INITIALIZER,
+ .mutex = MUTEX_INITIALIZER,
.id = 3,
.iobase = 0x2e8,
diff --git a/tools/kvm/include/kvm/mutex.h b/tools/kvm/include/kvm/mutex.h
index 3286cea..4f31025 100644
--- a/tools/kvm/include/kvm/mutex.h
+++ b/tools/kvm/include/kvm/mutex.h
@@ -10,23 +10,29 @@
* to write user-space code! :-)
*/
-#define DEFINE_MUTEX(mutex) pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER
-
-static inline void mutex_init(pthread_mutex_t *mutex)
+struct mutex {
+ pthread_mutex_t mutex;
+};
+#define MUTEX_INITIALIZER (struct mutex) { .mutex = PTHREAD_MUTEX_INITIALIZER }
+
+#define DEFINE_MUTEX(mtx) struct mutex mtx = MUTEX_INITIALIZER
+
+static inline void mutex_init(struct mutex *lock)
{
- if (pthread_mutex_init(mutex, NULL) != 0)
+ if (pthread_mutex_init(&lock->mutex, NULL) != 0)
die("unexpected pthread_mutex_init() failure!");
}
-static inline void mutex_lock(pthread_mutex_t *mutex)
+static inline void mutex_lock(struct mutex *lock)
{
- if (pthread_mutex_lock(mutex) != 0)
+ if (pthread_mutex_lock(&lock->mutex) != 0)
die("unexpected pthread_mutex_lock() failure!");
+
}
-static inline void mutex_unlock(pthread_mutex_t *mutex)
+static inline void mutex_unlock(struct mutex *lock)
{
- if (pthread_mutex_unlock(mutex) != 0)
+ if (pthread_mutex_unlock(&lock->mutex) != 0)
die("unexpected pthread_mutex_unlock() failure!");
}
diff --git a/tools/kvm/include/kvm/qcow.h b/tools/kvm/include/kvm/qcow.h
index e032a1e..f849246 100644
--- a/tools/kvm/include/kvm/qcow.h
+++ b/tools/kvm/include/kvm/qcow.h
@@ -74,7 +74,7 @@ struct qcow_header {
};
struct qcow {
- pthread_mutex_t mutex;
+ struct mutex mutex;
struct qcow_header *header;
struct qcow_l1_table table;
struct qcow_refcount_table refcount_table;
diff --git a/tools/kvm/include/kvm/threadpool.h b/tools/kvm/include/kvm/threadpool.h
index abe46ea..bacb243 100644
--- a/tools/kvm/include/kvm/threadpool.h
+++ b/tools/kvm/include/kvm/threadpool.h
@@ -15,7 +15,7 @@ struct thread_pool__job {
void *data;
int signalcount;
- pthread_mutex_t mutex;
+ struct mutex mutex;
struct list_head queue;
};
@@ -26,7 +26,7 @@ static inline void thread_pool__init_job(struct thread_pool__job *job, struct kv
.kvm = kvm,
.callback = callback,
.data = data,
- .mutex = PTHREAD_MUTEX_INITIALIZER,
+ .mutex = MUTEX_INITIALIZER,
};
}
diff --git a/tools/kvm/include/kvm/uip.h b/tools/kvm/include/kvm/uip.h
index 9af0110..ac248d2 100644
--- a/tools/kvm/include/kvm/uip.h
+++ b/tools/kvm/include/kvm/uip.h
@@ -187,14 +187,14 @@ struct uip_dhcp {
struct uip_info {
struct list_head udp_socket_head;
struct list_head tcp_socket_head;
- pthread_mutex_t udp_socket_lock;
- pthread_mutex_t tcp_socket_lock;
+ struct mutex udp_socket_lock;
+ struct mutex tcp_socket_lock;
struct uip_eth_addr guest_mac;
struct uip_eth_addr host_mac;
pthread_cond_t buf_free_cond;
pthread_cond_t buf_used_cond;
struct list_head buf_head;
- pthread_mutex_t buf_lock;
+ struct mutex buf_lock;
pthread_t udp_thread;
int udp_epollfd;
int buf_free_nr;
@@ -221,7 +221,7 @@ struct uip_buf {
struct uip_udp_socket {
struct sockaddr_in addr;
struct list_head list;
- pthread_mutex_t *lock;
+ struct mutex *lock;
u32 dport, sport;
u32 dip, sip;
int fd;
@@ -232,7 +232,7 @@ struct uip_tcp_socket {
struct list_head list;
struct uip_info *info;
pthread_cond_t cond;
- pthread_mutex_t *lock;
+ struct mutex *lock;
pthread_t thread;
u32 dport, sport;
u32 guest_acked;
diff --git a/tools/kvm/net/uip/buf.c b/tools/kvm/net/uip/buf.c
index 5e564a9..f29ad41 100644
--- a/tools/kvm/net/uip/buf.c
+++ b/tools/kvm/net/uip/buf.c
@@ -11,7 +11,7 @@ struct uip_buf *uip_buf_get_used(struct uip_info *info)
mutex_lock(&info->buf_lock);
while (!(info->buf_used_nr > 0))
- pthread_cond_wait(&info->buf_used_cond, &info->buf_lock);
+ pthread_cond_wait(&info->buf_used_cond, &info->buf_lock.mutex);
list_for_each_entry(buf, &info->buf_head, list) {
if (buf->status == UIP_BUF_STATUS_USED) {
@@ -39,7 +39,7 @@ struct uip_buf *uip_buf_get_free(struct uip_info *info)
mutex_lock(&info->buf_lock);
while (!(info->buf_free_nr > 0))
- pthread_cond_wait(&info->buf_free_cond, &info->buf_lock);
+ pthread_cond_wait(&info->buf_free_cond, &info->buf_lock.mutex);
list_for_each_entry(buf, &info->buf_head, list) {
if (buf->status == UIP_BUF_STATUS_FREE) {
diff --git a/tools/kvm/net/uip/core.c b/tools/kvm/net/uip/core.c
index 2e7603c..4e5bb82 100644
--- a/tools/kvm/net/uip/core.c
+++ b/tools/kvm/net/uip/core.c
@@ -153,9 +153,9 @@ int uip_init(struct uip_info *info)
INIT_LIST_HEAD(tcp_socket_head);
INIT_LIST_HEAD(buf_head);
- pthread_mutex_init(&info->udp_socket_lock, NULL);
- pthread_mutex_init(&info->tcp_socket_lock, NULL);
- pthread_mutex_init(&info->buf_lock, NULL);
+ mutex_init(&info->udp_socket_lock);
+ mutex_init(&info->tcp_socket_lock);
+ mutex_init(&info->buf_lock);
pthread_cond_init(&info->buf_used_cond, NULL);
pthread_cond_init(&info->buf_free_cond, NULL);
diff --git a/tools/kvm/net/uip/tcp.c b/tools/kvm/net/uip/tcp.c
index 830aa3f..9044f40 100644
--- a/tools/kvm/net/uip/tcp.c
+++ b/tools/kvm/net/uip/tcp.c
@@ -27,7 +27,7 @@ static int uip_tcp_socket_close(struct uip_tcp_socket *sk, int how)
static struct uip_tcp_socket *uip_tcp_socket_find(struct uip_tx_arg *arg, u32 sip, u32 dip, u16 sport, u16 dport)
{
struct list_head *sk_head;
- pthread_mutex_t *sk_lock;
+ struct mutex *sk_lock;
struct uip_tcp_socket *sk;
sk_head = &arg->info->tcp_socket_head;
@@ -49,7 +49,7 @@ static struct uip_tcp_socket *uip_tcp_socket_alloc(struct uip_tx_arg *arg, u32 s
{
struct list_head *sk_head;
struct uip_tcp_socket *sk;
- pthread_mutex_t *sk_lock;
+ struct mutex *sk_lock;
struct uip_tcp *tcp;
struct uip_ip *ip;
int ret;
@@ -198,7 +198,7 @@ static void *uip_tcp_socket_thread(void *p)
while (left > 0) {
mutex_lock(sk->lock);
while ((len = sk->guest_acked + sk->window_size - sk->seq_server) <= 0)
- pthread_cond_wait(&sk->cond, sk->lock);
+ pthread_cond_wait(&sk->cond, &sk->lock->mutex);
mutex_unlock(sk->lock);
sk->payload = pos;
diff --git a/tools/kvm/net/uip/udp.c b/tools/kvm/net/uip/udp.c
index 5b6ec1c..31c417c 100644
--- a/tools/kvm/net/uip/udp.c
+++ b/tools/kvm/net/uip/udp.c
@@ -14,7 +14,7 @@ static struct uip_udp_socket *uip_udp_socket_find(struct uip_tx_arg *arg, u32 si
{
struct list_head *sk_head;
struct uip_udp_socket *sk;
- pthread_mutex_t *sk_lock;
+ struct mutex *sk_lock;
struct epoll_event ev;
int flags;
int ret;
diff --git a/tools/kvm/util/threadpool.c b/tools/kvm/util/threadpool.c
index a363831..e64aa26 100644
--- a/tools/kvm/util/threadpool.c
+++ b/tools/kvm/util/threadpool.c
@@ -7,9 +7,9 @@
#include <pthread.h>
#include <stdbool.h>
-static pthread_mutex_t job_mutex = PTHREAD_MUTEX_INITIALIZER;
-static pthread_mutex_t thread_mutex = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t job_cond = PTHREAD_COND_INITIALIZER;
+static DEFINE_MUTEX(job_mutex);
+static DEFINE_MUTEX(thread_mutex);
+static pthread_cond_t job_cond = PTHREAD_COND_INITIALIZER;
static LIST_HEAD(head);
@@ -85,7 +85,7 @@ static void *thread_pool__threadfunc(void *param)
mutex_lock(&job_mutex);
while (running && (curjob = thread_pool__job_pop_locked()) == NULL)
- pthread_cond_wait(&job_cond, &job_mutex);
+ pthread_cond_wait(&job_cond, &job_mutex.mutex);
mutex_unlock(&job_mutex);
if (running)
diff --git a/tools/kvm/virtio/blk.c b/tools/kvm/virtio/blk.c
index f76342c..356a240 100644
--- a/tools/kvm/virtio/blk.c
+++ b/tools/kvm/virtio/blk.c
@@ -37,7 +37,7 @@ struct blk_dev_req {
};
struct blk_dev {
- pthread_mutex_t mutex;
+ struct mutex mutex;
struct list_head list;
@@ -248,7 +248,7 @@ static int virtio_blk__init_one(struct kvm *kvm, struct disk_image *disk)
return -ENOMEM;
*bdev = (struct blk_dev) {
- .mutex = PTHREAD_MUTEX_INITIALIZER,
+ .mutex = MUTEX_INITIALIZER,
.disk = disk,
.blk_config = (struct virtio_blk_config) {
.capacity = disk->size / SECTOR_SIZE,
diff --git a/tools/kvm/virtio/console.c b/tools/kvm/virtio/console.c
index 88b1106..1df6cb0 100644
--- a/tools/kvm/virtio/console.c
+++ b/tools/kvm/virtio/console.c
@@ -29,7 +29,7 @@
#define VIRTIO_CONSOLE_TX_QUEUE 1
struct con_dev {
- pthread_mutex_t mutex;
+ struct mutex mutex;
struct virtio_device vdev;
struct virt_queue vqs[VIRTIO_CONSOLE_NUM_QUEUES];
@@ -40,7 +40,7 @@ struct con_dev {
};
static struct con_dev cdev = {
- .mutex = PTHREAD_MUTEX_INITIALIZER,
+ .mutex = MUTEX_INITIALIZER,
.config = {
.cols = 80,
diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c
index ac429cc..db77ab8 100644
--- a/tools/kvm/virtio/net.c
+++ b/tools/kvm/virtio/net.c
@@ -39,7 +39,7 @@ struct net_dev_operations {
};
struct net_dev {
- pthread_mutex_t mutex;
+ struct mutex mutex;
struct virtio_device vdev;
struct list_head list;
@@ -48,11 +48,11 @@ struct net_dev {
u32 features;
pthread_t io_rx_thread;
- pthread_mutex_t io_rx_lock;
+ struct mutex io_rx_lock;
pthread_cond_t io_rx_cond;
pthread_t io_tx_thread;
- pthread_mutex_t io_tx_lock;
+ struct mutex io_tx_lock;
pthread_cond_t io_tx_cond;
int vhost_fd;
@@ -87,7 +87,7 @@ static void *virtio_net_rx_thread(void *p)
while (1) {
mutex_lock(&ndev->io_rx_lock);
if (!virt_queue__available(vq))
- pthread_cond_wait(&ndev->io_rx_cond, &ndev->io_rx_lock);
+ pthread_cond_wait(&ndev->io_rx_cond, &ndev->io_rx_lock.mutex);
mutex_unlock(&ndev->io_rx_lock);
while (virt_queue__available(vq)) {
@@ -125,7 +125,7 @@ static void *virtio_net_tx_thread(void *p)
while (1) {
mutex_lock(&ndev->io_tx_lock);
if (!virt_queue__available(vq))
- pthread_cond_wait(&ndev->io_tx_cond, &ndev->io_tx_lock);
+ pthread_cond_wait(&ndev->io_tx_cond, &ndev->io_tx_lock.mutex);
mutex_unlock(&ndev->io_tx_lock);
while (virt_queue__available(vq)) {
@@ -252,8 +252,8 @@ fail:
static void virtio_net__io_thread_init(struct kvm *kvm, struct net_dev *ndev)
{
- pthread_mutex_init(&ndev->io_tx_lock, NULL);
- pthread_mutex_init(&ndev->io_rx_lock, NULL);
+ mutex_init(&ndev->io_tx_lock);
+ mutex_init(&ndev->io_rx_lock);
pthread_cond_init(&ndev->io_tx_cond, NULL);
pthread_cond_init(&ndev->io_rx_cond, NULL);
--
1.7.12.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [RFC 2/3] lockdep: be nice about compiling from userspace
2012-10-24 17:00 [RFC 1/3] kvm tools: use mutex abstraction instead of pthread mutex Sasha Levin
@ 2012-10-24 17:00 ` Sasha Levin
2012-10-25 8:05 ` Ingo Molnar
2012-10-24 17:00 ` [RFC 3/3] kvm tools: use lockdep to detect locking issues Sasha Levin
2012-10-25 6:50 ` [RFC 1/3] kvm tools: use mutex abstraction instead of pthread mutex Pekka Enberg
2 siblings, 1 reply; 8+ messages in thread
From: Sasha Levin @ 2012-10-24 17:00 UTC (permalink / raw)
To: penberg, mingo; +Cc: peterz, asias.hejun, tglx, gorcunov, kvm, Sasha Levin
We can rather easily make lockdep work from userspace, although 3 issues
remain which I'm not sure about:
- Kernel naming - we can just wrap init_utsname() to return kvmtool related
utsname, is that what we want though?
- static_obj() - I don't have a better idea than calling mprobe(), which sounds
wrong as well.
- debug_show_all_locks() - we don't actually call it from userspace yet, but I think
we might want to, so I'm not sure how to make it pretty using existing kernel code.
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
---
kernel/lockdep.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 7981e5b..fdd3670 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -567,10 +567,12 @@ static void lockdep_print_held_locks(struct task_struct *curr)
static void print_kernel_ident(void)
{
+#ifdef __KERNEL__
printk("%s %.*s %s\n", init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version,
print_tainted());
+#endif
}
static int very_verbose(struct lock_class *class)
@@ -586,6 +588,7 @@ static int very_verbose(struct lock_class *class)
*/
static int static_obj(void *obj)
{
+#ifdef __KERNEL__
unsigned long start = (unsigned long) &_stext,
end = (unsigned long) &_end,
addr = (unsigned long) obj;
@@ -609,6 +612,8 @@ static int static_obj(void *obj)
* module static or percpu var?
*/
return is_module_address(addr) || is_module_percpu_address(addr);
+#endif
+ return 1;
}
/*
@@ -4108,7 +4113,7 @@ void debug_check_no_locks_held(struct task_struct *task)
if (unlikely(task->lockdep_depth > 0))
print_held_locks_bug(task);
}
-
+#ifdef __KERNEL__
void debug_show_all_locks(void)
{
struct task_struct *g, *p;
@@ -4166,7 +4171,7 @@ retry:
read_unlock(&tasklist_lock);
}
EXPORT_SYMBOL_GPL(debug_show_all_locks);
-
+#endif
/*
* Careful: only use this function if you are sure that
* the task cannot run in parallel!
--
1.7.12.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [RFC 3/3] kvm tools: use lockdep to detect locking issues
2012-10-24 17:00 [RFC 1/3] kvm tools: use mutex abstraction instead of pthread mutex Sasha Levin
2012-10-24 17:00 ` [RFC 2/3] lockdep: be nice about compiling from userspace Sasha Levin
@ 2012-10-24 17:00 ` Sasha Levin
2012-10-25 6:50 ` [RFC 1/3] kvm tools: use mutex abstraction instead of pthread mutex Pekka Enberg
2 siblings, 0 replies; 8+ messages in thread
From: Sasha Levin @ 2012-10-24 17:00 UTC (permalink / raw)
To: penberg, mingo; +Cc: peterz, asias.hejun, tglx, gorcunov, kvm, Sasha Levin
Not only the kernel can use lockdep to detect locking issues, so can we!
Just interface into lockdep and let it do exactly what it does in the kernel
to detect different locking issues.
For example, this stupid code:
mutex_lock(&mutex_A);
mutex_lock(&mutex_A);
Would result in a spew from lockdep:
=============================================
[ INFO: possible recursive locking detected ]
---------------------------------------------
kvm-main/3481 is trying to acquire lock:
(&mutex_A){......}, at: ./vm(kvm_cmd_run+0x5a) [0x426eba]
but task is already holding lock:
(&mutex_A){......}, at: ./vm(kvm_cmd_run+0x5a) [0x426eba]
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(&mutex_A);
lock(&mutex_A);
*** DEADLOCK ***
May be due to missing lock nesting notation
1 lock held by kvm-main/3481:
#0: (&mutex_A){......}, at: ./vm(kvm_cmd_run+0x5a) [0x426eba]
stack backtrace:
./vm[0x40afe4]
./vm[0x40c4bc]
./vm(lock_acquire+0x3e)[0x426fce]
./vm[0x40e988]
./vm(kvm_cmd_run+0x5a)[0x426eba]
./vm(handle_command+0x41)[0x4206f1]
/lib64/libc.so.6(__libc_start_main+0xf5)[0x7f73139e56c5]
./vm[0x40985d]
Or things like releasing a lock twice:
mutex_lock(&mutex_A);
mutex_unlock(&mutex_A);
mutex_unlock(&mutex_A);
Would result in a spew:
=====================================
[ BUG: bad unlock balance detected! ]
-------------------------------------
kvm-main/3643 is trying to release lock (&mutex_A) at:
./vm(kvm_cmd_run+0x5a) [0x426e1a]
but there are no more locks to release!
other info that might help us debug this:
no locks held by kvm-main/3643.
stack backtrace:
./vm[0x40afe4]
./vm[0x412688]
./vm(lock_release+0xb0)[0x413f70]
./vm[0x40e8ee]
./vm(kvm_cmd_run+0x5a)[0x426e1a]
./vm(handle_command+0x41)[0x420651]
/lib64/libc.so.6(__libc_start_main+0xf5)[0x7f2ed58446c5]
./vm[0x40985d]
And even a more complex ABBA issue such as:
mutex_lock(&mutex_A);
mutex_lock(&mutex_B);
mutex_unlock(&mutex_B);
mutex_unlock(&mutex_A);
mutex_lock(&mutex_B);
mutex_lock(&mutex_A);
mutex_unlock(&mutex_A);
mutex_unlock(&mutex_B);
Would result in a spew from lockdep:
======================================================
[ INFO: possible circular locking dependency detected ]
-------------------------------------------------------
kvm-main/2159 is trying to acquire lock:
(&mutex_A){......}, at: ./vm(kvm_cmd_run+0x5a) [0x426dda]
but task is already holding lock:
(&mutex_B){......}, at: ./vm(kvm_cmd_run+0x5a) [0x426dda]
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (&mutex_B){......}:
./vm[0x415aa7]
./vm[0x40c6b6]
./vm(lock_acquire+0x3e)[0x426eee]
./vm[0x4120e0]
./vm(kvm_cmd_run+0x5a)[0x426dda]
./vm(handle_command+0x41)[0x420611]
/lib64/libc.so.6(__libc_start_main+0xf5)[0x7fd640ab76c5]
./vm[0x40985d]
-> #0 (&mutex_A){......}:
./vm[0x415aa7]
./vm[0x40c46a]
./vm(lock_acquire+0x3e)[0x426eee]
./vm[0x41219e]
./vm(kvm_cmd_run+0x5a)[0x426dda]
./vm(handle_command+0x41)[0x420611]
/lib64/libc.so.6(__libc_start_main+0xf5)[0x7fd640ab76c5]
./vm[0x40985d]
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(&mutex_B);
lock(&mutex_A);
lock(&mutex_B);
lock(&mutex_A);
*** DEADLOCK ***
1 lock held by kvm-main/2159:
#0: (&mutex_B){......}, at: ./vm(kvm_cmd_run+0x5a) [0x426dda]
stack backtrace:
./vm[0x40afe4]
./vm[0x40c411]
./vm(lock_acquire+0x3e)[0x426eee]
./vm[0x41219e]
./vm(kvm_cmd_run+0x5a)[0x426dda]
./vm(handle_command+0x41)[0x420611]
/lib64/libc.so.6(__libc_start_main+0xf5)[0x7fd640ab76c5]
./vm[0x40985d]
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
---
tools/kvm/Makefile | 14 ++-
tools/kvm/builtin-run.c | 18 ++-
tools/kvm/include/kvm/kvm.h | 2 +
tools/kvm/include/kvm/mutex.h | 19 +++-
tools/kvm/include/linux/bitops.h | 1 -
tools/kvm/include/linux/debug_locks.h | 7 ++
tools/kvm/include/linux/ftrace.h | 132 +++++++++++++++++++++
tools/kvm/include/linux/gfp.h | 144 +++++++++++++++++++++++
tools/kvm/include/linux/hardirq.h | 209 ++++++++++++++++++++++++++++++++++
tools/kvm/include/linux/hash.h | 7 ++
tools/kvm/include/linux/irqflags.h | 50 ++++++++
tools/kvm/include/linux/kallsyms.h | 129 +++++++++++++++++++++
tools/kvm/include/linux/kernel.h | 20 +++-
tools/kvm/include/linux/lockdep.h | 56 +++++++++
tools/kvm/include/linux/module.h | 2 +-
tools/kvm/include/linux/moduleparam.h | 6 +
tools/kvm/include/linux/rcu.h | 19 ++++
tools/kvm/include/linux/spinlock.h | 33 ++++++
tools/kvm/include/linux/stacktrace.h | 31 +++++
tools/kvm/include/linux/types.h | 8 ++
tools/kvm/virtio/pci.c | 2 +-
21 files changed, 901 insertions(+), 8 deletions(-)
create mode 100644 tools/kvm/include/asm/sections.h
create mode 100644 tools/kvm/include/linux/debug_locks.h
create mode 100644 tools/kvm/include/linux/delay.h
create mode 100644 tools/kvm/include/linux/ftrace.h
create mode 100644 tools/kvm/include/linux/gfp.h
create mode 100644 tools/kvm/include/linux/hardirq.h
create mode 100644 tools/kvm/include/linux/hash.h
create mode 100644 tools/kvm/include/linux/interrupt.h
create mode 100644 tools/kvm/include/linux/irqflags.h
create mode 100644 tools/kvm/include/linux/kallsyms.h
create mode 100644 tools/kvm/include/linux/linkage.h
create mode 100644 tools/kvm/include/linux/lockdep.h
create mode 100644 tools/kvm/include/linux/mm_types.h
create mode 100644 tools/kvm/include/linux/moduleparam.h
create mode 100644 tools/kvm/include/linux/mutex.h
create mode 100644 tools/kvm/include/linux/proc_fs.h
create mode 100644 tools/kvm/include/linux/rcu.h
create mode 100644 tools/kvm/include/linux/seq_file.h
create mode 100644 tools/kvm/include/linux/spinlock.h
create mode 100644 tools/kvm/include/linux/stacktrace.h
create mode 100755 tools/kvm/include/linux/system.h
diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 903001e..5c40ba5 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -86,6 +86,7 @@ OBJS += net/uip/dhcp.o
OBJS += kvm-cmd.o
OBJS += util/init.o
OBJS += util/rbtree.o
+OBJS += util/lockdep.o
OBJS += util/threadpool.o
OBJS += util/parse-options.o
OBJS += util/rbtree-interval.o
@@ -244,7 +245,7 @@ DEFINES += -DKVMTOOLS_VERSION='"$(KVMTOOLS_VERSION)"'
DEFINES += -DBUILD_ARCH='"$(ARCH)"'
KVM_INCLUDE := include
-CFLAGS += $(CPPFLAGS) $(DEFINES) -I$(KVM_INCLUDE) -I$(ARCH_INCLUDE) -I$(KINCL_PATH)/include/uapi -I$(KINCL_PATH)/include -I$(KINCL_PATH)/arch/$(ARCH)/include/ -O2 -fno-strict-aliasing -g -flto
+CFLAGS += $(CPPFLAGS) $(DEFINES) -I$(KVM_INCLUDE) -I$(ARCH_INCLUDE) -I$(KINCL_PATH)/include/uapi -I$(KINCL_PATH)/include -I$(KINCL_PATH)/arch/$(ARCH)/include/ -O2 -fno-strict-aliasing -g -flto -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -rdynamic
WARNINGS += -Wall
WARNINGS += -Wcast-align
@@ -311,6 +312,9 @@ $(DEPS):
util/rbtree.d: ../../lib/rbtree.c
$(Q) $(CC) -M -MT util/rbtree.o $(CFLAGS) $< -o $@
+util/lockdep.d: ../../kernel/lockdep.c
+ $(Q) $(CC) -M -MT util/lockdep.o $(CFLAGS) $< -o $@
+
%.d: %.c
$(Q) $(CC) -M -MT $(patsubst %.d,%.o,$@) $(CFLAGS) $< -o $@
@@ -337,6 +341,14 @@ endif
$(E) " CC " $@
$(Q) $(CC) -c $(CFLAGS) $< -o $@
+util/lockdep.static.o util/lockdep.o: ../../kernel/lockdep.c
+ifeq ($(C),1)
+ $(E) " CHECK " $@
+ $(Q) $(CHECK) -c $(CFLAGS) $< -o $@
+endif
+ $(E) " CC " $@
+ $(Q) $(CC) -c $(CFLAGS) $< -o $@
+
%.static.o: %.c
ifeq ($(C),1)
$(E) " CHECK " $@
diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c
index 50495a3..9656da2 100644
--- a/tools/kvm/builtin-run.c
+++ b/tools/kvm/builtin-run.c
@@ -54,6 +54,7 @@
#define GB_SHIFT (30)
__thread struct kvm_cpu *current_kvm_cpu;
+__thread struct task_struct current_obj;
static int kvm_run_wrapper;
@@ -171,7 +172,12 @@ static void handle_sigalrm(int sig, siginfo_t *si, void *uc)
static void *kvm_cpu_thread(void *arg)
{
- current_kvm_cpu = arg;
+ char name[16];
+
+ current_kvm_cpu = arg;
+
+ sprintf(name, "kvm-vcpu-%lu", current_kvm_cpu->cpu_id);
+ kvm__set_thread_name(name);
if (kvm_cpu__start(current_kvm_cpu))
goto panic_kvm;
@@ -482,6 +488,12 @@ static struct kvm *kvm_cmd_run_init(int argc, const char **argv)
struct sigaction sa;
struct kvm *kvm = kvm__new();
+ /*
+ * This one is special, we need to init lockdep before we init any
+ * locks, so it can't go into the regular init code.
+ */
+ lockdep_init();
+
if (IS_ERR(kvm))
return kvm;
@@ -499,7 +511,7 @@ static struct kvm *kvm_cmd_run_init(int argc, const char **argv)
PARSE_OPT_STOP_AT_NON_OPTION |
PARSE_OPT_KEEP_DASHDASH);
if (argc != 0) {
- /* Cusrom options, should have been handled elsewhere */
+ /* Custom options, should have been handled elsewhere */
if (strcmp(argv[0], "--") == 0) {
if (kvm_run_wrapper == KVM_RUN_SANDBOX) {
kvm->cfg.sandbox = DEFAULT_SANDBOX_FILENAME;
@@ -686,6 +698,8 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix)
int ret = -EFAULT;
struct kvm *kvm;
+ kvm__set_thread_name("kvm-main");
+
kvm = kvm_cmd_run_init(argc, argv);
if (IS_ERR(kvm))
return PTR_ERR(kvm);
diff --git a/tools/kvm/include/asm/sections.h b/tools/kvm/include/asm/sections.h
new file mode 100644
index 0000000..e69de29
diff --git a/tools/kvm/include/kvm/kvm.h b/tools/kvm/include/kvm/kvm.h
index 1c7fab7..049932e 100644
--- a/tools/kvm/include/kvm/kvm.h
+++ b/tools/kvm/include/kvm/kvm.h
@@ -6,6 +6,7 @@
#include "kvm/util-init.h"
#include "kvm/kvm.h"
+#include <linux/kernel.h>
#include <stdbool.h>
#include <linux/types.h>
#include <time.h>
@@ -122,6 +123,7 @@ bool kvm__supports_extension(struct kvm *kvm, unsigned int extension);
static inline void kvm__set_thread_name(const char *name)
{
prctl(PR_SET_NAME, name);
+ lockdep_set_thread();
}
#endif /* KVM__KVM_H */
diff --git a/tools/kvm/include/kvm/mutex.h b/tools/kvm/include/kvm/mutex.h
index 4f31025..7917d3b 100644
--- a/tools/kvm/include/kvm/mutex.h
+++ b/tools/kvm/include/kvm/mutex.h
@@ -1,6 +1,10 @@
#ifndef KVM__MUTEX_H
#define KVM__MUTEX_H
+#include <linux/kernel.h>
+
+#include <linux/kallsyms.h>
+#include <linux/lockdep.h>
#include <pthread.h>
#include "kvm/util.h"
@@ -12,26 +16,39 @@
struct mutex {
pthread_mutex_t mutex;
+ struct lockdep_map dep_map;
};
#define MUTEX_INITIALIZER (struct mutex) { .mutex = PTHREAD_MUTEX_INITIALIZER }
#define DEFINE_MUTEX(mtx) struct mutex mtx = MUTEX_INITIALIZER
-static inline void mutex_init(struct mutex *lock)
+static inline void __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
{
if (pthread_mutex_init(&lock->mutex, NULL) != 0)
die("unexpected pthread_mutex_init() failure!");
+
+ lockdep_init_map(&lock->dep_map, name, key, 0);
}
+# define mutex_init(mutex) \
+ do { \
+ static struct lock_class_key __key; \
+ \
+ __mutex_init((mutex), #mutex, &__key); \
+ } while (0)
+
static inline void mutex_lock(struct mutex *lock)
{
+ lock_acquire(&lock->dep_map, 0, 0, 0, 2, NULL, _THIS_IP_);
if (pthread_mutex_lock(&lock->mutex) != 0)
die("unexpected pthread_mutex_lock() failure!");
+ lock_acquired(&lock->dep_map, _THIS_IP_);
}
static inline void mutex_unlock(struct mutex *lock)
{
+ lock_release(&lock->dep_map, 0, _THIS_IP_);
if (pthread_mutex_unlock(&lock->mutex) != 0)
die("unexpected pthread_mutex_unlock() failure!");
}
diff --git a/tools/kvm/include/linux/bitops.h b/tools/kvm/include/linux/bitops.h
index 56448b7..c5bec42 100644
--- a/tools/kvm/include/linux/bitops.h
+++ b/tools/kvm/include/linux/bitops.h
@@ -1,7 +1,6 @@
#ifndef _KVM_LINUX_BITOPS_H_
#define _KVM_LINUX_BITOPS_H_
-#include <linux/kernel.h>
#include <linux/compiler.h>
#include <asm/hweight.h>
diff --git a/tools/kvm/include/linux/debug_locks.h b/tools/kvm/include/linux/debug_locks.h
new file mode 100644
index 0000000..b9be86a
--- /dev/null
+++ b/tools/kvm/include/linux/debug_locks.h
@@ -0,0 +1,7 @@
+#ifndef KVM__DEBUG_LOCKS_H_
+#define KVM__DEBUG_LOCKS_H_
+
+static __used bool debug_locks = true;
+static __used bool debug_locks_silent = false;
+
+#endif
diff --git a/tools/kvm/include/linux/delay.h b/tools/kvm/include/linux/delay.h
new file mode 100644
index 0000000..e69de29
diff --git a/tools/kvm/include/linux/ftrace.h b/tools/kvm/include/linux/ftrace.h
new file mode 100644
index 0000000..b7f002c
--- /dev/null
+++ b/tools/kvm/include/linux/ftrace.h
@@ -0,0 +1,132 @@
+/*
+ * Ftrace header. For implementation details beyond the random comments
+ * scattered below, see: Documentation/trace/ftrace-design.txt
+ */
+
+#ifndef _LINUX_FTRACE_H
+#define _LINUX_FTRACE_H
+
+#include <asm/errno.h>
+
+/*
+ * If the arch supports passing the variable contents of
+ * function_trace_op as the third parameter back from the
+ * mcount call, then the arch should define this as 1.
+ */
+#ifndef ARCH_SUPPORTS_FTRACE_OPS
+#define ARCH_SUPPORTS_FTRACE_OPS 0
+#endif
+
+/*
+ * If the arch's mcount caller does not support all of ftrace's
+ * features, then it must call an indirect function that
+ * does. Or at least does enough to prevent any unwelcomed side effects.
+ */
+#if !defined(CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST) || \
+ !ARCH_SUPPORTS_FTRACE_OPS
+# define FTRACE_FORCE_LIST_FUNC 1
+#else
+# define FTRACE_FORCE_LIST_FUNC 0
+#endif
+
+
+struct module;
+struct ftrace_hash;
+
+/*
+ * (un)register_ftrace_function must be a macro since the ops parameter
+ * must not be evaluated.
+ */
+#define register_ftrace_function(ops) ({ 0; })
+#define unregister_ftrace_function(ops) ({ 0; })
+static inline int ftrace_nr_registered_ops(void)
+{
+ return 0;
+}
+static inline void clear_ftrace_function(void) { }
+static inline void ftrace_kill(void) { }
+static inline void ftrace_stop(void) { }
+static inline void ftrace_start(void) { }
+static inline int ftrace_enable_ftrace_graph_caller(void) { return 0; }
+static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; }
+static inline int skip_trace(unsigned long ip) { return 0; }
+static inline int ftrace_force_update(void) { return 0; }
+static inline void ftrace_disable_daemon(void) { }
+static inline void ftrace_enable_daemon(void) { }
+static inline void ftrace_release_mod(struct module *mod) {}
+static inline int unregister_ftrace_command(char *cmd_name)
+{
+ return -EINVAL;
+}
+static inline int ftrace_text_reserved(void *start, void *end)
+{
+ return 0;
+}
+static inline unsigned long ftrace_location(unsigned long ip)
+{
+ return 0;
+}
+
+/*
+ * Again users of functions that have ftrace_ops may not
+ * have them defined when ftrace is not enabled, but these
+ * functions may still be called. Use a macro instead of inline.
+ */
+#define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; })
+#define ftrace_set_early_filter(ops, buf, enable) do { } while (0)
+#define ftrace_set_filter_ip(ops, ip, remove, reset) ({ -ENODEV; })
+#define ftrace_set_filter(ops, buf, len, reset) ({ -ENODEV; })
+#define ftrace_set_notrace(ops, buf, len, reset) ({ -ENODEV; })
+#define ftrace_free_filter(ops) do { } while (0)
+
+static inline void tracer_disable(void)
+{
+#ifdef CONFIG_FUNCTION_TRACER
+ ftrace_enabled = 0;
+#endif
+}
+
+/*
+ * Ftrace disable/restore without lock. Some synchronization mechanism
+ * must be used to prevent ftrace_enabled to be changed between
+ * disable/restore.
+ */
+static inline int __ftrace_enabled_save(void)
+{
+#ifdef CONFIG_FUNCTION_TRACER
+ int saved_ftrace_enabled = ftrace_enabled;
+ ftrace_enabled = 0;
+ return saved_ftrace_enabled;
+#else
+ return 0;
+#endif
+}
+
+static inline void __ftrace_enabled_restore(int enabled)
+{
+#ifdef CONFIG_FUNCTION_TRACER
+ ftrace_enabled = enabled;
+#endif
+}
+
+# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
+# define CALLER_ADDR1 0UL
+# define CALLER_ADDR2 0UL
+# define CALLER_ADDR3 0UL
+# define CALLER_ADDR4 0UL
+# define CALLER_ADDR5 0UL
+# define CALLER_ADDR6 0UL
+
+ static inline void time_hardirqs_on(unsigned long a0, unsigned long a1) { }
+ static inline void time_hardirqs_off(unsigned long a0, unsigned long a1) { }
+
+/*
+ * Use defines instead of static inlines because some arches will make code out
+ * of the CALLER_ADDR, when we really want these to be a real nop.
+ */
+# define trace_preempt_on(a0, a1) do { } while (0)
+# define trace_preempt_off(a0, a1) do { } while (0)
+
+static inline void ftrace_init(void) { }
+
+#endif /* _LINUX_FTRACE_H */
diff --git a/tools/kvm/include/linux/gfp.h b/tools/kvm/include/linux/gfp.h
new file mode 100644
index 0000000..a3ecae5
--- /dev/null
+++ b/tools/kvm/include/linux/gfp.h
@@ -0,0 +1,144 @@
+#ifndef __LINUX_GFP_H
+#define __LINUX_GFP_H
+
+struct vm_area_struct;
+
+/* Plain integer GFP bitmasks. Do not use this directly. */
+#define ___GFP_DMA 0x01u
+#define ___GFP_HIGHMEM 0x02u
+#define ___GFP_DMA32 0x04u
+#define ___GFP_MOVABLE 0x08u
+#define ___GFP_WAIT 0x10u
+#define ___GFP_HIGH 0x20u
+#define ___GFP_IO 0x40u
+#define ___GFP_FS 0x80u
+#define ___GFP_COLD 0x100u
+#define ___GFP_NOWARN 0x200u
+#define ___GFP_REPEAT 0x400u
+#define ___GFP_NOFAIL 0x800u
+#define ___GFP_NORETRY 0x1000u
+#define ___GFP_MEMALLOC 0x2000u
+#define ___GFP_COMP 0x4000u
+#define ___GFP_ZERO 0x8000u
+#define ___GFP_NOMEMALLOC 0x10000u
+#define ___GFP_HARDWALL 0x20000u
+#define ___GFP_THISNODE 0x40000u
+#define ___GFP_RECLAIMABLE 0x80000u
+#define ___GFP_NOTRACK 0x200000u
+#define ___GFP_OTHER_NODE 0x800000u
+#define ___GFP_WRITE 0x1000000u
+
+/*
+ * GFP bitmasks..
+ *
+ * Zone modifiers (see linux/mmzone.h - low three bits)
+ *
+ * Do not put any conditional on these. If necessary modify the definitions
+ * without the underscores and use them consistently. The definitions here may
+ * be used in bit comparisons.
+ */
+#define __GFP_DMA ((__force gfp_t)___GFP_DMA)
+#define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM)
+#define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32)
+#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* Page is movable */
+#define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
+/*
+ * Action modifiers - doesn't change the zoning
+ *
+ * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
+ * _might_ fail. This depends upon the particular VM implementation.
+ *
+ * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
+ * cannot handle allocation failures. This modifier is deprecated and no new
+ * users should be added.
+ *
+ * __GFP_NORETRY: The VM implementation must not retry indefinitely.
+ *
+ * __GFP_MOVABLE: Flag that this page will be movable by the page migration
+ * mechanism or reclaimed
+ */
+#define __GFP_WAIT ((__force gfp_t)___GFP_WAIT) /* Can wait and reschedule? */
+#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) /* Should access emergency pools? */
+#define __GFP_IO ((__force gfp_t)___GFP_IO) /* Can start physical IO? */
+#define __GFP_FS ((__force gfp_t)___GFP_FS) /* Can call down to low-level FS? */
+#define __GFP_COLD ((__force gfp_t)___GFP_COLD) /* Cache-cold page required */
+#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) /* Suppress page allocation failure warning */
+#define __GFP_REPEAT ((__force gfp_t)___GFP_REPEAT) /* See above */
+#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) /* See above */
+#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) /* See above */
+#define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC)/* Allow access to emergency reserves */
+#define __GFP_COMP ((__force gfp_t)___GFP_COMP) /* Add compound page metadata */
+#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) /* Return zeroed page on success */
+#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) /* Don't use emergency reserves.
+ * This takes precedence over the
+ * __GFP_MEMALLOC flag if both are
+ * set
+ */
+#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) /* Enforce hardwall cpuset memory allocs */
+#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE)/* No fallback, no policies */
+#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
+#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */
+
+#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
+#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
+
+/*
+ * This may seem redundant, but it's a way of annotating false positives vs.
+ * allocations that simply cannot be supported (e.g. page tables).
+ */
+#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
+
+#define __GFP_BITS_SHIFT 25 /* Room for N __GFP_FOO bits */
+#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
+
+/* This equals 0, but use constants in case they ever change */
+#define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH)
+/* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */
+#define GFP_ATOMIC (__GFP_HIGH)
+#define GFP_NOIO (__GFP_WAIT)
+#define GFP_NOFS (__GFP_WAIT | __GFP_IO)
+#define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS)
+#define GFP_TEMPORARY (__GFP_WAIT | __GFP_IO | __GFP_FS | \
+ __GFP_RECLAIMABLE)
+#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
+#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
+ __GFP_HIGHMEM)
+#define GFP_HIGHUSER_MOVABLE (__GFP_WAIT | __GFP_IO | __GFP_FS | \
+ __GFP_HARDWALL | __GFP_HIGHMEM | \
+ __GFP_MOVABLE)
+#define GFP_IOFS (__GFP_IO | __GFP_FS)
+#define GFP_TRANSHUGE (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
+ __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN)
+
+#ifdef CONFIG_NUMA
+#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
+#else
+#define GFP_THISNODE ((__force gfp_t)0)
+#endif
+
+/* This mask makes up all the page movable related flags */
+#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
+
+/* Control page allocator reclaim behavior */
+#define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\
+ __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
+ __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC)
+
+/* Control slab gfp mask during early boot */
+#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS))
+
+/* Control allocation constraints */
+#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
+
+/* Do not use these with a slab allocator */
+#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
+
+/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some
+ platforms, used as appropriate on others */
+
+#define GFP_DMA __GFP_DMA
+
+/* 4GB DMA on some platforms */
+#define GFP_DMA32 __GFP_DMA32
+
+#endif /* __LINUX_GFP_H */
diff --git a/tools/kvm/include/linux/hardirq.h b/tools/kvm/include/linux/hardirq.h
new file mode 100644
index 0000000..c6538df
--- /dev/null
+++ b/tools/kvm/include/linux/hardirq.h
@@ -0,0 +1,209 @@
+#ifndef LINUX_HARDIRQ_H
+#define LINUX_HARDIRQ_H
+
+#include <linux/lockdep.h>
+
+#define preempt_count() 0
+
+/*
+ * We put the hardirq and softirq counter into the preemption
+ * counter. The bitmask has the following meaning:
+ *
+ * - bits 0-7 are the preemption count (max preemption depth: 256)
+ * - bits 8-15 are the softirq count (max # of softirqs: 256)
+ *
+ * The hardirq count can in theory reach the same as NR_IRQS.
+ * In reality, the number of nested IRQS is limited to the stack
+ * size as well. For archs with over 1000 IRQS it is not practical
+ * to expect that they will all nest. We give a max of 10 bits for
+ * hardirq nesting. An arch may choose to give less than 10 bits.
+ * m68k expects it to be 8.
+ *
+ * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
+ * - bit 26 is the NMI_MASK
+ * - bit 27 is the PREEMPT_ACTIVE flag
+ *
+ * PREEMPT_MASK: 0x000000ff
+ * SOFTIRQ_MASK: 0x0000ff00
+ * HARDIRQ_MASK: 0x03ff0000
+ * NMI_MASK: 0x04000000
+ */
+#define PREEMPT_BITS 8
+#define SOFTIRQ_BITS 8
+#define NMI_BITS 1
+
+#define MAX_HARDIRQ_BITS 10
+
+#ifndef HARDIRQ_BITS
+# define HARDIRQ_BITS MAX_HARDIRQ_BITS
+#endif
+
+#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
+#error HARDIRQ_BITS too high!
+#endif
+
+#define PREEMPT_SHIFT 0
+#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
+#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
+#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)
+
+#define __IRQ_MASK(x) ((1UL << (x))-1)
+
+#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
+#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
+#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
+#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
+
+#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT)
+#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
+#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
+#define NMI_OFFSET (1UL << NMI_SHIFT)
+
+#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
+
+#ifndef PREEMPT_ACTIVE
+#define PREEMPT_ACTIVE_BITS 1
+#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS)
+#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
+#endif
+
+#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
+#error PREEMPT_ACTIVE is too low!
+#endif
+
+#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
+#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
+#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
+ | NMI_MASK))
+
+/*
+ * Are we doing bottom half or hardware interrupt processing?
+ * Are we in a softirq context? Interrupt context?
+ * in_softirq - Are we currently processing softirq or have bh disabled?
+ * in_serving_softirq - Are we currently processing softirq?
+ */
+#define in_irq() (hardirq_count())
+#define in_softirq() (softirq_count())
+#define in_interrupt() (irq_count())
+#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
+
+/*
+ * Are we in NMI context?
+ */
+#define in_nmi() (preempt_count() & NMI_MASK)
+
+#if defined(CONFIG_PREEMPT_COUNT)
+# define PREEMPT_CHECK_OFFSET 1
+#else
+# define PREEMPT_CHECK_OFFSET 0
+#endif
+
+/*
+ * Are we running in atomic context? WARNING: this macro cannot
+ * always detect atomic context; in particular, it cannot know about
+ * held spinlocks in non-preemptible kernels. Thus it should not be
+ * used in the general case to determine whether sleeping is possible.
+ * Do not use in_atomic() in driver code.
+ */
+#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0)
+
+/*
+ * Check whether we were atomic before we did preempt_disable():
+ * (used by the scheduler, *after* releasing the kernel lock)
+ */
+#define in_atomic_preempt_off() \
+ ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
+
+#ifdef CONFIG_PREEMPT_COUNT
+# define preemptible() (preempt_count() == 0 && !irqs_disabled())
+# define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1)
+#else
+# define preemptible() 0
+# define IRQ_EXIT_OFFSET HARDIRQ_OFFSET
+#endif
+
+#if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS)
+extern void synchronize_irq(unsigned int irq);
+#else
+# define synchronize_irq(irq) barrier()
+#endif
+
+struct task_struct;
+
+#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING)
+static inline void vtime_account(struct task_struct *tsk)
+{
+}
+#else
+extern void vtime_account(struct task_struct *tsk);
+#endif
+
+#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
+
+static inline void rcu_nmi_enter(void)
+{
+}
+
+static inline void rcu_nmi_exit(void)
+{
+}
+
+#else
+extern void rcu_nmi_enter(void);
+extern void rcu_nmi_exit(void);
+#endif
+
+/*
+ * It is safe to do non-atomic ops on ->hardirq_context,
+ * because NMI handlers may not preempt and the ops are
+ * always balanced, so the interrupted value of ->hardirq_context
+ * will always be restored.
+ */
+#define __irq_enter() \
+ do { \
+ vtime_account(current); \
+ add_preempt_count(HARDIRQ_OFFSET); \
+ trace_hardirq_enter(); \
+ } while (0)
+
+/*
+ * Enter irq context (on NO_HZ, update jiffies):
+ */
+extern void irq_enter(void);
+
+/*
+ * Exit irq context without processing softirqs:
+ */
+#define __irq_exit() \
+ do { \
+ trace_hardirq_exit(); \
+ vtime_account(current); \
+ sub_preempt_count(HARDIRQ_OFFSET); \
+ } while (0)
+
+/*
+ * Exit irq context and process softirqs if needed:
+ */
+extern void irq_exit(void);
+
+#define nmi_enter() \
+ do { \
+ ftrace_nmi_enter(); \
+ BUG_ON(in_nmi()); \
+ add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
+ lockdep_off(); \
+ rcu_nmi_enter(); \
+ trace_hardirq_enter(); \
+ } while (0)
+
+#define nmi_exit() \
+ do { \
+ trace_hardirq_exit(); \
+ rcu_nmi_exit(); \
+ lockdep_on(); \
+ BUG_ON(!in_nmi()); \
+ sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
+ ftrace_nmi_exit(); \
+ } while (0)
+
+#endif /* LINUX_HARDIRQ_H */
diff --git a/tools/kvm/include/linux/hash.h b/tools/kvm/include/linux/hash.h
new file mode 100644
index 0000000..51dc018
--- /dev/null
+++ b/tools/kvm/include/linux/hash.h
@@ -0,0 +1,7 @@
+#ifndef KVM__HASH_H_
+#define KVM__HASH_H_
+
+#include <linux/bitops.h>
+#include <../../../include/linux/hash.h>
+
+#endif
diff --git a/tools/kvm/include/linux/interrupt.h b/tools/kvm/include/linux/interrupt.h
new file mode 100644
index 0000000..e69de29
diff --git a/tools/kvm/include/linux/irqflags.h b/tools/kvm/include/linux/irqflags.h
new file mode 100644
index 0000000..1d9980e
--- /dev/null
+++ b/tools/kvm/include/linux/irqflags.h
@@ -0,0 +1,50 @@
+/*
+ * include/linux/irqflags.h
+ *
+ * IRQ flags tracing: follow the state of the hardirq and softirq flags and
+ * provide callbacks for transitions between ON and OFF states.
+ *
+ * This file gets included from lowlevel asm headers too, to provide
+ * wrapped versions of the local_irq_*() APIs, based on the
+ * raw_local_irq_*() macros from the lowlevel headers.
+ */
+#ifndef _LINUX_TRACE_IRQFLAGS_H
+#define _LINUX_TRACE_IRQFLAGS_H
+
+# define trace_hardirq_context(p) 0
+# define trace_softirq_context(p) 0
+# define trace_hardirqs_enabled(p) 0
+# define trace_softirqs_enabled(p) 0
+# define trace_hardirq_enter() do { } while (0)
+# define trace_hardirq_exit() do { } while (0)
+# define lockdep_softirq_enter() do { } while (0)
+# define lockdep_softirq_exit() do { } while (0)
+# define INIT_TRACE_IRQFLAGS
+
+# define stop_critical_timings() do { } while (0)
+# define start_critical_timings() do { } while (0)
+
+/*
+ * Wrap the arch provided IRQ routines to provide appropriate checks.
+ */
+#define raw_local_irq_disable() do { } while (0)
+#define raw_local_irq_enable() do { } while (0)
+#define raw_local_irq_save(flags) do { flags = 0; } while (0)
+#define raw_local_irq_restore(flags) do { } while (0)
+#define raw_local_save_flags(flags) do { flags = 0; } while (0)
+#define raw_irqs_disabled_flags(flags) do { } while (0)
+#define raw_irqs_disabled() 0
+#define raw_safe_halt()
+
+#define local_irq_enable() do { } while (0)
+#define local_irq_disable() do { } while (0)
+#define local_irq_save(flags) do { flags = 0 ;} while (0)
+#define local_irq_restore(flags) do { } while (0)
+#define local_save_flags(flags) do { flags = 0; } while (0)
+#define irqs_disabled() (1)
+#define irqs_disabled_flags(flags) (0)
+#define safe_halt() do { } while (0)
+
+#define trace_lock_release(x, y)
+#define trace_lock_acquire(a, b, c, d, e, f, g);
+#endif
diff --git a/tools/kvm/include/linux/kallsyms.h b/tools/kvm/include/linux/kallsyms.h
new file mode 100644
index 0000000..6f85c98
--- /dev/null
+++ b/tools/kvm/include/linux/kallsyms.h
@@ -0,0 +1,129 @@
+/* Rewritten and vastly simplified by Rusty Russell for in-kernel
+ * module loader:
+ * Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
+ */
+#ifndef _LINUX_KALLSYMS_H
+#define _LINUX_KALLSYMS_H
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/stddef.h>
+
+#include <stdio.h>
+
+#define KSYM_NAME_LEN 128
+
+struct module;
+
+#ifdef CONFIG_KALLSYMS
+/* Lookup the address for a symbol. Returns 0 if not found. */
+unsigned long kallsyms_lookup_name(const char *name);
+
+/* Call a function on each kallsyms symbol in the core kernel */
+int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
+ unsigned long),
+ void *data);
+
+extern int kallsyms_lookup_size_offset(unsigned long addr,
+ unsigned long *symbolsize,
+ unsigned long *offset);
+
+/* Lookup an address. modname is set to NULL if it's in the kernel. */
+const char *kallsyms_lookup(unsigned long addr,
+ unsigned long *symbolsize,
+ unsigned long *offset,
+ char **modname, char *namebuf);
+
+/* Look up a kernel symbol and return it in a text buffer. */
+extern int sprint_symbol(char *buffer, unsigned long address);
+extern int sprint_symbol_no_offset(char *buffer, unsigned long address);
+extern int sprint_backtrace(char *buffer, unsigned long address);
+
+/* Look up a kernel symbol and print it to the kernel messages. */
+extern void __print_symbol(const char *fmt, unsigned long address);
+
+int lookup_symbol_name(unsigned long addr, char *symname);
+int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name);
+
+#else /* !CONFIG_KALLSYMS */
+
+static inline unsigned long kallsyms_lookup_name(const char *name)
+{
+ return 0;
+}
+
+static inline int kallsyms_on_each_symbol(int (*fn)(void *, const char *,
+ struct module *,
+ unsigned long),
+ void *data)
+{
+ return 0;
+}
+
+static inline int kallsyms_lookup_size_offset(unsigned long addr,
+ unsigned long *symbolsize,
+ unsigned long *offset)
+{
+ return 0;
+}
+
+static inline const char *kallsyms_lookup(unsigned long addr,
+ unsigned long *symbolsize,
+ unsigned long *offset,
+ char **modname, char *namebuf)
+{
+ return NULL;
+}
+
+static inline int sprint_symbol(char *buffer, unsigned long addr)
+{
+ *buffer = '\0';
+ return 0;
+}
+
+static inline int sprint_symbol_no_offset(char *buffer, unsigned long addr)
+{
+ *buffer = '\0';
+ return 0;
+}
+
+static inline int sprint_backtrace(char *buffer, unsigned long addr)
+{
+ *buffer = '\0';
+ return 0;
+}
+
+static inline int lookup_symbol_name(unsigned long addr, char *symname)
+{
+ return -1;
+}
+
+static inline int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name)
+{
+ return -1;
+}
+
+/* Stupid that this does nothing, but I didn't create this mess. */
+#define __print_symbol(fmt, addr)
+#endif /*CONFIG_KALLSYMS*/
+
+static inline void print_symbol(const char *fmt, unsigned long addr)
+{
+ __print_symbol(fmt, (unsigned long)
+ __builtin_extract_return_addr((void *)addr));
+}
+
+#include <execinfo.h>
+#include <stdlib.h>
+static inline void print_ip_sym(unsigned long ip)
+{
+ char **name;
+
+ name = backtrace_symbols((void **)&ip, 1);
+
+ printf("%s\n", *name);
+
+ free(name);
+}
+
+#endif /*_LINUX_KALLSYMS_H*/
diff --git a/tools/kvm/include/linux/kernel.h b/tools/kvm/include/linux/kernel.h
index 1e9abe9..91696d5 100644
--- a/tools/kvm/include/linux/kernel.h
+++ b/tools/kvm/include/linux/kernel.h
@@ -1,7 +1,15 @@
-
#ifndef KVM__LINUX_KERNEL_H_
#define KVM__LINUX_KERNEL_H_
+#include <linux/export.h>
+#include <linux/rcu.h>
+#include <linux/irqflags.h>
+#include <linux/kmemcheck.h>
+#include <linux/gfp.h>
+#include <linux/ftrace.h>
+#include <linux/hardirq.h>
+#include <linux/utsname.h>
+
#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
#define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1)
@@ -37,5 +45,15 @@
_max1 > _max2 ? _max1 : _max2; })
#define true 1
+#define DEBUG_LOCKS_WARN_ON(x) (x)
+#define WARN_ON_ONCE(x) (x)
+#define likely(x) (x)
+#define WARN(x,y,z)
+#define uninitialized_var(x) x
+
+#define _THIS_IP_ CALLER_ADDR0
+#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
+
+bool early_boot_irqs_disabled;
#endif
diff --git a/tools/kvm/include/linux/linkage.h b/tools/kvm/include/linux/linkage.h
new file mode 100644
index 0000000..e69de29
diff --git a/tools/kvm/include/linux/lockdep.h b/tools/kvm/include/linux/lockdep.h
new file mode 100644
index 0000000..d41e9cb
--- /dev/null
+++ b/tools/kvm/include/linux/lockdep.h
@@ -0,0 +1,56 @@
+#ifndef KVM__LOCKDEP_H_
+#define KVM__LOCKDEP_H_
+
+#include <sys/prctl.h>
+#include <asm/unistd.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <linux/utsname.h>
+
+#define __init
+#define MAX_LOCK_DEPTH 200
+#define list_add_tail_rcu list_add_tail
+#define noinline
+
+#include <../../../include/linux/lockdep.h>
+
+struct task_struct {
+ u64 curr_chain_key;
+ int lockdep_depth;
+ unsigned int lockdep_recursion;
+ struct held_lock held_locks[MAX_LOCK_DEPTH];
+ gfp_t lockdep_reclaim_gfp;
+ int pid;
+ char comm[17];
+};
+
+extern __thread struct task_struct current_obj;
+#define current (¤t_obj)
+
+static inline void lockdep_set_thread(void)
+{
+ prctl(PR_GET_NAME, current->comm);
+ current->pid = syscall(__NR_gettid);
+}
+
+static inline int debug_locks_off(void)
+{
+ return 1;
+}
+
+static inline pid_t task_pid_nr(struct task_struct *tsk)
+{
+ return tsk->pid;
+}
+
+#define KSYM_NAME_LEN 128
+#define printk printf
+
+#define KERN_ERR
+#define KERN_CONT
+
+#define list_del_rcu list_del
+
+#define atomic_t unsigned long
+#define atomic_inc(x) ((*(x))++)
+#endif
diff --git a/tools/kvm/include/linux/mm_types.h b/tools/kvm/include/linux/mm_types.h
new file mode 100644
index 0000000..e69de29
diff --git a/tools/kvm/include/linux/module.h b/tools/kvm/include/linux/module.h
index 0e4c6a3..75e8e18 100644
--- a/tools/kvm/include/linux/module.h
+++ b/tools/kvm/include/linux/module.h
@@ -1,6 +1,6 @@
#ifndef KVM__LINUX_MODULE_H
#define KVM__LINUX_MODULE_H
-#define EXPORT_SYMBOL(name)
+#include <linux/moduleparam.h>
#endif
diff --git a/tools/kvm/include/linux/moduleparam.h b/tools/kvm/include/linux/moduleparam.h
new file mode 100644
index 0000000..dff8755
--- /dev/null
+++ b/tools/kvm/include/linux/moduleparam.h
@@ -0,0 +1,6 @@
+#ifndef KVM__MODULEPARAM_H_
+#define KVM__MODULEPARAM_H_
+
+#define module_param(name, type, perm)
+
+#endif
diff --git a/tools/kvm/include/linux/mutex.h b/tools/kvm/include/linux/mutex.h
new file mode 100644
index 0000000..e69de29
diff --git a/tools/kvm/include/linux/proc_fs.h b/tools/kvm/include/linux/proc_fs.h
new file mode 100644
index 0000000..e69de29
diff --git a/tools/kvm/include/linux/rcu.h b/tools/kvm/include/linux/rcu.h
new file mode 100644
index 0000000..5de07a1
--- /dev/null
+++ b/tools/kvm/include/linux/rcu.h
@@ -0,0 +1,19 @@
+#ifndef KVM__RCU_H_
+#define KVM__RCU_H_
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+int rcu_scheduler_active;
+
+static inline int rcu_lockdep_current_cpu_online(void)
+{
+ return 1;
+}
+
+static inline int rcu_is_cpu_idle(void)
+{
+ return 1;
+}
+
+#endif
diff --git a/tools/kvm/include/linux/seq_file.h b/tools/kvm/include/linux/seq_file.h
new file mode 100644
index 0000000..e69de29
diff --git a/tools/kvm/include/linux/spinlock.h b/tools/kvm/include/linux/spinlock.h
new file mode 100644
index 0000000..556934b
--- /dev/null
+++ b/tools/kvm/include/linux/spinlock.h
@@ -0,0 +1,33 @@
+#ifndef KVM__SPINLOCK_H
+#define KVM__SPINLOCK_H
+
+#include <pthread.h>
+
+#include "kvm/util.h"
+
+/*
+ * Kernel-alike mutex API - to make it easier for kernel developers
+ * to write user-space code! :-)
+ */
+
+#define arch_spinlock_t pthread_mutex_t
+#define __ARCH_SPIN_LOCK_UNLOCKED PTHREAD_MUTEX_INITIALIZER
+
+static inline void arch_spin_lock(arch_spinlock_t *mutex)
+{
+ if (pthread_mutex_lock(mutex) != 0)
+ die("unexpected pthread_mutex_lock() failure!");
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *mutex)
+{
+ if (pthread_mutex_unlock(mutex) != 0)
+ die("unexpected pthread_mutex_unlock() failure!");
+}
+
+static inline bool arch_spin_is_locked(arch_spinlock_t *mutex)
+{
+ return true;
+}
+
+#endif /* KVM__SPINLOCK_H */
diff --git a/tools/kvm/include/linux/stacktrace.h b/tools/kvm/include/linux/stacktrace.h
new file mode 100644
index 0000000..ad59b96
--- /dev/null
+++ b/tools/kvm/include/linux/stacktrace.h
@@ -0,0 +1,31 @@
+#ifndef __LINUX_STACKTRACE_H
+#define __LINUX_STACKTRACE_H
+
+#include <execinfo.h>
+
+struct stack_trace {
+ unsigned int nr_entries, max_entries;
+ unsigned long *entries;
+ int skip; /* input argument: How many entries to skip */
+};
+
+static inline void print_stack_trace(struct stack_trace *trace, int spaces)
+{
+ backtrace_symbols_fd((void **)trace->entries, trace->nr_entries, 1);
+}
+
+#define save_stack_trace(trace) \
+ trace->nr_entries = backtrace((void **)trace->entries, trace->max_entries);
+
+static inline int dump_stack(void)
+{
+ void *array[64];
+ size_t size;
+
+ size = backtrace(array, 64);
+ backtrace_symbols_fd(array, size, 1);
+
+ return 0;
+}
+
+#endif
diff --git a/tools/kvm/include/linux/system.h b/tools/kvm/include/linux/system.h
new file mode 100755
index 0000000..e69de29
diff --git a/tools/kvm/include/linux/types.h b/tools/kvm/include/linux/types.h
index 5e20f10..2183897 100644
--- a/tools/kvm/include/linux/types.h
+++ b/tools/kvm/include/linux/types.h
@@ -1,10 +1,18 @@
#ifndef LINUX_TYPES_H
#define LINUX_TYPES_H
+#include <stdbool.h>
+#include <stddef.h>
+
#include <kvm/compiler.h>
#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */
#include <asm/types.h>
+struct page;
+struct kmem_cache;
+
+typedef unsigned gfp_t;
+
typedef __u64 u64;
typedef __s64 s64;
diff --git a/tools/kvm/virtio/pci.c b/tools/kvm/virtio/pci.c
index b6ac571..3acaa3a 100644
--- a/tools/kvm/virtio/pci.c
+++ b/tools/kvm/virtio/pci.c
@@ -286,7 +286,7 @@ int virtio_pci__signal_config(struct kvm *kvm, struct virtio_device *vdev)
}
if (vpci->features & VIRTIO_PCI_F_SIGNAL_MSI)
- virtio_pci__signal_msi(kvm, vpci, vpci->vq_vector[vpci->config_vector]);
+ virtio_pci__signal_msi(kvm, vpci, vpci->config_vector);
else
kvm__irq_trigger(kvm, vpci->config_gsi);
} else {
--
1.7.12.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [RFC 1/3] kvm tools: use mutex abstraction instead of pthread mutex
2012-10-24 17:00 [RFC 1/3] kvm tools: use mutex abstraction instead of pthread mutex Sasha Levin
2012-10-24 17:00 ` [RFC 2/3] lockdep: be nice about compiling from userspace Sasha Levin
2012-10-24 17:00 ` [RFC 3/3] kvm tools: use lockdep to detect locking issues Sasha Levin
@ 2012-10-25 6:50 ` Pekka Enberg
2 siblings, 0 replies; 8+ messages in thread
From: Pekka Enberg @ 2012-10-25 6:50 UTC (permalink / raw)
To: Sasha Levin; +Cc: mingo, peterz, asias.hejun, tglx, gorcunov, kvm
On Wed, 24 Oct 2012, Sasha Levin wrote:
> We already have something to wrap pthread with mutex_[init,lock,unlock]
> calls. This patch creates a new struct mutex abstraction and moves
> everything to work with it.
>
> Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
I applied this patch from the RFC series, thanks Sasha!
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC 2/3] lockdep: be nice about compiling from userspace
2012-10-24 17:00 ` [RFC 2/3] lockdep: be nice about compiling from userspace Sasha Levin
@ 2012-10-25 8:05 ` Ingo Molnar
2012-10-25 16:58 ` Sasha Levin
0 siblings, 1 reply; 8+ messages in thread
From: Ingo Molnar @ 2012-10-25 8:05 UTC (permalink / raw)
To: Sasha Levin; +Cc: penberg, mingo, peterz, asias.hejun, tglx, gorcunov, kvm
* Sasha Levin <sasha.levin@oracle.com> wrote:
> We can rather easily make lockdep work from userspace, although 3 issues
> remain which I'm not sure about:
>
> - Kernel naming - we can just wrap init_utsname() to return kvmtool related
> utsname, is that what we want though?
>
> - static_obj() - I don't have a better idea than calling mprobe(), which sounds
> wrong as well.
>
> - debug_show_all_locks() - we don't actually call it from userspace yet, but I think
> we might want to, so I'm not sure how to make it pretty using existing kernel code.
>
> Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
> ---
> kernel/lockdep.c | 9 +++++++--
> 1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/lockdep.c b/kernel/lockdep.c
> index 7981e5b..fdd3670 100644
> --- a/kernel/lockdep.c
> +++ b/kernel/lockdep.c
> @@ -567,10 +567,12 @@ static void lockdep_print_held_locks(struct task_struct *curr)
>
> static void print_kernel_ident(void)
> {
> +#ifdef __KERNEL__
> printk("%s %.*s %s\n", init_utsname()->release,
> (int)strcspn(init_utsname()->version, " "),
> init_utsname()->version,
> print_tainted());
> +#endif
I guess wrapping init_utsname() is not worth it. Although
kvmtool could provide the host system's utsname - kernel
identity is useful for debugging info.
You could generate a Git hash version string like tools/perf/
does (see PERF_VERSION and tools/perf/util/PERF-VERSION-GEN),
and put that into the ->version field.
->release could be the kvmtool version, and print_tainted()
could return an empty string.
That way you could provide init_utsname() and could remove this
#ifdef.
> }
>
> static int very_verbose(struct lock_class *class)
> @@ -586,6 +588,7 @@ static int very_verbose(struct lock_class *class)
> */
> static int static_obj(void *obj)
> {
> +#ifdef __KERNEL__
> unsigned long start = (unsigned long) &_stext,
> end = (unsigned long) &_end,
> addr = (unsigned long) obj;
> @@ -609,6 +612,8 @@ static int static_obj(void *obj)
> * module static or percpu var?
> */
> return is_module_address(addr) || is_module_percpu_address(addr);
> +#endif
> + return 1;
Could you put an:
#ifndef static_obj
around it? Then kvmtool could define its own trivial version of
static_obj():
#define static_obj(x) 1U
or so.
> @@ -4108,7 +4113,7 @@ void debug_check_no_locks_held(struct task_struct *task)
> if (unlikely(task->lockdep_depth > 0))
> print_held_locks_bug(task);
> }
> -
> +#ifdef __KERNEL__
> void debug_show_all_locks(void)
> {
> struct task_struct *g, *p;
I guess a show-all-locks functionality would be useful to
kvmtool as well?
Thanks,
Ingo
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC 2/3] lockdep: be nice about compiling from userspace
2012-10-25 8:05 ` Ingo Molnar
@ 2012-10-25 16:58 ` Sasha Levin
2012-10-25 17:06 ` Ingo Molnar
0 siblings, 1 reply; 8+ messages in thread
From: Sasha Levin @ 2012-10-25 16:58 UTC (permalink / raw)
To: Ingo Molnar; +Cc: penberg, mingo, peterz, asias.hejun, tglx, gorcunov, kvm
On 10/25/2012 04:05 AM, Ingo Molnar wrote:
>
> * Sasha Levin <sasha.levin@oracle.com> wrote:
>
>> We can rather easily make lockdep work from userspace, although 3 issues
>> remain which I'm not sure about:
>>
>> - Kernel naming - we can just wrap init_utsname() to return kvmtool related
>> utsname, is that what we want though?
>>
>> - static_obj() - I don't have a better idea than calling mprobe(), which sounds
>> wrong as well.
>>
>> - debug_show_all_locks() - we don't actually call it from userspace yet, but I think
>> we might want to, so I'm not sure how to make it pretty using existing kernel code.
>>
>> Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
>> ---
>> kernel/lockdep.c | 9 +++++++--
>> 1 file changed, 7 insertions(+), 2 deletions(-)
>>
>> diff --git a/kernel/lockdep.c b/kernel/lockdep.c
>> index 7981e5b..fdd3670 100644
>> --- a/kernel/lockdep.c
>> +++ b/kernel/lockdep.c
>> @@ -567,10 +567,12 @@ static void lockdep_print_held_locks(struct task_struct *curr)
>>
>> static void print_kernel_ident(void)
>> {
>> +#ifdef __KERNEL__
>> printk("%s %.*s %s\n", init_utsname()->release,
>> (int)strcspn(init_utsname()->version, " "),
>> init_utsname()->version,
>> print_tainted());
>> +#endif
>
> I guess wrapping init_utsname() is not worth it. Although
> kvmtool could provide the host system's utsname - kernel
> identity is useful for debugging info.
>
> You could generate a Git hash version string like tools/perf/
> does (see PERF_VERSION and tools/perf/util/PERF-VERSION-GEN),
> and put that into the ->version field.
>
> ->release could be the kvmtool version, and print_tainted()
> could return an empty string.
>
> That way you could provide init_utsname() and could remove this
> #ifdef.
Yeah, we already generate the version string for
'lkvm version' anyways, so I guess I'll just add init_utsname().
>> }
>>
>> static int very_verbose(struct lock_class *class)
>> @@ -586,6 +588,7 @@ static int very_verbose(struct lock_class *class)
>> */
>> static int static_obj(void *obj)
>> {
>> +#ifdef __KERNEL__
>> unsigned long start = (unsigned long) &_stext,
>> end = (unsigned long) &_end,
>> addr = (unsigned long) obj;
>> @@ -609,6 +612,8 @@ static int static_obj(void *obj)
>> * module static or percpu var?
>> */
>> return is_module_address(addr) || is_module_percpu_address(addr);
>> +#endif
>> + return 1;
>
> Could you put an:
>
> #ifndef static_obj
>
> around it? Then kvmtool could define its own trivial version of
> static_obj():
>
> #define static_obj(x) 1U
>
> or so.
>
>> @@ -4108,7 +4113,7 @@ void debug_check_no_locks_held(struct task_struct *task)
>> if (unlikely(task->lockdep_depth > 0))
>> print_held_locks_bug(task);
>> }
>> -
>> +#ifdef __KERNEL__
>> void debug_show_all_locks(void)
>> {
>> struct task_struct *g, *p;
>
> I guess a show-all-locks functionality would be useful to
> kvmtool as well?
Regarding the above two,
Yes, we can wrap both static_obj() and debug_show_all_locks() with #ifndefs
and let kvmtool provide it's own version of those two.
The question is here more of a "would lockdep maintainers be ok with adding
that considering there's no in-kernel justification for those?"
Thanks,
Sasha
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC 2/3] lockdep: be nice about compiling from userspace
2012-10-25 16:58 ` Sasha Levin
@ 2012-10-25 17:06 ` Ingo Molnar
2012-10-25 19:17 ` Sasha Levin
0 siblings, 1 reply; 8+ messages in thread
From: Ingo Molnar @ 2012-10-25 17:06 UTC (permalink / raw)
To: Sasha Levin; +Cc: penberg, mingo, peterz, asias.hejun, tglx, gorcunov, kvm
* Sasha Levin <sasha.levin@oracle.com> wrote:
> On 10/25/2012 04:05 AM, Ingo Molnar wrote:
> >
> > * Sasha Levin <sasha.levin@oracle.com> wrote:
> >
> >> We can rather easily make lockdep work from userspace, although 3 issues
> >> remain which I'm not sure about:
> >>
> >> - Kernel naming - we can just wrap init_utsname() to return kvmtool related
> >> utsname, is that what we want though?
> >>
> >> - static_obj() - I don't have a better idea than calling mprobe(), which sounds
> >> wrong as well.
> >>
> >> - debug_show_all_locks() - we don't actually call it from userspace yet, but I think
> >> we might want to, so I'm not sure how to make it pretty using existing kernel code.
> >>
> >> Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
> >> ---
> >> kernel/lockdep.c | 9 +++++++--
> >> 1 file changed, 7 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/kernel/lockdep.c b/kernel/lockdep.c
> >> index 7981e5b..fdd3670 100644
> >> --- a/kernel/lockdep.c
> >> +++ b/kernel/lockdep.c
> >> @@ -567,10 +567,12 @@ static void lockdep_print_held_locks(struct task_struct *curr)
> >>
> >> static void print_kernel_ident(void)
> >> {
> >> +#ifdef __KERNEL__
> >> printk("%s %.*s %s\n", init_utsname()->release,
> >> (int)strcspn(init_utsname()->version, " "),
> >> init_utsname()->version,
> >> print_tainted());
> >> +#endif
> >
> > I guess wrapping init_utsname() is not worth it. Although
> > kvmtool could provide the host system's utsname - kernel
> > identity is useful for debugging info.
> >
> > You could generate a Git hash version string like tools/perf/
> > does (see PERF_VERSION and tools/perf/util/PERF-VERSION-GEN),
> > and put that into the ->version field.
> >
> > ->release could be the kvmtool version, and print_tainted()
> > could return an empty string.
> >
> > That way you could provide init_utsname() and could remove this
> > #ifdef.
>
> Yeah, we already generate the version string for
> 'lkvm version' anyways, so I guess I'll just add init_utsname().
>
>
> >> }
> >>
> >> static int very_verbose(struct lock_class *class)
> >> @@ -586,6 +588,7 @@ static int very_verbose(struct lock_class *class)
> >> */
> >> static int static_obj(void *obj)
> >> {
> >> +#ifdef __KERNEL__
> >> unsigned long start = (unsigned long) &_stext,
> >> end = (unsigned long) &_end,
> >> addr = (unsigned long) obj;
> >> @@ -609,6 +612,8 @@ static int static_obj(void *obj)
> >> * module static or percpu var?
> >> */
> >> return is_module_address(addr) || is_module_percpu_address(addr);
> >> +#endif
> >> + return 1;
> >
> > Could you put an:
> >
> > #ifndef static_obj
> >
> > around it? Then kvmtool could define its own trivial version of
> > static_obj():
> >
> > #define static_obj(x) 1U
> >
> > or so.
> >
> >> @@ -4108,7 +4113,7 @@ void debug_check_no_locks_held(struct task_struct *task)
> >> if (unlikely(task->lockdep_depth > 0))
> >> print_held_locks_bug(task);
> >> }
> >> -
> >> +#ifdef __KERNEL__
> >> void debug_show_all_locks(void)
> >> {
> >> struct task_struct *g, *p;
> >
> > I guess a show-all-locks functionality would be useful to
> > kvmtool as well?
>
> Regarding the above two,
>
> Yes, we can wrap both static_obj() and debug_show_all_locks()
> with #ifndefs and let kvmtool provide it's own version of
> those two.
Only static_obj() - I see no immediate reason why you shouldn't
be able to utilize debug_show_all_locks(). 'vm debug -a' already
lists all backtraces on all vcpus - so 'vm debug lockdep -a'
could list all current locks and indicate which one is held and
by whom.
> The question is here more of a "would lockdep maintainers be
> ok with adding that considering there's no in-kernel
> justification for those?"
Yeah, such a simple patch would be acceptable to me, being nice
isn't against the law.
Thanks,
Ingo
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC 2/3] lockdep: be nice about compiling from userspace
2012-10-25 17:06 ` Ingo Molnar
@ 2012-10-25 19:17 ` Sasha Levin
0 siblings, 0 replies; 8+ messages in thread
From: Sasha Levin @ 2012-10-25 19:17 UTC (permalink / raw)
To: Ingo Molnar; +Cc: penberg, mingo, peterz, asias.hejun, tglx, gorcunov, kvm
On 10/25/2012 01:06 PM, Ingo Molnar wrote:
> * Sasha Levin <sasha.levin@oracle.com> wrote:
>> Yes, we can wrap both static_obj() and debug_show_all_locks()
>> with #ifndefs and let kvmtool provide it's own version of
>> those two.
>
> Only static_obj() - I see no immediate reason why you shouldn't
> be able to utilize debug_show_all_locks(). 'vm debug -a' already
> lists all backtraces on all vcpus - so 'vm debug lockdep -a'
> could list all current locks and indicate which one is held and
> by whom.
I'm not sure how we'd make debug_show_all_locks() work in userspace
since it would require us to wrap do_each_thread() & friends to iterate
over all our task_structs.
I was thinking about writing a corresponding debug_show_all_locks() that
would simply iterate a list of our dummy task_structs.
Thanks,
Sasha
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2012-10-25 19:19 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-10-24 17:00 [RFC 1/3] kvm tools: use mutex abstraction instead of pthread mutex Sasha Levin
2012-10-24 17:00 ` [RFC 2/3] lockdep: be nice about compiling from userspace Sasha Levin
2012-10-25 8:05 ` Ingo Molnar
2012-10-25 16:58 ` Sasha Levin
2012-10-25 17:06 ` Ingo Molnar
2012-10-25 19:17 ` Sasha Levin
2012-10-24 17:00 ` [RFC 3/3] kvm tools: use lockdep to detect locking issues Sasha Levin
2012-10-25 6:50 ` [RFC 1/3] kvm tools: use mutex abstraction instead of pthread mutex Pekka Enberg
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).