/* * lttng-statedump.c * * Linux Trace Toolkit Next Generation Kernel State Dump * * Copyright 2005 Jean-Hugues Deschenes * Copyright 2006-2012 Mathieu Desnoyers * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; only * version 2.1 of the License. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * Changes: * Eric Clement: Add listing of network IP interface * 2006, 2007 Mathieu Desnoyers Fix kernel threads * Various updates */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include //add tarek syscall #include #include #include //end #include "lttng-events.h" #include "lttng-tracer.h" #include "wrapper/irqdesc.h" #include "wrapper/spinlock.h" #include "wrapper/fdtable.h" #include "wrapper/nsproxy.h" #include "wrapper/irq.h" #include "wrapper/tracepoint.h" #include "wrapper/genhd.h" #ifdef CONFIG_LTTNG_HAS_LIST_IRQ #include #endif //add tarek syscall #ifndef CONFIG_FTRACE_SYSCALLS #error "You need to set CONFIG_FTRACE_SYSCALLS=y" #endif #ifndef CONFIG_KALLSYMS_ALL #error "You need to set CONFIG_KALLSYMS_ALL=y" #endif static struct syscall_metadata **__start_syscalls_metadata; static struct syscall_metadata **__stop_syscalls_metadata; //end /* Define the tracepoints, but do not build the probes */ #define CREATE_TRACE_POINTS #define TRACE_INCLUDE_PATH ../instrumentation/events/lttng-module #define TRACE_INCLUDE_FILE lttng-statedump #include "instrumentation/events/lttng-module/lttng-statedump.h" DEFINE_TRACE(lttng_statedump_block_device); DEFINE_TRACE(lttng_statedump_end); DEFINE_TRACE(lttng_statedump_interrupt); DEFINE_TRACE(lttng_statedump_file_descriptor); DEFINE_TRACE(lttng_statedump_start); DEFINE_TRACE(lttng_statedump_process_state); //Add Tarek DEFINE_TRACE(lttng_statedump_syscall_address_list); //end DEFINE_TRACE(lttng_statedump_network_interface); struct lttng_fd_ctx { char *page; struct lttng_session *session; struct task_struct *p; struct files_struct *files; }; /* * Protected by the trace lock. */ static struct delayed_work cpu_work[NR_CPUS]; static DECLARE_WAIT_QUEUE_HEAD(statedump_wq); static atomic_t kernel_threads_to_run; enum lttng_thread_type { LTTNG_USER_THREAD = 0, LTTNG_KERNEL_THREAD = 1, }; enum lttng_execution_mode { LTTNG_USER_MODE = 0, LTTNG_SYSCALL = 1, LTTNG_TRAP = 2, LTTNG_IRQ = 3, LTTNG_SOFTIRQ = 4, LTTNG_MODE_UNKNOWN = 5, }; enum lttng_execution_submode { LTTNG_NONE = 0, LTTNG_UNKNOWN = 1, }; enum lttng_process_status { LTTNG_UNNAMED = 0, LTTNG_WAIT_FORK = 1, LTTNG_WAIT_CPU = 2, LTTNG_EXIT = 3, LTTNG_ZOMBIE = 4, LTTNG_WAIT = 5, LTTNG_RUN = 6, LTTNG_DEAD = 7, }; static int lttng_enumerate_block_devices(struct lttng_session *session) { struct class *ptr_block_class; struct device_type *ptr_disk_type; struct class_dev_iter iter; struct device *dev; ptr_block_class = wrapper_get_block_class(); if (!ptr_block_class) return -ENOSYS; ptr_disk_type = wrapper_get_disk_type(); if (!ptr_disk_type) { return -ENOSYS; } class_dev_iter_init(&iter, ptr_block_class, NULL, ptr_disk_type); while ((dev = class_dev_iter_next(&iter))) { struct disk_part_iter piter; struct gendisk *disk = dev_to_disk(dev); struct hd_struct *part; /* * Don't show empty devices or things that have been * suppressed */ if (get_capacity(disk) == 0 || (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) continue; disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); while ((part = disk_part_iter_next(&piter))) { char name_buf[BDEVNAME_SIZE]; char *p; p = wrapper_disk_name(disk, part->partno, name_buf); if (!p) { disk_part_iter_exit(&piter); class_dev_iter_exit(&iter); return -ENOSYS; } trace_lttng_statedump_block_device(session, part_devt(part), name_buf); } disk_part_iter_exit(&piter); } class_dev_iter_exit(&iter); return 0; } #ifdef CONFIG_INET static void lttng_enumerate_device(struct lttng_session *session, struct net_device *dev) { struct in_device *in_dev; struct in_ifaddr *ifa; if (dev->flags & IFF_UP) { in_dev = in_dev_get(dev); if (in_dev) { for (ifa = in_dev->ifa_list; ifa != NULL; ifa = ifa->ifa_next) { trace_lttng_statedump_network_interface( session, dev, ifa); } in_dev_put(in_dev); } } else { trace_lttng_statedump_network_interface( session, dev, NULL); } } static int lttng_enumerate_network_ip_interface(struct lttng_session *session) { struct net_device *dev; read_lock(&dev_base_lock); for_each_netdev(&init_net, dev) lttng_enumerate_device(session, dev); read_unlock(&dev_base_lock); return 0; } #else /* CONFIG_INET */ static inline int lttng_enumerate_network_ip_interface(struct lttng_session *session) { return 0; } #endif /* CONFIG_INET */ static int lttng_dump_one_fd(const void *p, struct file *file, unsigned int fd) { const struct lttng_fd_ctx *ctx = p; const char *s = d_path(&file->f_path, ctx->page, PAGE_SIZE); unsigned int flags = file->f_flags; struct fdtable *fdt; /* * We don't expose kernel internal flags, only userspace-visible * flags. */ flags &= ~FMODE_NONOTIFY; fdt = files_fdtable(ctx->files); /* * We need to check here again whether fd is within the fdt * max_fds range, because we might be seeing a different * files_fdtable() than iterate_fd(), assuming only RCU is * protecting the read. In reality, iterate_fd() holds * file_lock, which should ensure the fdt does not change while * the lock is taken, but we are not aware whether this is * guaranteed or not, so play safe. */ if (fd < fdt->max_fds && test_bit(fd, fdt->close_on_exec)) flags |= O_CLOEXEC; if (IS_ERR(s)) { struct dentry *dentry = file->f_path.dentry; /* Make sure we give at least some info */ spin_lock(&dentry->d_lock); trace_lttng_statedump_file_descriptor(ctx->session, ctx->p, fd, dentry->d_name.name, flags, file->f_mode); spin_unlock(&dentry->d_lock); goto end; } trace_lttng_statedump_file_descriptor(ctx->session, ctx->p, fd, s, flags, file->f_mode); end: return 0; } static void lttng_enumerate_task_fd(struct lttng_session *session, struct task_struct *p, char *tmp) { struct lttng_fd_ctx ctx = { .page = tmp, .session = session, .p = p }; struct files_struct *files; task_lock(p); files = p->files; if (!files) goto end; ctx.files = files; lttng_iterate_fd(files, 0, lttng_dump_one_fd, &ctx); end: task_unlock(p); } static int lttng_enumerate_file_descriptors(struct lttng_session *session) { struct task_struct *p; char *tmp; tmp = (char *) __get_free_page(GFP_KERNEL); if (!tmp) return -ENOMEM; /* Enumerate active file descriptors */ rcu_read_lock(); for_each_process(p) lttng_enumerate_task_fd(session, p, tmp); rcu_read_unlock(); free_page((unsigned long) tmp); return 0; } #if 0 /* * FIXME: we cannot take a mmap_sem while in a RCU read-side critical section * (scheduling in atomic). Normally, the tasklist lock protects this kind of * iteration, but it is not exported to modules. */ static void lttng_enumerate_task_vm_maps(struct lttng_session *session, struct task_struct *p) { struct mm_struct *mm; struct vm_area_struct *map; unsigned long ino; /* get_task_mm does a task_lock... */ mm = get_task_mm(p); if (!mm) return; map = mm->mmap; if (map) { down_read(&mm->mmap_sem); while (map) { if (map->vm_file) ino = map->vm_file->f_dentry->d_inode->i_ino; else ino = 0; trace_lttng_statedump_vm_map(session, p, map, ino); map = map->vm_next; } up_read(&mm->mmap_sem); } mmput(mm); } static int lttng_enumerate_vm_maps(struct lttng_session *session) { struct task_struct *p; rcu_read_lock(); for_each_process(p) lttng_enumerate_task_vm_maps(session, p); rcu_read_unlock(); return 0; } #endif #ifdef CONFIG_LTTNG_HAS_LIST_IRQ #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39)) #define irq_desc_get_chip(desc) get_irq_desc_chip(desc) #endif static int lttng_list_interrupts(struct lttng_session *session) { unsigned int irq; unsigned long flags = 0; struct irq_desc *desc; #define irq_to_desc wrapper_irq_to_desc /* needs irq_desc */ for_each_irq_desc(irq, desc) { struct irqaction *action; const char *irq_chip_name = irq_desc_get_chip(desc)->name ? : "unnamed_irq_chip"; local_irq_save(flags); wrapper_desc_spin_lock(&desc->lock); for (action = desc->action; action; action = action->next) { trace_lttng_statedump_interrupt(session, irq, irq_chip_name, action); } wrapper_desc_spin_unlock(&desc->lock); local_irq_restore(flags); } return 0; #undef irq_to_desc } #else static inline int lttng_list_interrupts(struct lttng_session *session) { return 0; } #endif static void lttng_statedump_process_ns(struct lttng_session *session, struct task_struct *p, enum lttng_thread_type type, enum lttng_execution_mode mode, enum lttng_execution_submode submode, enum lttng_process_status status) { struct nsproxy *proxy; struct pid_namespace *pid_ns; rcu_read_lock(); proxy = task_nsproxy(p); if (proxy) { pid_ns = lttng_get_proxy_pid_ns(proxy); do { trace_lttng_statedump_process_state(session, p, type, mode, submode, status, pid_ns); pid_ns = pid_ns->parent; } while (pid_ns); } else { trace_lttng_statedump_process_state(session, p, type, mode, submode, status, NULL); } rcu_read_unlock(); } static int lttng_enumerate_process_states(struct lttng_session *session) { struct task_struct *g, *p; rcu_read_lock(); for_each_process(g) { p = g; do { enum lttng_execution_mode mode = LTTNG_MODE_UNKNOWN; enum lttng_execution_submode submode = LTTNG_UNKNOWN; enum lttng_process_status status; enum lttng_thread_type type; task_lock(p); if (p->exit_state == EXIT_ZOMBIE) status = LTTNG_ZOMBIE; else if (p->exit_state == EXIT_DEAD) status = LTTNG_DEAD; else if (p->state == TASK_RUNNING) { /* Is this a forked child that has not run yet? */ if (list_empty(&p->rt.run_list)) status = LTTNG_WAIT_FORK; else /* * All tasks are considered as wait_cpu; * the viewer will sort out if the task * was really running at this time. */ status = LTTNG_WAIT_CPU; } else if (p->state & (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)) { /* Task is waiting for something to complete */ status = LTTNG_WAIT; } else status = LTTNG_UNNAMED; submode = LTTNG_NONE; /* * Verification of t->mm is to filter out kernel * threads; Viewer will further filter out if a * user-space thread was in syscall mode or not. */ if (p->mm) type = LTTNG_USER_THREAD; else type = LTTNG_KERNEL_THREAD; lttng_statedump_process_ns(session, p, type, mode, submode, status); task_unlock(p); } while_each_thread(g, p); } rcu_read_unlock(); return 0; } //BEGIN : Added by Tarek ----------------------------------------------------// static int lttng_dump_one_sycall_address(struct lttng_session *session, const char *syscall_name, unsigned int runtime_syscall_address, unsigned int symbol_syscall_address) { if (!syscall_name || !runtime_syscall_address || !symbol_syscall_address ) { return -1; } trace_lttng_statedump_syscall_address_list (session, syscall_name, runtime_syscall_address, symbol_syscall_address); return 0; } static __init struct syscall_metadata *find_syscall_meta(unsigned long syscall) { struct syscall_metadata **iter; for (iter = __start_syscalls_metadata; iter < __stop_syscalls_metadata; iter++) { if ((*iter)->syscall_nr == syscall) return (*iter); } return NULL; } static int lttng_enumerate_syscall_address_list(struct lttng_session *session) { // get syscall table address // hash table that contains all system_call names and numbers // iterate for every system call and get its address in the system call table struct syscall_metadata *meta; int i; unsigned long *sys_call_table = NULL; __start_syscalls_metadata = (void *) kallsyms_lookup_name("__start_syscalls_metadata"); __stop_syscalls_metadata = (void *) kallsyms_lookup_name("__stop_syscalls_metadata"); sys_call_table = (unsigned long *) kallsyms_lookup_name("sys_call_table"); for (i = 0; i < NR_syscalls; i++) { int j; meta = find_syscall_meta(i); if (!meta || !sys_call_table) continue; printk("syscall %s ; nr %d ; nbargs %d ; address in sys call table : %p ", meta->name, meta->syscall_nr, meta->nb_args, (void *) sys_call_table[meta->syscall_nr]); // TODO : retierve the symbol address of a syscall in order to inject it in the third argument of lttng_dump_one_sycall_address function lttng_dump_one_sycall_address(session,meta->name,sys_call_table[meta->syscall_nr], (unsigned int) NULL /* address of syscall symbol*/); } printk("SUCCESS\n"); return -1; } //END----------------------------------------------------// static void lttng_statedump_work_func(struct work_struct *work) { if (atomic_dec_and_test(&kernel_threads_to_run)) /* If we are the last thread, wake up do_lttng_statedump */ wake_up(&statedump_wq); } static int do_lttng_statedump(struct lttng_session *session) { int cpu, ret; trace_lttng_statedump_start(session); ret = lttng_enumerate_process_states(session); if (ret) return ret; //Added by Tarek ret = lttng_enumerate_syscall_address_list(session); if (ret) return ret; // ret = lttng_enumerate_file_descriptors(session); if (ret) return ret; /* * FIXME * ret = lttng_enumerate_vm_maps(session); * if (ret) * return ret; */ ret = lttng_list_interrupts(session); if (ret) return ret; ret = lttng_enumerate_network_ip_interface(session); if (ret) return ret; ret = lttng_enumerate_block_devices(session); switch (ret) { case -ENOSYS: printk(KERN_WARNING "LTTng: block device enumeration is not supported by kernel\n"); break; default: return ret; } /* TODO lttng_dump_idt_table(session); */ /* TODO lttng_dump_softirq_vec(session); */ /* TODO lttng_list_modules(session); */ /* TODO lttng_dump_swap_files(session); */ /* * Fire off a work queue on each CPU. Their sole purpose in life * is to guarantee that each CPU has been in a state where is was in * syscall mode (i.e. not in a trap, an IRQ or a soft IRQ). */ get_online_cpus(); atomic_set(&kernel_threads_to_run, num_online_cpus()); for_each_online_cpu(cpu) { INIT_DELAYED_WORK(&cpu_work[cpu], lttng_statedump_work_func); schedule_delayed_work_on(cpu, &cpu_work[cpu], 0); } /* Wait for all threads to run */ __wait_event(statedump_wq, (atomic_read(&kernel_threads_to_run) == 0)); put_online_cpus(); /* Our work is done */ trace_lttng_statedump_end(session); return 0; } /* * Called with session mutex held. */ int lttng_statedump_start(struct lttng_session *session) { return do_lttng_statedump(session); } EXPORT_SYMBOL_GPL(lttng_statedump_start); static int __init lttng_statedump_init(void) { /* * Allow module to load even if the fixup cannot be done. This * will allow seemless transition when the underlying issue fix * is merged into the Linux kernel, and when tracepoint.c * "tracepoint_module_notify" is turned into a static function. */ (void) wrapper_lttng_fixup_sig(THIS_MODULE); return 0; } module_init(lttng_statedump_init); static void __exit lttng_statedump_exit(void) { } module_exit(lttng_statedump_exit); MODULE_LICENSE("GPL and additional rights"); MODULE_AUTHOR("Jean-Hugues Deschenes"); MODULE_DESCRIPTION("Linux Trace Toolkit Next Generation Statedump"); MODULE_VERSION(__stringify(LTTNG_MODULES_MAJOR_VERSION) "." __stringify(LTTNG_MODULES_MINOR_VERSION) "." __stringify(LTTNG_MODULES_PATCHLEVEL_VERSION) LTTNG_MODULES_EXTRAVERSION);