event trace 跟踪 schedule switch的例子
1.背景
最近debug时,需要trace schedule switch的前后进程,所以写了一个event trace的程序。
2.代码
#include <linux/tracepoint.h> #include <trace/events/sched.h> #include <linux/memblock.h> #include <linux/pfn.h> #include <linux/err.h> #include <linux/module.h> #include <linux/io.h> #include <asm/memory.h> #include <linux/slab.h> #include <linux/sched/task_stack.h> #include <linux/stacktrace.h> #define MAX_STACK_TRACE_DEPTH 64 #define SKIP_CHECKED_PFN static unsigned long memory_monitor_addr = 0; static unsigned long memory_monitor_size = 0; static unsigned long checked_cnt = 0; static unsigned long backup_stacktrace[16][MAX_STACK_TRACE_DEPTH]; #ifdef SKIP_CHECKED_PFN static unsigned long skip_pfn[16]; #endif static int _kprint_stack(struct task_struct *task, int saved) { struct stack_trace trace; unsigned long *entries; int err; entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); if (!entries) return –ENOMEM; trace.nr_entries = 0; trace.max_entries = MAX_STACK_TRACE_DEPTH; trace.entries = entries; trace.skip = 0; if (!err) { unsigned int i; save_stack_trace_tsk(task,&trace); for (i = 0; i < trace.nr_entries; i ++) { pr_cont(“[<%px>] %pS\n”, (void*)entries[i], (void*)entries[i]); if (saved) backup_stacktrace[checked_cnt][i] = entries[i]; } } kfree(entries); return err; } static unsigned long end_pfn = 0; static unsigned long start_pfn = 0; static void sched_switch_probe(void *data, bool preempt, struct task_struct*prev, struct task_struct *next) { void *va_pos = NULL; unsigned long cur_pfn; unsigned long mem_data; #ifdef SKIP_CHECKED_PFN int i = 0; int skip = 0; #endif if (start_pfn == 0 || end_pfn == 0) return; for (cur_pfn = start_pfn; cur_pfn < end_pfn; cur_pfn ++) { #ifdef SKIP_CHECKED_PFN skip = 0; for (i = 0; i <= checked_cnt; i ++) { if (cur_pfn == skip_pfn[i]) { skip = 1; break; } } if (skip) continue; #endif va_pos = __va(PFN_PHYS(cur_pfn) + 0x520); mem_data = *(uint64_t*)va_pos >> 40; if (mem_data==0xb0ffff || mem_data==0xb0cccc || mem_data==0xb00000){ pr_emerg(“address[%px]=%lx, CPU=%d\n”, __pa(va_pos),*(uint64_t*)va_pos, smp_processor_id()); pr_emerg(“----current process stack----\n”); _kprint_stack(prev, 0); pr_emerg(“----next process stack----\n”); _kprint_stack(next,0); dump_stack(); } } return; } static int __init get_memory_monitor_range(char *str) { unsigned long addr, size; pr_info(“%s(%s)\n”, __func__, str); if (!str) return 0; size = memparse(str, &str); if (*str != ‘@’) return 0; addr = memparse(str + 1, &str); if (addr == 0 || size == 0){ pr_info (“memory monitor is disable, addr=%#x, size=%#x\n”, addr, size); return 0; } pr_info (“memory monitor is enable, addr=%#x, size=%#x\n”, addr, size); memory_monitor_addr = addr; memory_monitor_size = size; return 0; } early_param(“sched_monitor_addr”, get_memory_monitor_range); static int __init tracepoint_sched_switch_init(void) { int ret = 0; if (memory_monitor_addr == 0 || memory_monitor_size == 0) return 0; start_pfn = PHYS_PFN(memory_monitor_addr); end_pfn = PHYS_PFN(memory_monitor_addr + memory_monitor_size); pr_info (“memory monitor: start_pfn=%#x, end_pfn=%#x\n”, start_pfn, end_pfn); ret = register_trace_sched_switch(sched_switch_probe, NULL); if (ret) goto fail; pr_info (“register trace_sched_switch success.\n”); return 0; fail: pr_info (“register trace_sched_switch failed.\n”); } static void __exit tracepoint_sched_switch_exit(void) { pr_info(“%s:%d: exit.\n”, __func__, __LINE__); unregister_trace_sched_switch(sched_switch_probe, NULL); } pure_initcall(tracepoint_sched_switch_init); module_exit(tracepoint_sched_switch_exit);