tracer ftrace笔记(3)——宏展开和hook和注册——lengency
一、ftrace的宏
1. struct tracepoint 结构
使用 struct tracepoint 变量来描述一个 trace point。
//include/linux/tracepoint-defs.h struct tracepoint { const char *name; //trace point的名字,内核中通过hash表管理所有的trace point,找到对应的hash slot后,需要通过name来识别具体的trace point。 struct static_key key; //trace point状态,0表示disable,1表示enable,static_key_false(&key)判断的其实就只是key的真假。 struct static_call_key *static_call_key; void *static_call_tramp; void *iterator; int (*regfunc)(void); //添加桩函数的函数 void (*unregfunc)(void); //卸载桩函数的函数 struct tracepoint_func __rcu *funcs; //trace point中所有的桩函数链表. 是个数组 }; struct tracepoint_func { void *func; void *data; int prio; };
static key使用见:https://www.cnblogs.com/hellokitty2/p/15026568.html
2. DEFINE_TRACE_FN 展开后是
/* * include/linux/tracepoint.h * 就是定义一个名为 __tracepoint_##_name 的 struct tracepoint 结构, * 然后定义一个名为 __traceiter_##_name 的函数,它对 struct tracepoint::funcs[] 成员数组中的每个函数都进行调用,数组尾部要以NULL结尾。 */ #define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args) \ static const char __tpstrtab_##_name[] \ __section("__tracepoints_strings") = #_name; \ extern struct static_call_key __SCK__tp_func_##_name; \ int __traceiter_##_name(void *__data, proto); \ struct tracepoint __tracepoint_##_name __used __section("__tracepoints") = { \ .name = __tpstrtab_##_name, \ .key = STATIC_KEY_INIT_FALSE, \ .static_call_key = &__SCK__tp_func_##_name, \ .static_call_tramp = NULL, \ .iterator = &__traceiter_##_name, \ .regfunc = _reg, \ .unregfunc = _unreg, \ .funcs = NULL \ }; \ __TRACEPOINT_ENTRY(_name); \ int __nocfi __traceiter_##_name(void *__data, proto) \ { \ struct tracepoint_func *it_func_ptr; \ void *it_func; \ it_func_ptr = rcu_dereference_raw((&__tracepoint_##_name)->funcs); \ if (it_func_ptr) { \ do { \ it_func = (it_func_ptr)->func; \ __data = (it_func_ptr)->data; \ ((void(*)(void *, proto))(it_func))(__data, args); \ } while ((++it_func_ptr)->func); \ } \ return 0; \ } \ extern struct static_call_key __SCK__tp_func_##_name; \ extern typeof(__traceiter_##_name) __SCT__tp_func_##_name; \ struct static_call_key __SCK__tp_func_##_name = { \ .func = __traceiter_##_name, \ }
3. __DECLARE_TRACE 宏展开后就是:
/* * include/linux/tracepoint.h * 这个宏主要定义了一系列函数集合,常用的有 register_trace_##name、 * trace_##name##_enabled * rcuidle 的还特殊弄了一个函数,还可以注册带有优先级的trace */ #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ extern int __traceiter_##name(data_proto); \ extern struct static_call_key __SCK__tp_func_##name; \ extern typeof(__traceiter_##name) __SCT__tp_func_##name; \ extern struct tracepoint __tracepoint_##name; \ static inline void __nocfi trace_##name(proto) \ { \ if (static_key_false(&__tracepoint_##name.key)) \ do { \ struct tracepoint_func *it_func_ptr; \ int __maybe_unused __idx = 0; \ void *__data; \ \ if (!(cond)) \ return; \ /* keep srcu and sched-rcu usage consistent */ \ preempt_disable_notrace(); \ it_func_ptr = rcu_dereference_raw((&__tracepoint_##name)->funcs); \ if (it_func_ptr) { \ __data = (it_func_ptr)->data; \ __traceiter_##name(data_args); \ } \ preempt_enable_notrace(); \ } while (0) \ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ rcu_read_lock_sched_notrace(); \ rcu_dereference_sched(__tracepoint_##name.funcs);\ rcu_read_unlock_sched_notrace(); \ } \ } \ static inline void trace_##name##_rcuidle(proto) \ { \ if (static_key_false(&__tracepoint_##name.key)) \ do { \ struct tracepoint_func *it_func_ptr; \ int __maybe_unused __idx = 0; \ void *__data; \ \ if (!(cond)) \ return; \ \ /* srcu can't be used from NMI */ \ WARN_ON_ONCE(in_nmi()); \ \ /* keep srcu and sched-rcu usage consistent */ \ preempt_disable_notrace(); \ \ /* \ * For rcuidle callers, use srcu since sched-rcu \ * doesn't work from the idle path. \ */ \ __idx = srcu_read_lock_notrace(&tracepoint_srcu);\ rcu_irq_enter_irqson(); \ \ it_func_ptr = rcu_dereference_raw((&__tracepoint_##name)->funcs); \ if (it_func_ptr) { \ __data = (it_func_ptr)->data; \ __traceiter_##name(data_args); \ } \ \ rcu_irq_exit_irqson(); \ srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\ \ preempt_enable_notrace(); \ } while (0) \ } \ static inline int register_trace_##name(void (*probe)(data_proto), void *data) \ { \ return tracepoint_probe_register(&__tracepoint_##name, (void *)probe, data); \ } \ static inline int register_trace_prio_##name(void (*probe)(data_proto), void *data, int prio) \ { \ return tracepoint_probe_register_prio(&__tracepoint_##name, (void *)probe, data, prio); \ } \ static inline int unregister_trace_##name(void (*probe)(data_proto), void *data) \ { \ return tracepoint_probe_unregister(&__tracepoint_##name, (void *)probe, data); \ } \ static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \ { \ } \ static inline bool trace_##name##_enabled(void) \ { \ return static_key_false(&__tracepoint_##name.key); \ }
trace_##name(proto) 中判断 __tracepoint_##name.key 的值为真才会调用执行各个钩子函数,在下面路径中会将这个key设置为真。
register_trace_##name() //具体tracepoint的define位置 tracepoint_probe_register //tracepoint.c tracepoint_probe_register_prio //tracepoint.c tracepoint_add_func //tracepoint.c static_key_enable(&tp->key);
也就是说注册了 hook 才会真,否则为假。
4. 使用 DECLARE_TRACE 的宏
#define DEFINE_TRACE(name, proto, args) DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args)); //为空 #define TRACE_EVENT_FLAGS(event, flag) //为空 #define TRACE_EVENT_PERF_PERM(event, expr...) /* * include/linux/tracepoint-defs.h * 不建议直接使用,此头文件是包含在最head位置的 */ #define DECLARE_TRACEPOINT(tp) extern struct tracepoint __tracepoint_##tp /* * 建议使用,它的作用和 trace_##name##_enabled(void) 一致,但是在头文件中 * 使用是安全的,然而 trace_##name##_enabled(void) 在头文件中是不安全的,应 * 该是因为不能重复定义一个函数。 */ #define tracepoint_enabled(tp) static_key_false(&(__tracepoint_##tp).key) /* * include/linux/tracepoint.h * 就是上面的一组函数集合,包含register_trace_##name、trace_##name##_enabled 等 */ #define DECLARE_TRACE(name, proto, args) \ __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), cpu_online(raw_smp_processor_id()), PARAMS(void *__data, proto), PARAMS(__data, args)) /* * 两个宏之间的区别就是后者 arg4 逻辑与上了 cond 参数,主要是 trace_##name、trace_##name##_rcuidle 两个函数中使用,若是判断 cond 为假, * 就直接返回了。 */ #define DECLARE_TRACE_CONDITION(name, proto, args, cond) \ __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), PARAMS(void *__data, proto), PARAMS(__data, args)) /* include/linux/tracepoint.h */ #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) #define DEFINE_EVENT(template, name, proto, args) DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg) DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_PRINT(template, name, proto, args, print) DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) DECLARE_TRACE_CONDITION(name, PARAMS(proto), PARAMS(args), PARAMS(cond)) #define TRACE_EVENT(name, proto, args, struct, assign, print) DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FN(name, proto, args, struct, assign, print, reg, unreg) DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FN_COND(name, proto, args, cond, struct, assign, print, reg, unreg) DECLARE_TRACE_CONDITION(name, PARAMS(proto), PARAMS(args), PARAMS(cond)) #define TRACE_EVENT_CONDITION(name, proto, args, cond, struct, assign, print) DECLARE_TRACE_CONDITION(name, PARAMS(proto), PARAMS(args), PARAMS(cond)) #define TRACE_EVENT_FLAGS(event, flag) #define TRACE_EVENT_PERF_PERM(event, expr...) #define DECLARE_EVENT_NOP(name, proto, args) \ static inline void trace_##name(proto) \ { } \ static inline bool trace_##name##_enabled(void) \ { \ return false; \ } #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print) DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args)) #define DECLARE_EVENT_CLASS_NOP(name, proto, args, tstruct, assign, print) #define DEFINE_EVENT_NOP(template, name, proto, args) DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))
tracepoint.h 中的定义可能不是最终的,因为有文件中会先执行 #undef XXX,然后重新进行 define。观察可以发现,这些宏主要使用的是 DECLARE_TRACE,对照展开后的函数,显示是不完整的,因为 DEFINE_TRACE 相关的部分没有。因此每个trace应该还存在对 DEFINE_TRACE 进行使用的一部分。两者都存在,一个trace才圆满。
5. 使用 DEFINE_TRACE 的部分
/* include/trace/define_trace.h */ #undef TRACE_EVENT #define TRACE_EVENT(name, proto, args, tstruct, assign, print) DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) #undef TRACE_EVENT_CONDITION #define TRACE_EVENT_CONDITION(name, proto, args, cond, tstruct, assign, print) \ TRACE_EVENT(name, PARAMS(proto), PARAMS(args), PARAMS(tstruct), PARAMS(assign), PARAMS(print)) #undef TRACE_EVENT_FN #define TRACE_EVENT_FN(name, proto, args, tstruct, assign, print, reg, unreg) \ DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args)) #undef TRACE_EVENT_FN_COND #define TRACE_EVENT_FN_COND(name, proto, args, cond, tstruct, assign, print, reg, unreg) \ DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args)) #undef TRACE_EVENT_NOP #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print) #undef DEFINE_EVENT_NOP #define DEFINE_EVENT_NOP(template, name, proto, args) #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) #undef DEFINE_EVENT_FN #define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg) \ DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args)) #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) #undef DEFINE_EVENT_CONDITION #define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) \ DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) DEFINE_TRACE(name, PARAMS(proto), PARAMS(args))
6. EXPORT_TRACEPOINT_SYMBOL_GPL 和 EXPORT_TRACEPOINT_SYMBOL
导出这些trace符号后,模块中才能在模块中使用
/* * include/linux/tracepoint.h * 展开后就是 */ #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ EXPORT_SYMBOL_GPL(__tracepoint_##name); \ EXPORT_SYMBOL_GPL(__traceiter_##name); \ EXPORT_SYMBOL_GPL(__SCK__tp_func_##name); #define EXPORT_TRACEPOINT_SYMBOL(name) \ EXPORT_SYMBOL(__tracepoint_##name); \ EXPORT_SYMBOL(__traceiter_##name); \ EXPORT_SYMBOL(__SCK__tp_func_##name)
7. 定义一个trace,TRACE_EVENT 各个成员使用的宏
/* include/linux/tracepoint.h */ #define PARAMS(args...) args #define TP_PROTO(args...) args #define TP_ARGS(args...) args #define TP_CONDITION(args...) args //include/trace/trace_events.h #define TP_STRUCT__entry(args...) args #define TP_fast_assign(args...) args #define TP_printk(fmt, args...) "\"" fmt "\", " __stringify(args)
include/trace/events/sched.h 文件中定义了大量的CPU调度相关的trace,但是它只include了 linux/tracepoint.h 文件,说明其使用的宏全部都是来自linux/tracepoint.h 文件的,但是 tracepoint.h 中又包含了其它头文件,不排除其它头文件中又包含了其它头文件,比如 include/trace/trace_events.h 。
8. 以 sched_migrate_task 为例来看 TRACE_EVENT
//include/trace/events/sched.h TRACE_EVENT(sched_migrate_task, TP_PROTO(struct task_struct *p, int dest_cpu), TP_ARGS(p, dest_cpu), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( int, prio ) __field( int, orig_cpu ) __field( int, dest_cpu ) __field( int, running ) ), TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ __entry->orig_cpu = task_cpu(p); __entry->dest_cpu = dest_cpu; __entry->running = (p->state == TASK_RUNNING); ), TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d running=%d", __entry->comm, __entry->pid, __entry->prio, __entry->orig_cpu, __entry->dest_cpu, __entry->running) );
include/linux/tracepoint.h 中有注释:__field(pid_t, prev_prid) 等于 pid_t prev_pid; __array(char, prev_comm, TASK_COMM_LEN) 等于 char prev_comm[TASK_COMM_LEN];
声明的 'local variable' 叫做 '__entry',可以在 TP_fast_assign 中使用 __entry->XX 来引用。TP_STRUCT__entry 指定环形缓冲区中的存储格式,也是 /sys/kernel/debug/tracing/events/<*>/format 导出到用户空间的格式。
按照如下宏定义进行展开:
#define TRACE_EVENT(name, proto, args, struct, assign, print) DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DECLARE_TRACE(name, proto, args) \ __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), cpu_online(raw_smp_processor_id()), PARAMS(void *__data, proto), PARAMS(__data, args)) //直接映射也就是: #define TRACE_EVENT(name, proto, args, struct, assign, print) \ __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), cpu_online(raw_smp_processor_id()), PARAMS(void *__data, proto), PARAMS(__data, args)) #define TRACE_EVENT(name, proto, args, struct, assign, print) DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DEFINE_TRACE(name, proto, args) DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args)); //直接映射也就是: #define TRACE_EVENT(name, proto, args, struct, assign, print) DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));
全部展开后为:
#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ extern int __traceiter_sched_migrate_task(void *__data, struct task_struct *p, int dest_cpu); \ extern struct static_call_key __SCK__tp_func_sched_migrate_task; \ extern typeof(__traceiter_sched_migrate_task) __SCT__tp_func_sched_migrate_task; \ extern struct tracepoint __tracepoint_sched_migrate_task; \ static inline void __nocfi trace_sched_migrate_task(struct task_struct *p, int dest_cpu) \ { \ if (static_key_false(&__tracepoint_sched_migrate_task.key)) \ do { \ struct tracepoint_func *it_func_ptr; \ int __maybe_unused __idx = 0; \ void *__data; \ \ if (!cpu_online(raw_smp_processor_id())) \ return; \ /* keep srcu and sched-rcu usage consistent */ \ preempt_disable_notrace(); \ it_func_ptr = rcu_dereference_raw((&__tracepoint_sched_migrate_task)->funcs); \ if (it_func_ptr) { \ __data = (it_func_ptr)->data; \ __traceiter_sched_migrate_task(__data, p, dest_cpu); \ } \ preempt_enable_notrace(); \ } while (0) \ if (IS_ENABLED(CONFIG_LOCKDEP) && cpu_online(raw_smp_processor_id())) { \ rcu_read_lock_sched_notrace(); \ rcu_dereference_sched(__tracepoint_sched_migrate_task.funcs);\ rcu_read_unlock_sched_notrace(); \ } \ } \ static inline void trace_sched_migrate_task_rcuidle(struct task_struct *p, int dest_cpu) \ { \ if (static_key_false(&__tracepoint_sched_migrate_task.key)) \ do { \ struct tracepoint_func *it_func_ptr; \ int __maybe_unused __idx = 0; \ void *__data; \ \ if (!cpu_online(raw_smp_processor_id())) \ return; \ \ /* srcu can't be used from NMI */ \ WARN_ON_ONCE(in_nmi()); \ \ /* keep srcu and sched-rcu usage consistent */ \ preempt_disable_notrace(); \ \ /* \ * For rcuidle callers, use srcu since sched-rcu \ * doesn't work from the idle path. \ */ \ __idx = srcu_read_lock_notrace(&tracepoint_srcu);\ rcu_irq_enter_irqson(); \ \ it_func_ptr = rcu_dereference_raw((&__tracepoint_sched_migrate_task)->funcs); \ if (it_func_ptr) { \ __data = (it_func_ptr)->data; \ __traceiter_sched_migrate_task(__data, p, dest_cpu); \ } \ \ rcu_irq_exit_irqson(); \ srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\ \ preempt_enable_notrace(); \ } while (0) \ } \ static inline int register_trace_sched_migrate_task(void (*probe)(void *__data, struct task_struct *p, int dest_cpu), void *data) \ { \ return tracepoint_probe_register(&__tracepoint_sched_migrate_task, (void *)probe, data); \ } \ static inline int register_trace_prio_sched_migrate_task(void (*probe)(void *__data, struct task_struct *p, int dest_cpu), void *data, int prio) \ { \ return tracepoint_probe_register_prio(&__tracepoint_sched_migrate_task, (void *)probe, data, prio); \ } \ static inline int unregister_trace_sched_migrate_task(void (*probe)(void *__data, struct task_struct *p, int dest_cpu), void *data) \ { \ return tracepoint_probe_unregister(&__tracepoint_sched_migrate_task, (void *)probe, data); \ } \ static inline void check_trace_callback_type_sched_migrate_task(void (*cb)(void *__data, struct task_struct *p, int dest_cpu)) \ { \ } \ static inline bool trace_sched_migrate_task_enabled(void) \ { \ return static_key_false(&__tracepoint_sched_migrate_task.key); \ } #define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args) \ static const char __tpstrtab_sched_migrate_task[] \ __section("__tracepoints_strings") = "sched_migrate_task"; \ extern struct static_call_key __SCK__tp_func_sched_migrate_task; \ int __traceiter_sched_migrate_task(void *__data, struct task_struct *p, int dest_cpu); \ struct tracepoint __tracepoint_sched_migrate_task __used __section("__tracepoints") = { \ .name = __tpstrtab_sched_migrate_task, \ .key = STATIC_KEY_INIT_FALSE, \ .static_call_key = &__SCK__tp_func_sched_migrate_task, \ .static_call_tramp = NULL, \ .iterator = &__traceiter_sched_migrate_task, \ .regfunc = NULL, \ .unregfunc = NULL, \ .funcs = NULL \ }; \ __TRACEPOINT_ENTRY(sched_migrate_task); \ int __nocfi __traceiter_sched_migrate_task(void *__data, struct task_struct *p, int dest_cpu) \ { \ struct tracepoint_func *it_func_ptr; \ void *it_func; \ it_func_ptr = rcu_dereference_raw((&__tracepoint_sched_migrate_task)->funcs); \ if (it_func_ptr) { \ do { \ it_func = (it_func_ptr)->func; \ __data = (it_func_ptr)->data; \ ((void(*)(void *, struct task_struct *p, int dest_cpu))(it_func))(__data, p, dest_cpu); \ } while ((++it_func_ptr)->func); \ } \ return 0; \ } \ extern struct static_call_key __SCK__tp_func_sched_migrate_task; \ extern typeof(__traceiter_sched_migrate_task) __SCT__tp_func_sched_migrate_task; \ struct static_call_key __SCK__tp_func_sched_migrate_task = { \ .func = __traceiter_sched_migrate_task, \ }
TODO: 其它部分是怎么起作用的?
从展开后的内容可以看到,当调用 trace_sched_migrate_task() 进行trace的时候,会调用 __traceiter_sched_migrate_task() 来遍历 struct tracepoint::funcs 数组中的每一个函数进行trace,也就是说一个trace上可以注册多个hook函数。
若使用 EXPORT_TRACEPOINT_SYMBOL_GPL(sched_migrate_task) 导出,上面加黑加粗的 __tracepoint_sched_migrate_task __traceiter_sched_migrate_task __SCK__tp_func_sched_migrate_task 三个符号会被导出来。
9. 一个trace上注册多个hook
既然一个trace上可以注册多个hook,那么一定会涉及到这些hook函数的调用次序的问题,见 tracepoint_probe_register 实现可知,有一个默认优先级 TRACEPOINT_DEFAULT_PRIO=10,注册函数中会传递给 struct tracepoint_func::prio,在插入到 struct tracepoint::funcs 数组时会判断优先级,优先级数值越大,越插在靠前的位置,相同优先级的话,后注册的插在后面。 比如此例子中,注册默认优先级的使用函数 register_trace_sched_migrate_task,自己指定优先级使用函数 register_trace_prio_sched_migrate_task。
int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data) { return tracepoint_probe_register_prio(tp, probe, data, TRACEPOINT_DEFAULT_PRIO); } EXPORT_SYMBOL_GPL(tracepoint_probe_register);
二、Google搞的vendor hook
1. hook 的 DEFINE_HOOK_FN 解析后是
//include/trace/hooks/vendor_hooks.h #define DEFINE_HOOK_FN(_name, _reg, _unreg, proto, args) \ static const char __tpstrtab_##_name[] \ __section("__tracepoints_strings") = #_name; \ extern struct static_call_key __SCK__tp_func_##_name; \ int __traceiter_##_name(void *__data, proto); \ struct tracepoint __tracepoint_##_name __used __section("__tracepoints") = { \ .name = __tpstrtab_##_name, \ .key = STATIC_KEY_INIT_FALSE, \ .static_call_key = &__SCK__tp_func_##_name, \ .static_call_tramp = NULL, \ .iterator = &__traceiter_##_name, \ .regfunc = _reg, \ .unregfunc = _unreg, \ .funcs = NULL }; \ __TRACEPOINT_ENTRY(_name); \ int __nocfi __traceiter_##_name(void *__data, proto) \ { \ struct tracepoint_func *it_func_ptr; \ void *it_func; \ \ it_func_ptr = (&__tracepoint_##_name)->funcs; //不同:这里是直接访问的,ftrace是rcu_dereference_raw \ it_func = (it_func_ptr)->func; //不同:这里是先获取一个,ftrace中的是先判断it_func_ptr \ do { \ __data = (it_func_ptr)->data; \ ((void(*)(void *, proto))(it_func))(__data, args); \ it_func = READ_ONCE((++it_func_ptr)->func); \ } while (it_func); \ return 0; \ } \ extern struct static_call_key __SCK__tp_func_##_name; \ extern typeof(__traceiter_##_name) __SCT__tp_func_##_name; \ struct static_call_key __SCK__tp_func_##_name = { \ .func = __traceiter_##_name, \ }
注意备注上的一些和ftrace之间的不同点。
2. hook 的 __DECLARE_HOOK 解析后是:
//include/trace/hooks/vendor_hooks.h #define __DECLARE_HOOK(name, proto, args, cond, data_proto, data_args) \ extern int __traceiter_##name(data_proto); \ extern struct static_call_key __SCK__tp_func_##name; \ extern typeof(__traceiter_##name) __SCT__tp_func_##name; extern struct tracepoint __tracepoint_##name; \ \ static inline void __nocfi trace_##name(proto) \ { \ if (static_key_false(&__tracepoint_##name.key)) \ do { \ struct tracepoint_func *it_func_ptr; \ void *__data; \ \ if (!(cond)) \ return; \ \ it_func_ptr = (&__tracepoint_##name)->funcs; \ if (it_func_ptr) { \ __data = (it_func_ptr)->data; \ __traceiter_##name(data_args); \ } \ } while (0) \ } \ static inline bool trace_##name##_enabled(void) \ { \ return static_key_false(&__tracepoint_##name.key); \ } \ static inline int register_trace_##name(void (*probe)(data_proto), void *data) \ { \ return android_rvh_probe_register(&__tracepoint_##name, (void *)probe, data); \ } \ /* vendor hooks cannot be unregistered */ \
相比与ftrace,hook的trace 删除了 trace_##name##_rcuidle()、register_trace_prio_##name()、unregister_trace_##name()、check_trace_callback_type_##name()
3. 其它宏
#undef DECLARE_RESTRICTED_HOOK #define DECLARE_RESTRICTED_HOOK(name, proto, args, cond) \ DEFINE_HOOK_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args)) #undef DECLARE_RESTRICTED_HOOK #define DECLARE_RESTRICTED_HOOK(name, proto, args, cond) \ __DECLARE_HOOK(name, PARAMS(proto), PARAMS(args), cond, PARAMS(void *__data, proto),PARAMS(__data, args))
4. 总结
Google的vendor hook在ftrace的基础上做了改动,由于Google的Hook宏删除了ftrace中的 register_trace_prio_##name(),因此不能注册带有优先级的钩子函数了。
三、实验
1. 对5.10内核中的 util_est_update 中的trace添加hook
static inline void util_est_update(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) //fair.c { ... //Google 搞的 vendor hook trace_android_rvh_util_est_update(cfs_rq, p, task_sleep, &ret); if (ret) return; ... //普通的ftrace trace_sched_util_est_se_tp(&p->se); }
这两个trace符号Google已经导出来了:
EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_util_est_update); //vendor_hooks.c EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_se_tp); //core.c
2. 实验代码
/* 1. 包含头文件 */ #include <trace/events/sched.h> /* 2. 实现handler钩子函数,类型要与 trace##name() 的类型相同 */ //util_est_update() //fair.c void android_rvh_util_est_update_handler(void *data, struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep, int *ret_o) { struct util_est *se_ue = &p->se.avg.util_est; struct util_est *rq_ue = &cfs_rq->avg.util_est; trace_printk("start: first_register: se_ue->enqueued=%d, se_ue->ewma=%d, rq_ue->enqueued=%d, rq_ue->ewma=%d, task_sleep=%d\n", se_ue->enqueued, se_ue->ewma, rq_ue->enqueued, rq_ue->ewma, task_sleep); *ret_o = 0; } void android_rvh_util_est_update_handler_second(void *data, struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep, int *ret_o) { struct util_est *se_ue = &p->se.avg.util_est; struct util_est *rq_ue = &cfs_rq->avg.util_est; trace_printk("start: second_register: se_ue->enqueued=%d, se_ue->ewma=%d, rq_ue->enqueued=%d, rq_ue->ewma=%d, task_sleep=%d\n", se_ue->enqueued, se_ue->ewma, rq_ue->enqueued, rq_ue->ewma, task_sleep); *ret_o = 0; } //只改变这一个debug优先级, 默认优先级是10 void sched_util_est_se_tp_handler(void *data, struct sched_entity *se) { static int count = 0; int prio = 10; if (entity_is_task(se)) { struct task_struct *p = container_of(se, struct task_struct, se); struct rq *rq = cpu_rq(task_cpu(p)); struct cfs_rq *cfs_rq = &rq->cfs; struct util_est *se_ue = &p->se.avg.util_est; struct util_est *rq_ue = &cfs_rq->avg.util_est; trace_printk("end: count=%d, prio=%d, se_ue->enqueued=%d, se_ue->ewma=%d, rq_ue->enqueued=%d, rq_ue->ewma=%d\n", count++, prio, se_ue->enqueued, se_ue->ewma, rq_ue->enqueued, rq_ue->ewma); } else { trace_printk("end: se is not task\n"); } } void sched_util_est_se_tp_handler_prio_12(void *data, struct sched_entity *se) { static int count = 0; int prio = 12; if (entity_is_task(se)) { struct task_struct *p = container_of(se, struct task_struct, se); struct rq *rq = cpu_rq(task_cpu(p)); struct cfs_rq *cfs_rq = &rq->cfs; struct util_est *se_ue = &p->se.avg.util_est; struct util_est *rq_ue = &cfs_rq->avg.util_est; trace_printk("end: count=%d, prio=%d, se_ue->enqueued=%d, se_ue->ewma=%d, rq_ue->enqueued=%d, rq_ue->ewma=%d\n", count++, prio, se_ue->enqueued, se_ue->ewma, rq_ue->enqueued, rq_ue->ewma); } else { trace_printk("end: se is not task\n"); } } /* 3. 注册handler */ //common register register_trace_android_rvh_util_est_update(android_rvh_util_est_update_handler, NULL); register_trace_sched_util_est_se_tp(sched_util_est_se_tp_handler, NULL); //google vendor couldn't use prio, because not defined. register_trace_android_rvh_util_est_update(android_rvh_util_est_update_handler_second, NULL); //ftrace register with prio. register_trace_prio_sched_util_est_se_tp(sched_util_est_se_tp_handler_prio_12, NULL, 12);
3. 实验结果,打印的前后关系:
# echo 1 > tracing_on # cat trace_pipe <...>-338 [005] d..3 32.158404: sched_util_est_se_tp_handler_prio_12: end: count=28494, prio=12, se_ue->enqueued=39, se_ue->ewma=48, rq_ue->enqueued=87, rq_ue->ewma=0 <...>-338 [005] d..3 32.158404: sched_util_est_se_tp_handler: end: count=28493, prio=10, se_ue->enqueued=39, se_ue->ewma=48, rq_ue->enqueued=87, rq_ue->ewma=0 <...>-338 [005] d..2 32.158410: android_rvh_util_est_update_handler: start: first_register: se_ue->enqueued=39, se_ue->ewma=48, rq_ue->enqueued=87, rq_ue->ewma=0, task_sleep=1 <...>-338 [005] d..2 32.158410: android_rvh_util_est_update_handler_second: start: second_register: se_ue->enqueued=39, se_ue->ewma=48, rq_ue->enqueued=87, rq_ue->ewma=0, task_sleep=1
普通ftrace,注册时指定的优先级数值越大,越先调用。vendor hook 没有带有优先级注册的钩子函数,先注册的钩子函数调用在前,后注册的钩子函数调用在后。
看代码实现,就算是不执行 “echo 1 > tracing_on” 这些钩子函数应该也会被调用执行,只不过不会打印出来。
4. 另一种注册trace hook的方法
struct tracepoints_table { const char *name; void *func; struct tracepoint *tp; bool registered; }; static struct tracepoints_table g_tracepoints_table[] = { {.name = "android_rvh_util_est_update", .func = android_rvh_util_est_update_handler}, {.name = "sched_util_est_se_tp", .func = sched_util_est_se_tp_handler}, }; static void lookup_tracepoints(struct tracepoint *tp, void *ignore) { int i; for (i = 0; i < ARRAY_SIZE(g_tracepoints_table); i++) { if (!strcmp(g_tracepoints_table[i].name, tp->name)) g_tracepoints_table[i].tp = tp; } } static void register_tracepoints_table(void) { int i, ret; struct tracepoints_table *tt; for_each_kernel_tracepoint(lookup_tracepoints, NULL); //找到匹配的tracepoint结构 for (i = 0; i < ARRAY_SIZE(g_tracepoints_table); i++) { tt = &g_tracepoints_table[i]; if (tt->tp) { ret = tracepoint_probe_register(tt->tp, tt->func, NULL); if (ret) { pr_info("couldn't activate tracepoint %pf\n", tt->func); tracepoint_cleanup(i); } tt->registered = true; } } } void tracepoint_cleanup(int index) { int i; struct tracepoints_table *tt; for (i = 0; i < index; i++) { tracepoints_table *tt = &g_tracepoints_table[i]; if (tt->registered) { tracepoint_probe_unregister(tt->tp, tt->func, NULL); tt->registered = false; } } }
可见这种注册需要便利 tracepoint 区域对name进行对比,效率比较低,优点是涉及的文件比较少。
posted on 2021-11-07 23:15 Hello-World3 阅读(3171) 评论(0) 编辑 收藏 举报