tracer ftrace笔记(10)—— trace内容分析

基于Linux-5.10

一、trace打印格式

1. 在把 raw trace 数据格式化成用户可读的形式时,trace数据分成两部分:一部分是 common 信息,一部分是用户自定义的 trace 信息,这两部分分开格式化。

common信息: 这部分有系统根据当前是 “Normal format” 还是 “Latency trace format”,选择使用 trace_print_context() 或者是 trace_print_lat_context() 函数来进行格式化。

用户trace信息: 这部分根据 type 字段找到对应的 trace_event 结构,根据其 event->funcs->trace() 函数来进行格式化。遵循谁存储谁解析的原则,由 TRACE_EVENT 的 TP_printk 参数决定。

以 # cat /sys/kernel/tracing/trace 为例,

trace_create_file("trace", 0644, d_tracer, tr, &tracing_fops);

tracing_fops.read
    seq_read //读操作实际最后落在 tracer_seq_ops 的 s_next() 和 s_show() 上
        tracer_seq_ops.next
            s_next
                trace_find_next_entry_inc
                    __find_next_entry //取出下一个entry
                    trace_iterator_increment //增加iter中的读指针
        tracer_seq_ops.show
            s_show
                print_trace_line
                    print_trace_fmt
                        trace_print_context //打印每条trace的头部common信息

print_trace_fmt() 函数:

static enum print_line_t print_trace_fmt(struct trace_iterator *iter) //kernel/trace/trace.c
{
    struct trace_array *tr = iter->tr;
    struct trace_seq *s = &iter->seq;
    unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
    struct trace_entry *entry;
    struct trace_event *event;

    entry = iter->ent;

    test_cpu_buff_start(iter);

    event = ftrace_find_event(entry->type);

    /* 1. 打印头部common信息 */
    if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
        if (iter->iter_flags & TRACE_FILE_LAT_FMT)
            /* Latency trace format */
            trace_print_lat_context(iter);
        else
            /* Normal format */
            trace_print_context(iter);
    }

    if (trace_seq_has_overflowed(s))
        return TRACE_TYPE_PARTIAL_LINE;

    /* 2. 打印用户trace信息 */
    if (event)
        return event->funcs->trace(iter, sym_flags, event);

    trace_seq_printf(s, "Unknown type %d\n", entry->type);

    return trace_handle_return(s);
}

(1) 只看"Normal format"下头部信息的打印,打印comm、pid、tgid、cpu ts 信息:

int trace_print_context(struct trace_iterator *iter) //kernel/trace/trace_output.c
{
    struct trace_array *tr = iter->tr;
    struct trace_seq *s = &iter->seq;
    struct trace_entry *entry = iter->ent;
    unsigned long long t;
    unsigned long secs, usec_rem;
    char comm[TASK_COMM_LEN];

    /* 是从cmdline中找的task->comm信息 */
    trace_find_cmdline(entry->pid, comm);

    /* 打印trace记录时的 cur->comm 和 cur->pid */
    trace_seq_printf(s, "%16s-%-7d ", comm, entry->pid);

    if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
        unsigned int tgid = trace_find_tgid(entry->pid);

        if (!tgid)
            trace_seq_printf(s, "(-------) ");
        else
            /* 打印tgid */
            trace_seq_printf(s, "(%7d) ", tgid);
    }

    /* 打印trace抓取时所在的cpu */
    trace_seq_printf(s, "[%03d] ", iter->cpu);

    if (tr->trace_flags & TRACE_ITER_IRQ_INFO)
        /* 打印关中断关抢占状态 */
        trace_print_lat_fmt(s, entry);

    if (iter->iter_flags & TRACE_FILE_TIME_IN_NS) {
        t = ns2usecs(iter->ts);
        usec_rem = do_div(t, USEC_PER_SEC);
        secs = (unsigned long)t;
        /* 打印时间戳 */
        trace_seq_printf(s, " %5lu.%06lu: ", secs, usec_rem);
    } else
        trace_seq_printf(s, " %12llu: ", iter->ts);

    return !trace_seq_has_overflowed(s);
}

(2) struct trace_event 类型的 event->funcs->trace() 最后调用到的是 “include/trace/trace_event.h” 中的 trace_raw_output_##call() 函数。以 sched_blocked_reason 为例就是 trace_raw_output_sched_blocked_reason(),在它中打印用户自定义信息。

static notrace enum print_line_t trace_raw_output_sched_blocked_reason(struct trace_iterator *iter,
        int flags, struct trace_event *trace_event)
{
    /* iter->seq:为output buffer */
    struct trace_seq *s = &iter->seq;
    /*
     * iter->tmp_seq:为临时buffer,供 __get_bitmask()、__print_flags()、
     * __print_symbolic()等一系列函数先把值转换成字符串
     */
    struct trace_seq __maybe_unused *p = &iter->tmp_seq;
    /* 使用trace_event自定义的结构指向这片区域,来进行引用 */
    struct trace_event_raw_sched_blocked_reason *field;
    int ret;

    /* iter->ent:为ringbuffer原始数据的存放buffer */
    field = (typeof(field))iter->ent;
    /* 在output buffer的最前头,打印出trace_event的name */
    ret = trace_raw_output_prep(iter, trace_event);
    if (ret != TRACE_TYPE_HANDLED)
        return ret;
    /* 把ringbuffer中的原始数据根据格式打印到output buffer当中*/

    trace_seq_printf(s, "pid=%d iowait=%d caller=%pS" "\n", field->pid, field->io_wait, field->caller);

    return trace_handle_return(s);
}

trace_iterator 只是一个中间临时变量,主要是利用 iter->seq、iter->tmp_seq 把 iter->ent 指向的 ringbuffer 数据按照 format 转换成用户可读的数据。

用户部分除了可以看 TRACE_EVENT 的 TP_printk 参数外,还可以cat format 文件进行查看

/sys/kernel/tracing/events/sched/sched_blocked_reason # cat format
name: sched_blocked_reason
ID: 84
format:
        field:unsigned short common_type;       offset:0;       size:2; signed:0;
        field:unsigned char common_flags;       offset:2;       size:1; signed:0;
        field:unsigned char common_preempt_count;       offset:3;       size:1; signed:0;
        field:int common_pid;   offset:4;       size:4; signed:1;

        field:pid_t pid;        offset:8;       size:4; signed:1;
        field:void* caller;     offset:16;      size:8; signed:0;
        field:bool io_wait;     offset:24;      size:1; signed:0;

print fmt: "pid=%d iowait=%d caller=%pS", REC->pid, REC->io_wait, REC->caller

2. 打印格式举例:

crtc_commit:158 918-918     (    918) [002] d..2  2521.440658: sched_blocked_reason: pid=27443 iowait=0 caller=icc_set_tag+0x5c/0x1a4
               <idle>-0     (-------) [003] d.s3  2521.444053: sched_blocked_reason: pid=13 iowait=0 caller=rcu_gp_fqs_loop+0x154/0xac4
crtc_event:158 919-919      (    919) [000] d..5  2521.449214: sched_waking: comm=presentFence pid=2987 prio=98 target_cpu=000

 

 

 

 

 

 

参考:

深入理解Linux ftrace 之 trace event: https://mp.weixin.qq.com/s/1A02qv5SIEgTEvsN1DWzqQ

 

posted on 2022-10-11 16:57  Hello-World3  阅读(895)  评论(0编辑  收藏  举报

导航