nf_conntrack的丢包

   测试盒子设备ipv6的时候出现cps性能上不去, 抓包的时候发现三次握手出现了重传, 查看ifconfig  netstat ethtool 等信息时 没有发现丢包!

dmesg的时候出现”“nf_conntrack: table full, dropping packet” 如下log, 说明是nf_conntrack的table 满了!!

所以按照目前的需求可以去掉nf_conntrack;

  但是nf_conntrack的drop的报文是在哪里统计的呢?

结论是在:/proc/net/stat/nf_conntrack  里面能看到其统计信息

其内核代码如下所示:

static int ct_cpu_seq_show(struct seq_file *seq, void *v)
{
    struct net *net = seq_file_net(seq);
    unsigned int nr_conntracks = atomic_read(&net->ct.count);
    const struct ip_conntrack_stat *st = v;

    if (v == SEQ_START_TOKEN) {
        seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert
            insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete search_restart\n");
        return 0;
    }

    seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
            "%08x %08x %08x %08x %08x  %08x %08x %08x %08x\n",
           nr_conntracks,
           st->searched,
           st->found,
           st->new,
           st->invalid,
           st->ignore,
           st->delete,
           st->delete_list,
           st->insert,
           st->insert_failed,
           st->drop,
           st->early_drop,
           st->error,

           st->expect_new,
           st->expect_create,
           st->expect_delete,
           st->search_restart
        );
    return 0;
}

 

所以后续处理问题时,需要查看 /proc/net/stat/nf_conntrack  目录选项! 这里面显示了由于ct导致drop的报文统计;

由于报文 使用kfree_skb的时候需要知道丢弃的堆栈, 此时需要使用perf record -g -a -e skb:kfree_skb   来跟踪其堆栈了 这样就知道是什么问题导致的!!

 目前在内核里面有一个packet drop 

 可以学习一波

PS 目前其实可以使用notrack 实现不要跟踪链接

iptables -t raw -A PREROUTING -i ens33 -p tcp --dport 80 --syn -j NOTRACK

 

NOTRACK目标

以下结构notrack_tg_reg定义了"NOTRACK"目标,其位于"raw"表中

static struct xt_target notrack_tg_reg __read_mostly = {
    .name        = "NOTRACK",
    .revision    = 0,
    .family        = NFPROTO_UNSPEC,
    .checkentry    = notrack_chk,
    .target        = notrack_tg,
    .table        = "raw",
    .me        = THIS_MODULE,
};

 

处理函数notrack_tg如下,首先检查skb结构中_nfct结构是否为空,不为空的话,表明可能是由环回接口接收的报文,对此种报文不做处理。否则,对于接收到的报文,通过函数nf_ct_set设置其关联的nf_conntrack的IP_CT_UNTRACKED标志。

 

static unsigned int
notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
    /* Previously seen (loopback)? Ignore. */
    if (skb->_nfct != 0)
        return XT_CONTINUE;

    nf_ct_set(skb, NULL, IP_CT_UNTRACKED);

    return XT_CONTINUE;
}

跳过conntrack建立

在如下conntrack入口函数nf_conntrack_in中,如果报文设置了IP_CT_UNTRACKED标志,直接返回NF_ACCEPT。

unsigned int
nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
{
    enum ip_conntrack_info ctinfo;
    struct nf_conn *ct, *tmpl;
    u_int8_t protonum;
    int dataoff, ret;

    tmpl = nf_ct_get(skb, &ctinfo);
    if (tmpl || ctinfo == IP_CT_UNTRACKED) {
        /* Previously seen (loopback or untracked)?  Ignore. */
        if ((tmpl && !nf_ct_is_template(tmpl)) ||
             ctinfo == IP_CT_UNTRACKED) {
            NF_CT_STAT_INC_ATOMIC(state->net, ignore);
            return NF_ACCEPT;
        }
        skb->_nfct = 0;
    }

CT目标和notrack标志

CT目标注册了三个版本的xt结构,以下以revision=2为例,与上节一样,其注册在raw表中:

    {
        .name        = "CT",
        .family        = NFPROTO_UNSPEC,
        .revision    = 2,
        .targetsize    = sizeof(struct xt_ct_target_info_v1),
        .usersize    = offsetof(struct xt_ct_target_info, ct),
        .checkentry    = xt_ct_tg_check_v2,
        .destroy    = xt_ct_tg_destroy_v1,
        .target        = xt_ct_target_v1,
        .table        = "raw",
        .me        = THIS_MODULE,
    },

static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct)
{
    /* Previously seen (loopback)? Ignore. */
    if (skb->_nfct != 0)
        return XT_CONTINUE;

    if (ct) {
        atomic_inc(&ct->ct_general.use);
        nf_ct_set(skb, ct, IP_CT_NEW);
    } else {
        nf_ct_set(skb, ct, IP_CT_UNTRACKED);
    }

    return XT_CONTINUE;
}

由于PREROUTING点上,之前的raw表中的notrack规则优先级为NF_IP_PRI_RAW,高于以上的conntrack处理函数的优先级NF_IP_PRI_CONNTRACK。notrack规则设置了之后,将不对此连接建立conntrack结构,不进行追踪

enum nf_ip_hook_priorities {
    NF_IP_PRI_FIRST = INT_MIN,
    NF_IP_PRI_RAW_BEFORE_DEFRAG = -450,
    NF_IP_PRI_CONNTRACK_DEFRAG = -400,
    NF_IP_PRI_RAW = -300,
    NF_IP_PRI_SELINUX_FIRST = -225,
    NF_IP_PRI_CONNTRACK = -200,
    NF_IP_PRI_MANGLE = -150,
    NF_IP_PRI_NAT_DST = -100,
    NF_IP_PRI_FILTER = 0,
    NF_IP_PRI_SECURITY = 50,
    NF_IP_PRI_NAT_SRC = 100,
    NF_IP_PRI_SELINUX_LAST = 225,
    NF_IP_PRI_CONNTRACK_HELPER = 300,
    NF_IP_PRI_CONNTRACK_CONFIRM = INT_MAX,
    NF_IP_PRI_LAST = INT_MAX,
};

以上代码参考linux 5.8内核

 注意:设置NO_TRACK时 要区分IPv4  和IPv6  也就是iptables   ip6tables 都需要使用

posted @ 2021-11-11 20:40  codestacklinuxer  阅读(127)  评论(0编辑  收藏  举报