dpvs 新建连接 转载

tcp新建连接调度

/**
  * set @verdict if failed to schedule 
  * 新建的连接由conn_sched进行调度,对于tcp服务调用tcp_conn_sched,在__dp_vs_in中被调用
  * 返回值:
  * EDPVS_OK: 继续lvs处理
  * 其余: 结束lvs处理,并且将设置的verdict作为hook返回结果返回
  */
static int tcp_conn_sched(struct dp_vs_proto *proto,
                          const struct dp_vs_iphdr *iph,
                          struct rte_mbuf *mbuf,
                          struct dp_vs_conn **conn,
                          int *verdict)
{
    struct tcphdr *       th, _tcph;
    struct dp_vs_service *svc;
    bool outwall = false;

    //校验
    assert(proto && iph && mbuf && conn && verdict);
    //获取tcp header,只是指针操作,不涉及数据复制
    th = mbuf_header_pointer(mbuf, iph->len, sizeof(_tcph), &_tcph);
    //如果获取tcp header失败,主要是数据包长不对,直接丢弃
    if (unlikely(!th))
    {
        *verdict = INET_DROP;
        return(EDPVS_INVPKT);
    }

    /* Syn-proxy step 2 logic: receive client's 3-handshacke ack packet */

    /* When synproxy disabled, only SYN packets can arrive here.
     * So don't judge SYNPROXY flag here! If SYNPROXY flag judged, and syn_proxy
     * got disbled and keepalived reloaded, SYN packets for RS may never be sent. */
    //如果是syn cookies 连接建立第三次握手数据包,则返回EDPVS_PKTSTOLEN
    if (dp_vs_synproxy_ack_rcv(iph->af, mbuf, th, proto, conn, iph, verdict) == 0)
    {
        /* Attention: First ACK packet is also stored in conn->ack_mbuf */
        return(EDPVS_PKTSTOLEN);
    }

    /* only TCP-SYN without other flag can be scheduled */
    //对于新建立的连接,只允许syn请求,其余的都丢弃,此处说明不是syn的数据包
    if (!th->syn || th->ack || th->fin || th->rst)
    {
#ifdef CONFIG_DPVS_IPVS_DEBUG
        char        dbuf[64], sbuf[64];
        const char *daddr, *saddr;

        daddr = inet_ntop(iph->af, &iph->daddr, dbuf, sizeof(dbuf)) ? dbuf : "::";
        saddr = inet_ntop(iph->af, &iph->saddr, sbuf, sizeof(sbuf)) ? sbuf : "::";
        RTE_LOG(DEBUG, IPVS,
                "%s: [%d] try sched non-SYN packet: [%c%c%c%c] %s/%d->%s/%d\\n",
                __func__, rte_lcore_id(),
                th->syn ? 'S' : '.', th->fin ? 'F' : '.',
                th->ack ? 'A' : '.', th->rst ? 'R' : '.',
                saddr, ntohs(th->source), daddr, ntohs(th->dest));
#endif

        /* Drop tcp packet which is send to vip and !vport */
        //如果是发往vip,但不是vport的数据报,则丢弃
        if (g_defence_tcp_drop &&
            (svc = dp_vs_vip_lookup(iph->af, iph->proto,
                                    &iph->daddr, rte_lcore_id())))
        {
            dp_vs_estats_inc(DEFENCE_TCP_DROP);
            *verdict = INET_DROP;
            return(EDPVS_INVPKT);
        }
        //找不到对应的dp_vs_service,则设置返回结果为INET_ACCEPT,并且返回EDPVS_INVAL
        *verdict = INET_ACCEPT;
        return(EDPVS_INVAL);
    }
    //根据请求目的地址和端口来查找dp_vs_service,如果找不到丢弃
    svc = dp_vs_service_lookup(iph->af, iph->proto, &iph->daddr, th->dest,
                               0, mbuf, NULL, &outwall, rte_lcore_id());
    if (!svc)
    {
        /* Drop tcp packet which is send to vip and !vport */
        //如果是发往vip,但不是vport的数据报,则丢弃
        if (g_defence_tcp_drop &&
            (svc = dp_vs_vip_lookup(iph->af, iph->proto,
                                    &iph->daddr, rte_lcore_id())))
        {
            dp_vs_estats_inc(DEFENCE_TCP_DROP);
            *verdict = INET_DROP;
            return(EDPVS_INVPKT);
        }
        *verdict = INET_ACCEPT;
        return(EDPVS_NOSERV);
    }
    //根据dp_vs_service来选择后端real server建立连接
    *conn = dp_vs_schedule(svc, iph, mbuf, false, outwall);
    if (!*conn)
    {
        *verdict = INET_DROP;
        return(EDPVS_RESOURCE);
    }

    return(EDPVS_OK);
}
  •  dp_vs_schedule

 

/**
  * select an RS by service's scheduler and create a connection 
  * dp_vs_schedule 新建立连接后端调度,选择一个real server
  */
struct dp_vs_conn *dp_vs_schedule(struct dp_vs_service *svc,
                                  const struct dp_vs_iphdr *iph,
                                  struct rte_mbuf *mbuf,
                                  bool is_synproxy_on,
                                  bool outwall)
{
    uint16_t                _ports[2], *ports; /* sport, dport */
    struct dp_vs_dest *     dest;
    struct dp_vs_conn *     conn;
    struct dp_vs_conn_param param;

    assert(svc && iph && mbuf);
    //ports指向目的端口,源端口,非copy
    ports = mbuf_header_pointer(mbuf, iph->len, sizeof(_ports), _ports);
    if (!ports)
    {
        return(NULL);
    }

    /* persistent service */
    //长连接请求
    if (svc->flags & DP_VS_SVC_F_PERSISTENT)
    {
        return(dp_vs_sched_persist(svc, iph, mbuf, is_synproxy_on));
    }
    //根据特定算法选择 real server, 常用的有 rr, wrr, wlc 以后再分析。返回 dest 结构体是后端 rs
    dest = svc->scheduler->schedule(svc, mbuf);
    if (!dest)
    {
        RTE_LOG(WARNING, IPVS, "%s: no dest found.\\n", __func__);
#ifdef CONFIG_DPVS_MBUF_DEBUG
        dp_vs_mbuf_dump("found dest failed.", iph->af, mbuf);
#endif
        return(NULL);
    }
    //snat特殊处理
    if (dest->fwdmode == DPVS_FWD_MODE_SNAT)
    {
        return(dp_vs_snat_schedule(dest, iph, ports, mbuf, outwall));
    }
    //icmp处理
    if (unlikely(iph->proto == IPPROTO_ICMP))
    {
        struct icmphdr *ich, _icmph;
        ich = mbuf_header_pointer(mbuf, iph->len, sizeof(_icmph), &_icmph);
        if (!ich)
        {
            return(NULL);
        }

        ports     = _ports;
        _ports[0] = icmp4_id(ich);
        _ports[1] = ich->type << 8 | ich->code;

        dp_vs_conn_fill_param(iph->af, iph->proto,
                              &iph->saddr, &iph->daddr,
                              ports[0], ports[1], 0, &param);
    }
    else if (unlikely(iph->proto == IPPROTO_ICMPV6))
    {
        struct icmp6_hdr *ic6h, _ic6hp;
        ic6h = mbuf_header_pointer(mbuf, iph->len, sizeof(_ic6hp), &_ic6hp);
        if (!ic6h)
        {
            return(NULL);
        }

        ports     = _ports;
        _ports[0] = icmp6h_id(ic6h);
        _ports[1] = ic6h->icmp6_type << 8 | ic6h->icmp6_code;

        dp_vs_conn_fill_param(iph->af, iph->proto,
                              &iph->daddr, &dest->addr,
                              ports[1], ports[0],
                              0, &param);
    }
    else
    {
        //填充proto,caddr,vaddr,cport,vport供新建立连接使用
        dp_vs_conn_fill_param(iph->af, iph->proto,
                              &iph->saddr, &iph->daddr,
                              ports[0], ports[1], 0, &param);
    }
    //根据参数,目标机器信息建立代理连接
    conn = dp_vs_conn_new(mbuf, iph, &param, dest,
                          is_synproxy_on ? DPVS_CONN_F_SYNPROXY : 0);
    if (!conn)
    {
        return(NULL);
    }

    dp_vs_stats_conn(conn);
    return(conn);
}
  • dp_vs_conn缓存池
/*
 * memory pool for dp_vs_conn{}
 */
 //dp_vs_conn缓存池,per-socket数组
static struct rte_mempool *dp_vs_conn_cache[DPVS_MAX_SOCKET];
#define this_conn_count    (RTE_PER_LCORE(dp_vs_conn_count))
#define this_conn_cache    (dp_vs_conn_cache[rte_socket_id()])
//连接hash表
#define this_conn_tbl      (RTE_PER_LCORE(dp_vs_conn_tbl))

dp_vs_conn_init

  • 初始化用于查找的this_conn_tbl,per-lcore hash表
  • 初始化conn缓存池,per-socket数组
int dp_vs_conn_init(void)
{
    int       i, err;
    lcoreid_t lcore;
    char      poolname[32];

    /* init connection template table */
    //persistent持续调度相关,模板连接查找表
    dp_vs_ct_tbl = rte_malloc_socket(NULL, sizeof(struct list_head) * DPVS_CONN_TBL_SIZE,
                                     RTE_CACHE_LINE_SIZE, rte_socket_id());

    for (i = 0; i < DPVS_CONN_TBL_SIZE; i++)
    {
        INIT_LIST_HEAD(&dp_vs_ct_tbl[i]);
    }
    rte_spinlock_init(&dp_vs_ct_lock);

    /*
     * unlike linux per_cpu() which can assign CPU number,
     * RTE_PER_LCORE() can only access own instances.
     * it make codes looks strange.
     */
     //主要初始化用于查找conn的this_conn_tbl,per-lcore hash表
    rte_eal_mp_remote_launch(conn_init_lcore, NULL, SKIP_MASTER);
    RTE_LCORE_FOREACH_SLAVE(lcore)
    {
        if ((err = rte_eal_wait_lcore(lcore)) < 0)
        {
            RTE_LOG(WARNING, IPVS, "%s: lcore %d: %s.\\n",
                    __func__, lcore, dpvs_strerror(err));
        }
    }

    conn_ctrl_init();

    /* connection cache on each NUMA socket */
    //初始化conn连接池,per-socket数组
    for (i = 0; i < get_numa_nodes(); i++)
    {
        snprintf(poolname, sizeof(poolname), "dp_vs_conn_%d", i);
        dp_vs_conn_cache[i] = rte_mempool_create(poolname,
                                                 conn_pool_size,
                                                 sizeof(struct dp_vs_conn),
                                                 conn_pool_cache,
                                                 0, NULL, NULL, NULL, NULL,
                                                 i, 0);
        if (!dp_vs_conn_cache[i])
        {
            err = EDPVS_NOMEM;
            goto cleanup;
        }
    }

    dp_vs_conn_rnd = (uint32_t)random();

    return(EDPVS_OK);

cleanup:
    dp_vs_conn_term();
    return(err);
}

static int conn_init_lcore(void *arg)
{
    int i;

    if (!rte_lcore_is_enabled(rte_lcore_id()))
    {
        return(EDPVS_DISABLED);
    }

    if (netif_lcore_is_idle(rte_lcore_id()))
    {
        return(EDPVS_IDLE);
    }
    //创建用于查找conn的hash表头
    this_conn_tbl = rte_malloc_socket(NULL,
                                      sizeof(struct list_head) * DPVS_CONN_TBL_SIZE,
                                      RTE_CACHE_LINE_SIZE, rte_socket_id());
    if (!this_conn_tbl)
    {
        return(EDPVS_NOMEM);
    }

    for (i = 0; i < DPVS_CONN_TBL_SIZE; i++)
    {
        INIT_LIST_HEAD(&this_conn_tbl[i]);
    }

#ifdef CONFIG_DPVS_IPVS_CONN_LOCK
    rte_spinlock_init(&this_conn_lock);
#endif
    this_conn_count = 0;

    return(EDPVS_OK);
}

 

  • dp_vs_conn_alloc
static struct dp_vs_conn *dp_vs_conn_alloc(enum dpvs_fwd_mode fwdmode,
                                           uint32_t flags)
{
    struct dp_vs_conn *    conn;
    struct dp_vs_redirect *r = NULL;
    //从当前所在socket对应的conn cache中分配一个conn
    if (unlikely(rte_mempool_get(this_conn_cache, (void **)&conn) != 0))
    {
        RTE_LOG(ERR, IPVS, "%s: no memory for connection\\n", __func__);
        return(NULL);
    }
    //清零,并设置conn所在的connpool,主要用于在释放时归还至正确的connpool
    memset(conn, 0, sizeof(struct dp_vs_conn));
    conn->connpool = this_conn_cache;
    this_conn_count++;

    /* no need to create redirect for the global template connection */
    //根据flag,分配dp_vs_redirect,主要在FNAT,SNAT和NAT模式下
    if (likely((flags & DPVS_CONN_F_TEMPLATE) == 0))
    {
        r = dp_vs_redirect_alloc(fwdmode);
    }

    conn->redirect = r;

    return(conn);
}

 

  • dp_vs_conn_new
/**
 * 创建新的连接
 * mbuf: 接收到的数据包
 * iph: 主要保存IP层相关数据,IP层首部长度,源/目的地址
 * param: 创建连接的相关信息,主要是tcp四元组信息
 * dest: dp_vs_service选出出来的real server相关信息
 */
struct dp_vs_conn *dp_vs_conn_new(struct rte_mbuf *mbuf,
                                  const struct dp_vs_iphdr *iph,
                                  struct dp_vs_conn_param *param,
                                  struct dp_vs_dest *dest,
                                  uint32_t flags)
{
    struct dp_vs_conn *     new;
    struct conn_tuple_hash *t;
    uint16_t rport;
    __be16   _ports[2], *ports;
    int      err;

    assert(mbuf && param && dest);
    //分配dp_vs_conn,从内存池中分配
    new = dp_vs_conn_alloc(dest->fwdmode, flags);
    if (unlikely(!new))
    {
        return(NULL);
    }
    //设置连接的flag
    new->flags = flags;

    /* set proper RS port */
    if (dp_vs_conn_is_template(new) || param->ct_dport != 0)
    {
        rport = param->ct_dport;
    }
    else if (dest->fwdmode == DPVS_FWD_MODE_SNAT)
    {
        //如果是ICMP相关报文,rport从icmp信息从param中获取(icmp应用数据中设置params)
        if (unlikely(param->proto == IPPROTO_ICMP ||
                     param->proto == IPPROTO_ICMPV6))
        {
            rport = param->vport;
        }
        else
        {
            //否则从mbuf中获取rport
            ports = mbuf_header_pointer(mbuf, iph->len, sizeof(_ports), _ports);
            if (unlikely(!ports))
            {
                RTE_LOG(WARNING, IPVS, "%s: no memory\\n", __func__);
                goto errout;
            }
            //设置rport为源端口,snat模式中为内部服务器--->外部服务器(www.baidu.com)
            rport = ports[0];
        }
    }
    else
    {
        //rport为选择后端real server的服务port
        rport = dest->port;
    }
    //conn 连接有一个 tuplehash 数组元素,长度为2,保存两个方向的 tupehash 结构体。不同方向的源地址和目的地址意义是不同的
    /* init inbound conn tuple hash */
    //t指向tuplehash中DPVS_CONN_DIR_INBOUND方向的conn_tuple_hash
    //即外网服务器(如 baidu.com) -> DPVS -> 内网服务器
    t         = &tuplehash_in(new);
    t->direct = DPVS_CONN_DIR_INBOUND;
    t->af     = param->af;
    t->proto  = param->proto;
    //源地址是外网 client addr
    t->saddr = *param->caddr;
    t->sport = param->cport;
    //目的地址是服务虚IP地址
    t->daddr = *param->vaddr;
    t->dport = param->vport;
    INIT_LIST_HEAD(&t->list);

    /* init outbound conn tuple hash */
    //t指向tuplehash中DPVS_CONN_DIR_OUTBOUND方向的conn_tuple_hash
    //即内网服务器 -> DPVS -> 外网服务器(如 baidu.com )
    t         = &tuplehash_out(new);
    t->direct = DPVS_CONN_DIR_OUTBOUND;
    t->af     = dest->af;
    t->proto  = param->proto;
    if (dest->fwdmode == DPVS_FWD_MODE_SNAT)
    {
        //如果是snat模式(主要用于内网服务器作为客户端角色,请求外网服务器),此时,saddr为mbuf数据包中的源地址
        t->saddr = iph->saddr;
    }
    else
    {
        //否则,使用后端real server的地址
        t->saddr = dest->addr;
    }
    //源port使用前面得到的rport,与saddr类似
    t->sport = rport;
    t->daddr = *param->caddr;       /* non-FNAT */
    t->dport = param->cport;        /* non-FNAT */
    INIT_LIST_HEAD(&t->list);

    /* init connection */
    //设置conn相关的协议族,端口之类信息
    new->af    = param->af;
    new->proto = param->proto;
    new->caddr = *param->caddr;
    new->cport = param->cport;
    new->vaddr = *param->vaddr;
    new->vport = param->vport;
    new->laddr = *param->caddr;     /* non-FNAT */
    new->lport = param->cport;      /* non-FNAT */
    if (dest->fwdmode == DPVS_FWD_MODE_SNAT)
    {
        new->daddr = iph->saddr;
    }
    else
    {
        new->daddr = dest->addr;
    }
    new->dport   = rport;
    new->outwall = param->outwall;

    /* neighbour confirm cache */
    if (AF_INET == tuplehash_in(new).af)
    {
        new->in_nexthop.in.s_addr = htonl(INADDR_ANY);
    }
    else
    {
        new->in_nexthop.in6 = in6addr_any;
    }

    if (AF_INET == tuplehash_out(new).af)
    {
        new->out_nexthop.in.s_addr = htonl(INADDR_ANY);
    }
    else
    {
        new->out_nexthop.in6 = in6addr_any;
    }

    new->in_dev  = NULL;
    new->out_dev = NULL;

    /* Controll member */
    new->control = NULL;
    rte_atomic32_clear(&new->n_control);

    /* caller will use it right after created,
     * just like dp_vs_conn_get(). */
    rte_atomic32_set(&new->refcnt, 1);
    new->state = 0;
#ifdef CONFIG_DPVS_IPVS_STATS_DEBUG
    new->ctime = rte_rdtsc();
#endif

    /* bind destination and corresponding trasmitter */
    //将conn与对应的后端real server dest绑定,主要设置不同转发模式相关的传输函数
    err = dp_vs_conn_bind_dest(new, dest);
    if (err != EDPVS_OK)
    {
        RTE_LOG(WARNING, IPVS, "%s: fail to bind dest: %s\\n",
                __func__, dpvs_strerror(err));
        goto errout;
    }

    /* FNAT only: select and bind local address/port */
    //full-nat 特殊处理
    if (dest->fwdmode == DPVS_FWD_MODE_FNAT)
    {
        if ((err = dp_vs_laddr_bind(new, dest->svc)) != EDPVS_OK)
        {
            goto unbind_dest;
        }
    }

    /* init redirect if it exists */
    //初始化new conn相关dp_vs_redirect,主要为了解决在nat-mode模式下inside→outside两端连接数据包可能不从同一网卡的同一
    //物理队列上收取,导致可能会分配到不同的lcore上处理
    dp_vs_redirect_init(new);

    /* add to hash table (dual dir for each bucket) */
    //conn_hash将连接加到this_conn_tlb中,实际上是将tuphash两个方向的都加到流表里,方便不同方向的检索
    if ((err = dp_vs_conn_hash(new)) != EDPVS_OK)
    {
        goto unbind_laddr;
    }

    /* timer */
    //默认超时时间
    new->timeout.tv_sec  = conn_init_timeout;
    new->timeout.tv_usec = 0;

    /* synproxy */
    INIT_LIST_HEAD(&new->ack_mbuf);
    //设置syn_retry_max为0
    rte_atomic32_set(&new->syn_retry_max, 0);
    //设置dup_ack_cnt为0
    rte_atomic32_set(&new->dup_ack_cnt, 0);
    //处理synproxy
    if ((flags & DPVS_CONN_F_SYNPROXY) && !dp_vs_conn_is_template(new))
    {
        struct tcphdr _tcph, *th = NULL;
        struct dp_vs_synproxy_ack_pakcet *ack_mbuf;
        struct dp_vs_proto *pp;

        th = mbuf_header_pointer(mbuf, iph->len, sizeof(_tcph), &_tcph);
        if (!th)
        {
            RTE_LOG(ERR, IPVS, "%s: get tcphdr failed\\n", __func__);
            goto unbind_laddr;
        }

        /* save ack packet */
        if (unlikely(rte_mempool_get(this_ack_mbufpool, (void **)&ack_mbuf) != 0))
        {
            RTE_LOG(ERR, IPVS, "%s: no memory\\n", __func__);
            goto unbind_laddr;
        }
        //将mbuf加入到ack_mbuf列表.
        ack_mbuf->mbuf = mbuf;
        list_add_tail(&ack_mbuf->list, &new->ack_mbuf);
        new->ack_num++;
        sp_dbg_stats32_inc(sp_ack_saved);

        /* save ack_seq - 1 */
        //将client发来的fnat_seq保存到fnat_seq,将fnat_seq-1保存到syn_proxy_seq
        new->syn_proxy_seq.isn =
            htonl((uint32_t)((ntohl(th->ack_seq) - 1)));

        /* save ack_seq */
        new->fnat_seq.fdata_seq = ntohl(th->ack_seq);

        /* FIXME: use DP_VS_TCP_S_SYN_SENT for syn */
        pp = dp_vs_proto_lookup(param->proto);
        new->timeout.tv_sec = pp->timeout_table[new->state = DPVS_TCP_S_SYN_SENT];
    }

    /* schedule conn timer */
    dpvs_time_rand_delay(&new->timeout, 1000000);
    //最后将连接加到定时器,管理连接超时。tcp 不同状态的超时时间是不同的,以后单独分析定时器
    //超时处理函数为dp_vs_conn_expire
    dp_vs_conn_attach_timer(new, true);

#ifdef CONFIG_DPVS_IPVS_DEBUG
    conn_dump("new conn: ", new);
#endif
    return(new);

unbind_laddr:
    dp_vs_laddr_unbind(new);
unbind_dest:
    dp_vs_conn_unbind_dest(new);
errout:
    dp_vs_conn_free(new);
    return(NULL);
}

dp_vs_conn_bind_dest

  • 绑定conn关联的dest
  • 设置conn的传输函数
static int dp_vs_conn_bind_dest(struct dp_vs_conn *conn,
                                struct dp_vs_dest *dest)
{
    /* ATTENTION:
     *   Initial state of conn should be INACTIVE, with conn->inactconns=1 and
     *   conn->actconns=0. We should not increase conn->actconns except in session
     *   sync.Generally, the INACTIVE and SYN_PROXY flags are passed down from
     *   the dest here. */
    conn->flags |= rte_atomic16_read(&dest->conn_flags);

    if (dest->max_conn &&
        (rte_atomic32_read(&dest->inactconns) + \\
         rte_atomic32_read(&dest->actconns) >= dest->max_conn))
    {
        dest->flags |= DPVS_DEST_F_OVERLOAD;
        return(EDPVS_OVERLOAD);
    }
    //增加dest的引用计数
    rte_atomic32_inc(&dest->refcnt);

    if (dp_vs_conn_is_template(conn))
    {
        rte_atomic32_inc(&dest->persistconns);
    }
    else
    {
        rte_atomic32_inc(&dest->inactconns);
    }
    //设置不同模式的流量处理函数,NAT相关的流量都要经过LB,而DR/TUNNEL模式属于单臂模式,只有入口流量,没有出口流量
    switch (dest->fwdmode)
    {
    case DPVS_FWD_MODE_NAT:
        conn->packet_xmit     = dp_vs_xmit_nat;
        conn->packet_out_xmit = dp_vs_out_xmit_nat;
        break;

    case DPVS_FWD_MODE_TUNNEL:
        conn->packet_xmit = dp_vs_xmit_tunnel;
        break;

    case DPVS_FWD_MODE_DR:
        conn->packet_xmit = dp_vs_xmit_dr;
        break;

    case DPVS_FWD_MODE_FNAT:
        conn->packet_xmit     = dp_vs_xmit_fnat;
        conn->packet_out_xmit = dp_vs_out_xmit_fnat;
        break;

    case DPVS_FWD_MODE_SNAT:
        conn->packet_xmit     = dp_vs_xmit_snat;
        conn->packet_out_xmit = dp_vs_out_xmit_snat;
        break;

    default:
        return(EDPVS_NOTSUPP);
    }

    conn->dest = dest;
    return(EDPVS_OK);
}

 

  • dp_vs_conn_hash

 

static inline int dp_vs_conn_hash(struct dp_vs_conn *conn)
{
    int err;

#ifdef CONFIG_DPVS_IPVS_CONN_LOCK
    rte_spinlock_lock(&this_conn_lock);
#endif
    //将conn加入到对应的hash表中,方便查找,in/out方向tuplehash都加入到hash表中
    err = __dp_vs_conn_hash(conn, DPVS_CONN_TBL_MASK);

#ifdef CONFIG_DPVS_IPVS_CONN_LOCK
    rte_spinlock_unlock(&this_conn_lock);
#endif
    //将conn加入到redirect hash中
    dp_vs_redirect_hash(conn);

    return(err);
}

static inline int __dp_vs_conn_hash(struct dp_vs_conn *conn, uint32_t mask)
{
    uint32_t ihash, ohash;

    if (unlikely(conn->flags & DPVS_CONN_F_HASHED))
    {
        return(EDPVS_EXIST);
    }
    //计算in/out两个方向上的tuplehash key
    ihash = dp_vs_conn_hashkey(tuplehash_in(conn).af,
                               &tuplehash_in(conn).saddr, tuplehash_in(conn).sport,
                               &tuplehash_in(conn).daddr, tuplehash_in(conn).dport,
                               mask);

    ohash = dp_vs_conn_hashkey(tuplehash_out(conn).af,
                               &tuplehash_out(conn).saddr, tuplehash_out(conn).sport,
                               &tuplehash_out(conn).daddr, tuplehash_out(conn).dport,
                               mask);
    //如果是长连接,则加入到dp_vs_ct_tbl中,否则加入至this_conn_tbl hash表中
    if (dp_vs_conn_is_template(conn))
    {
        /* lock is complusory for template */
        rte_spinlock_lock(&dp_vs_ct_lock);
        list_add(&tuplehash_in(conn).list, &dp_vs_ct_tbl[ihash]);
        list_add(&tuplehash_out(conn).list, &dp_vs_ct_tbl[ohash]);
        rte_spinlock_unlock(&dp_vs_ct_lock);
    }
    else
    {
        list_add(&tuplehash_in(conn).list, &this_conn_tbl[ihash]);
        list_add(&tuplehash_out(conn).list, &this_conn_tbl[ohash]);
    }

    conn->flags |= DPVS_CONN_F_HASHED;
    rte_atomic32_inc(&conn->refcnt);

    return(EDPVS_OK);
}

 

  • dp_vs_conn_attach_timer
static void dp_vs_conn_attach_timer(struct dp_vs_conn *conn, bool lock)
{
    int rc;
    //如果conn->timer正在运行中,则直接返回
    if (dp_vs_conn_is_in_timer(conn))
    {
        return;
    }
    //如果为长连接,则将定时器加入到global_timer中,否则加入至per-lcore timer中
    if (dp_vs_conn_is_template(conn))
    {
        if (lock)
        {
            rc = dpvs_timer_sched(&conn->timer, &conn->timeout,
                                  dp_vs_conn_expire, conn, true);
        }
        else
        {
            rc = dpvs_timer_sched_nolock(&conn->timer, &conn->timeout,
                                         dp_vs_conn_expire, conn, true);
        }
    }
    else
    {
        if (lock)
        {
            rc = dpvs_timer_sched(&conn->timer, &conn->timeout,
                                  dp_vs_conn_expire, conn, false);
        }
        else
        {
            rc = dpvs_timer_sched_nolock(&conn->timer, &conn->timeout,
                                         dp_vs_conn_expire, conn, false);
        }
    }

    if (rc == EDPVS_OK)
    {
        //设置conn->timer正在运行中
        dp_vs_conn_set_in_timer(conn);
    }
}

 

 

https://blog.csdn.net/liwei0526vip/article/details/104723572

https://blog.csdn.net/zjx345438858/category_10311334.html

posted @ 2021-12-23 20:45  codestacklinuxer  阅读(202)  评论(0编辑  收藏  举报