socket connect tcp_v4_connect
tcp_v4_connect
/* This will initiate an outgoing connection. tcp_v4_connect函数初始化一个对外的连接请求,创建一个SYN包并发送出去, 把套接字的状态从CLOSE切换到SYN_SENT,初始化TCP部分选项数据包序列号、 窗口大小、MSS、套接字传送超时等*/ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); __be16 orig_sport, orig_dport; __be32 daddr, nexthop; struct flowi4 *fl4; struct rtable *rt; int err; struct ip_options_rcu *inet_opt; if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; if (usin->sin_family != AF_INET) return -EAFNOSUPPORT; //是否设置源路由选项 nexthop = daddr = usin->sin_addr.s_addr; inet_opt = rcu_dereference_protected(inet->inet_opt, sock_owned_by_user(sk)); if (inet_opt && inet_opt->opt.srr) { if (!daddr) return -EINVAL; nexthop = inet_opt->opt.faddr; } /* 根据目的ip、目的端口、网络设备接口调用ip_route_connect选路由, 路由结构保存到rt->rt_dst中,实际调用的函数是ip_route_output_flow, 如果是广播地址、组地址就返回 */ orig_sport = inet->inet_sport; orig_dport = usin->sin_port; fl4 = &inet->cork.fl.u.ip4; rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_TCP, orig_sport, orig_dport, sk, true); if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); return err; } if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { ip_rt_put(rt); return -ENETUNREACH; } if (!inet_opt || !inet_opt->opt.srr) daddr = fl4->daddr; if (!inet->inet_saddr) inet->inet_saddr = fl4->saddr; inet->inet_rcv_saddr = inet->inet_saddr; if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { /* Reset inherited state */ tp->rx_opt.ts_recent = 0; tp->rx_opt.ts_recent_stamp = 0; if (likely(!tp->repair)) tp->write_seq = 0; } ////获取套接字最近使用的时间 if (tcp_death_row.sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) tcp_fetch_timewait_stamp(sk, &rt->dst); inet->inet_dport = usin->sin_port; inet->inet_daddr = daddr; inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet_opt) inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; /* Socket identity is still unknown (sport may be zero). * However we set state to SYN-SENT and not releasing socket * lock select source port, enter ourselves into the hash tables and * complete initialization after this. 调用tcp_set_state设置套接字状态为TCP_SYN_SENT,本把套接字sk加入到连接管理哈希链表中, 为连接分配一个临时端口 */ tcp_set_state(sk, TCP_SYN_SENT); //将套接字sk放入TCP连接管理哈希链表中 同时 Bind a port //绑定IP地址和端口,并将socket加入到连接表中 err = inet_hash_connect(&tcp_death_row, sk); if (err) goto failure; rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, inet->inet_sport, inet->inet_dport, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; goto failure; } /* OK, now commit destination to socket. */ sk->sk_gso_type = SKB_GSO_TCPV4; sk_setup_caps(sk, &rt->dst); if (!tp->write_seq && likely(!tp->repair)) tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, inet->inet_daddr, inet->inet_sport, usin->sin_port); inet->inet_id = tp->write_seq ^ jiffies; /* 初始化第一个序列号,调用tcp_connect函数完成建立连接, 包括发送SYN,tcp_connect将创建号的SYN数据段加入到套接字发送队列, 最后调用tcp_transmit_skb数据包发送到IP层。 */ if (likely(!tp->repair)) err = tcp_connect(sk); else err = tcp_repair_connect(sk); rt = NULL; if (err) goto failure; return 0; failure: /* * This unhashes the socket and releases the local port, * if necessary. */ tcp_set_state(sk, TCP_CLOSE); ip_rt_put(rt); sk->sk_route_caps = 0; inet->inet_dport = 0; return err; } /* * Bind a port for a connect operation and hash it. */ int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk), __inet_check_established, __inet_hash_nolisten); } int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u32 port_offset, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **), int (*hash)(struct sock *sk, struct inet_timewait_sock *twp)) { struct inet_hashinfo *hinfo = death_row->hashinfo; const unsigned short snum = inet_sk(sk)->inet_num; struct inet_bind_hashbucket *head; struct inet_bind_bucket *tb; int ret; struct net *net = sock_net(sk); int twrefcnt = 1; if (!snum) {//端口未绑定 int i, remaining, low, high, port; static u32 hint; u32 offset = hint + port_offset; struct hlist_node *node; struct inet_timewait_sock *tw = NULL; inet_get_local_port_range(&low, &high); remaining = (high - low) + 1; local_bh_disable(); for (i = 1; i <= remaining; i++) { port = low + (i + offset) % remaining; if (inet_is_reserved_local_port(port)) continue; head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; spin_lock(&head->lock); /* Does not bother with rcv_saddr checks, * because the established check is already * unique enough. //绑定到一个port的socket可能是通过bind 系统调用,也可能是调用connect系统调用时__inet_hash_connect函数选取的 */ inet_bind_bucket_for_each(tb, node, &head->chain) { if (net_eq(ib_net(tb), net) && tb->port == port) { if (tb->fastreuse >= 0) goto next_port; WARN_ON(hlist_empty(&tb->owners)); if (!check_established(death_row, sk, port, &tw)) goto ok; goto next_port; } } //当前端口没有被使用 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net, head, port); if (!tb) { spin_unlock(&head->lock); break; } tb->fastreuse = -1; goto ok; next_port: spin_unlock(&head->lock); } local_bh_enable(); return -EADDRNOTAVAIL; ok: hint += i; /* Head lock still held and bh's disabled //将socket加入port对应的tb的socket队列中,即将此socket与port相关联 */ inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { //如果socket没有被加入到“已建立连接”的连接表中 inet_sk(sk)->inet_sport = htons(port); twrefcnt += hash(sk, tw);//将socket加入到“已建立连接”的连接表中 } if (tw) twrefcnt += inet_twsk_bind_unhash(tw, hinfo); spin_unlock(&head->lock); if (tw) { inet_twsk_deschedule(tw, death_row); while (twrefcnt) { twrefcnt--; inet_twsk_put(tw); } } ret = 0; goto out; } head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)]; tb = inet_csk(sk)->icsk_bind_hash;//将tb加入到bind hash表中 spin_lock_bh(&head->lock); //条件为false时,会执行else分支,检查是否可用。这么看来,调用bind()成功并不意味着这个端口就真的可以用 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {//有且仅有一个socket绑定到这个端口,无需冲突检查 hash(sk, NULL);//将socket加入到“已建立连接”的连接表中 spin_unlock_bh(&head->lock); return 0; } else { spin_unlock(&head->lock); /* No definite answer... Walk to established hash table */ ret = check_established(death_row, sk, snum, NULL); out: local_bh_enable(); return ret; } }
创建一个套接字,设置SO_REUSEADDR选项,建立连接后立即关闭,关闭后立即又重复同样的过程,发现在第二次调用connect()的时候返回EADDRNOTAVAIL错误
可以看到返回EADDRNOTVAIL错误的有两种情况:
1、在TIME_WAIT传输控制块中找到匹配的端口,并且twsk_unique()返回true时
2、在除TIME_WAIT和LISTEN状态外的传输块中存在匹配的端口。
第二种情况很好容易理解了,只要状态在FIN_WAIT_1、ESTABLISHED等的传输控制块使用的端口和要查找的匹配,就会返回EADDRNOTVAIL错误。
第一种情况还要取决于twsk_uniqueue()的返回值
__inet_hash_connect的主要功能与bind系统调用中的inet_csk_get_port类似,都是:
1、如果没有选取端口则选定一个;
2、将socket与端口绑定;
3、将scoket加入到连接表中(这个功能inet_csk_get_port没有)。
另外一点不同是:inet_csk_get_port进行冲突检查时关注的是绑定冲突
而__inet_hash_connect检查的是当前socket是否与“已建立连接的socket”的冲突。
__inet_hash_connect检查冲突的函数是__inet_check_established:
/* called with local bh disabled */ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct sock *sk, __u16 lport, struct inet_timewait_sock **twp) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); __be32 daddr = inet->inet_rcv_saddr; __be32 saddr = inet->inet_daddr; int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); struct net *net = sock_net(sk); unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);//找到连接表中的表项 spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; int twrefcnt = 0; spin_lock(lock); /* Check TIME-WAIT sockets first. 先检查TIME_WAIT表,然后再检查establish表,与这两个表中的任意一个冲突都是不允许的 */ sk_nulls_for_each(sk2, node, &head->twchain) { tw = inet_twsk(sk2); if (INET_TW_MATCH(sk2, net, hash, acookie, saddr, daddr, ports, dif)) { if (twsk_unique(sk, sk2, twp)) goto unique; else goto not_unique; } } tw = NULL; /* And established part... */ sk_nulls_for_each(sk2, node, &head->chain) { if (INET_MATCH(sk2, net, hash, acookie, saddr, daddr, ports, dif)) goto not_unique; } unique: /* Must record num and sport now. Otherwise we will see * in hash table socket with a funny identity. */ inet->inet_num = lport; inet->inet_sport = htons(lport); sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); if (tw) { twrefcnt = inet_twsk_unhash(tw); NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); } spin_unlock(lock); if (twrefcnt) inet_twsk_put(tw); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); if (twp) { *twp = tw; } else if (tw) { /* Silly. Should hash-dance instead... */ inet_twsk_deschedule(tw, death_row); inet_twsk_put(tw); } return 0; not_unique: spin_unlock(lock); return -EADDRNOTAVAIL; } 在listen系统调用中,inet_hash函数会将socket加入到listen连接表中: static void __inet_hash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct inet_listen_hashbucket *ilb; if (sk->sk_state != TCP_LISTEN) { __inet_hash_nolisten(sk, NULL); return; } WARN_ON(!sk_unhashed(sk)); ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; spin_lock(&ilb->lock); __sk_nulls_add_node_rcu(sk, &ilb->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); spin_unlock(&ilb->lock); } int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct hlist_nulls_head *list; spinlock_t *lock; struct inet_ehash_bucket *head; int twrefcnt = 0; WARN_ON(!sk_unhashed(sk)); sk->sk_hash = inet_sk_ehashfn(sk); head = inet_ehash_bucket(hashinfo, sk->sk_hash); list = &head->chain; lock = inet_ehash_lockp(hashinfo, sk->sk_hash); spin_lock(lock); __sk_nulls_add_node_rcu(sk, list); if (tw) { WARN_ON(sk->sk_hash != tw->tw_hash); twrefcnt = inet_twsk_unhash(tw); } spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); return twrefcnt; } static inline struct inet_ehash_bucket *inet_ehash_bucket( struct inet_hashinfo *hashinfo, unsigned int hash) { return &hashinfo->ehash[hash & hashinfo->ehash_mask]; }/* 可见server端的socket在进行listen系统调用后被加入到sk->sk_prot->h.hashinfo->listening_hash中, client端的socket在进行connect系统调用后被加入到sk->sk_prot->h.hashinfo->ehash中, 而对于TCPv4和TCPv6,sk->sk_prot->h.hashinfo指向的都是tcp_hashinfo。*/
在测试设备并发时 出现了如下log
tcp_connect2] idx 3 (192.168.43.4:0)-->(192.168.43.11, 80):errno:99 Cannot assign requested address
说明此时 ip port 不够用了
根据上述代码可知:
tcp_connect时,其源ip以及接口可以不用指定, 可以依靠目的ip 进行路由,查找到出口ip相同网段的接口以及对应接口ip
http代理服务器(3-4-7层代理)-网络事件库公共组件、内核kernel驱动 摄像头驱动 tcpip网络协议栈、netfilter、bridge 好像看过!!!!
但行好事 莫问前程
--身高体重180的胖子
分类:
linux tcp/ip
, socket
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· SQL Server 2025 AI相关能力初探
· AI编程工具终极对决:字节Trae VS Cursor,谁才是开发者新宠?
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南