linux下connect超时时间探究

最近在linux做服务器开发的时候,发现了一个现象:服务器在启动的时候调用了 connect 函数,因为连接了一个不可用的端口,导致connect最后报出了 “Connection timed out” 的错误。但是这中间过了六十多秒的时间。

为何会等待这么长的时间才超时呢?这个时间又在哪里设置?

《UNIX网络编程(第一卷)——套接口 API 和 X/Open 传输接口 API》一书的4.3节有写到:

  对于TCP套接口来说,函数 connect 激发TCP的三路握手过程,且仅在链接成功建立或出错时才返回,返回的错误可能有如下几种情况:

  1. 如果TCP客户没有收到SYN分节的响应,则返回ETIMEDOUT。例如在4.4BSD中,当调用函数 connect 时,发出一个SYN,若无响应,等待6秒之后再发一个;若仍无响应,24秒钟之后再发一个。若总共等待了75秒钟之后仍未响应,则返回错误...

从书中可以看到 connect 建立TCP链接的过程中,会发送SYN包,如果没有收到SYN包的回包,内核会多次发送SYN包,并且每次重试的间隔会逐渐增加,避免发送太多的SYN包影响网络。

在CentOS上,这个重试次数是可以设置的:

$ sysctl net.ipv4 | grep tcp
net.ipv4.tcp_timestamps = 1
net.ipv4.tcp_window_scaling = 1
net.ipv4.tcp_sack = 1
net.ipv4.tcp_retrans_collapse = 1
net.ipv4.tcp_syn_retries = 5
net.ipv4.tcp_synack_retries = 5
net.ipv4.tcp_max_orphans = 262144
net.ipv4.tcp_max_tw_buckets = 262144
net.ipv4.tcp_keepalive_time = 7200
net.ipv4.tcp_keepalive_probes = 9
net.ipv4.tcp_keepalive_intvl = 75
net.ipv4.tcp_retries1 = 3
net.ipv4.tcp_retries2 = 15
net.ipv4.tcp_fin_timeout = 60
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_tw_recycle = 0
net.ipv4.tcp_abort_on_overflow = 0
net.ipv4.tcp_stdurg = 0
net.ipv4.tcp_rfc1337 = 0
net.ipv4.tcp_max_syn_backlog = 2048
net.ipv4.tcp_orphan_retries = 0
net.ipv4.tcp_fack = 1
net.ipv4.tcp_reordering = 3
net.ipv4.tcp_ecn = 2
net.ipv4.tcp_dsack = 1
net.ipv4.tcp_mem = 639168       852224  1278336
net.ipv4.tcp_wmem = 4096        16384   4194304
net.ipv4.tcp_rmem = 4096        87380   4194304
net.ipv4.tcp_app_win = 31
net.ipv4.tcp_adv_win_scale = 2
net.ipv4.tcp_tw_reuse = 0
net.ipv4.tcp_frto = 2
net.ipv4.tcp_frto_response = 0
net.ipv4.tcp_low_latency = 0
net.ipv4.tcp_no_metrics_save = 0
net.ipv4.tcp_moderate_rcvbuf = 1
net.ipv4.tcp_tso_win_divisor = 3
net.ipv4.tcp_congestion_control = cubic
net.ipv4.tcp_abc = 0
net.ipv4.tcp_mtu_probing = 0
net.ipv4.tcp_base_mss = 512
net.ipv4.tcp_workaround_signed_windows = 0
net.ipv4.tcp_challenge_ack_limit = 1000
net.ipv4.tcp_limit_output_bytes = 262144
net.ipv4.tcp_dma_copybreak = 4096
net.ipv4.tcp_slow_start_after_idle = 1
net.ipv4.tcp_available_congestion_control = cubic reno
net.ipv4.tcp_allowed_congestion_control = cubic reno
net.ipv4.tcp_max_ssthresh = 0
net.ipv4.tcp_thin_linear_timeouts = 0
net.ipv4.tcp_thin_dupack = 0
net.ipv4.tcp_min_tso_segs = 2
net.ipv4.tcp_invalid_ratelimit = 500

其中的 net.ipv4.tcp_syn_retries 选项控制着SYN的重试次数,可以通过如下命令来查看和设置:

$ sysctl net.ipv4.tcp_syn_retries       #查看
net.ipv4.tcp_syn_retries = 5
$ sudo sysctl -w net.ipv4.tcp_syn_retries=1 #设置
net.ipv4.tcp_syn_retries = 1

下面用一个简单的程序,来验证各种次数下的connect超时时间:

#include <iostream>
#include <sys/socket.h>
#include <sys/time.h>
#include <netinet/in.h>
#include <errno.h>
#include <string.h>
#include <arpa/inet.h>

long long GetCurrentMSecond()
{
    struct timeval tv; 
    gettimeofday(&tv, NULL);
    return tv.tv_sec * 1000 + tv.tv_usec / 1000;
}

int main()
{
    int fd = 0;
    struct sockaddr_in addr;

    fd = socket(AF_INET, SOCK_STREAM, 0); 

    socklen_t bufSize = 128 * 1024;
    int retCode = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &bufSize, sizeof(bufSize));
    addr.sin_family = AF_INET;
    addr.sin_addr.s_addr = inet_addr("192.168.207.128");
    addr.sin_port = htons(13500); //连接一个不用的端口,以保证会触发超时
    long long llBeginTime = GetCurrentMSecond();
    if (connect(fd, (struct sockaddr*)&addr, sizeof(addr)) == -1) 
    {   
        long long llEndTime = GetCurrentMSecond();
        std::cout << "connect failed, errno: " << errno << ", error: " << strerror(errno)
                  << ", cost time: " << llEndTime - llBeginTime << std::endl;
        return 0;
    }   
    std::cout << "connect success" << std::endl;
}

通过设置不同的重试次数,探究各种重试次数下的time out时间:

$ g++ connect.cpp -o main
$ sudo sysctl -w net.ipv4.tcp_syn_retries=1
net.ipv4.tcp_syn_retries = 1
$ ./main 
connect failed, errno: 110, error: Connection timed out, cost time: 3000
$ sudo sysctl -w net.ipv4.tcp_syn_retries=2
net.ipv4.tcp_syn_retries = 2
$ ./main 
connect failed, errno: 110, error: Connection timed out, cost time: 7000

 

重试次数 超时时间(单位:毫秒)
1 3000  
2 7000
3 14999
4 31000
5 63001
6 126999

从表格中可以看到,当前设置重试次数为5的时候,超时时间是63秒,可以通过修改重试次数的方式,来改变connect的超时时间。

posted @ 2018-12-22 18:00  溟漓  阅读(5080)  评论(0编辑  收藏  举报