欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

TCP连接保活检测Keepalive

程序员文章站 2024-03-22 22:13:28
...

TCP连接的keepalive定时器用于定期检测连接是否正常。

Keepalive初始化

默认情况下Keepalive每两个小时触发一次,触发之后,最多发送9次探测报文,每个报文的间隔为75秒钟。即在9次探测都没有接收到对端的回复之后,认为连接已经断开。

#define TCP_KEEPALIVE_TIME  (120*60*HZ) /* two hours */
#define TCP_KEEPALIVE_PROBES    9       /* Max of 9 keepalive probes    */
#define TCP_KEEPALIVE_INTVL (75*HZ)

static int __net_init tcp_sk_init(struct net *net)
{
    net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
    net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
    net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;

在函数inet_csk_init_xmit_timers中初始化keepalive定时器。

void tcp_init_xmit_timers(struct sock *sk)
{
    inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
                  &tcp_keepalive_timer);

keepalive设置

可通过PROC文件调整Keepalive的相关参数,此调整是全局性质的。

static struct ctl_table ipv4_net_table[] = {
    {
        .procname   = "tcp_keepalive_time",
        .data       = &init_net.ipv4.sysctl_tcp_keepalive_time,
        .maxlen     = sizeof(int),
    },
    {
        .procname   = "tcp_keepalive_probes",
        .data       = &init_net.ipv4.sysctl_tcp_keepalive_probes,
        .maxlen     = sizeof(int),
    },
    {
        .procname   = "tcp_keepalive_intvl",
        .data       = &init_net.ipv4.sysctl_tcp_keepalive_intvl,
        .maxlen     = sizeof(int),
    },

默认值如下:

$ cat /proc/sys/net/ipv4/tcp_keepalive_time
7200
$ cat /proc/sys/net/ipv4/tcp_keepalive_probes 
9
$ cat /proc/sys/net/ipv4/tcp_keepalive_intvl 
75

也可以通过套接口选项调整Keepalive参数,仅对操作的套接口生效。注意对于TCP_KEEPIDLE选项,如果套接口开启了SOCK_KEEPOPEN标志,内核将根据keepalive的剩余时长重新设置超时时间。

static int do_tcp_setsockopt(struct sock *sk, int level,
        int optname, char __user *optval, unsigned int optlen)
{

    switch (optname) {
    case TCP_KEEPIDLE:
        if (val < 1 || val > MAX_TCP_KEEPIDLE)
            err = -EINVAL;
        else {
            tp->keepalive_time = val * HZ;
            if (sock_flag(sk, SOCK_KEEPOPEN) && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
                u32 elapsed = keepalive_time_elapsed(tp);
                if (tp->keepalive_time > elapsed)
                    elapsed = tp->keepalive_time - elapsed;
                else
                    elapsed = 0;
                inet_csk_reset_keepalive_timer(sk, elapsed);
            }
        }
        break;
    case TCP_KEEPINTVL:
        if (val < 1 || val > MAX_TCP_KEEPINTVL)
            err = -EINVAL;
        else
            tp->keepalive_intvl = val * HZ;
        break;
    case TCP_KEEPCNT:
        if (val < 1 || val > MAX_TCP_KEEPCNT)
            err = -EINVAL;
        else
            tp->keepalive_probes = val;
        break;

对于keepalive的三个参数,内核做了如下的限制。keepalive_time时长不能超过9个小时;keepalive_probes次数不能超过127次;最后,keepalive_intvl的时长也不能超过9小时。

#define MAX_TCP_KEEPIDLE    32767
#define MAX_TCP_KEEPINTVL   32767
#define MAX_TCP_KEEPCNT     127

另外,最重要的套接口层的keepalive开关SO_KEEPALIVE在如下函数sock_setsockopt中设置,具体实现由注册的tcp_set_keepalive函数完成。

int sock_setsockopt(struct socket *sock, int level, int optname,
            char __user *optval, unsigned int optlen)
{
    switch (optname) { 
    case SO_KEEPALIVE:
        if (sk->sk_prot->keepalive)
            sk->sk_prot->keepalive(sk, valbool);
        sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
        break;
		
struct proto tcp_prot = {
    .name           = "TCP",
    .keepalive      = tcp_set_keepalive,
}

如果val为真,即使能keepalive功能,并且套接口之前未开启过,启动keepalive定时器。否则,如果val为零,关闭keepalive定时器。

void tcp_set_keepalive(struct sock *sk, int val)
{
    if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
        return;

    if (val && !sock_flag(sk, SOCK_KEEPOPEN))
        inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
    else if (!val)
        inet_csk_delete_keepalive_timer(sk);
}

如下,如果监听套接口开启了keepalive功能,其子接口也将开启keepalive。

struct sock *tcp_create_openreq_child(const struct sock *sk,
                      struct request_sock *req, struct sk_buff *skb)
{
    if (sock_flag(newsk, SOCK_KEEPOPEN))
        inet_csk_reset_keepalive_timer(newsk, keepalive_time_when(newtp));

Keepalive定时器超时

如果此时套接口被用户调用占用,将keepalive定时器延迟50毫秒。如果套接口处于监听状态不作处理。

static void tcp_keepalive_timer (struct timer_list *t)
{
    struct sock *sk = from_timer(sk, t, sk_timer);
    struct inet_connection_sock *icsk = inet_csk(sk);
    struct tcp_sock *tp = tcp_sk(sk);

    /* Only process if socket is not in use. */
    bh_lock_sock(sk);
    if (sock_owned_by_user(sk)) {
        /* Try again later. */
        inet_csk_reset_keepalive_timer (sk, HZ/20);
        goto out;
    }
    if (sk->sk_state == TCP_LISTEN) {
        pr_err("Hmm... keepalive on a LISTEN ???\n");
        goto out;
    }

如果套接口没有启用Keepalive功能,或者套接口设置有CLOSE或者SYN_SENT标志,不进行处理。如果网络中存在发出,还没有确认的报文(packets_out),或者套接口发送队列不为空,不需要进行keepalive处理,直接启动下一个Keepalive周期。

    if (!sock_flag(sk, SOCK_KEEPOPEN) ||
        ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)))
        goto out;

    elapsed = keepalive_time_when(tp);

    /* It is alive without keepalive 8) */
    if (tp->packets_out || !tcp_write_queue_empty(sk))
        goto resched;

如果keepalive定时器运行时间超过设定的超时时间,分成两种情况处理。第一,使能了UTO时,如果keepalive运行时长超出UTO时长,并且本地已经发送过探测报文,还是没有收到响应,则判定此连接已经出错。或者第二,UTO没有使能的情况下,如果keepalive的probe发送次数已经超过设定值(默认9次),发送TCP复位报文,终止连接。

如果以上两种情况都不成立,由函数tcp_write_wakeup发送探测报文,并且增加probe计数,将下一次超时设定为间隔值(默认为75秒)。如果由于本地拥塞导致probe报文未能发送,将下一次超时设置为500毫秒之后(TCP_RESOURCE_PROBE_INTERVAL)。

    elapsed = keepalive_time_elapsed(tp);
    
    if (elapsed >= keepalive_time_when(tp)) {
        /* If the TCP_USER_TIMEOUT option is enabled, use that
         * to determine when to timeout instead.
         */
        if ((icsk->icsk_user_timeout != 0 &&
            elapsed >= msecs_to_jiffies(icsk->icsk_user_timeout) &&
            icsk->icsk_probes_out > 0) ||

            (icsk->icsk_user_timeout == 0 &&
            icsk->icsk_probes_out >= keepalive_probes(tp))) {
            tcp_send_active_reset(sk, GFP_ATOMIC);
            tcp_write_err(sk);
            goto out;
        }
        if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
            icsk->icsk_probes_out++;
            elapsed = keepalive_intvl_when(tp);
        } else {
            /* If keepalive was lost due to local congestion, try harder.
             */ 
            elapsed = TCP_RESOURCE_PROBE_INTERVAL;
        }

最后,如果keepalive定时器运行时长未超过设置时长(如probe报文间隔定时到期),重新设置定时器时长。

    } else {
        /* It is tp->rcv_tstamp + keepalive_time_when(tp) */
        elapsed = keepalive_time_when(tp) - elapsed;
    }
    
    sk_mem_reclaim(sk);
        
resched:
    inet_csk_reset_keepalive_timer (sk, elapsed);
    goto out;

探测报文

如下函数tcp_write_wakeup,由上节的介绍可知,在发送keepalive探测报文时,套接口的发送队列是空的,所以内核使用tcp_xmit_probe_skb函数发送探测报文,如果当前SND.UNA未确认报文包含紧急指针SND.UP,第二个参数设置为1。

注意TCP的窗口探测机制与keepalive共同使用此函数,这里去掉了窗口探测相关代码。

int tcp_write_wakeup(struct sock *sk, int mib)
{
    struct tcp_sock *tp = tcp_sk(sk);

    if (sk->sk_state == TCP_CLOSE) return -1;

    skb = tcp_send_head(sk);
    if (skb && before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
        ...
    } else {
        if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
            tcp_xmit_probe_skb(sk, 1, mib);
        return tcp_xmit_probe_skb(sk, 0, mib);

如下发送函数tcp_xmit_probe_skb,发送ACK报文,如果紧急指针urgent为真,ACK报文序号为SND.UNA,否则,如果urgent不为真,ACK报文序号为SND.UNA减去1(ACK报文不占用新序号)。

static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
{
    struct tcp_sock *tp = tcp_sk(sk);

    /* We don't queue it, tcp_transmit_skb() sets ownership. */
    skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN));
    if (!skb) return -1;

    /* Reserve space for headers and set control bits. */
    skb_reserve(skb, MAX_TCP_HEADER);

    /* Use a previous sequence.  This should cause the other
     * end to send an ack.  Don't queue or clone SKB, just send it.
     */
    tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
    NET_INC_STATS(sock_net(sk), mib);
    return tcp_transmit_skb(sk, skb, 0, (__force gfp_t)0);

keepalive与窗口探测

内核使用icsk_probes_out保存keepalive的探测计数,并且,在接收到对端ACK报文之后,清空此计数。窗口探测同样使用icsk_probes_out变量做计数,但是由于在套接口发送队列有报文时,才进行零窗口探测,而相反,只有在发送队列为空时,才会进行keepalive处理,所以两者互不影响。

static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
{
    /* We passed data and got it acked, remove any soft error
     * log. Something worked...
     */
    sk->sk_err_soft = 0;
    icsk->icsk_probes_out = 0;
    tp->rcv_tstamp = tcp_jiffies32;

Keepalive与FIN_WAIT2

内核中keepalive和套接口FIN_WAIT2状态共用一个定时器,以上介绍了在超时处理中,keepalive的处理部分,以下为FIN_WAIT2部分的处理,后者优于keepalive功能,先行处理。如果linger2时间大于等于零,并且时长超过TCP_TIMEWAIT_LEN(60秒),TCP套接口进入TIME_WAIT状态,第三个超时参数tmo设置为剩余的linger2时间。否则,如果linger2减去TCP_TIMEWAIT_LEN的剩余时长值小于零,不在需要进入TIME_WAIT状态,发送复位报文,断开连接。

static void tcp_keepalive_timer (struct timer_list *t)
{
    struct sock *sk = from_timer(sk, t, sk_timer);
    struct inet_connection_sock *icsk = inet_csk(sk);
    struct tcp_sock *tp = tcp_sk(sk);

    tcp_mstamp_refresh(tp);
    if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
        if (tp->linger2 >= 0) {
            const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;

            if (tmo > 0) {
                tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
                goto out;
            }
        }
        tcp_send_active_reset(sk, GFP_ATOMIC);
        goto death;
    }

内核版本 5.0