TCP连接保活检测Keepalive
TCP连接的keepalive定时器用于定期检测连接是否正常。
Keepalive初始化
默认情况下Keepalive每两个小时触发一次,触发之后,最多发送9次探测报文,每个报文的间隔为75秒钟。即在9次探测都没有接收到对端的回复之后,认为连接已经断开。
#define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */
#define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */
#define TCP_KEEPALIVE_INTVL (75*HZ)
static int __net_init tcp_sk_init(struct net *net)
{
net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
在函数inet_csk_init_xmit_timers中初始化keepalive定时器。
void tcp_init_xmit_timers(struct sock *sk)
{
inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
&tcp_keepalive_timer);
keepalive设置
可通过PROC文件调整Keepalive的相关参数,此调整是全局性质的。
static struct ctl_table ipv4_net_table[] = {
{
.procname = "tcp_keepalive_time",
.data = &init_net.ipv4.sysctl_tcp_keepalive_time,
.maxlen = sizeof(int),
},
{
.procname = "tcp_keepalive_probes",
.data = &init_net.ipv4.sysctl_tcp_keepalive_probes,
.maxlen = sizeof(int),
},
{
.procname = "tcp_keepalive_intvl",
.data = &init_net.ipv4.sysctl_tcp_keepalive_intvl,
.maxlen = sizeof(int),
},
默认值如下:
$ cat /proc/sys/net/ipv4/tcp_keepalive_time
7200
$ cat /proc/sys/net/ipv4/tcp_keepalive_probes
9
$ cat /proc/sys/net/ipv4/tcp_keepalive_intvl
75
也可以通过套接口选项调整Keepalive参数,仅对操作的套接口生效。注意对于TCP_KEEPIDLE选项,如果套接口开启了SOCK_KEEPOPEN标志,内核将根据keepalive的剩余时长重新设置超时时间。
static int do_tcp_setsockopt(struct sock *sk, int level,
int optname, char __user *optval, unsigned int optlen)
{
switch (optname) {
case TCP_KEEPIDLE:
if (val < 1 || val > MAX_TCP_KEEPIDLE)
err = -EINVAL;
else {
tp->keepalive_time = val * HZ;
if (sock_flag(sk, SOCK_KEEPOPEN) && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
u32 elapsed = keepalive_time_elapsed(tp);
if (tp->keepalive_time > elapsed)
elapsed = tp->keepalive_time - elapsed;
else
elapsed = 0;
inet_csk_reset_keepalive_timer(sk, elapsed);
}
}
break;
case TCP_KEEPINTVL:
if (val < 1 || val > MAX_TCP_KEEPINTVL)
err = -EINVAL;
else
tp->keepalive_intvl = val * HZ;
break;
case TCP_KEEPCNT:
if (val < 1 || val > MAX_TCP_KEEPCNT)
err = -EINVAL;
else
tp->keepalive_probes = val;
break;
对于keepalive的三个参数,内核做了如下的限制。keepalive_time时长不能超过9个小时;keepalive_probes次数不能超过127次;最后,keepalive_intvl的时长也不能超过9小时。
#define MAX_TCP_KEEPIDLE 32767
#define MAX_TCP_KEEPINTVL 32767
#define MAX_TCP_KEEPCNT 127
另外,最重要的套接口层的keepalive开关SO_KEEPALIVE在如下函数sock_setsockopt中设置,具体实现由注册的tcp_set_keepalive函数完成。
int sock_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, unsigned int optlen)
{
switch (optname) {
case SO_KEEPALIVE:
if (sk->sk_prot->keepalive)
sk->sk_prot->keepalive(sk, valbool);
sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
break;
struct proto tcp_prot = {
.name = "TCP",
.keepalive = tcp_set_keepalive,
}
如果val为真,即使能keepalive功能,并且套接口之前未开启过,启动keepalive定时器。否则,如果val为零,关闭keepalive定时器。
void tcp_set_keepalive(struct sock *sk, int val)
{
if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
return;
if (val && !sock_flag(sk, SOCK_KEEPOPEN))
inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
else if (!val)
inet_csk_delete_keepalive_timer(sk);
}
如下,如果监听套接口开启了keepalive功能,其子接口也将开启keepalive。
struct sock *tcp_create_openreq_child(const struct sock *sk,
struct request_sock *req, struct sk_buff *skb)
{
if (sock_flag(newsk, SOCK_KEEPOPEN))
inet_csk_reset_keepalive_timer(newsk, keepalive_time_when(newtp));
Keepalive定时器超时
如果此时套接口被用户调用占用,将keepalive定时器延迟50毫秒。如果套接口处于监听状态不作处理。
static void tcp_keepalive_timer (struct timer_list *t)
{
struct sock *sk = from_timer(sk, t, sk_timer);
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
/* Only process if socket is not in use. */
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
/* Try again later. */
inet_csk_reset_keepalive_timer (sk, HZ/20);
goto out;
}
if (sk->sk_state == TCP_LISTEN) {
pr_err("Hmm... keepalive on a LISTEN ???\n");
goto out;
}
如果套接口没有启用Keepalive功能,或者套接口设置有CLOSE或者SYN_SENT标志,不进行处理。如果网络中存在发出,还没有确认的报文(packets_out),或者套接口发送队列不为空,不需要进行keepalive处理,直接启动下一个Keepalive周期。
if (!sock_flag(sk, SOCK_KEEPOPEN) ||
((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)))
goto out;
elapsed = keepalive_time_when(tp);
/* It is alive without keepalive 8) */
if (tp->packets_out || !tcp_write_queue_empty(sk))
goto resched;
如果keepalive定时器运行时间超过设定的超时时间,分成两种情况处理。第一,使能了UTO时,如果keepalive运行时长超出UTO时长,并且本地已经发送过探测报文,还是没有收到响应,则判定此连接已经出错。或者第二,UTO没有使能的情况下,如果keepalive的probe发送次数已经超过设定值(默认9次),发送TCP复位报文,终止连接。
如果以上两种情况都不成立,由函数tcp_write_wakeup发送探测报文,并且增加probe计数,将下一次超时设定为间隔值(默认为75秒)。如果由于本地拥塞导致probe报文未能发送,将下一次超时设置为500毫秒之后(TCP_RESOURCE_PROBE_INTERVAL)。
elapsed = keepalive_time_elapsed(tp);
if (elapsed >= keepalive_time_when(tp)) {
/* If the TCP_USER_TIMEOUT option is enabled, use that
* to determine when to timeout instead.
*/
if ((icsk->icsk_user_timeout != 0 &&
elapsed >= msecs_to_jiffies(icsk->icsk_user_timeout) &&
icsk->icsk_probes_out > 0) ||
(icsk->icsk_user_timeout == 0 &&
icsk->icsk_probes_out >= keepalive_probes(tp))) {
tcp_send_active_reset(sk, GFP_ATOMIC);
tcp_write_err(sk);
goto out;
}
if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
icsk->icsk_probes_out++;
elapsed = keepalive_intvl_when(tp);
} else {
/* If keepalive was lost due to local congestion, try harder.
*/
elapsed = TCP_RESOURCE_PROBE_INTERVAL;
}
最后,如果keepalive定时器运行时长未超过设置时长(如probe报文间隔定时到期),重新设置定时器时长。
} else {
/* It is tp->rcv_tstamp + keepalive_time_when(tp) */
elapsed = keepalive_time_when(tp) - elapsed;
}
sk_mem_reclaim(sk);
resched:
inet_csk_reset_keepalive_timer (sk, elapsed);
goto out;
探测报文
如下函数tcp_write_wakeup,由上节的介绍可知,在发送keepalive探测报文时,套接口的发送队列是空的,所以内核使用tcp_xmit_probe_skb函数发送探测报文,如果当前SND.UNA未确认报文包含紧急指针SND.UP,第二个参数设置为1。
注意TCP的窗口探测机制与keepalive共同使用此函数,这里去掉了窗口探测相关代码。
int tcp_write_wakeup(struct sock *sk, int mib)
{
struct tcp_sock *tp = tcp_sk(sk);
if (sk->sk_state == TCP_CLOSE) return -1;
skb = tcp_send_head(sk);
if (skb && before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
...
} else {
if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
tcp_xmit_probe_skb(sk, 1, mib);
return tcp_xmit_probe_skb(sk, 0, mib);
如下发送函数tcp_xmit_probe_skb,发送ACK报文,如果紧急指针urgent为真,ACK报文序号为SND.UNA,否则,如果urgent不为真,ACK报文序号为SND.UNA减去1(ACK报文不占用新序号)。
static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
{
struct tcp_sock *tp = tcp_sk(sk);
/* We don't queue it, tcp_transmit_skb() sets ownership. */
skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN));
if (!skb) return -1;
/* Reserve space for headers and set control bits. */
skb_reserve(skb, MAX_TCP_HEADER);
/* Use a previous sequence. This should cause the other
* end to send an ack. Don't queue or clone SKB, just send it.
*/
tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
NET_INC_STATS(sock_net(sk), mib);
return tcp_transmit_skb(sk, skb, 0, (__force gfp_t)0);
keepalive与窗口探测
内核使用icsk_probes_out保存keepalive的探测计数,并且,在接收到对端ACK报文之后,清空此计数。窗口探测同样使用icsk_probes_out变量做计数,但是由于在套接口发送队列有报文时,才进行零窗口探测,而相反,只有在发送队列为空时,才会进行keepalive处理,所以两者互不影响。
static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
{
/* We passed data and got it acked, remove any soft error
* log. Something worked...
*/
sk->sk_err_soft = 0;
icsk->icsk_probes_out = 0;
tp->rcv_tstamp = tcp_jiffies32;
Keepalive与FIN_WAIT2
内核中keepalive和套接口FIN_WAIT2状态共用一个定时器,以上介绍了在超时处理中,keepalive的处理部分,以下为FIN_WAIT2部分的处理,后者优于keepalive功能,先行处理。如果linger2时间大于等于零,并且时长超过TCP_TIMEWAIT_LEN(60秒),TCP套接口进入TIME_WAIT状态,第三个超时参数tmo设置为剩余的linger2时间。否则,如果linger2减去TCP_TIMEWAIT_LEN的剩余时长值小于零,不在需要进入TIME_WAIT状态,发送复位报文,断开连接。
static void tcp_keepalive_timer (struct timer_list *t)
{
struct sock *sk = from_timer(sk, t, sk_timer);
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
tcp_mstamp_refresh(tp);
if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
if (tp->linger2 >= 0) {
const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
if (tmo > 0) {
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
goto out;
}
}
tcp_send_active_reset(sk, GFP_ATOMIC);
goto death;
}
内核版本 5.0
推荐阅读
-
TCP连接探测中的Keepalive和心跳包
-
TCP连接保活检测Keepalive
-
TCP连接探测中的Keepalive和心跳包
-
tcp长连接和保活时间
-
lvs+keepalive实现双主模式(采用DR),同时实现TCP和UDP检测实现非web端的负载均衡,同时实现跨网段的通讯
-
c# socket心跳超时检测的思路(适用于超大量TCP连接情况下)
-
lvs+keepalive实现双主模式(采用DR),同时实现TCP和UDP检测实现非web端的负载均衡,同时实现跨网段的通讯
-
TCP连接探测中的Keepalive和心跳包
-
tcp长连接和保活时间
-
c# socket心跳超时检测的思路(适用于超大量TCP连接情况下)