环境: linux-5.0.1内核 ,32位系统的MenuOShtml
圆圈是状态,()是函数linux
enum { TCP_ESTABLISHED = 1, TCP_SYN_SENT, //2 TCP_SYN_RECV, //3 TCP_FIN_WAIT1, //4 TCP_FIN_WAIT2, //5 TCP_TIME_WAIT, //6 TCP_CLOSE, //7 TCP_CLOSE_WAIT, //8 TCP_LAST_ACK, //9 TCP_LISTEN, //10 TCP_CLOSING, /* Now a valid state */ //11 TCP_NEW_SYN_RECV, //12 TCP_MAX_STATES /* Leave at the end! */ //13 };
1.client端发起主动链接,将自身状态置为TCP_SYN_SENT,向服务器端发送一个SYN被置1的报文表示请求链接api
2.server端在listen以后处于LISTEN状态,收到client发送的SYN以后,将此处于半链接的socket加入一个数据结构,并设置其状态为TCP_NEW_SYN_RECV,而后向client发送ACK和SYN均置为1的数据包,表示收到请求并赞成创建链接。服务器
3.client收到后,将自身状态置为ESTABLISHED,并向server端发送ACK置为1的数据包,表示接收到了该数据包。serverd端收到后查询半链接的表,拿出来建立新的socket链接,并设置其状态为TCP_SYN_RECV,将其加入请求队列,而后将状态置为TCP_SYN_RECV,三次握手完毕,链接创建成功,最后再将状态切换为TCP_FIN_WAIT等待链接关闭。cookie
1 客户端connect如何从socket接口找到tcp协议的? 2 客户端tcp协议是如何将数据SYN传到ip层的,状态如何切换的? 3 服务端ip层收到数据SYN以后,如何传递给tcp层的? 4 服务端如何将SYN+ACK发送到IP的,状态何时改变的? 5 客户端收到SYN+ACK以后,状态如何转变的? 6 客户端如何ACK发送出去的 7 服务端收到IP层传来的ACK如何处理的?状态怎么切换到。 8 accept是如何从tcp层获的新的socke的?
第一个问题比较容易,这在咱们上一篇中咱们以及跟踪过了,是由于咱们在socket()建立是就已经指定TCP协议。网络
发送SYN报文,向服务器发起tcp链接 connect(fd, servaddr, addrlen); -> _sys_socketcall() -> _sys_connect() -> sock->ops->connect() == inet_stream_connect (sock->ops即inet_stream_ops) -> tcp_v4_connect()数据结构
咱们只须要打个断点就能看到以下函数栈调用关系:并发
(gdb) bt #0 <tcp_v4_connect> (sk=0xc71b06a0, uaddr=0xc7895ec4, addr_len=16) #1 __inet_stream_connect () #2 inet_stream_connect() #3 __sys_connect () #4 __do_sys_socketcall () #5 __se_sys_socketcall () #6 do_syscall_32_irqs_on() #7 do_fast_syscall_32() #8 entry_SYSENTER_32 () #9 0x00000003 in ?? () #10 0x00000000 in ?? ()
tcp_v4_connect->tcp_connect-> tcp_transmit_skb->ip_queue_xmitsocket
上面咱们已经跟踪到了tcp_v4_connect,那咱们将进入仔细看看它到底发生了什么。tcp
详细解读可看: https://blog.csdn.net/wangpengqi/article/details/9472699
这儿之分析和咱们相关的,咱们看到tcp_v4_connect完成了路由,端口,产生SYN分节,生成序号,调用tcp_connect发送包。
/* This will initiate an outgoing connection. */ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { // 查找路由表 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_TCP, orig_sport, orig_dport, sk); ... //设置网络层所需的,目的IP地址,目的端口地址 inet->dport = usin->sin_port; inet->daddr = daddr; //将状态closing->TCP_SYN_SENT tcp_set_state(sk, TCP_SYN_SENT); ... //为套接字绑定一个端口 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, inet->inet_sport, inet->inet_dport, sk); //设置套接字的路由出口信息 __sk_dst_set(sk, &rt->u.dst); tcp_v4_setup_caps(sk, &rt->u.dst); tp->ext2_header_len = rt->u.dst.header_len; ... //生成一个序号 if (!tp->write_seq) tp->write_seq = secure_tcp_sequence_number(inet->saddr, inet->daddr, inet->sport, //调用tcp_connect(sk)函数,为请求包设置SYN标志,并发出请求 err = tcp_connect(sk); ... //连接失败,将状态置为TCP_CLOSE tcp_set_state(sk, TCP_CLOSE); } EXPORT_SYMBOL(tcp_v4_connect);
tcp_connect函数具体负责构造一个携带SYN标志位的TCP头并发送出去,同时还设置了计时器超时重发。
#define TCPHDR_FIN 0x01 #define TCPHDR_SYN 0x02 #define TCPHDR_RST 0x04 #define TCPHDR_PSH 0x08 #define TCPHDR_ACK 0x10 #define TCPHDR_URG 0x20 #define TCPHDR_ECE 0x40 #define TCPHDR_CWR 0x80 int tcp_connect(struct sock *sk) { //初始化SYN,虽然它的值为2,但它表明的是SYN位为1,看tcp包头,相信您能明白。 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); //设置时间戳 tp->retrans_stamp = tcp_time_stamp; //显然这里有两种方式传送syn,下面咱们打断点看看是哪种方式。 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); //设置tcp头的序号 tp->snd_nxt = tp->write_seq; tp->pushed_seq = tp->write_seq; }
断点状况:
b tcp_connect b tcp_transmit_skb b tcp_send_syn_data b ip_queue_xmit
断点依次通过tcp_connect,tcp_transmit_skb,ip_queue_xmit
(gdb) bt #0 ip_queue_xmit #1 __tcp_transmit_skb #2 tcp_transmit_skb #3 tcp_connect (sk=0xc71886a0) #4 0xc17fe987 in tcp_v4_connect
其中tcp_transmit_skb函数负责将tcp数据发送出去,这里调用了icsk->icsk_af_ops->queue_xmit函数指针,实际上就是在TCP/IP协议栈初始化时设定好的IP层向上提供数据发送接口ip_queue_xmit函数,这里TCP协议栈经过调用这个icsk->icsk_af_ops->queue_xmit函数指针来触发IP协议栈代码发送数据,从而将数据传到IP层。
__tcp_transmit_skb { const struct inet_connection_sock *icsk = inet_csk(sk); err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl) }
->tcp_v4_rcv ->tcp_v4_do_rcv ->tcp_rcv_state_process ->tcp_v4_conn_request ->tcp_conn_request
这儿就要依据下图了:
socket接口经过结构体tcp_prot将上层传递下来的函数指针与具体协议(tcp)的方法绑定,tcp_pro结构体在上一篇博客最后咱们分析过了,就不说了,而下层经过结构体tcp_protocol将ip层的回调函数与具体协议(tcp)的方法绑定,咱们来看一看tcp_protocol,咱们看到回调函数handler指向了tcp_v4_rcv。
static const struct net_protocol tcp_protocol = { .early_demux = tcp_v4_early_demux, .handler = tcp_v4_rcv, .err_handler = tcp_v4_err, .no_policy = 1, .netns_ok = 1, .icmp_strict_tag_validation = 1, };
而此时咱们程序此刻停在客户端的ip_queue_xmit,离开客户端以前咱们先看一下客户端的状态,是不是TCP_SYN_SENT
tcp的状态
enum { TCP_ESTABLISHED = 1, TCP_SYN_SENT, //2 TCP_SYN_RECV, //3 TCP_FIN_WAIT1, //4 TCP_FIN_WAIT2, //5 TCP_TIME_WAIT, //6 TCP_CLOSE, //7 TCP_CLOSE_WAIT, //8 TCP_LAST_ACK, //9 TCP_LISTEN, //10 TCP_CLOSING, /* Now a valid state */ //11 TCP_NEW_SYN_RECV, //12 TCP_MAX_STATES /* Leave at the end! */ //13 };
查看tcp的状态,没错刚好是。
p sk->__sk_common.skc_state $1 = 2 '\002' //TCP_SYN_SENT
那咱们准备离开客户端,进入服务端了。
b tcp_v4_rcv
若是咱们按 c,就进入服务端的tcp_v4_rcv,查看一下堆栈关系,这个堆栈有点深,咱们就直接看最后#28 ip_queue_xmit如何到tcp_v4_rcv的吧,确实是经历了坎坷。
(gdb) bt //客户端传输层 #0 tcp_v4_rcv (skb=0xc791a0b8) //服务端网络层 #1 ip_protocol_deliver_rcu (net=0xc1cd3e40 <init_net>, skb=0xc791a0b8) #2 ip_local_deliver_finish (net=<optimized out>, sk=<optimized out>,skb=<optimized out>) #3 NF_HOOK () #4 ip_local_deliver (skb=0xc791a0b8) #5 dst_input (skb=<optimized out>) #6 ip_rcv_finish (skb=0xc791a0b8) #7 NF_HOOK () //下一次课须要研究的 #8 ip_rcv (skb=0xc791a0b8, dev=0xc780f800, pt=<optimized out>, orig_dev=0xc780f800) #9 __netif_receive_skb_one_core (skb=0xc791a0b8,) #10__netif_receive_skb () #11 process_backlog () #12 napi_poll () #13 net_rx_action (h=<optimized out>) #14 __do_softirq () #15 call_on_stack (func=0xc791a0b8, stack=0xc17ff980 <tcp_v4_rcv>) #16 do_softirq_own_stack () #17 do_softirq () #18 do_softirq () #19 __local_bh_enable_ip () #20 local_bh_enable () #21 rcu_read_unlock_bh () //客户端网络层 #22 ip_finish_output2 (net=<optimized out>, sk=<optimized out>, skb=0xc791a0b8) #23 ip_finish_output (net=<optimized out>, sk=0xc71b86a0, skb=0xc791a0b8) #24 NF_HOOK_COND () #25 ip_output (net=0xc1cd3e40 <init_net>, sk=<optimized out>, skb=0xc791a0b8) #26 dst_output () #27 ip_local_out (net=0xc1cd3e40 <init_net>, sk=<optimized out>, skb=0xc791a0b8) #28 in __ip_queue_xmit (sk=0xc71b86a0, skb=0xc17ff980 <tcp_v4_rcv>, //客户端传输层 #29 __tcp_transmit_skb
tcp_v4_rcv
int tcp_v4_rcv(struct sk_buff *skb) { //前面就是一堆检查校验和,组包之类的 先跳过。 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, th->dest, sdif, &refcounted); .... //因为服务端进行bind和listen以后的状态为TCP_LISTEN,因此进入tcp_v4_do_rcv if (sk->sk_state == TCP_LISTEN) { ret = tcp_v4_do_rcv(sk, skb); goto put_and_return; } ... }
tcp_v4_rcv前面的内容可参考:
http://blog.sina.com.cn/s/blog_52355d840100b6sd.html
//咱们将代码执行到sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, th->dest, sdif, &refcounted)以后 //而后查看sk->sk_state的状态 (gdb) p sk->__sk_common.skc_state $2 = 10 '\n' //TCP_LISTEN
没问题,进入tcp_v4_do_rcv
检查当前状态,最后进入tcp_rcv_state_process
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) { ... if(tcp_rcv_state_process(sk, skb)) { rsk = sk; goto reset; } }
struct sk_buff与struct socket及struct sock 结构体分析 https://blog.csdn.net/wangpengqi/article/details/9156083
int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) { switch (sk->sk_state) case TCP_LISTEN: //检查是不是ACK if (th->ack) return 1; if (th->rst) goto discard; //检查是不是syn,显然是有的,所以客户端就传了一个syn过来. if (th->syn) { //没有fin if (th->fin) goto discard; /* It is possible that we process SYN packets from backlog, * so we need to make sure to disable BH and RCU right there. */ //加解锁 rcu_read_lock(); local_bh_disable(); //执行conn_request acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0; local_bh_enable(); rcu_read_unlock(); if (!acceptable) return 1; consume_skb(skb); return 0; } }
根据前面icsk->icsk_af_ops->connect道理同样,icsk->icsk_af_ops-conn_request,咱们知道就是在调用 tcp_v4_conn_request 。
cp_v4_conn_request函数对传入包的路由类型进行检查,若是是发往广播或者组播的,则丢弃该包, 不然调用tcp_conn_request 继续进行请求处理,其中参数传入了请求控制块操做函数结构指针
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { /* Never answer to SYNs send to broadcast or multicast */ if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) goto drop; return tcp_conn_request(&tcp_request_sock_ops, &tcp_request_sock_ipv4_ops, sk, skb); drop: tcp_listendrop(sk); return 0; }
tcp_conn_request函数为syn请求的核心处理流程,咱们暂且忽略其中的syn cookies和fastopen相关流程,其核心功能为分析请求参数,新建链接请求控制块,注意,新建请求控制操做中会将链接状态更新为TCP_NEW_SYN_RECV ,并初始化相关成员,初始化完毕以后,tcp_v4_send_synack()向客户端发送了SYN+ACK报文,inet_csk_reqsk_queue_hash_add()将sk添加到保存半链接的数据结构syn_table中,填充了该客户端相关的信息。这样,再次收到客户端的ACK报文时,就能够在syn_table中找到相应项了
int tcp_conn_request(...) { ... //分配请求控制块,请求控制块的操做指向rsk_ops , //注意: 这个函数将链接状态更新为TCP_NEW_SYN_RECV // ireq->ireq_state = TCP_NEW_SYN_RECV; req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie); ... inet_csk_reqsk_queue_hash_add(sk, req, tcp_timeout_init((struct sock *)req)); //发送syn+ack tcp_v4_send_synack af_ops->send_synack(sk, dst, &fl, req, &foc, !want_cookie ? TCP_SYNACK_NORMAL : TCP_SYNACK_COOKIE); ... }
static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, struct dst_entry *dst) { const struct inet_request_sock *ireq = inet_rsk(req); int err = -1; struct sk_buff * skb; //获取路由 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) goto out; //根据监听套接字、链接请求块和路由构造SYN+ACK数据包 skb = tcp_make_synack(sk, dst, req); if (skb) { struct tcphdr *th = tcp_hdr(skb); //计算TCP校验和 th->check = tcp_v4_check(skb->len, ireq->loc_addr, ireq->rmt_addr, csum_partial((char *)th, skb->len, skb->csum)); //构造IP报文并发送,属于IP层动做,暂时不考虑,进入5. err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, ireq->rmt_addr, ireq->opt); err = net_xmit_eval(err); } out: dst_release(dst); return err; }
从上面的代码能够看出,TCP构造出SYN+ACK报文后,会直接发送给IP层,而且不会将该数据包加入TCP的发送队列。
5的过程其实和3相似,一样是IP层传到TCP层,这一次我就忽略掉细节,由于客户端在收到SYN+ACK以后,在函数tcp_rcv_state_process中,当前客户端的状态TCP_SYN_SENT,进入tcp_rcv_synsent_state_process。
(gdb) bt #0 tcp_set_state() # tcp_finish_connect() #1 tcp_rcv_synsent_state_process () #2 tcp_rcv_state_process (sk=0xc71b86a0, skb=0xc78f4000) //case TCP_SYN_SENT 进入tcp_rcv_synsent_state_process #3 tcp_v4_do_rcv () #4 sk_backlog_rcv () #5 __release_sock () #6 release_sock() #7 in inet_wait_for_connect()//在第一次握手以后,一致阻塞在这儿等待接收 #8 __inet_stream_connect() #9 inet_stream_connect (sock=0xc77a04e0, uaddr=0xc7895ec4, addr_len=16, flags=2) #10 __sys_connect (fd=<optimized out>, uservaddr=<optimized out>, addrlen=16) #11 in __do_sys_socketcall (args=<optimized out>, call=<optimized out>) #12 __se_sys_socketcall (call=3, args=-1076164160) at net/socket.c:2527
case TCP_SYN_SENT: tp->rx_opt.saw_tstamp = 0; tcp_mstamp_refresh(tp); //进入tcp_rcv_synsent_state_process处理 queued = tcp_rcv_synsent_state_process(sk, skb, th); if (queued >= 0) return queued; /* Do step6 onward by hand. */ tcp_urg(sk, skb, th); __kfree_skb(skb); tcp_data_snd_check(sk); return 0; }
(gdb) p sk->__sk_common.skc_state $8 = 1 '\001'//TCP_ESTABLISHED
{ ... //检查ACK的有效性 tcp_ack(sk, skb, FLAG_SLOWPATH); ... //若是ack有效,则完成链接,将状态兄TCP_SYN_SENT->TCP_ESTABLISHED tcp_finish_connect(sk, skb); ... //发送ack tcp_send_ack(sk); ... }
调用__tcp_send_ack -> __ tcp_transmit_skb -> ip_queue_xmit回到1的过程。
(gdb) bt #0 ip_queue_xmit (sk=0xc71a86a0, skb=0xc78f40c0, fl=0xc71a88f8) #1 __tcp_transmit_skb (sk=0xc71a86a0, skb=0xc71a86a0) #2 0xc17f8da7 in __tcp_send_ack (sk=0xc71a86a0, rcv_nxt=<optimized out>) #3 0xc17fa3d7 in __tcp_send_ack (rcv_nxt=<optimized out>, sk=<optimized out>) #4 tcp_send_ack (sk=<optimized out>) at net/ipv4/tcp_output.c:3656
这一过程和发送SYN差很少,因此咱们把断点一样打在tcp_v4_rcv
tcp_v4_rcv->tcp_v4_syn_recv_sock
tcp_v4_syn_recv_sock 调用tcp_create_openreq_child建立新的socked链接,并设置新链接的状态为SYN_RECV
{ ... //建立新的socked链接,并设置新链接的状态为SYN_RECV newsk = tcp_create_openreq_child(sk, req, skb); //把newsk插入到ehash队列 *own_req=inet_ehash_nolisten(newsk, req_to_sk(req_unhash)) ... }
新链接来了以后要要维持的三个队列: https://blog.csdn.net/xiaoyu_750516366/article/details/85539495
而后进入tcp_rcv_state_process,堆栈状况以下,但好像和第一次发送SYN路径好像不同了。相比于第一次SYN,它没有进入tcp_v4_do_recv,而是建立了子进程来处理对应新链接。
(gdb) bt #0 tcp_rcv_state_process (sk=0xc71a8d40, skb=0xc78f40c0) #1 tcp_child_process (parent=0xc71a8000, child=0xc78f40c0, skb=<optimized out>) #2 tcp_v4_rcv (skb=0xc78f40c0) //网络层,先无论 #3 ip_protocol_deliver_rcu (net=0xc1cd3e40 <init_net>, skb=0xc78f40c0, protocol=<optimized out>)
(gdb) p sk->__sk_common.skc_state $4 = 3 '\003' //TCP_SYN_RECV
tcp_rcv_state_process
{ case TCP_LISTEN: ... return 0; case TCP_SYN_SENT: ... return 0; //其余状态: ... //将TCP_SYN_RECV切换为TCP_ESTABLISHED tcp_set_state(sk, TCP_ESTABLISHED); ... }
至此三次握手结束。但还会继续将TCP_ESTABLISHED->TCP_FINWAIT,等待结束。
至此服务端进入accept。
__sys_accept4->inet_accept->inet_csk_accept
会请求队列中取出一个链接请求,若是队列为空则经过inet_csk_wait_for_connect阻塞住等待客户端的链接。
struct sock *inet_csk_accept() { /* 如过请求链接队列为空,则调用inet_csk_wait_for_connect,阻塞*/ if (reqsk_queue_empty(queue)) { long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); /* If this is a non blocking socket don't sleep */ error = -EAGAIN; if (!timeo) goto out_err; error = inet_csk_wait_for_connect(sk, timeo);//一个for寻魂,从而阻塞accept if (error) goto out_err; } //若是队列不为空,则从队列中移除,取到newsk中。 req = reqsk_queue_remove(queue, sk); newsk = req->sk; }