001 // vim: set ft=c: 002 003 // https://tools.ietf.org/html/rfc793 004 005 // See https://en.wikipedia.org/wiki/File:Tcp_state_diagram_fixed_new.svg 006 #define TCP_STATE_CLOSED 0 007 #define TCP_STATE_LISTEN 1 008 #define TCP_STATE_SYN_SENT 2 009 #define TCP_STATE_SYN_RECEIVED 3 010 #define TCP_STATE_ESTABLISHED 4 011 #define TCP_STATE_FIN_WAIT_1 5 012 #define TCP_STATE_FIN_WAIT_2 6 013 #define TCP_STATE_CLOSE_WAIT 7 014 #define TCP_STATE_CLOSING 8 015 #define TCP_STATE_LAST_ACK 9 016 #define TCP_STATE_TIME_WAIT 10 017 018 #define TCP_CONNECT_TIMEOUT 10000 019 020 #define TCP_DEFAULT_MSS 536 021 022 #define TCP_WINDOW_SIZE 8192 023 024 #define TCP_FLAG_FIN 0x01 025 #define TCP_FLAG_SYN 0x02 026 #define TCP_FLAG_RST 0x04 027 #define TCP_FLAG_PSH 0x08 028 #define TCP_FLAG_ACK 0x10 029 #define TCP_FLAG_URG 0x20 030 031 #define TCP_SRTT_ALPHA 0.9 032 #define TCP_RTO_MIN 0.2 033 #define TCP_RTO_MAX 10 034 #define TCP_RTO_BETA 2 035 036 class CTcpHeader { 037 U16 source_port; 038 U16 dest_port; 039 U32 seq; 040 U32 ack; 041 U8 data_offset; 042 U8 flags; 043 U16 window_size; 044 U16 checksum; 045 U16 urgent_pointer; 046 }; 047 048 class CTcpSendBufHeader { 049 CTcpSendBufHeader* next; 050 051 F64 time_sent; 052 U32 length; 053 U32 retries; 054 U32 seq_start; 055 U32 seq_end; 056 }; 057 058 class CTcpSocket { 059 CSocket sock; 060 061 I64 state; 062 063 U32 local_addr; 064 U16 local_port; 065 066 U32 remote_addr; 067 U32 remote_port; 068 069 U32 snd_una; // seq number of first unacknowledged octet 070 U32 snd_nxt; // seq number of next octet to send 071 U32 snd_wnd; // allowed number of unacknowledged outgoing octets 072 U32 mss; // maximum segment size 073 074 U32 rcv_nxt; // seq number of next octet to receive 075 U32 rcv_wnd; // allowed number of unacknowledged incoming octets 076 077 F64 conntime; 078 F64 srtt; 079 080 I64 recv_buf_size; 081 U8* recv_buf; 082 I64 recv_buf_read_pos; 083 I64 recv_buf_write_pos; 084 085 CTcpSocket* backlog_next; 086 CTcpSocket* backlog_first; 087 CTcpSocket* backlog_last; 088 I64 backlog_remaining; 089 090 CTcpSendBufHeader* send_buf_first; 091 CTcpSendBufHeader* send_buf_last; 092 093 //I64 rcvtimeo_ms; 094 //I64 recv_maxtime; 095 }; 096 097 class CTcpPseudoHeader { 098 U32 source_addr; 099 U32 dest_addr; 100 U8 zeros; 101 U8 protocol; 102 U16 tcp_length; 103 }; 104 105 // TODO: this takes up half a meg, change it to a binary tree or something 106 static CTcpSocket** tcp_bound_sockets; 107 108 static U16 tcp_next_source_port = RandU16(); 109 110 static Bool TcpIsSynchronizedState(I64 state) { 111 return state == TCP_STATE_ESTABLISHED || state == TCP_STATE_FIN_WAIT_1 112 || state == TCP_STATE_FIN_WAIT_2 || state == TCP_STATE_CLOSE_WAIT 113 || state == TCP_STATE_CLOSING || state == TCP_STATE_LAST_ACK 114 || state == TCP_STATE_TIME_WAIT; 115 } 116 117 static U16 TcpPartialChecksum(U32 sum, U8* header, I64 length) { 118 I64 nleft = length; 119 U16* w = header; 120 121 while (nleft > 1) { 122 sum += *(w++); 123 nleft -= 2; 124 } 125 126 return sum; 127 } 128 129 static U16 TcpFinalChecksum(U32 sum, U8* header, I64 length) { 130 I64 nleft = length; 131 U16* w = header; 132 133 while (nleft > 1) { 134 sum += *(w++); 135 nleft -= 2; 136 } 137 138 // mop up an odd byte, if necessary 139 if (nleft == 1) { 140 sum += ((*w) & 0x00ff); 141 } 142 143 // add back carry outs from top 16 bits to low 16 bits 144 sum = (sum >> 16) + (sum & 0xffff); // add hi 16 to low 16 145 sum += (sum >> 16); // add carry 146 return (~sum) & 0xffff; 147 } 148 149 I64 TcpPacketAlloc(U8** frame_out, U32 source_ip, U16 source_port, U32 dest_ip, U16 dest_port, 150 U32 seq, U32 ack, U8 flags, I64 length) { 151 U8* frame; 152 I64 index = IPv4PacketAlloc(&frame, IP_PROTO_TCP, source_ip, dest_ip, 153 sizeof(CTcpHeader) + length); 154 155 if (index < 0) 156 return index; 157 158 CTcpHeader* hdr = frame; 159 hdr->source_port = htons(source_port); 160 hdr->dest_port = htons(dest_port); 161 hdr->seq = htonl(seq); 162 hdr->ack = htonl(ack); 163 hdr->data_offset = (sizeof(CTcpHeader) / 4) << 4; 164 hdr->flags = flags; 165 hdr->window_size = htons(TCP_WINDOW_SIZE / 2); // FIXME 166 hdr->checksum = 0; 167 hdr->urgent_pointer = 0; 168 169 *frame_out = frame + sizeof(CTcpHeader); 170 return index; 171 } 172 173 I64 TcpPacketFinish(I64 index, U32 source_ip, U32 dest_ip, U8* frame, I64 length, 174 CTcpSendBufHeader** send_buf_out) { 175 CTcpHeader* hdr = frame - sizeof(CTcpHeader); 176 177 CTcpPseudoHeader pseudo; 178 pseudo.source_addr = htonl(source_ip); 179 pseudo.dest_addr = htonl(dest_ip); 180 pseudo.zeros = 0; 181 pseudo.protocol = IP_PROTO_TCP; 182 pseudo.tcp_length = htons(sizeof(CTcpHeader) + length); 183 184 U32 sum = TcpPartialChecksum(0, &pseudo, sizeof(CTcpPseudoHeader)); 185 hdr->checksum = TcpFinalChecksum(sum, hdr, sizeof(CTcpHeader) + length); 186 187 if (send_buf_out) { 188 CTcpSendBufHeader* sb = MAlloc(sizeof(CTcpSendBufHeader) + sizeof(CTcpHeader) + length); 189 sb->next = NULL; 190 sb->time_sent = tS; 191 sb->length = sizeof(CTcpHeader) + length; 192 sb->retries = 0; 193 sb->seq_start = ntohl(hdr->seq); 194 sb->seq_end = 0; // NEEDS TO BE SET UPSTREAM 195 196 MemCpy((sb(U8*)) + sizeof(CTcpSendBufHeader), frame, sizeof(CTcpHeader) + length); 197 *send_buf_out = sb; 198 } 199 200 return IPv4PacketFinish(index); 201 } 202 203 I64 TcpSend(U32 local_addr, U16 local_port, U32 remote_addr, U16 remote_port, U32 seq, U32 ack, U8 flags) { 204 U8* frame; 205 I64 index = TcpPacketAlloc(&frame, 206 local_addr, local_port, remote_addr, remote_port, 207 seq, ack, flags, 0); 208 209 if (index < 0) 210 return index; 211 212 return TcpPacketFinish(index, local_addr, remote_addr, frame, 0, NULL); 213 } 214 215 I64 TcpSend2(CTcpSocket* s, U8 flags) { 216 U8* frame; 217 I64 index = TcpPacketAlloc(&frame, 218 s->local_addr, s->local_port, s->remote_addr, s->remote_port, 219 s->snd_nxt, s->rcv_nxt, flags, 0); 220 221 if (index < 0) 222 return index; 223 224 if (flags & TCP_FLAG_SYN) 225 s->snd_nxt++; 226 227 if (flags & TCP_FLAG_FIN) 228 s->snd_nxt++; 229 230 //"Sent #%d, to %08X, err = %d\n", s->seq, s->remote_addr, error; 231 // FIXME: If the packet is SYN or FIN, we also need to queue for retransmit! 232 return TcpPacketFinish(index, s->local_addr, s->remote_addr, frame, 0, NULL); 233 } 234 235 I64 TcpSendData2(CTcpSocket* s, U8 flags, U8* data, I64 length) { 236 U8* frame; 237 I64 index = TcpPacketAlloc(&frame, 238 s->local_addr, s->local_port, s->remote_addr, s->remote_port, 239 s->snd_nxt, s->rcv_nxt, flags, length); 240 241 if (index < 0) 242 return index; 243 244 if (length) 245 MemCpy(frame, data, length); 246 247 if (flags & TCP_FLAG_SYN) 248 s->snd_nxt++; 249 250 s->snd_nxt += length; 251 252 if (flags & TCP_FLAG_FIN) 253 s->snd_nxt++; 254 255 //"Sent #%d, to %08X, err = %d\n", s->seq, s->remote_addr, error; 256 257 CTcpSendBufHeader* sb; 258 TcpPacketFinish(index, s->local_addr, s->remote_addr, frame, length, &sb); 259 sb->seq_end = s->snd_nxt; 260 261 // Append to SendBuf chain 262 if (s->send_buf_first) 263 s->send_buf_last->next = sb; 264 else 265 s->send_buf_first = sb; 266 267 s->send_buf_last = sb; 268 } 269 270 I64 TcpParsePacket(CTcpHeader** header_out, U8** data_out, I64* length_out, CIPv4Packet* packet) { 271 if (packet->proto != IP_PROTO_TCP) 272 return -1; 273 274 // FIXME: validate packet->length 275 // FIXME: checksum 276 277 CTcpHeader* hdr = packet->data; 278 I64 header_length = (hdr->data_offset >> 4) * 4; 279 280 //"TCP: in hdr %d, flags %02Xh, seq %d, ack %d, len %d, chksum %d\n", 281 // header_length, hdr->flags, ntohl(hdr->seq), ntohl(hdr->ack), 282 // packet->length - header_length, ntohs(hdr->checksum); 283 284 *header_out = hdr; 285 *data_out = packet->data + header_length; 286 *length_out = packet->length - header_length; 287 return 0; 288 } 289 290 /* 291 class CTcpSendBufHeader { 292 CTcpSendBufHeader* next; 293 294 F64 time_sent; 295 U32 length; 296 U32 retries; 297 U32 seq_start; 298 U32 seq_end; 299 }; 300 */ 301 302 static U0 TcpSocketAckSendBufs(CTcpSocket* s, U32 seg_ack) { 303 F64 time = tS; 304 305 while (s->send_buf_first) { 306 CTcpSendBufHeader* sb = s->send_buf_first; 307 308 // There's no notion of smaller/greater than in modular arithemtic, 309 // we can only check if a number lies within some range. 310 // Here we check that 311 // sb->seq_end <= seg_ack <= s->snd_nxt 312 // because that will work for all meaningful ACKs. 313 I64 seg_ack_rel = (seg_ack - sb->seq_end) & 0xffffffff; 314 I64 snd_nxt_rel = (s->snd_nxt - sb->seq_end) & 0xffffffff; 315 316 if (seg_ack_rel <= snd_nxt_rel) { 317 // Update smoothed RTT 318 F64 rtt = time - sb->time_sent; 319 s->srtt = (s->srtt * TCP_SRTT_ALPHA) + ((1.0 - TCP_SRTT_ALPHA) * rtt); 320 //"ACK'd %d->%d (RTT %f ms)", sb->seq_start, sb->seq_end, rtt * 1000; 321 322 // Remove SendBuf from chain 323 s->send_buf_first = sb->next; 324 325 if (s->send_buf_first == NULL) 326 s->send_buf_last = NULL; 327 328 Free(sb); 329 } 330 else 331 break; 332 } 333 } 334 335 static U0 TcpSocketCheckSendBufs(CTcpSocket* s) { 336 F64 time = tS; 337 338 F64 rto = TCP_RTO_BETA * s->srtt; 339 340 if (rto < TCP_RTO_MIN) rto = TCP_RTO_MIN; 341 if (rto > TCP_RTO_MAX) rto = TCP_RTO_MAX; 342 343 while (s->send_buf_first) { 344 CTcpSendBufHeader* sb = s->send_buf_first; 345 346 if (time > sb->time_sent + rto) { 347 // Retransmit 348 "Retransmit %d->%d (%f ms)!\n", sb->seq_start, sb->seq_end, (time - sb->time_sent) * 1000; 349 U8* frame; 350 I64 index = IPv4PacketAlloc(&frame, IP_PROTO_TCP, s->local_addr, s->remote_addr, sb->length); 351 352 if (index < 0) 353 return; // retry later I guess 354 355 MemCpy(frame, (sb(U8*)) + sizeof(CTcpSendBufHeader), sb->length); 356 IPv4PacketFinish(index); 357 358 sb->time_sent = tS; 359 360 // Move to the end of the chain 361 s->send_buf_first = sb->next; 362 sb->next = NULL; 363 364 if (s->send_buf_first) 365 s->send_buf_last->next = sb; 366 else 367 s->send_buf_first = sb; 368 369 s->send_buf_last = sb; 370 } 371 else 372 break; 373 } 374 } 375 376 I64 TcpSocketAccept(CTcpSocket* s, sockaddr* addr, I64 addrlen) { 377 if (s->state != TCP_STATE_LISTEN) 378 return -1; 379 380 while (1) { 381 // TODO: Thread safe? 382 if (s->backlog_first) { 383 CTcpSocket* new_socket = s->backlog_first; 384 "Retr %p\n", new_socket; 385 386 s->backlog_first = s->backlog_first->backlog_next; 387 if (!s->backlog_first) 388 s->backlog_last = NULL; 389 390 s->backlog_remaining++; 391 392 // TODO: this should be done in a way that doesn't block on accept() 393 I64 maxtime = cnts.jiffies + TCP_CONNECT_TIMEOUT * JIFFY_FREQ / 1000; 394 395 while (cnts.jiffies < maxtime) { 396 if (new_socket->state == TCP_STATE_ESTABLISHED || new_socket->state == TCP_STATE_CLOSED) 397 break; 398 else 399 Yield; 400 } 401 402 if (new_socket->state != TCP_STATE_ESTABLISHED) { 403 close(new_socket); 404 return -1; 405 } 406 407 return new_socket; 408 } 409 else 410 Yield; 411 } 412 413 no_warn addr; // FIXME 414 no_warn addrlen; 415 return -1; 416 } 417 418 I64 TcpSocketBind(CTcpSocket* s, sockaddr* addr, I64 addrlen) { 419 if (addrlen < sizeof(sockaddr_in)) 420 return -1; 421 422 if (s->state != TCP_STATE_CLOSED) 423 return -1; 424 425 sockaddr_in* addr_in = addr; 426 427 U16 local_port = ntohs(addr_in->sin_port); 428 429 // TODO: address & stuff 430 if (tcp_bound_sockets[local_port] != NULL) 431 return -1; 432 433 tcp_bound_sockets[local_port] = s; 434 435 s->local_addr = IPv4GetAddress(); 436 s->local_port = local_port; 437 438 return 0; 439 } 440 441 I64 TcpSocketClose(CTcpSocket* s) { 442 if (TcpIsSynchronizedState(s->state)) { 443 TcpSend2(s, TCP_FLAG_RST); 444 } 445 446 // Free backlog 447 CTcpSocket* backlog = s->backlog_first; 448 CTcpSocket* backlog2; 449 450 while (backlog) { 451 backlog2 = backlog->backlog_next; 452 close(backlog); 453 backlog = backlog2; 454 } 455 456 if (s->local_port) 457 tcp_bound_sockets[s->local_port] = NULL; 458 459 Free(s->recv_buf); 460 Free(s); 461 return 0; 462 } 463 464 I64 TcpSocketConnect(CTcpSocket* s, sockaddr* addr, I64 addrlen) { 465 if (addrlen < sizeof(sockaddr_in)) 466 return -1; 467 468 if (s->state != TCP_STATE_CLOSED) 469 return -1; 470 471 sockaddr_in* addr_in = addr; 472 473 U16 local_port = 0x8000 + (tcp_next_source_port & 0x7fff); 474 tcp_next_source_port++; 475 476 // TODO: address & stuff 477 if (tcp_bound_sockets[local_port] != NULL) 478 return -1; 479 480 tcp_bound_sockets[local_port] = s; 481 482 s->local_addr = IPv4GetAddress(); 483 s->local_port = local_port; 484 s->remote_addr = ntohl(addr_in->sin_addr.s_addr); 485 s->remote_port = ntohs(addr_in->sin_port); 486 487 s->snd_una = 0; 488 s->snd_nxt = 0; 489 s->snd_wnd = 0; 490 s->mss = TCP_DEFAULT_MSS; 491 492 s->rcv_nxt = 0; 493 s->rcv_wnd = TCP_WINDOW_SIZE; 494 495 s->conntime = tS; 496 497 TcpSend2(s, TCP_FLAG_SYN); 498 s->state = TCP_STATE_SYN_SENT; 499 500 // TODO: TcpSetTimeout 501 I64 maxtime = cnts.jiffies + TCP_CONNECT_TIMEOUT * JIFFY_FREQ / 1000; 502 503 while (cnts.jiffies < maxtime) { 504 if (s->state == TCP_STATE_ESTABLISHED || s->state == TCP_STATE_CLOSED) 505 break; 506 else 507 Yield; 508 } 509 510 if (s->state != TCP_STATE_ESTABLISHED) 511 return -1; 512 513 return 0; 514 } 515 516 I64 TcpSocketListen(CTcpSocket* s, I64 backlog) { 517 if (s->state != TCP_STATE_CLOSED) 518 return -1; 519 520 // Enter listen state. If a SYN packet arrives, it will be processed by TcpHandler, 521 // which opens the connection and puts the new socket into the listening socket's accept backlog. 522 s->state = TCP_STATE_LISTEN; 523 s->backlog_remaining = backlog; 524 525 return 0; 526 } 527 528 I64 TcpSocketRecvfrom(CTcpSocket* s, U8* buf, I64 len, I64 flags, sockaddr* src_addr, I64 addrlen) { 529 no_warn flags; 530 no_warn src_addr; // FIXME 531 no_warn addrlen; 532 //"TcpSocketRecvfrom\n"; 533 while (s->state == TCP_STATE_ESTABLISHED && s->recv_buf_read_pos == s->recv_buf_write_pos) { 534 TcpSocketCheckSendBufs(s); 535 Yield; 536 } 537 538 // TODO: this works for now, but we should be still able to receive data 539 // in connection-closing states 540 if ((s->state != TCP_STATE_ESTABLISHED && s->recv_buf_read_pos == s->recv_buf_write_pos) 541 || len == 0) 542 return 0; 543 544 I64 read_pos = s->recv_buf_read_pos; 545 I64 write_pos = s->recv_buf_write_pos; 546 547 //I64 avail = (write_pos - read_pos) & (s->recv_buf_size); 548 I64 read_total = 0; 549 I64 step; 550 551 if (write_pos < read_pos) { 552 // We can read up to the end of the buffer 553 step = s->recv_buf_size - read_pos; 554 555 if (step > len) 556 step = len; 557 558 //"Read %d from %d..end\n", step, read_pos; 559 MemCpy(buf, s->recv_buf + read_pos, step); 560 buf += step; 561 len -= step; 562 read_pos = (read_pos + step) & (s->recv_buf_size - 1); 563 read_total += step; 564 565 // at this point, (len == 0 || read_pos == 0) must be true 566 } 567 568 if (len) { 569 step = write_pos - read_pos; 570 571 if (step > len) 572 step = len; 573 574 //"Read %d from start+%d..\n", step, read_pos; 575 MemCpy(buf, s->recv_buf + read_pos, step); 576 buf += step; 577 len -= step; 578 read_pos += step; 579 read_total += step; 580 } 581 582 s->recv_buf_read_pos = read_pos; 583 return read_total; 584 } 585 586 I64 TcpSocketSendto(CTcpSocket* s, U8* buf, I64 len, I64 flags, sockaddr_in* dest_addr, I64 addrlen) { 587 no_warn dest_addr; 588 no_warn addrlen; 589 no_warn flags; 590 591 I64 sent_total = 0; 592 593 while (s->state == TCP_STATE_ESTABLISHED && len) { 594 I64 can_send = (s->snd_una + s->snd_wnd - s->snd_nxt) & 0xffffffff; 595 596 // TODO: Keep trying 597 // Must be tied to a timeout; see RFC793/Managing-the-Window 598 //if (s->snd_wnd == 0) 599 // can_send = 1; 600 601 if (can_send == 0) { 602 if (sent_total > 0) 603 break; 604 else { 605 TcpSocketCheckSendBufs(s); 606 Yield; 607 } 608 } 609 else { 610 if (can_send > len) 611 can_send = len; 612 613 if (can_send > s->mss) 614 can_send = s->mss; 615 616 TcpSendData2(s, TCP_FLAG_ACK, buf, can_send); 617 buf += can_send; 618 len -= can_send; 619 } 620 } 621 622 return sent_total; 623 } 624 625 I64 TcpSocketSetsockopt(CTcpSocket* s, I64 level, I64 optname, U8* optval, I64 optlen) { 626 /*if (level == SOL_SOCKET && optname == SO_RCVTIMEO_MS && optlen == 8) { 627 s->rcvtimeo_ms = *(optval(I64*)); 628 return 0; 629 }*/ 630 631 no_warn s; 632 no_warn level; 633 no_warn optname; 634 no_warn optval; 635 no_warn optlen; 636 637 return -1; 638 } 639 640 CTcpSocket* TcpSocket(U16 domain, U16 type) { 641 if (domain != AF_INET || type != SOCK_STREAM) 642 return NULL; 643 644 CTcpSocket* s = MAlloc(sizeof(CTcpSocket)); 645 s->sock.accept = &TcpSocketAccept; 646 s->sock.bind = &TcpSocketBind; 647 s->sock.close = &TcpSocketClose; 648 s->sock.connect = &TcpSocketConnect; 649 s->sock.listen = &TcpSocketListen; 650 s->sock.recvfrom = &TcpSocketRecvfrom; 651 s->sock.sendto = &TcpSocketSendto; 652 s->sock.setsockopt = &TcpSocketSetsockopt; 653 654 s->state = TCP_STATE_CLOSED; 655 656 s->send_buf_first = NULL; 657 s->send_buf_last = NULL; 658 659 s->recv_buf_size = TCP_WINDOW_SIZE; 660 s->recv_buf = MAlloc(s->recv_buf_size); 661 s->recv_buf_read_pos = 0; 662 s->recv_buf_write_pos = 0; 663 664 s->backlog_next = NULL; 665 s->backlog_first = NULL; 666 s->backlog_last = NULL; 667 s->backlog_remaining = 0; 668 669 /*s->rcvtimeo_ms = 0; 670 s->recv_maxtime = 0; 671 672 s->recv_buf = NULL; 673 s->recv_len = 0; 674 s->recv_addr.sin_family = AF_INET; 675 s->bound_to = 0;*/ 676 return s; 677 } 678 679 U0 TcpSocketHandle(CTcpSocket* s, CIPv4Packet* packet, CTcpHeader* hdr, U8* data, I64 length) { 680 U32 seg_len = length; 681 682 if (hdr->flags & TCP_FLAG_FIN) seg_len++; 683 if (hdr->flags & TCP_FLAG_SYN) seg_len++; 684 685 U32 seg_seq = ntohl(hdr->seq); 686 687 if (s->state == TCP_STATE_LISTEN) { 688 // A new connection is being opened. 689 690 if ((hdr->flags & TCP_FLAG_SYN) && s->backlog_remaining > 0) { 691 //"SYN in from %08X:%d => %08X:%d.\n", packet->source_ip, ntohs(hdr->source_port), 692 // packet->dest_ip, ntohs(hdr->dest_port); 693 CTcpSocket* new_socket = TcpSocket(AF_INET, SOCK_STREAM); 694 695 new_socket->local_addr = IPv4GetAddress(); 696 new_socket->local_port = s->local_port; 697 new_socket->remote_addr = packet->source_ip; 698 new_socket->remote_port = ntohs(hdr->source_port); 699 700 new_socket->snd_una = 0; 701 new_socket->snd_nxt = 0; 702 new_socket->snd_wnd = 0; 703 new_socket->mss = TCP_DEFAULT_MSS; 704 705 new_socket->rcv_nxt = ++seg_seq; 706 new_socket->rcv_wnd= TCP_WINDOW_SIZE; 707 708 new_socket->conntime = tS; 709 710 TcpSend2(new_socket, TCP_FLAG_SYN | TCP_FLAG_ACK); 711 new_socket->state = TCP_STATE_SYN_RECEIVED; 712 713 // FIXME FIXME FIXME FIXME 714 tcp_bound_sockets[new_socket->local_port] = new_socket; 715 716 if (s->backlog_last) 717 s->backlog_last->backlog_next = new_socket; 718 else 719 s->backlog_first = new_socket; 720 721 s->backlog_last = new_socket; 722 s->backlog_remaining--; 723 } 724 else { 725 //"REJ %08X:%d (as %08X:%d)\n", packet->source_ip, ntohs(hdr->source_port), 726 // packet->dest_ip, ntohs(hdr->dest_port); 727 TcpSend(packet->dest_ip, ntohs(hdr->dest_port), packet->source_ip, ntohs(hdr->source_port), 728 seg_seq + 1, seg_seq + 1, TCP_FLAG_ACK | TCP_FLAG_RST); 729 } 730 731 return; 732 } 733 734 if (s->state == TCP_STATE_CLOSED) 735 return; 736 737 Bool must_ack = FALSE; 738 739 // Process SYN 740 if (hdr->flags & TCP_FLAG_SYN) { 741 s->rcv_nxt = ++seg_seq; 742 //"Reset ACK to %d\n", s->ack; 743 744 must_ack = TRUE; 745 } 746 747 // Validate SEQ 748 Bool valid_seq; 749 750 if (seg_len == 0 && s->rcv_wnd == 0) { 751 valid_seq = (seg_seq == s->rcv_nxt); 752 } 753 else { 754 // At least one of these must be true: 755 // RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND 756 // RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND 757 I64 rel_seq = ((seg_seq - s->rcv_nxt) & 0xffffffff); 758 I64 rel_seq_end = ((seg_seq + seg_len - 1 - s->rcv_nxt) & 0xffffffff); 759 760 if (rel_seq < s->rcv_wnd || rel_seq_end < s->rcv_wnd) 761 valid_seq = TRUE; 762 else 763 valid_seq = FALSE; 764 } 765 766 if (!valid_seq) 767 "SEQ error: seg_seq %d, seg_len %d, rcv_nxt %d, rcv_wnd %d\n", seg_seq, seg_len, s->rcv_nxt, s->rcv_wnd; 768 769 // Process ACK 770 if (hdr->flags & TCP_FLAG_ACK) { 771 U32 seg_ack = ntohl(hdr->ack); 772 // ACK is acceptable iff SND.UNA < SEG.ACK =< SND.NXT 773 774 I64 rel_ack = ((seg_ack - s->snd_una) & 0xffffffff); 775 I64 rel_nxt = ((s->snd_nxt - s->snd_una) & 0xffffffff); 776 777 // RFC 793 is poorly worded in this regard, unacceptable ACK 778 // is not the opposite of an acceptible (= new) ACK! 779 // TODO: Instead of zero, we should compare rel_ack to some NEGATIVE_CONSTANT, 780 // so that we don't unnecessarily try to correct every slightly delayed ACK 781 if (/*0 < rel_ack &&*/ rel_ack <= rel_nxt) { 782 TcpSocketAckSendBufs(s, seg_ack); 783 784 // Accept ACK 785 s->snd_una = seg_ack; 786 787 if (s->state == TCP_STATE_SYN_SENT && (hdr->flags & TCP_FLAG_SYN)) { 788 s->state = TCP_STATE_ESTABLISHED; 789 s->srtt = tS - s->conntime; 790 //"Initial RTT: %f ms", s->srtt * 1000; 791 } 792 else if (s->state == TCP_STATE_SYN_RECEIVED) { 793 //"Connection established.\n"; 794 s->state = TCP_STATE_ESTABLISHED; 795 s->srtt = tS - s->conntime; 796 //"Initial RTT: %f ms", s->srtt * 1000; 797 } 798 } 799 else { 800 // Unacceptable ACK 801 "Bad ACK; state %d, seg_ack %d, snd_nxt %d\n", s->state, seg_ack, s->snd_nxt; 802 803 if (s->state == TCP_STATE_LISTEN || s->state == TCP_STATE_SYN_SENT 804 || s->state == TCP_STATE_SYN_RECEIVED) { 805 // Reset 806 TcpSend(packet->dest_ip, ntohs(hdr->dest_port), packet->source_ip, ntohs(hdr->source_port), 807 seg_ack, seg_seq + seg_len, TCP_FLAG_ACK | TCP_FLAG_RST); 808 } 809 else if (TcpIsSynchronizedState(s->state)) { 810 // Send a 'corrective' ACK 811 must_ack = TRUE; 812 } 813 } 814 } 815 816 // Process RST 817 if (hdr->flags & TCP_FLAG_RST) { 818 if ((s->state == TCP_STATE_SYN_SENT)) { 819 // If acknowledged 820 if (s->snd_una == s->snd_nxt) { 821 "Connection refused\n"; 822 s->state = TCP_STATE_CLOSED; 823 return; 824 } 825 } 826 else { 827 if (valid_seq) { 828 "Connection reset by peer\n"; 829 s->state = TCP_STATE_CLOSED; 830 return; 831 } 832 } 833 834 "Spurious RST\n"; 835 } 836 837 // FIXME check remote addr & port 838 839 // Process data 840 if (valid_seq) { 841 s->snd_wnd = hdr->window_size; 842 843 if (s->state == TCP_STATE_ESTABLISHED) { 844 I64 write_pos = s->recv_buf_write_pos; 845 //"%d in @ %d", length, write_pos; 846 847 // Skip retransmitted bytes 848 while (length && seg_seq != s->rcv_nxt) { 849 seg_seq = (seg_seq + 1) & 0xffffffff; 850 data++; 851 length--; 852 } 853 854 // ugh! 855 I64 i = 0; 856 for (i = 0; i < length; i++) { 857 I64 next_pos = (write_pos + 1) & (s->recv_buf_size - 1); 858 859 if (next_pos == s->recv_buf_read_pos) 860 break; 861 862 s->recv_buf[write_pos] = data[i]; 863 write_pos = next_pos; 864 } 865 866 s->recv_buf_write_pos = write_pos; 867 s->rcv_nxt += i; 868 //"; %d saved\n", i; 869 870 if (i > 0) 871 must_ack = TRUE; 872 873 if (hdr->flags & TCP_FLAG_FIN) { 874 s->rcv_nxt++; 875 s->state = TCP_STATE_CLOSE_WAIT; 876 must_ack = TRUE; 877 } 878 } 879 } 880 881 if (must_ack) { 882 TcpSend2(s, TCP_FLAG_ACK); 883 } 884 } 885 886 I64 TcpHandler(CIPv4Packet* packet) { 887 CTcpHeader* hdr; 888 U8* data; 889 I64 length; 890 891 I64 error = TcpParsePacket(&hdr, &data, &length, packet); 892 893 if (error < 0) 894 return error; 895 896 U16 dest_port = ntohs(hdr->dest_port); 897 //"%u => %p\n", dest_port, tcp_bound_sockets[dest_port]; 898 899 CTcpSocket* s = tcp_bound_sockets[dest_port]; 900 901 // FIXME: should also check that bound address is INADDR_ANY, 902 // OR packet dest IP matches bound address 903 if (s != NULL) { 904 TcpSocketHandle(s, packet, hdr, data, length); 905 } 906 else { 907 // TODO: Send RST as per RFC793/Reset-Generation 908 } 909 910 return error; 911 } 912 913 U0 TcpInit() { 914 tcp_bound_sockets = MAlloc(65536 * sizeof(CTcpSocket*)); 915 MemSet(tcp_bound_sockets, 0, 65536 * sizeof(CTcpSocket*)); 916 } 917 918 TcpInit; 919 RegisterL4Protocol(IP_PROTO_TCP, &TcpHandler); 920 RegisterSocketClass(AF_INET, SOCK_STREAM, &TcpSocket);