mirror of
https://github.com/adulau/aha.git
synced 2024-12-29 12:16:20 +00:00
[TCP]: TCP_DEFER_ACCEPT updates - process as established
Change TCP_DEFER_ACCEPT implementation so that it transitions a connection to ESTABLISHED after handshake is complete instead of leaving it in SYN-RECV until some data arrvies. Place connection in accept queue when first data packet arrives from slow path. Benefits: - established connection is now reset if it never makes it to the accept queue - diagnostic state of established matches with the packet traces showing completed handshake - TCP_DEFER_ACCEPT timeouts are expressed in seconds and can now be enforced with reasonable accuracy instead of rounding up to next exponential back-off of syn-ack retry. Signed-off-by: Patrick McManus <mcmanus@ducksong.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
e4c7884028
commit
ec3c0982a2
9 changed files with 99 additions and 33 deletions
|
@ -239,6 +239,11 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
|
||||||
return (struct tcp_request_sock *)req;
|
return (struct tcp_request_sock *)req;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct tcp_deferred_accept_info {
|
||||||
|
struct sock *listen_sk;
|
||||||
|
struct request_sock *request;
|
||||||
|
};
|
||||||
|
|
||||||
struct tcp_sock {
|
struct tcp_sock {
|
||||||
/* inet_connection_sock has to be the first member of tcp_sock */
|
/* inet_connection_sock has to be the first member of tcp_sock */
|
||||||
struct inet_connection_sock inet_conn;
|
struct inet_connection_sock inet_conn;
|
||||||
|
@ -374,6 +379,8 @@ struct tcp_sock {
|
||||||
unsigned int keepalive_intvl; /* time interval between keep alive probes */
|
unsigned int keepalive_intvl; /* time interval between keep alive probes */
|
||||||
int linger2;
|
int linger2;
|
||||||
|
|
||||||
|
struct tcp_deferred_accept_info defer_tcp_accept;
|
||||||
|
|
||||||
unsigned long last_synq_overflow;
|
unsigned long last_synq_overflow;
|
||||||
|
|
||||||
u32 tso_deferred;
|
u32 tso_deferred;
|
||||||
|
|
|
@ -115,8 +115,8 @@ struct request_sock_queue {
|
||||||
struct request_sock *rskq_accept_head;
|
struct request_sock *rskq_accept_head;
|
||||||
struct request_sock *rskq_accept_tail;
|
struct request_sock *rskq_accept_tail;
|
||||||
rwlock_t syn_wait_lock;
|
rwlock_t syn_wait_lock;
|
||||||
u8 rskq_defer_accept;
|
u16 rskq_defer_accept;
|
||||||
/* 3 bytes hole, try to pack */
|
/* 2 bytes hole, try to pack */
|
||||||
struct listen_sock *listen_opt;
|
struct listen_sock *listen_opt;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -139,6 +139,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
|
||||||
#define MAX_TCP_KEEPINTVL 32767
|
#define MAX_TCP_KEEPINTVL 32767
|
||||||
#define MAX_TCP_KEEPCNT 127
|
#define MAX_TCP_KEEPCNT 127
|
||||||
#define MAX_TCP_SYNCNT 127
|
#define MAX_TCP_SYNCNT 127
|
||||||
|
#define MAX_TCP_ACCEPT_DEFERRED 65535
|
||||||
|
|
||||||
#define TCP_SYNQ_INTERVAL (HZ/5) /* Period of SYNACK timer */
|
#define TCP_SYNQ_INTERVAL (HZ/5) /* Period of SYNACK timer */
|
||||||
|
|
||||||
|
|
|
@ -414,8 +414,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
|
||||||
struct inet_connection_sock *icsk = inet_csk(parent);
|
struct inet_connection_sock *icsk = inet_csk(parent);
|
||||||
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
|
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
|
||||||
struct listen_sock *lopt = queue->listen_opt;
|
struct listen_sock *lopt = queue->listen_opt;
|
||||||
int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
|
int thresh = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
|
||||||
int thresh = max_retries;
|
|
||||||
unsigned long now = jiffies;
|
unsigned long now = jiffies;
|
||||||
struct request_sock **reqp, *req;
|
struct request_sock **reqp, *req;
|
||||||
int i, budget;
|
int i, budget;
|
||||||
|
@ -451,9 +450,6 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (queue->rskq_defer_accept)
|
|
||||||
max_retries = queue->rskq_defer_accept;
|
|
||||||
|
|
||||||
budget = 2 * (lopt->nr_table_entries / (timeout / interval));
|
budget = 2 * (lopt->nr_table_entries / (timeout / interval));
|
||||||
i = lopt->clock_hand;
|
i = lopt->clock_hand;
|
||||||
|
|
||||||
|
@ -461,9 +457,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
|
||||||
reqp=&lopt->syn_table[i];
|
reqp=&lopt->syn_table[i];
|
||||||
while ((req = *reqp) != NULL) {
|
while ((req = *reqp) != NULL) {
|
||||||
if (time_after_eq(now, req->expires)) {
|
if (time_after_eq(now, req->expires)) {
|
||||||
if ((req->retrans < (inet_rsk(req)->acked ? max_retries : thresh)) &&
|
if (req->retrans < thresh &&
|
||||||
(inet_rsk(req)->acked ||
|
!req->rsk_ops->rtx_syn_ack(parent, req)) {
|
||||||
!req->rsk_ops->rtx_syn_ack(parent, req))) {
|
|
||||||
unsigned long timeo;
|
unsigned long timeo;
|
||||||
|
|
||||||
if (req->retrans++ == 0)
|
if (req->retrans++ == 0)
|
||||||
|
|
|
@ -2105,15 +2105,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case TCP_DEFER_ACCEPT:
|
case TCP_DEFER_ACCEPT:
|
||||||
icsk->icsk_accept_queue.rskq_defer_accept = 0;
|
if (val < 0) {
|
||||||
if (val > 0) {
|
err = -EINVAL;
|
||||||
/* Translate value in seconds to number of
|
} else {
|
||||||
* retransmits */
|
if (val > MAX_TCP_ACCEPT_DEFERRED)
|
||||||
while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
|
val = MAX_TCP_ACCEPT_DEFERRED;
|
||||||
val > ((TCP_TIMEOUT_INIT / HZ) <<
|
icsk->icsk_accept_queue.rskq_defer_accept = val;
|
||||||
icsk->icsk_accept_queue.rskq_defer_accept))
|
|
||||||
icsk->icsk_accept_queue.rskq_defer_accept++;
|
|
||||||
icsk->icsk_accept_queue.rskq_defer_accept++;
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -2295,8 +2292,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
|
||||||
val = (val ? : sysctl_tcp_fin_timeout) / HZ;
|
val = (val ? : sysctl_tcp_fin_timeout) / HZ;
|
||||||
break;
|
break;
|
||||||
case TCP_DEFER_ACCEPT:
|
case TCP_DEFER_ACCEPT:
|
||||||
val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
|
val = icsk->icsk_accept_queue.rskq_defer_accept;
|
||||||
((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
|
|
||||||
break;
|
break;
|
||||||
case TCP_WINDOW_CLAMP:
|
case TCP_WINDOW_CLAMP:
|
||||||
val = tp->window_clamp;
|
val = tp->window_clamp;
|
||||||
|
|
|
@ -4451,6 +4451,49 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int tcp_defer_accept_check(struct sock *sk)
|
||||||
|
{
|
||||||
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
|
||||||
|
if (tp->defer_tcp_accept.request) {
|
||||||
|
int queued_data = tp->rcv_nxt - tp->copied_seq;
|
||||||
|
int hasfin = !skb_queue_empty(&sk->sk_receive_queue) ?
|
||||||
|
tcp_hdr((struct sk_buff *)
|
||||||
|
sk->sk_receive_queue.prev)->fin : 0;
|
||||||
|
|
||||||
|
if (queued_data && hasfin)
|
||||||
|
queued_data--;
|
||||||
|
|
||||||
|
if (queued_data &&
|
||||||
|
tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) {
|
||||||
|
if (sock_flag(sk, SOCK_KEEPOPEN)) {
|
||||||
|
inet_csk_reset_keepalive_timer(sk,
|
||||||
|
keepalive_time_when(tp));
|
||||||
|
} else {
|
||||||
|
inet_csk_delete_keepalive_timer(sk);
|
||||||
|
}
|
||||||
|
|
||||||
|
inet_csk_reqsk_queue_add(
|
||||||
|
tp->defer_tcp_accept.listen_sk,
|
||||||
|
tp->defer_tcp_accept.request,
|
||||||
|
sk);
|
||||||
|
|
||||||
|
tp->defer_tcp_accept.listen_sk->sk_data_ready(
|
||||||
|
tp->defer_tcp_accept.listen_sk, 0);
|
||||||
|
|
||||||
|
sock_put(tp->defer_tcp_accept.listen_sk);
|
||||||
|
sock_put(sk);
|
||||||
|
tp->defer_tcp_accept.listen_sk = NULL;
|
||||||
|
tp->defer_tcp_accept.request = NULL;
|
||||||
|
} else if (hasfin ||
|
||||||
|
tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) {
|
||||||
|
tcp_reset(sk);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
|
static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
|
||||||
{
|
{
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
@ -4811,6 +4854,9 @@ step5:
|
||||||
|
|
||||||
tcp_data_snd_check(sk);
|
tcp_data_snd_check(sk);
|
||||||
tcp_ack_snd_check(sk);
|
tcp_ack_snd_check(sk);
|
||||||
|
|
||||||
|
if (tcp_defer_accept_check(sk))
|
||||||
|
return -1;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
csum_error:
|
csum_error:
|
||||||
|
|
|
@ -1920,6 +1920,14 @@ int tcp_v4_destroy_sock(struct sock *sk)
|
||||||
sk->sk_sndmsg_page = NULL;
|
sk->sk_sndmsg_page = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (tp->defer_tcp_accept.request) {
|
||||||
|
reqsk_free(tp->defer_tcp_accept.request);
|
||||||
|
sock_put(tp->defer_tcp_accept.listen_sk);
|
||||||
|
sock_put(sk);
|
||||||
|
tp->defer_tcp_accept.listen_sk = NULL;
|
||||||
|
tp->defer_tcp_accept.request = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
atomic_dec(&tcp_sockets_allocated);
|
atomic_dec(&tcp_sockets_allocated);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -571,10 +571,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
|
||||||
does sequence test, SYN is truncated, and thus we consider
|
does sequence test, SYN is truncated, and thus we consider
|
||||||
it a bare ACK.
|
it a bare ACK.
|
||||||
|
|
||||||
If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
|
Both ends (listening sockets) accept the new incoming
|
||||||
bare ACK. Otherwise, we create an established connection. Both
|
connection and try to talk to each other. 8-)
|
||||||
ends (listening sockets) accept the new incoming connection and try
|
|
||||||
to talk to each other. 8-)
|
|
||||||
|
|
||||||
Note: This case is both harmless, and rare. Possibility is about the
|
Note: This case is both harmless, and rare. Possibility is about the
|
||||||
same as us discovering intelligent life on another plant tomorrow.
|
same as us discovering intelligent life on another plant tomorrow.
|
||||||
|
@ -642,13 +640,6 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
|
||||||
if (!(flg & TCP_FLAG_ACK))
|
if (!(flg & TCP_FLAG_ACK))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
|
|
||||||
if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
|
|
||||||
TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
|
|
||||||
inet_rsk(req)->acked = 1;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* OK, ACK is valid, create big socket and
|
/* OK, ACK is valid, create big socket and
|
||||||
* feed this segment to it. It will repeat all
|
* feed this segment to it. It will repeat all
|
||||||
* the tests. THIS SEGMENT MUST MOVE SOCKET TO
|
* the tests. THIS SEGMENT MUST MOVE SOCKET TO
|
||||||
|
@ -687,7 +678,24 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
|
||||||
inet_csk_reqsk_queue_unlink(sk, req, prev);
|
inet_csk_reqsk_queue_unlink(sk, req, prev);
|
||||||
inet_csk_reqsk_queue_removed(sk, req);
|
inet_csk_reqsk_queue_removed(sk, req);
|
||||||
|
|
||||||
inet_csk_reqsk_queue_add(sk, req, child);
|
if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
|
||||||
|
TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
|
||||||
|
|
||||||
|
/* the accept queue handling is done is est recv slow
|
||||||
|
* path so lets make sure to start there
|
||||||
|
*/
|
||||||
|
tcp_sk(child)->pred_flags = 0;
|
||||||
|
sock_hold(sk);
|
||||||
|
sock_hold(child);
|
||||||
|
tcp_sk(child)->defer_tcp_accept.listen_sk = sk;
|
||||||
|
tcp_sk(child)->defer_tcp_accept.request = req;
|
||||||
|
|
||||||
|
inet_csk_reset_keepalive_timer(child,
|
||||||
|
inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ);
|
||||||
|
} else {
|
||||||
|
inet_csk_reqsk_queue_add(sk, req, child);
|
||||||
|
}
|
||||||
|
|
||||||
return child;
|
return child;
|
||||||
|
|
||||||
listen_overflow:
|
listen_overflow:
|
||||||
|
|
|
@ -481,6 +481,11 @@ static void tcp_keepalive_timer (unsigned long data)
|
||||||
goto death;
|
goto death;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) {
|
||||||
|
tcp_send_active_reset(sk, GFP_ATOMIC);
|
||||||
|
goto death;
|
||||||
|
}
|
||||||
|
|
||||||
if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
|
if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue