From 59435444a13ed52d3444c5df26b73d3086bcd57b Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sat, 26 Jul 2008 11:59:09 +0100 Subject: [PATCH 1/8] dccp: Allow to distinguish original and retransmitted packets This patch allows the sender to distinguish original and retransmitted packets, which is in particular needed for the retransmission of DCCP-Requests: * the first Request uses ISS (generated in net/dccp/ip*.c), and sets GSS = ISS; * all retransmitted Requests use GSS' = GSS + 1, so that the n-th retransmitted Request has sequence number ISS + n (mod 48). To add generic support, the patch reorganises existing code so that: * icsk_retransmits == 0 for the original packet and * icsk_retransmits = n > 0 for the n-th retransmitted packet at the time dccp_transmit_skb() is called, via dccp_retransmit_skb(). Thanks to Wei Yongjun for pointing this problem out. Further changes: ---------------- * removed the `skb' argument from dccp_retransmit_skb(), since sk_send_head is used for all retransmissions (the exception is client-Acks in PARTOPEN state, but these do not use sk_send_head); * since sk_send_head always contains the original skb (via dccp_entail()), skb_cloned() never evaluated to true and thus pskb_copy() was never used. Signed-off-by: Gerrit Renker --- net/dccp/dccp.h | 2 +- net/dccp/output.c | 20 ++++++++++++++++---- net/dccp/timer.c | 20 ++++---------------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 743d85fcd65..1c2e3ec2eb5 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -226,7 +226,7 @@ static inline void dccp_csum_outgoing(struct sk_buff *skb) extern void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); -extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); +extern int dccp_retransmit_skb(struct sock *sk); extern void dccp_send_ack(struct sock *sk); extern void dccp_reqsk_send_ack(struct sk_buff *sk, struct request_sock *rsk); diff --git a/net/dccp/output.c b/net/dccp/output.c index fe20068c5d8..d19d4819501 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -284,14 +284,26 @@ void dccp_write_xmit(struct sock *sk, int block) } } -int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) +/** + * dccp_retransmit_skb - Retransmit Request, Close, or CloseReq packets + * There are only four retransmittable packet types in DCCP: + * - Request in client-REQUEST state (sec. 8.1.1), + * - CloseReq in server-CLOSEREQ state (sec. 8.3), + * - Close in node-CLOSING state (sec. 8.3), + * - Acks in client-PARTOPEN state (sec. 8.1.5, handled by dccp_delack_timer()). + * This function expects sk->sk_send_head to contain the original skb. + */ +int dccp_retransmit_skb(struct sock *sk) { + WARN_ON(sk->sk_send_head == NULL); + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk) != 0) return -EHOSTUNREACH; /* Routing failure or similar. */ - return dccp_transmit_skb(sk, (skb_cloned(skb) ? - pskb_copy(skb, GFP_ATOMIC): - skb_clone(skb, GFP_ATOMIC))); + /* this count is used to distinguish original and retransmitted skb */ + inet_csk(sk)->icsk_retransmits++; + + return dccp_transmit_skb(sk, skb_clone(sk->sk_send_head, GFP_ATOMIC)); } struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 6a5b961b6f5..54b3c7e9e01 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -98,22 +98,12 @@ static void dccp_retransmit_timer(struct sock *sk) goto backoff; } - /* - * sk->sk_send_head has to have one skb with - * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP - * packet types. The only packets eligible for retransmission are: - * -- Requests in client-REQUEST state (sec. 8.1.1) - * -- Acks in client-PARTOPEN state (sec. 8.1.5) - * -- CloseReq in server-CLOSEREQ state (sec. 8.3) - * -- Close in node-CLOSING state (sec. 8.3) */ - WARN_ON(sk->sk_send_head == NULL); - /* * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was * sent, no need to retransmit, this sock is dead. */ if (dccp_write_timeout(sk)) - goto out; + return; /* * We want to know the number of packets retransmitted, not the @@ -122,30 +112,28 @@ static void dccp_retransmit_timer(struct sock *sk) if (icsk->icsk_retransmits == 0) DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS); - if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) { + if (dccp_retransmit_skb(sk) != 0) { /* * Retransmission failed because of local congestion, * do not backoff. */ - if (icsk->icsk_retransmits == 0) + if (--icsk->icsk_retransmits == 0) icsk->icsk_retransmits = 1; inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL), DCCP_RTO_MAX); - goto out; + return; } backoff: icsk->icsk_backoff++; - icsk->icsk_retransmits++; icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, DCCP_RTO_MAX); if (icsk->icsk_retransmits > sysctl_dccp_retries1) __sk_dst_reset(sk); -out:; } static void dccp_write_timer(unsigned long data) From 73f18fdbca3f92b90aeaee16f5175fe30496e218 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sat, 26 Jul 2008 11:59:10 +0100 Subject: [PATCH 2/8] dccp: Bug-Fix - AWL was never updated The AWL lower Ack validity window advances in proportion to GSS, the greatest sequence number sent. Updating AWL other than at connection setup (in the DCCP-Request sent by dccp_v{4,6}_connect()) was missing in the DCCP code. This bug lead to syslog messages such as "kernel: dccp_check_seqno: DCCP: Step 6 failed for DATAACK packet, [...] P.ackno exists or LAWL(82947089) <= P.ackno(82948208) <= S.AWH(82948728), sending SYNC..." The difference between AWL/AWH here is 1639 packets, while the expected value (the Sequence Window) would have been 100 (the default). A closer look showed that LAWL = AWL = 82947089 equalled the ISS on the Response. The patch now updates AWL with each increase of GSS. Further changes: ---------------- The patch also enforces more stringent checks on the ISS sequence number: * AWL is initialised to ISS at connection setup and remains at this value; * AWH is then always set to GSS (via dccp_update_gss()); * so on the first Request: AWL = AWH = ISS, and on the n-th Request: AWL = ISS, AWH = ISS + n. As a consequence, only Response packets that refer to Requests sent by this host will pass, all others are discarded. This is the intention and in effect implements the initial adjustments for AWL as specified in RFC 4340, 7.5.1. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- net/dccp/output.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/net/dccp/output.c b/net/dccp/output.c index d19d4819501..d06945c7d3d 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -53,8 +53,11 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) dccp_packet_hdr_len(dcb->dccpd_type); int err, set_ack = 1; u64 ackno = dp->dccps_gsr; - - dccp_inc_seqno(&dp->dccps_gss); + /* + * Increment GSS here already in case the option code needs it. + * Update GSS for real only if option processing below succeeds. + */ + dcb->dccpd_seq = ADD48(dp->dccps_gss, 1); switch (dcb->dccpd_type) { case DCCP_PKT_DATA: @@ -66,6 +69,9 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) case DCCP_PKT_REQUEST: set_ack = 0; + /* Use ISS on the first (non-retransmitted) Request. */ + if (icsk->icsk_retransmits == 0) + dcb->dccpd_seq = dp->dccps_iss; /* fall through */ case DCCP_PKT_SYNC: @@ -84,8 +90,6 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) break; } - dcb->dccpd_seq = dp->dccps_gss; - if (dccp_insert_options(sk, skb)) { kfree_skb(skb); return -EPROTO; @@ -103,7 +107,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) /* XXX For now we're using only 48 bits sequence numbers */ dh->dccph_x = 1; - dp->dccps_awh = dp->dccps_gss; + dccp_update_gss(sk, dcb->dccpd_seq); dccp_hdr_set_seq(dh, dp->dccps_gss); if (set_ack) dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno); @@ -112,6 +116,11 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) case DCCP_PKT_REQUEST: dccp_hdr_request(skb)->dccph_req_service = dp->dccps_service; + /* + * Limit Ack window to ISS <= P.ackno <= GSS, so that + * only Responses to Requests we sent are considered. + */ + dp->dccps_awl = dp->dccps_iss; break; case DCCP_PKT_RESET: dccp_hdr_reset(skb)->dccph_reset_code = @@ -449,19 +458,7 @@ static inline void dccp_connect_init(struct sock *sk) dccp_sync_mss(sk, dst_mtu(dst)); - /* - * SWL and AWL are initially adjusted so that they are not less than - * the initial Sequence Numbers received and sent, respectively: - * SWL := max(GSR + 1 - floor(W/4), ISR), - * AWL := max(GSS - W' + 1, ISS). - * These adjustments MUST be applied only at the beginning of the - * connection. - */ - dccp_update_gss(sk, dp->dccps_iss); - dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); - - /* S.GAR - greatest valid acknowledgement number received on a non-Sync; - * initialized to S.ISS (sec. 8.5) */ + /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */ dp->dccps_gar = dp->dccps_iss; icsk->icsk_retransmits = 0; From d68f0866f76e2bc4ddc07e88e2cb1bc8959a6d7e Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 26 Jul 2008 11:59:10 +0100 Subject: [PATCH 3/8] dccp: Fix sequence number check for ICMPv4 packets The payload of ICMP message is a part of the packet sent by ourself, so the sequence number check must use AWL and AWH, not SWL and SWH. For example: Endpoint A Endpoint B DATA-ACK --------> (SEQ=X) <-------- ICMP (Fragmentation Needed) (SEQ=X) Signed-off-by: Wei Yongjun Acked-by: Gerrit Renker --- net/dccp/ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index a835b88237c..6a2f1879e18 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -238,7 +238,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) dp = dccp_sk(sk); seq = dccp_hdr_seq(dh); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && - !between48(seq, dp->dccps_swl, dp->dccps_swh)) { + !between48(seq, dp->dccps_awl, dp->dccps_awh)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } From e0bcfb0c6a6ed9ebd68746b306298dc5797fd426 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 26 Jul 2008 11:59:10 +0100 Subject: [PATCH 4/8] dccp: Add check for sequence number in ICMPv6 message This adds a sequence number check for ICMPv6 DCCP error packets, in the same manner as it has been done for ICMPv4 in the previous patch. Signed-off-by: Wei Yongjun Acked-by: Gerrit Renker --- net/dccp/ipv6.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index da509127e00..25826b1bf68 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -89,6 +89,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, { struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); + struct dccp_sock *dp; struct ipv6_pinfo *np; struct sock *sk; int err; @@ -116,6 +117,14 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk->sk_state == DCCP_CLOSED) goto out; + dp = dccp_sk(sk); + seq = dccp_hdr_seq(dh); + if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && + !between48(seq, dp->dccps_awl, dp->dccps_awh)) { + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + goto out; + } + np = inet6_sk(sk); if (type == ICMPV6_PKT_TOOBIG) { @@ -168,7 +177,6 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, icmpv6_err_convert(type, code, &err); - seq = dccp_hdr_seq(dh); /* Might be for an request_sock */ switch (sk->sk_state) { struct request_sock *req, **prev; From 18e1d836002ad970f42736bad09b7be9cfe99545 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 26 Jul 2008 11:59:10 +0100 Subject: [PATCH 5/8] dccp: Fix incorrect length check for ICMPv4 packets Unlike TCP, which only needs 8 octets of original packet data, DCCP requires minimally 12 or 16 bytes for ICMP-payload sequence number checks. This patch replaces the insufficient length constant of 8 with a two-stage test, making sure that 12 bytes are available, before computing the basic header length required for sequence number checks. Signed-off-by: Wei Yongjun Signed-off-by: Gerrit Renker --- net/dccp/ipv4.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 6a2f1879e18..882c5c4de69 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -196,8 +196,8 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, static void dccp_v4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (struct iphdr *)skb->data; - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + - (iph->ihl << 2)); + const u8 offset = iph->ihl << 2; + const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); struct dccp_sock *dp; struct inet_sock *inet; const int type = icmp_hdr(skb)->type; @@ -207,7 +207,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) int err; struct net *net = dev_net(skb->dev); - if (skb->len < (iph->ihl << 2) + 8) { + if (skb->len < offset + sizeof(*dh) || + skb->len < offset + __dccp_basic_hdr_len(dh)) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } From 860239c56bbc7c830bdbcec93b140f22a5a5219b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 26 Jul 2008 11:59:11 +0100 Subject: [PATCH 6/8] dccp: Add check for truncated ICMPv6 DCCP error packets This patch adds a minimum-length check for ICMPv6 packets, as per the previous patch for ICMPv4 payloads. Signed-off-by: Wei Yongjun Signed-off-by: Gerrit Renker --- net/dccp/ipv6.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 25826b1bf68..5e1ee0da2c4 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -96,6 +96,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, __u64 seq; struct net *net = dev_net(skb->dev); + if (skb->len < offset + sizeof(*dh) || + skb->len < offset + __dccp_basic_hdr_len(dh)) { + ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); + return; + } + sk = inet6_lookup(net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport, &hdr->saddr, dh->dccph_sport, inet6_iif(skb)); From 2c3abab7c95295f319dc8899b74cbd60140fcdfb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 27 Jul 2008 03:59:24 -0700 Subject: [PATCH 7/8] ipcomp: Fix warnings after ipcomp consolidation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/ipv4/ipcomp.c: In function ‘ipcomp4_init_state’: net/ipv4/ipcomp.c:109: warning: unused variable ‘calg_desc’ net/ipv4/ipcomp.c:108: warning: unused variable ‘ipcd’ net/ipv4/ipcomp.c:107: warning: ‘err’ may be used uninitialized in this function net/ipv6/ipcomp6.c: In function ‘ipcomp6_init_state’: net/ipv6/ipcomp6.c:139: warning: unused variable ‘calg_desc’ net/ipv6/ipcomp6.c:138: warning: unused variable ‘ipcd’ net/ipv6/ipcomp6.c:137: warning: ‘err’ may be used uninitialized in this function Signed-off-by: David S. Miller --- net/ipv4/ipcomp.c | 4 +--- net/ipv6/ipcomp6.c | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index a42b64d040c..38ccb6dfb02 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -104,9 +104,7 @@ out: static int ipcomp4_init_state(struct xfrm_state *x) { - int err; - struct ipcomp_data *ipcd; - struct xfrm_algo_desc *calg_desc; + int err = -EINVAL; x->props.header_len = 0; switch (x->props.mode) { diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 0cfcea42153..4545e430686 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -134,9 +134,7 @@ out: static int ipcomp6_init_state(struct xfrm_state *x) { - int err; - struct ipcomp_data *ipcd; - struct xfrm_algo_desc *calg_desc; + int err = -EINVAL; x->props.header_len = 0; switch (x->props.mode) { From 6f9f489a4eeaa3c8a8618e078a5270d2c4872b67 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 27 Jul 2008 04:40:51 -0700 Subject: [PATCH 8/8] net: missing bits of net-namespace / sysctl Piss-poor sysctl registration API strikes again, film at 11... What we really need is _pathname_ required to be present in already registered table, so that kernel could warn about bad order. That's the next target for sysctl stuff (and generally saner and more explicit order of initialization of ipv[46] internals wouldn't hurt either). For the time being, here are full fixups required by ..._rotable() stuff; we make per-net sysctl sets descendents of "ro" one and make sure that sufficient skeleton is there before we start registering per-net sysctls. Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/ipv6.h | 2 ++ include/net/route.h | 2 -- net/ipv4/route.c | 11 ++++++++++- net/ipv4/sysctl_net_ipv4.c | 14 -------------- net/ipv6/af_inet6.c | 12 ++++++++++++ net/ipv6/sysctl_net_ipv6.c | 16 ++++++++++++++++ net/sysctl_net.c | 4 +++- 7 files changed, 43 insertions(+), 18 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 2d5c18514a2..113028fb8f6 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -608,6 +608,8 @@ extern struct ctl_table *ipv6_icmp_sysctl_init(struct net *net); extern struct ctl_table *ipv6_route_sysctl_init(struct net *net); extern int ipv6_sysctl_register(void); extern void ipv6_sysctl_unregister(void); +extern int ipv6_static_sysctl_register(void); +extern void ipv6_static_sysctl_unregister(void); #endif #endif /* __KERNEL__ */ diff --git a/include/net/route.h b/include/net/route.h index 3140cc50085..4f0d8c14736 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -204,6 +204,4 @@ static inline struct inet_peer *rt_get_peer(struct rtable *rt) return rt->peer; } -extern ctl_table ipv4_route_table[]; - #endif /* _ROUTE_H */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a507c5e27d0..380d6474cf6 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2914,7 +2914,7 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, return 0; } -ctl_table ipv4_route_table[] = { +static ctl_table ipv4_route_table[] = { { .ctl_name = NET_IPV4_ROUTE_GC_THRESH, .procname = "gc_thresh", @@ -3216,6 +3216,15 @@ int __init ip_rt_init(void) return rc; } +/* + * We really need to sanitize the damn ipv4 init order, then all + * this nonsense will go away. + */ +void __init ip_static_sysctl_init(void) +{ + register_sysctl_paths(ipv4_route_path, ipv4_route_table); +} + EXPORT_SYMBOL(__ip_select_ident); EXPORT_SYMBOL(ip_route_input); EXPORT_SYMBOL(ip_route_output_key); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d63e9388d92..770d827f5ab 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -401,13 +401,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = &ipv4_local_port_range, .strategy = &ipv4_sysctl_local_port_range, }, - { - .ctl_name = NET_IPV4_ROUTE, - .procname = "route", - .maxlen = 0, - .mode = 0555, - .child = ipv4_route_table - }, #ifdef CONFIG_IP_MULTICAST { .ctl_name = NET_IPV4_IGMP_MAX_MEMBERSHIPS, @@ -882,11 +875,4 @@ static __init int sysctl_ipv4_init(void) return 0; } -/* set enough of tree skeleton to get rid of ordering problems */ -void __init ip_static_sysctl_init(void) -{ - static ctl_table table[1]; - register_sysctl_paths(net_ipv4_ctl_path, table); -} - __initcall(sysctl_ipv4_init); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c708ca84229..95055f8c3f3 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -934,6 +934,11 @@ static int __init inet6_init(void) if (err) goto out_unregister_sock; +#ifdef CONFIG_SYSCTL + err = ipv6_static_sysctl_register(); + if (err) + goto static_sysctl_fail; +#endif /* * ipngwg API draft makes clear that the correct semantics * for TCP and UDP is to consider one TCP and UDP instance @@ -1058,6 +1063,10 @@ ipmr_fail: icmp_fail: unregister_pernet_subsys(&inet6_net_ops); register_pernet_fail: +#ifdef CONFIG_SYSCTL + ipv6_static_sysctl_unregister(); +static_sysctl_fail: +#endif cleanup_ipv6_mibs(); out_unregister_sock: sock_unregister(PF_INET6); @@ -1113,6 +1122,9 @@ static void __exit inet6_exit(void) rawv6_exit(); unregister_pernet_subsys(&inet6_net_ops); +#ifdef CONFIG_SYSCTL + ipv6_static_sysctl_unregister(); +#endif cleanup_ipv6_mibs(); proto_unregister(&rawv6_prot); proto_unregister(&udplitev6_prot); diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 5c99274558b..e6dfaeac6be 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -150,3 +150,19 @@ void ipv6_sysctl_unregister(void) unregister_net_sysctl_table(ip6_header); unregister_pernet_subsys(&ipv6_sysctl_net_ops); } + +static struct ctl_table_header *ip6_base; + +int ipv6_static_sysctl_register(void) +{ + static struct ctl_table empty[1]; + ip6_base = register_net_sysctl_rotable(net_ipv6_ctl_path, empty); + if (ip6_base == NULL) + return -ENOMEM; + return 0; +} + +void ipv6_static_sysctl_unregister(void) +{ + unregister_net_sysctl_table(ip6_base); +} diff --git a/net/sysctl_net.c b/net/sysctl_net.c index cefbc367d8b..972201cd5fa 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -73,7 +73,9 @@ static struct ctl_table_root net_sysctl_ro_root = { static int sysctl_net_init(struct net *net) { - setup_sysctl_set(&net->sysctls, NULL, is_seen); + setup_sysctl_set(&net->sysctls, + &net_sysctl_ro_root.default_set, + is_seen); return 0; }