[IPV6]: Multiple Routing Tables

Adds the framework to support multiple IPv6 routing tables.
Currently all automatically generated routes are put into the
same table. This could be changed at a later point after
considering the produced locking overhead.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Thomas Graf 2006-08-04 23:20:06 -07:00 committed by David S. Miller
parent 5d0bbeeb14
commit c71099acce
6 changed files with 499 additions and 195 deletions

View file

@ -51,6 +51,8 @@ struct rt6key
int plen;
};
struct fib6_table;
struct rt6_info
{
union {
@ -71,6 +73,7 @@ struct rt6_info
u32 rt6i_flags;
u32 rt6i_metric;
atomic_t rt6i_ref;
struct fib6_table *rt6i_table;
struct rt6key rt6i_dst;
struct rt6key rt6i_src;
@ -143,12 +146,43 @@ struct rt6_statistics {
typedef void (*f_pnode)(struct fib6_node *fn, void *);
extern struct fib6_node ip6_routing_table;
struct fib6_table {
struct hlist_node tb6_hlist;
u32 tb6_id;
rwlock_t tb6_lock;
struct fib6_node tb6_root;
};
#define RT6_TABLE_UNSPEC RT_TABLE_UNSPEC
#define RT6_TABLE_MAIN RT_TABLE_MAIN
#define RT6_TABLE_LOCAL RT6_TABLE_MAIN
#define RT6_TABLE_DFLT RT6_TABLE_MAIN
#define RT6_TABLE_INFO RT6_TABLE_MAIN
#define RT6_TABLE_PREFIX RT6_TABLE_MAIN
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
#define FIB6_TABLE_MIN 1
#define FIB6_TABLE_MAX RT_TABLE_MAX
#else
#define FIB6_TABLE_MIN RT_TABLE_MAIN
#define FIB6_TABLE_MAX FIB6_TABLE_MIN
#endif
#define RT6_F_STRICT 1
#define RT6_F_HAS_SADDR 2
typedef struct rt6_info *(*pol_lookup_t)(struct fib6_table *,
struct flowi *, int);
/*
* exported functions
*/
extern struct fib6_table * fib6_get_table(u32 id);
extern struct fib6_table * fib6_new_table(u32 id);
extern struct dst_entry * fib6_rule_lookup(struct flowi *fl, int flags,
pol_lookup_t lookup);
extern struct fib6_node *fib6_lookup(struct fib6_node *root,
struct in6_addr *daddr,
struct in6_addr *saddr);
@ -161,6 +195,9 @@ extern void fib6_clean_tree(struct fib6_node *root,
int (*func)(struct rt6_info *, void *arg),
int prune, void *arg);
extern void fib6_clean_all(int (*func)(struct rt6_info *, void *arg),
int prune, void *arg);
extern int fib6_walk(struct fib6_walker_t *w);
extern int fib6_walk_continue(struct fib6_walker_t *w);

View file

@ -58,7 +58,8 @@ extern int ipv6_route_ioctl(unsigned int cmd, void __user *arg);
extern int ip6_route_add(struct in6_rtmsg *rtmsg,
struct nlmsghdr *,
void *rtattr,
struct netlink_skb_parms *req);
struct netlink_skb_parms *req,
u32 table_id);
extern int ip6_ins_rt(struct rt6_info *,
struct nlmsghdr *,
void *rtattr,

View file

@ -136,3 +136,9 @@ config IPV6_TUNNEL
If unsure, say N.
config IPV6_MULTIPLE_TABLES
bool "IPv6: Multiple Routing Tables"
depends on IPV6 && EXPERIMENTAL
---help---
Support multiple routing tables.

View file

@ -1525,7 +1525,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT))
rtmsg.rtmsg_flags |= RTF_NONEXTHOP;
ip6_route_add(&rtmsg, NULL, NULL, NULL);
ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_PREFIX);
}
/* Create "default" multicast route to the interface */
@ -1542,7 +1542,7 @@ static void addrconf_add_mroute(struct net_device *dev)
rtmsg.rtmsg_ifindex = dev->ifindex;
rtmsg.rtmsg_flags = RTF_UP;
rtmsg.rtmsg_type = RTMSG_NEWROUTE;
ip6_route_add(&rtmsg, NULL, NULL, NULL);
ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_LOCAL);
}
static void sit_route_add(struct net_device *dev)
@ -1559,7 +1559,7 @@ static void sit_route_add(struct net_device *dev)
rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP;
rtmsg.rtmsg_ifindex = dev->ifindex;
ip6_route_add(&rtmsg, NULL, NULL, NULL);
ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_MAIN);
}
static void addrconf_add_lroute(struct net_device *dev)

View file

@ -26,6 +26,7 @@
#include <linux/netdevice.h>
#include <linux/in6.h>
#include <linux/init.h>
#include <linux/list.h>
#ifdef CONFIG_PROC_FS
#include <linux/proc_fs.h>
@ -147,6 +148,126 @@ static __inline__ void rt6_release(struct rt6_info *rt)
dst_free(&rt->u.dst);
}
static struct fib6_table fib6_main_tbl = {
.tb6_id = RT6_TABLE_MAIN,
.tb6_lock = RW_LOCK_UNLOCKED,
.tb6_root = {
.leaf = &ip6_null_entry,
.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
},
};
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
#define FIB_TABLE_HASHSZ 256
static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
static struct fib6_table *fib6_alloc_table(u32 id)
{
struct fib6_table *table;
table = kzalloc(sizeof(*table), GFP_ATOMIC);
if (table != NULL) {
table->tb6_id = id;
table->tb6_lock = RW_LOCK_UNLOCKED;
table->tb6_root.leaf = &ip6_null_entry;
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
}
return table;
}
static void fib6_link_table(struct fib6_table *tb)
{
unsigned int h;
h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1);
/*
* No protection necessary, this is the only list mutatation
* operation, tables never disappear once they exist.
*/
hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]);
}
struct fib6_table *fib6_new_table(u32 id)
{
struct fib6_table *tb;
if (id == 0)
id = RT6_TABLE_MAIN;
tb = fib6_get_table(id);
if (tb)
return tb;
tb = fib6_alloc_table(id);
if (tb != NULL)
fib6_link_table(tb);
return tb;
}
struct fib6_table *fib6_get_table(u32 id)
{
struct fib6_table *tb;
struct hlist_node *node;
unsigned int h;
if (id == 0)
id = RT6_TABLE_MAIN;
h = id & (FIB_TABLE_HASHSZ - 1);
rcu_read_lock();
hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb6_hlist) {
if (tb->tb6_id == id) {
rcu_read_unlock();
return tb;
}
}
rcu_read_unlock();
return NULL;
}
struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
pol_lookup_t lookup)
{
/*
* TODO: Add rule lookup
*/
struct fib6_table *table = fib6_get_table(RT6_TABLE_MAIN);
return (struct dst_entry *) lookup(table, fl, flags);
}
static void __init fib6_tables_init(void)
{
fib6_link_table(&fib6_main_tbl);
}
#else
struct fib6_table *fib6_new_table(u32 id)
{
return fib6_get_table(id);
}
struct fib6_table *fib6_get_table(u32 id)
{
return &fib6_main_tbl;
}
struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
pol_lookup_t lookup)
{
return (struct dst_entry *) lookup(&fib6_main_tbl, fl, flags);
}
static void __init fib6_tables_init(void)
{
}
#endif
/*
* Routing Table
@ -1064,6 +1185,22 @@ void fib6_clean_tree(struct fib6_node *root,
fib6_walk(&c.w);
}
void fib6_clean_all(int (*func)(struct rt6_info *, void *arg),
int prune, void *arg)
{
int i;
struct fib6_table *table;
for (i = FIB6_TABLE_MIN; i <= FIB6_TABLE_MAX; i++) {
table = fib6_get_table(i);
if (table != NULL) {
write_lock_bh(&table->tb6_lock);
fib6_clean_tree(&table->tb6_root, func, prune, arg);
write_unlock_bh(&table->tb6_lock);
}
}
}
static int fib6_prune_clone(struct rt6_info *rt, void *arg)
{
if (rt->rt6i_flags & RTF_CACHE) {
@ -1142,11 +1279,8 @@ void fib6_run_gc(unsigned long dummy)
}
gc_args.more = 0;
write_lock_bh(&rt6_lock);
ndisc_dst_gc(&gc_args.more);
fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL);
write_unlock_bh(&rt6_lock);
fib6_clean_all(fib6_age, 0, NULL);
if (gc_args.more)
mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
@ -1165,6 +1299,8 @@ void __init fib6_init(void)
NULL, NULL);
if (!fib6_node_kmem)
panic("cannot create fib6_nodes cache");
fib6_tables_init();
}
void fib6_gc_cleanup(void)

View file

@ -140,16 +140,6 @@ struct rt6_info ip6_null_entry = {
.rt6i_ref = ATOMIC_INIT(1),
};
struct fib6_node ip6_routing_table = {
.leaf = &ip6_null_entry,
.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
};
/* Protects all the ip6 fib */
DEFINE_RWLOCK(rt6_lock);
/* allocate dst with ip6_dst_ops */
static __inline__ struct rt6_info *ip6_dst_alloc(void)
{
@ -188,8 +178,14 @@ static __inline__ int rt6_check_expired(const struct rt6_info *rt)
time_after(jiffies, rt->rt6i_expires));
}
static inline int rt6_need_strict(struct in6_addr *daddr)
{
return (ipv6_addr_type(daddr) &
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
}
/*
* Route lookup. Any rt6_lock is implied.
* Route lookup. Any table->tb6_lock is implied.
*/
static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
@ -441,27 +437,66 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
}
#endif
struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
int oif, int strict)
#define BACKTRACK() \
if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \
while ((fn = fn->parent) != NULL) { \
if (fn->fn_flags & RTN_TL_ROOT) { \
dst_hold(&rt->u.dst); \
goto out; \
} \
if (fn->fn_flags & RTN_RTINFO) \
goto restart; \
} \
}
static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
struct flowi *fl, int flags)
{
struct fib6_node *fn;
struct rt6_info *rt;
read_lock_bh(&rt6_lock);
fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
rt = rt6_device_match(fn->leaf, oif, strict);
read_lock_bh(&table->tb6_lock);
fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
restart:
rt = fn->leaf;
rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
BACKTRACK();
dst_hold(&rt->u.dst);
rt->u.dst.__use++;
read_unlock_bh(&rt6_lock);
out:
read_unlock_bh(&table->tb6_lock);
rt->u.dst.lastuse = jiffies;
if (rt->u.dst.error == 0)
rt->u.dst.__use++;
return rt;
dst_release(&rt->u.dst);
}
struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
int oif, int strict)
{
struct flowi fl = {
.oif = oif,
.nl_u = {
.ip6_u = {
.daddr = *daddr,
/* TODO: saddr */
},
},
};
struct dst_entry *dst;
int flags = strict ? RT6_F_STRICT : 0;
dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
if (dst->error == 0)
return (struct rt6_info *) dst;
dst_release(dst);
return NULL;
}
/* ip6_ins_rt is called with FREE rt6_lock.
/* ip6_ins_rt is called with FREE table->tb6_lock.
It takes new route entry, the addition fails by any reason the
route is freed. In any case, if caller does not hold it, it may
be destroyed.
@ -471,10 +506,12 @@ int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
void *_rtattr, struct netlink_skb_parms *req)
{
int err;
struct fib6_table *table;
write_lock_bh(&rt6_lock);
err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
write_unlock_bh(&rt6_lock);
table = rt->rt6i_table;
write_lock_bh(&table->tb6_lock);
err = fib6_add(&table->tb6_root, rt, nlh, _rtattr, req);
write_unlock_bh(&table->tb6_lock);
return err;
}
@ -532,116 +569,34 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
return rt;
}
#define BACKTRACK() \
if (rt == &ip6_null_entry) { \
while ((fn = fn->parent) != NULL) { \
if (fn->fn_flags & RTN_ROOT) { \
goto out; \
} \
if (fn->fn_flags & RTN_RTINFO) \
goto restart; \
} \
}
void ip6_route_input(struct sk_buff *skb)
struct rt6_info *ip6_pol_route_input(struct fib6_table *table, struct flowi *fl,
int flags)
{
struct fib6_node *fn;
struct rt6_info *rt, *nrt;
int strict;
int strict = 0;
int attempts = 3;
int err;
int reachable = RT6_SELECT_F_REACHABLE;
strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
if (flags & RT6_F_STRICT)
strict = RT6_SELECT_F_IFACE;
relookup:
read_lock_bh(&rt6_lock);
read_lock_bh(&table->tb6_lock);
restart_2:
fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
&skb->nh.ipv6h->saddr);
fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
restart:
rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
BACKTRACK();
if (rt == &ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
goto out;
dst_hold(&rt->u.dst);
read_unlock_bh(&rt6_lock);
if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
else {
#if CLONE_OFFLINK_ROUTE
nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
#else
goto out2;
#endif
}
dst_release(&rt->u.dst);
rt = nrt ? : &ip6_null_entry;
dst_hold(&rt->u.dst);
if (nrt) {
err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
if (!err)
goto out2;
}
if (--attempts <= 0)
goto out2;
/*
* Race condition! In the gap, when rt6_lock was
* released someone could insert this route. Relookup.
*/
dst_release(&rt->u.dst);
goto relookup;
out:
if (reachable) {
reachable = 0;
goto restart_2;
}
dst_hold(&rt->u.dst);
read_unlock_bh(&rt6_lock);
out2:
rt->u.dst.lastuse = jiffies;
rt->u.dst.__use++;
skb->dst = (struct dst_entry *) rt;
return;
}
struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
{
struct fib6_node *fn;
struct rt6_info *rt, *nrt;
int strict;
int attempts = 3;
int err;
int reachable = RT6_SELECT_F_REACHABLE;
strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
relookup:
read_lock_bh(&rt6_lock);
restart_2:
fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
restart:
rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
BACKTRACK();
if (rt == &ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
goto out;
dst_hold(&rt->u.dst);
read_unlock_bh(&rt6_lock);
read_unlock_bh(&table->tb6_lock);
if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
@ -667,7 +622,7 @@ restart:
goto out2;
/*
* Race condition! In the gap, when rt6_lock was
* Race condition! In the gap, when table->tb6_lock was
* released someone could insert this route. Relookup.
*/
dst_release(&rt->u.dst);
@ -679,11 +634,116 @@ out:
goto restart_2;
}
dst_hold(&rt->u.dst);
read_unlock_bh(&rt6_lock);
read_unlock_bh(&table->tb6_lock);
out2:
rt->u.dst.lastuse = jiffies;
rt->u.dst.__use++;
return &rt->u.dst;
return rt;
}
void ip6_route_input(struct sk_buff *skb)
{
struct ipv6hdr *iph = skb->nh.ipv6h;
struct flowi fl = {
.iif = skb->dev->ifindex,
.nl_u = {
.ip6_u = {
.daddr = iph->daddr,
.saddr = iph->saddr,
.flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
},
},
.proto = iph->nexthdr,
};
int flags = 0;
if (rt6_need_strict(&iph->daddr))
flags |= RT6_F_STRICT;
skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
}
static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
struct flowi *fl, int flags)
{
struct fib6_node *fn;
struct rt6_info *rt, *nrt;
int strict = 0;
int attempts = 3;
int err;
int reachable = RT6_SELECT_F_REACHABLE;
if (flags & RT6_F_STRICT)
strict = RT6_SELECT_F_IFACE;
relookup:
read_lock_bh(&table->tb6_lock);
restart_2:
fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
restart:
rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
BACKTRACK();
if (rt == &ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
goto out;
dst_hold(&rt->u.dst);
read_unlock_bh(&table->tb6_lock);
if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
else {
#if CLONE_OFFLINK_ROUTE
nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
#else
goto out2;
#endif
}
dst_release(&rt->u.dst);
rt = nrt ? : &ip6_null_entry;
dst_hold(&rt->u.dst);
if (nrt) {
err = ip6_ins_rt(nrt, NULL, NULL, NULL);
if (!err)
goto out2;
}
if (--attempts <= 0)
goto out2;
/*
* Race condition! In the gap, when table->tb6_lock was
* released someone could insert this route. Relookup.
*/
dst_release(&rt->u.dst);
goto relookup;
out:
if (reachable) {
reachable = 0;
goto restart_2;
}
dst_hold(&rt->u.dst);
read_unlock_bh(&table->tb6_lock);
out2:
rt->u.dst.lastuse = jiffies;
rt->u.dst.__use++;
return rt;
}
struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
{
int flags = 0;
if (rt6_need_strict(&fl->fl6_dst))
flags |= RT6_F_STRICT;
return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
}
@ -906,7 +966,8 @@ int ipv6_get_hoplimit(struct net_device *dev)
*/
int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
void *_rtattr, struct netlink_skb_parms *req)
void *_rtattr, struct netlink_skb_parms *req,
u32 table_id)
{
int err;
struct rtmsg *r;
@ -914,6 +975,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
struct rt6_info *rt = NULL;
struct net_device *dev = NULL;
struct inet6_dev *idev = NULL;
struct fib6_table *table;
int addr_type;
rta = (struct rtattr **) _rtattr;
@ -937,6 +999,12 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
if (rtmsg->rtmsg_metric == 0)
rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
table = fib6_new_table(table_id);
if (table == NULL) {
err = -ENOBUFS;
goto out;
}
rt = ip6_dst_alloc();
if (rt == NULL) {
@ -1093,6 +1161,7 @@ install_route:
rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
rt->u.dst.dev = dev;
rt->rt6i_idev = idev;
rt->rt6i_table = table;
return ip6_ins_rt(rt, nlh, _rtattr, req);
out:
@ -1108,26 +1177,35 @@ out:
int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
{
int err;
struct fib6_table *table;
write_lock_bh(&rt6_lock);
table = rt->rt6i_table;
write_lock_bh(&table->tb6_lock);
err = fib6_del(rt, nlh, _rtattr, req);
dst_release(&rt->u.dst);
write_unlock_bh(&rt6_lock);
write_unlock_bh(&table->tb6_lock);
return err;
}
static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
void *_rtattr, struct netlink_skb_parms *req,
u32 table_id)
{
struct fib6_table *table;
struct fib6_node *fn;
struct rt6_info *rt;
int err = -ESRCH;
read_lock_bh(&rt6_lock);
table = fib6_get_table(table_id);
if (table == NULL)
return err;
fn = fib6_locate(&ip6_routing_table,
read_lock_bh(&table->tb6_lock);
fn = fib6_locate(&table->tb6_root,
&rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
&rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
@ -1144,12 +1222,12 @@ static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_r
rtmsg->rtmsg_metric != rt->rt6i_metric)
continue;
dst_hold(&rt->u.dst);
read_unlock_bh(&rt6_lock);
read_unlock_bh(&table->tb6_lock);
return ip6_del_rt(rt, nlh, _rtattr, req);
}
}
read_unlock_bh(&rt6_lock);
read_unlock_bh(&table->tb6_lock);
return err;
}
@ -1161,10 +1239,15 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
struct neighbour *neigh, u8 *lladdr, int on_link)
{
struct rt6_info *rt, *nrt = NULL;
int strict;
struct fib6_node *fn;
struct fib6_table *table;
struct netevent_redirect netevent;
/* TODO: Very lazy, might need to check all tables */
table = fib6_get_table(RT6_TABLE_MAIN);
if (table == NULL)
return;
/*
* Get the "current" route for this destination and
* check if the redirect has come from approriate router.
@ -1175,10 +1258,9 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
* is a bit fuzzy and one might need to check all possible
* routes.
*/
strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL);
read_lock_bh(&rt6_lock);
fn = fib6_lookup(&ip6_routing_table, dest, NULL);
read_lock_bh(&table->tb6_lock);
fn = fib6_lookup(&table->tb6_root, dest, NULL);
restart:
for (rt = fn->leaf; rt; rt = rt->u.next) {
/*
@ -1201,7 +1283,7 @@ restart:
}
if (rt)
dst_hold(&rt->u.dst);
else if (strict) {
else if (rt6_need_strict(dest)) {
while ((fn = fn->parent) != NULL) {
if (fn->fn_flags & RTN_ROOT)
break;
@ -1209,7 +1291,7 @@ restart:
goto restart;
}
}
read_unlock_bh(&rt6_lock);
read_unlock_bh(&table->tb6_lock);
if (!rt) {
if (net_ratelimit())
@ -1384,6 +1466,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
#ifdef CONFIG_IPV6_SUBTREES
memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
#endif
rt->rt6i_table = ort->rt6i_table;
}
return rt;
}
@ -1394,9 +1477,14 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle
{
struct fib6_node *fn;
struct rt6_info *rt = NULL;
struct fib6_table *table;
write_lock_bh(&rt6_lock);
fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0);
table = fib6_get_table(RT6_TABLE_INFO);
if (table == NULL)
return NULL;
write_lock_bh(&table->tb6_lock);
fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
if (!fn)
goto out;
@ -1411,7 +1499,7 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle
break;
}
out:
write_unlock_bh(&rt6_lock);
write_unlock_bh(&table->tb6_lock);
return rt;
}
@ -1433,7 +1521,7 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle
rtmsg.rtmsg_flags |= RTF_DEFAULT;
rtmsg.rtmsg_ifindex = ifindex;
ip6_route_add(&rtmsg, NULL, NULL, NULL);
ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_INFO);
return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
}
@ -1442,12 +1530,14 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle
struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
{
struct rt6_info *rt;
struct fib6_node *fn;
struct fib6_table *table;
fn = &ip6_routing_table;
table = fib6_get_table(RT6_TABLE_DFLT);
if (table == NULL)
return NULL;
write_lock_bh(&rt6_lock);
for (rt = fn->leaf; rt; rt=rt->u.next) {
write_lock_bh(&table->tb6_lock);
for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
if (dev == rt->rt6i_dev &&
((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ipv6_addr_equal(&rt->rt6i_gateway, addr))
@ -1455,7 +1545,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
}
if (rt)
dst_hold(&rt->u.dst);
write_unlock_bh(&rt6_lock);
write_unlock_bh(&table->tb6_lock);
return rt;
}
@ -1474,28 +1564,31 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
rtmsg.rtmsg_ifindex = dev->ifindex;
ip6_route_add(&rtmsg, NULL, NULL, NULL);
ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_DFLT);
return rt6_get_dflt_router(gwaddr, dev);
}
void rt6_purge_dflt_routers(void)
{
struct rt6_info *rt;
struct fib6_table *table;
/* NOTE: Keep consistent with rt6_get_dflt_router */
table = fib6_get_table(RT6_TABLE_DFLT);
if (table == NULL)
return;
restart:
read_lock_bh(&rt6_lock);
for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
read_lock_bh(&table->tb6_lock);
for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
dst_hold(&rt->u.dst);
read_unlock_bh(&rt6_lock);
read_unlock_bh(&table->tb6_lock);
ip6_del_rt(rt, NULL, NULL, NULL);
goto restart;
}
}
read_unlock_bh(&rt6_lock);
read_unlock_bh(&table->tb6_lock);
}
int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
@ -1516,10 +1609,12 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
rtnl_lock();
switch (cmd) {
case SIOCADDRT:
err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
err = ip6_route_add(&rtmsg, NULL, NULL, NULL,
RT6_TABLE_MAIN);
break;
case SIOCDELRT:
err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
err = ip6_route_del(&rtmsg, NULL, NULL, NULL,
RT6_TABLE_MAIN);
break;
default:
err = -EINVAL;
@ -1593,6 +1688,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
rt->rt6i_dst.plen = 128;
rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
atomic_set(&rt->u.dst.__refcnt, 1);
@ -1611,9 +1707,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg)
void rt6_ifdown(struct net_device *dev)
{
write_lock_bh(&rt6_lock);
fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
write_unlock_bh(&rt6_lock);
fib6_clean_all(fib6_ifdown, 0, dev);
}
struct rt6_mtu_change_arg
@ -1663,13 +1757,12 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
void rt6_mtu_change(struct net_device *dev, unsigned mtu)
{
struct rt6_mtu_change_arg arg;
struct rt6_mtu_change_arg arg = {
.dev = dev,
.mtu = mtu,
};
arg.dev = dev;
arg.mtu = mtu;
read_lock_bh(&rt6_lock);
fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
read_unlock_bh(&rt6_lock);
fib6_clean_all(rt6_mtu_change_route, 0, &arg);
}
static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
@ -1719,7 +1812,7 @@ int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
return -EINVAL;
return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table);
}
int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
@ -1729,7 +1822,7 @@ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
return -EINVAL;
return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table);
}
struct rt6_rtnl_dump_arg
@ -1761,6 +1854,10 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
rtm->rtm_dst_len = rt->rt6i_dst.plen;
rtm->rtm_src_len = rt->rt6i_src.plen;
rtm->rtm_tos = 0;
if (rt->rt6i_table)
rtm->rtm_table = rt->rt6i_table->tb6_id;
else
rtm->rtm_table = RT6_TABLE_UNSPEC;
rtm->rtm_table = RT_TABLE_MAIN;
if (rt->rt6i_flags&RTF_REJECT)
rtm->rtm_type = RTN_UNREACHABLE;
@ -1868,7 +1965,6 @@ static void fib6_dump_end(struct netlink_callback *cb)
if (w) {
cb->args[0] = 0;
fib6_walker_unlink(w);
kfree(w);
}
cb->done = (void*)cb->args[1];
@ -1883,13 +1979,20 @@ static int fib6_dump_done(struct netlink_callback *cb)
int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
struct fib6_table *table;
struct rt6_rtnl_dump_arg arg;
struct fib6_walker_t *w;
int res;
int i, res = 0;
arg.skb = skb;
arg.cb = cb;
/*
* cb->args[0] = pointer to walker structure
* cb->args[1] = saved cb->done() pointer
* cb->args[2] = current table being dumped
*/
w = (void*)cb->args[0];
if (w == NULL) {
/* New dump:
@ -1905,24 +2008,48 @@ int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
w = kzalloc(sizeof(*w), GFP_ATOMIC);
if (w == NULL)
return -ENOMEM;
RT6_TRACE("dump<%p", w);
w->root = &ip6_routing_table;
w->func = fib6_dump_node;
w->args = &arg;
cb->args[0] = (long)w;
read_lock_bh(&rt6_lock);
res = fib6_walk(w);
read_unlock_bh(&rt6_lock);
cb->args[2] = FIB6_TABLE_MIN;
} else {
w->args = &arg;
read_lock_bh(&rt6_lock);
i = cb->args[2];
if (i > FIB6_TABLE_MAX)
goto end;
table = fib6_get_table(i);
if (table != NULL) {
read_lock_bh(&table->tb6_lock);
w->root = &table->tb6_root;
res = fib6_walk_continue(w);
read_unlock_bh(&rt6_lock);
read_unlock_bh(&table->tb6_lock);
if (res != 0) {
if (res < 0)
fib6_walker_unlink(w);
goto end;
}
#if RT6_DEBUG >= 3
if (res <= 0 && skb->len == 0)
RT6_TRACE("%p>dump end\n", w);
#endif
}
fib6_walker_unlink(w);
cb->args[2] = ++i;
}
for (i = cb->args[2]; i <= FIB6_TABLE_MAX; i++) {
table = fib6_get_table(i);
if (table == NULL)
continue;
read_lock_bh(&table->tb6_lock);
w->root = &table->tb6_root;
res = fib6_walk(w);
read_unlock_bh(&table->tb6_lock);
if (res)
break;
}
end:
cb->args[2] = i;
res = res < 0 ? res : skb->len;
/* res < 0 is an error. (really, impossible)
res == 0 means that dump is complete, but skb still can contain data.
@ -2102,16 +2229,13 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
{
struct rt6_proc_arg arg;
arg.buffer = buffer;
arg.offset = offset;
arg.length = length;
arg.skip = 0;
arg.len = 0;
struct rt6_proc_arg arg = {
.buffer = buffer,
.offset = offset,
.length = length,
};
read_lock_bh(&rt6_lock);
fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
read_unlock_bh(&rt6_lock);
fib6_clean_all(rt6_info_route, 0, &arg);
*start = buffer;
if (offset)