X-Git-Url: https://git.kernel.dk/?a=blobdiff_plain;f=net%2Fipv4%2Froute.c;h=278f46f5011beb2ab85747543f84dfd3ce7c6d1c;hb=59ed6eecff4aa00c5c5d18ffd180acac108d596e;hp=28205e5bfa9b703a30baed0afe83c24575080574;hpb=cf5046323ea254be72535648a9d090b18b8510f3;p=linux-2.6-block.git diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 28205e5bfa9b..278f46f5011b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -131,8 +131,8 @@ static int ip_rt_min_advmss __read_mostly = 256; static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; static int rt_chain_length_max __read_mostly = 20; -static void rt_worker_func(struct work_struct *work); -static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); +static struct delayed_work expires_work; +static unsigned long expires_ljiffies; /* * Interface to generic destination cache. @@ -787,9 +787,12 @@ static void rt_check_expire(void) struct rtable *rth, *aux, **rthp; unsigned long samples = 0; unsigned long sum = 0, sum2 = 0; + unsigned long delta; u64 mult; - mult = ((u64)ip_rt_gc_interval) << rt_hash_log; + delta = jiffies - expires_ljiffies; + expires_ljiffies = jiffies; + mult = ((u64)delta) << rt_hash_log; if (ip_rt_gc_timeout > 1) do_div(mult, ip_rt_gc_timeout); goal = (unsigned int)mult; @@ -1064,7 +1067,8 @@ work_done: out: return 0; } -static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) +static int rt_intern_hash(unsigned hash, struct rtable *rt, + struct rtable **rp, struct sk_buff *skb) { struct rtable *rth, **rthp; unsigned long now; @@ -1081,8 +1085,35 @@ restart: now = jiffies; if (!rt_caching(dev_net(rt->u.dst.dev))) { - rt_drop(rt); - return 0; + /* + * If we're not caching, just tell the caller we + * were successful and don't touch the route. The + * caller hold the sole reference to the cache entry, and + * it will be released when the caller is done with it. + * If we drop it here, the callers have no way to resolve routes + * when we're not caching. Instead, just point *rp at rt, so + * the caller gets a single use out of the route + * Note that we do rt_free on this new route entry, so that + * once its refcount hits zero, we are still able to reap it + * (Thanks Alexey) + * Note also the rt_free uses call_rcu. We don't actually + * need rcu protection here, this is just our path to get + * on the route gc list. + */ + + if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { + int err = arp_bind_neighbour(&rt->u.dst); + if (err) { + if (net_ratelimit()) + printk(KERN_WARNING + "Neighbour table failure & not caching routes.\n"); + rt_drop(rt); + return err; + } + } + + rt_free(rt); + goto skip_hashing; } rthp = &rt_hash_table[hash].chain; @@ -1114,7 +1145,10 @@ restart: spin_unlock_bh(rt_hash_lock_addr(hash)); rt_drop(rt); - *rp = rth; + if (rp) + *rp = rth; + else + skb_dst_set(skb, &rth->u.dst); return 0; } @@ -1196,7 +1230,8 @@ restart: #if RT_CACHE_DEBUG >= 2 if (rt->u.dst.rt_next) { struct rtable *trt; - printk(KERN_DEBUG "rt_cache @%02x: %pI4", hash, &rt->rt_dst); + printk(KERN_DEBUG "rt_cache @%02x: %pI4", + hash, &rt->rt_dst); for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) printk(" . %pI4", &trt->rt_dst); printk("\n"); @@ -1210,7 +1245,12 @@ restart: rcu_assign_pointer(rt_hash_table[hash].chain, rt); spin_unlock_bh(rt_hash_lock_addr(hash)); - *rp = rt; + +skip_hashing: + if (rp) + *rp = rt; + else + skb_dst_set(skb, &rt->u.dst); return 0; } @@ -1407,7 +1447,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, &netevent); rt_del(hash, rth); - if (!rt_intern_hash(hash, rt, &rt)) + if (!rt_intern_hash(hash, rt, &rt, NULL)) ip_rt_put(rt); goto do_next; } @@ -1473,7 +1513,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) void ip_rt_send_redirect(struct sk_buff *skb) { - struct rtable *rt = skb->rtable; + struct rtable *rt = skb_rtable(skb); struct in_device *in_dev = in_dev_get(rt->u.dst.dev); if (!in_dev) @@ -1521,7 +1561,7 @@ out: static int ip_error(struct sk_buff *skb) { - struct rtable *rt = skb->rtable; + struct rtable *rt = skb_rtable(skb); unsigned long now; int code; @@ -1698,7 +1738,7 @@ static void ipv4_link_failure(struct sk_buff *skb) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); - rt = skb->rtable; + rt = skb_rtable(skb); if (rt) dst_set_expires(&rt->u.dst, 0); } @@ -1858,7 +1898,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, in_dev_put(in_dev); hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); - return rt_intern_hash(hash, rth, &skb->rtable); + return rt_intern_hash(hash, rth, NULL, skb); e_nobufs: in_dev_put(in_dev); @@ -2019,7 +2059,7 @@ static int ip_mkroute_input(struct sk_buff *skb, /* put it into the cache */ hash = rt_hash(daddr, saddr, fl->iif, rt_genid(dev_net(rth->u.dst.dev))); - return rt_intern_hash(hash, rth, &skb->rtable); + return rt_intern_hash(hash, rth, NULL, skb); } /* @@ -2175,7 +2215,7 @@ local_input: } rth->rt_type = res.type; hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); - err = rt_intern_hash(hash, rth, &skb->rtable); + err = rt_intern_hash(hash, rth, NULL, skb); goto done; no_route: @@ -2244,7 +2284,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, dst_use(&rth->u.dst, jiffies); RT_CACHE_STAT_INC(in_hit); rcu_read_unlock(); - skb->rtable = rth; + skb_dst_set(skb, &rth->u.dst); return 0; } RT_CACHE_STAT_INC(in_hlist_search); @@ -2420,7 +2460,7 @@ static int ip_mkroute_output(struct rtable **rp, if (err == 0) { hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, rt_genid(dev_net(dev_out))); - err = rt_intern_hash(hash, rth, rp); + err = rt_intern_hash(hash, rth, rp, NULL); } return err; @@ -2763,7 +2803,7 @@ static int rt_fill_info(struct net *net, struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait, unsigned int flags) { - struct rtable *rt = skb->rtable; + struct rtable *rt = skb_rtable(skb); struct rtmsg *r; struct nlmsghdr *nlh; long expires; @@ -2907,7 +2947,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); local_bh_enable(); - rt = skb->rtable; + rt = skb_rtable(skb); if (err == 0 && rt->u.dst.error) err = -rt->u.dst.error; } else { @@ -2927,7 +2967,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (err) goto errout_free; - skb->rtable = rt; + skb_dst_set(skb, &rt->u.dst); if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; @@ -2968,15 +3008,15 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; if (rt_is_expired(rt)) continue; - skb->dst = dst_clone(&rt->u.dst); + skb_dst_set(skb, dst_clone(&rt->u.dst)); if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1, NLM_F_MULTI) <= 0) { - dst_release(xchg(&skb->dst, NULL)); + skb_dst_drop(skb); rcu_read_unlock_bh(); goto done; } - dst_release(xchg(&skb->dst, NULL)); + skb_dst_drop(skb); } rcu_read_unlock_bh(); } @@ -3390,6 +3430,8 @@ int __init ip_rt_init(void) /* All the timers, started at system startup tend to synchronize. Perturb it a bit. */ + INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); + expires_ljiffies = jiffies; schedule_delayed_work(&expires_work, net_random() % ip_rt_gc_interval + ip_rt_gc_interval);