Commit | Line | Data |
---|---|---|
457c8996 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
1da177e4 LT |
2 | /* |
3 | * net/core/dst.c Protocol independent destination cache. | |
4 | * | |
5 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | |
6 | * | |
7 | */ | |
8 | ||
9 | #include <linux/bitops.h> | |
10 | #include <linux/errno.h> | |
11 | #include <linux/init.h> | |
12 | #include <linux/kernel.h> | |
86bba269 | 13 | #include <linux/workqueue.h> |
1da177e4 LT |
14 | #include <linux/mm.h> |
15 | #include <linux/module.h> | |
5a0e3ad6 | 16 | #include <linux/slab.h> |
1da177e4 | 17 | #include <linux/netdevice.h> |
1da177e4 LT |
18 | #include <linux/skbuff.h> |
19 | #include <linux/string.h> | |
20 | #include <linux/types.h> | |
e9dc8653 | 21 | #include <net/net_namespace.h> |
2fc1b5dd | 22 | #include <linux/sched.h> |
268bb0ce | 23 | #include <linux/prefetch.h> |
61adedf3 | 24 | #include <net/lwtunnel.h> |
b6ca8bd5 | 25 | #include <net/xfrm.h> |
1da177e4 LT |
26 | |
27 | #include <net/dst.h> | |
f38a9eb1 | 28 | #include <net/dst_metadata.h> |
1da177e4 | 29 | |
ede2059d | 30 | int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) |
1da177e4 LT |
31 | { |
32 | kfree_skb(skb); | |
33 | return 0; | |
34 | } | |
ede2059d | 35 | EXPORT_SYMBOL(dst_discard_out); |
1da177e4 | 36 | |
3fb07daf | 37 | const struct dst_metrics dst_default_metrics = { |
a37e6e34 ED |
38 | /* This initializer is needed to force linker to place this variable |
39 | * into const section. Otherwise it might end into bss section. | |
40 | * We really want to avoid false sharing on this variable, and catch | |
41 | * any writes on it. | |
42 | */ | |
9620fef2 | 43 | .refcnt = REFCOUNT_INIT(1), |
a37e6e34 | 44 | }; |
d4ead6b3 | 45 | EXPORT_SYMBOL(dst_default_metrics); |
a37e6e34 | 46 | |
f38a9eb1 TG |
47 | void dst_init(struct dst_entry *dst, struct dst_ops *ops, |
48 | struct net_device *dev, int initial_ref, int initial_obsolete, | |
49 | unsigned short flags) | |
1da177e4 | 50 | { |
5c1e6aa3 | 51 | dst->dev = dev; |
d62607c3 | 52 | netdev_hold(dev, &dst->dev_tracker, GFP_ATOMIC); |
1da177e4 | 53 | dst->ops = ops; |
3fb07daf | 54 | dst_init_metrics(dst, dst_default_metrics.metrics, true); |
cf911662 | 55 | dst->expires = 0UL; |
cf911662 DM |
56 | #ifdef CONFIG_XFRM |
57 | dst->xfrm = NULL; | |
58 | #endif | |
5c1e6aa3 | 59 | dst->input = dst_discard; |
ede2059d | 60 | dst->output = dst_discard_out; |
cf911662 | 61 | dst->error = 0; |
5c1e6aa3 | 62 | dst->obsolete = initial_obsolete; |
cf911662 DM |
63 | dst->header_len = 0; |
64 | dst->trailer_len = 0; | |
65 | #ifdef CONFIG_IP_ROUTE_CLASSID | |
66 | dst->tclassid = 0; | |
1da177e4 | 67 | #endif |
61adedf3 | 68 | dst->lwtstate = NULL; |
5c1e6aa3 | 69 | atomic_set(&dst->__refcnt, initial_ref); |
cf911662 | 70 | dst->__use = 0; |
5c1e6aa3 DM |
71 | dst->lastuse = jiffies; |
72 | dst->flags = flags; | |
957c665f DM |
73 | if (!(flags & DST_NOCOUNT)) |
74 | dst_entries_add(ops, 1); | |
f38a9eb1 TG |
75 | } |
76 | EXPORT_SYMBOL(dst_init); | |
77 | ||
78 | void *dst_alloc(struct dst_ops *ops, struct net_device *dev, | |
79 | int initial_ref, int initial_obsolete, unsigned short flags) | |
80 | { | |
81 | struct dst_entry *dst; | |
82 | ||
cf86a086 ED |
83 | if (ops->gc && |
84 | !(flags & DST_NOCOUNT) && | |
85 | dst_entries_get_fast(ops) > ops->gc_thresh) { | |
22c2ad61 | 86 | if (ops->gc(ops)) { |
cf86a086 | 87 | pr_notice_ratelimited("Route cache is full: consider increasing sysctl net.ipv6.route.max_size.\n"); |
f38a9eb1 | 88 | return NULL; |
22c2ad61 | 89 | } |
f38a9eb1 TG |
90 | } |
91 | ||
92 | dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC); | |
93 | if (!dst) | |
94 | return NULL; | |
95 | ||
96 | dst_init(dst, ops, dev, initial_ref, initial_obsolete, flags); | |
97 | ||
1da177e4 LT |
98 | return dst; |
99 | } | |
598ed936 | 100 | EXPORT_SYMBOL(dst_alloc); |
1da177e4 | 101 | |
1da177e4 LT |
102 | struct dst_entry *dst_destroy(struct dst_entry * dst) |
103 | { | |
b92cf4aa | 104 | struct dst_entry *child = NULL; |
1da177e4 LT |
105 | |
106 | smp_rmb(); | |
107 | ||
b92cf4aa | 108 | #ifdef CONFIG_XFRM |
b6ca8bd5 DM |
109 | if (dst->xfrm) { |
110 | struct xfrm_dst *xdst = (struct xfrm_dst *) dst; | |
111 | ||
112 | child = xdst->child; | |
113 | } | |
b92cf4aa | 114 | #endif |
957c665f DM |
115 | if (!(dst->flags & DST_NOCOUNT)) |
116 | dst_entries_add(dst->ops, -1); | |
1da177e4 LT |
117 | |
118 | if (dst->ops->destroy) | |
119 | dst->ops->destroy(dst); | |
d62607c3 | 120 | netdev_put(dst->dev, &dst->dev_tracker); |
f38a9eb1 | 121 | |
e252b3d1 WC |
122 | lwtstate_put(dst->lwtstate); |
123 | ||
f38a9eb1 | 124 | if (dst->flags & DST_METADATA) |
d71785ff | 125 | metadata_dst_free((struct metadata_dst *)dst); |
f38a9eb1 TG |
126 | else |
127 | kmem_cache_free(dst->ops->kmem_cachep, dst); | |
1da177e4 LT |
128 | |
129 | dst = child; | |
52df157f WW |
130 | if (dst) |
131 | dst_release_immediate(dst); | |
1da177e4 LT |
132 | return NULL; |
133 | } | |
598ed936 | 134 | EXPORT_SYMBOL(dst_destroy); |
1da177e4 | 135 | |
f8864972 ED |
136 | static void dst_destroy_rcu(struct rcu_head *head) |
137 | { | |
138 | struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); | |
139 | ||
140 | dst = dst_destroy(dst); | |
f8864972 ED |
141 | } |
142 | ||
4a6ce2b6 WW |
143 | /* Operations to mark dst as DEAD and clean up the net device referenced |
144 | * by dst: | |
1be107de | 145 | * 1. put the dst under blackhole interface and discard all tx/rx packets |
4a6ce2b6 WW |
146 | * on this route. |
147 | * 2. release the net_device | |
148 | * This function should be called when removing routes from the fib tree | |
149 | * in preparation for a NETDEV_DOWN/NETDEV_UNREGISTER event and also to | |
150 | * make the next dst_ops->check() fail. | |
151 | */ | |
152 | void dst_dev_put(struct dst_entry *dst) | |
153 | { | |
154 | struct net_device *dev = dst->dev; | |
155 | ||
156 | dst->obsolete = DST_OBSOLETE_DEAD; | |
157 | if (dst->ops->ifdown) | |
158 | dst->ops->ifdown(dst, dev, true); | |
159 | dst->input = dst_discard; | |
160 | dst->output = dst_discard_out; | |
8d7017fd | 161 | dst->dev = blackhole_netdev; |
d62607c3 JK |
162 | netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker, |
163 | GFP_ATOMIC); | |
4a6ce2b6 WW |
164 | } |
165 | EXPORT_SYMBOL(dst_dev_put); | |
166 | ||
8d330868 IJ |
167 | void dst_release(struct dst_entry *dst) |
168 | { | |
169 | if (dst) { | |
598ed936 | 170 | int newrefcnt; |
ef711cf1 | 171 | |
598ed936 | 172 | newrefcnt = atomic_dec_return(&dst->__refcnt); |
adecda5b | 173 | if (WARN_ONCE(newrefcnt < 0, "dst_release underflow")) |
8bf4ada2 KK |
174 | net_warn_ratelimited("%s: dst:%p refcnt:%d\n", |
175 | __func__, dst, newrefcnt); | |
b2a9c0ed | 176 | if (!newrefcnt) |
f8864972 | 177 | call_rcu(&dst->rcu_head, dst_destroy_rcu); |
8d330868 IJ |
178 | } |
179 | } | |
180 | EXPORT_SYMBOL(dst_release); | |
181 | ||
5f56f409 WW |
182 | void dst_release_immediate(struct dst_entry *dst) |
183 | { | |
184 | if (dst) { | |
185 | int newrefcnt; | |
186 | ||
187 | newrefcnt = atomic_dec_return(&dst->__refcnt); | |
adecda5b | 188 | if (WARN_ONCE(newrefcnt < 0, "dst_release_immediate underflow")) |
5f56f409 WW |
189 | net_warn_ratelimited("%s: dst:%p refcnt:%d\n", |
190 | __func__, dst, newrefcnt); | |
191 | if (!newrefcnt) | |
192 | dst_destroy(dst); | |
193 | } | |
194 | } | |
195 | EXPORT_SYMBOL(dst_release_immediate); | |
196 | ||
62fa8a84 DM |
197 | u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old) |
198 | { | |
3fb07daf | 199 | struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC); |
62fa8a84 DM |
200 | |
201 | if (p) { | |
3fb07daf | 202 | struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old); |
62fa8a84 DM |
203 | unsigned long prev, new; |
204 | ||
9620fef2 | 205 | refcount_set(&p->refcnt, 1); |
3fb07daf | 206 | memcpy(p->metrics, old_p->metrics, sizeof(p->metrics)); |
62fa8a84 DM |
207 | |
208 | new = (unsigned long) p; | |
209 | prev = cmpxchg(&dst->_metrics, old, new); | |
210 | ||
211 | if (prev != old) { | |
212 | kfree(p); | |
3fb07daf | 213 | p = (struct dst_metrics *)__DST_METRICS_PTR(prev); |
62fa8a84 DM |
214 | if (prev & DST_METRICS_READ_ONLY) |
215 | p = NULL; | |
3fb07daf | 216 | } else if (prev & DST_METRICS_REFCOUNTED) { |
9620fef2 | 217 | if (refcount_dec_and_test(&old_p->refcnt)) |
3fb07daf | 218 | kfree(old_p); |
62fa8a84 DM |
219 | } |
220 | } | |
3fb07daf ED |
221 | BUILD_BUG_ON(offsetof(struct dst_metrics, metrics) != 0); |
222 | return (u32 *)p; | |
62fa8a84 DM |
223 | } |
224 | EXPORT_SYMBOL(dst_cow_metrics_generic); | |
225 | ||
226 | /* Caller asserts that dst_metrics_read_only(dst) is false. */ | |
227 | void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) | |
228 | { | |
229 | unsigned long prev, new; | |
230 | ||
3fb07daf | 231 | new = ((unsigned long) &dst_default_metrics) | DST_METRICS_READ_ONLY; |
62fa8a84 DM |
232 | prev = cmpxchg(&dst->_metrics, old, new); |
233 | if (prev == old) | |
234 | kfree(__DST_METRICS_PTR(old)); | |
235 | } | |
236 | EXPORT_SYMBOL(__dst_destroy_metrics_generic); | |
237 | ||
c4c877b2 DB |
238 | struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie) |
239 | { | |
240 | return NULL; | |
241 | } | |
242 | ||
243 | u32 *dst_blackhole_cow_metrics(struct dst_entry *dst, unsigned long old) | |
244 | { | |
245 | return NULL; | |
246 | } | |
247 | ||
248 | struct neighbour *dst_blackhole_neigh_lookup(const struct dst_entry *dst, | |
249 | struct sk_buff *skb, | |
250 | const void *daddr) | |
251 | { | |
252 | return NULL; | |
253 | } | |
254 | ||
255 | void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, | |
256 | struct sk_buff *skb, u32 mtu, | |
257 | bool confirm_neigh) | |
258 | { | |
259 | } | |
260 | EXPORT_SYMBOL_GPL(dst_blackhole_update_pmtu); | |
261 | ||
262 | void dst_blackhole_redirect(struct dst_entry *dst, struct sock *sk, | |
263 | struct sk_buff *skb) | |
264 | { | |
265 | } | |
266 | EXPORT_SYMBOL_GPL(dst_blackhole_redirect); | |
267 | ||
268 | unsigned int dst_blackhole_mtu(const struct dst_entry *dst) | |
269 | { | |
270 | unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); | |
271 | ||
272 | return mtu ? : dst->dev->mtu; | |
273 | } | |
274 | EXPORT_SYMBOL_GPL(dst_blackhole_mtu); | |
275 | ||
a188bb56 DB |
276 | static struct dst_ops dst_blackhole_ops = { |
277 | .family = AF_UNSPEC, | |
278 | .neigh_lookup = dst_blackhole_neigh_lookup, | |
279 | .check = dst_blackhole_check, | |
280 | .cow_metrics = dst_blackhole_cow_metrics, | |
281 | .update_pmtu = dst_blackhole_update_pmtu, | |
282 | .redirect = dst_blackhole_redirect, | |
283 | .mtu = dst_blackhole_mtu, | |
f38a9eb1 TG |
284 | }; |
285 | ||
3fcece12 JK |
286 | static void __metadata_dst_init(struct metadata_dst *md_dst, |
287 | enum metadata_type type, u8 optslen) | |
f38a9eb1 | 288 | { |
f38a9eb1 TG |
289 | struct dst_entry *dst; |
290 | ||
f38a9eb1 | 291 | dst = &md_dst->dst; |
a188bb56 | 292 | dst_init(dst, &dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, |
a4c2fd7f | 293 | DST_METADATA | DST_NOCOUNT); |
f38a9eb1 | 294 | memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst)); |
3fcece12 | 295 | md_dst->type = type; |
d3aa45ce AS |
296 | } |
297 | ||
3fcece12 JK |
298 | struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type, |
299 | gfp_t flags) | |
d3aa45ce AS |
300 | { |
301 | struct metadata_dst *md_dst; | |
302 | ||
303 | md_dst = kmalloc(sizeof(*md_dst) + optslen, flags); | |
304 | if (!md_dst) | |
305 | return NULL; | |
306 | ||
3fcece12 | 307 | __metadata_dst_init(md_dst, type, optslen); |
f38a9eb1 TG |
308 | |
309 | return md_dst; | |
310 | } | |
311 | EXPORT_SYMBOL_GPL(metadata_dst_alloc); | |
312 | ||
d71785ff PA |
313 | void metadata_dst_free(struct metadata_dst *md_dst) |
314 | { | |
315 | #ifdef CONFIG_DST_CACHE | |
e65a4955 DL |
316 | if (md_dst->type == METADATA_IP_TUNNEL) |
317 | dst_cache_destroy(&md_dst->u.tun_info.dst_cache); | |
d71785ff PA |
318 | #endif |
319 | kfree(md_dst); | |
320 | } | |
af308b94 | 321 | EXPORT_SYMBOL_GPL(metadata_dst_free); |
d71785ff | 322 | |
3fcece12 JK |
323 | struct metadata_dst __percpu * |
324 | metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags) | |
d3aa45ce AS |
325 | { |
326 | int cpu; | |
327 | struct metadata_dst __percpu *md_dst; | |
328 | ||
329 | md_dst = __alloc_percpu_gfp(sizeof(struct metadata_dst) + optslen, | |
330 | __alignof__(struct metadata_dst), flags); | |
331 | if (!md_dst) | |
332 | return NULL; | |
333 | ||
334 | for_each_possible_cpu(cpu) | |
3fcece12 | 335 | __metadata_dst_init(per_cpu_ptr(md_dst, cpu), type, optslen); |
d3aa45ce AS |
336 | |
337 | return md_dst; | |
338 | } | |
339 | EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu); | |
d66f2b91 JK |
340 | |
341 | void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst) | |
342 | { | |
833e0e2f | 343 | #ifdef CONFIG_DST_CACHE |
d66f2b91 JK |
344 | int cpu; |
345 | ||
d66f2b91 JK |
346 | for_each_possible_cpu(cpu) { |
347 | struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu); | |
348 | ||
349 | if (one_md_dst->type == METADATA_IP_TUNNEL) | |
350 | dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache); | |
351 | } | |
352 | #endif | |
353 | free_percpu(md_dst); | |
354 | } | |
355 | EXPORT_SYMBOL_GPL(metadata_dst_free_percpu); |