Commit | Line | Data |
---|---|---|
ab84be7e DA |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Generic nexthop implementation | |
3 | * | |
4 | * Copyright (c) 2017-19 Cumulus Networks | |
5 | * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com> | |
6 | */ | |
7 | ||
8 | #include <linux/nexthop.h> | |
9 | #include <linux/rtnetlink.h> | |
10 | #include <linux/slab.h> | |
b6459415 | 11 | #include <linux/vmalloc.h> |
430a0491 | 12 | #include <net/arp.h> |
53010f99 | 13 | #include <net/ipv6_stubs.h> |
b513bd03 | 14 | #include <net/lwtunnel.h> |
430a0491 | 15 | #include <net/ndisc.h> |
ab84be7e | 16 | #include <net/nexthop.h> |
597cfe4f | 17 | #include <net/route.h> |
ab84be7e DA |
18 | #include <net/sock.h> |
19 | ||
a2601e2b PM |
20 | #define NH_RES_DEFAULT_IDLE_TIMER (120 * HZ) |
21 | #define NH_RES_DEFAULT_UNBALANCED_TIMER 0 /* No forced rebalancing. */ | |
22 | ||
430a0491 DA |
23 | static void remove_nexthop(struct net *net, struct nexthop *nh, |
24 | struct nl_info *nlinfo); | |
25 | ||
597cfe4f DA |
26 | #define NH_DEV_HASHBITS 8 |
27 | #define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS) | |
28 | ||
643d0878 | 29 | static const struct nla_policy rtm_nh_policy_new[] = { |
ab84be7e DA |
30 | [NHA_ID] = { .type = NLA_U32 }, |
31 | [NHA_GROUP] = { .type = NLA_BINARY }, | |
32 | [NHA_GROUP_TYPE] = { .type = NLA_U16 }, | |
33 | [NHA_BLACKHOLE] = { .type = NLA_FLAG }, | |
34 | [NHA_OIF] = { .type = NLA_U32 }, | |
35 | [NHA_GATEWAY] = { .type = NLA_BINARY }, | |
36 | [NHA_ENCAP_TYPE] = { .type = NLA_U16 }, | |
37 | [NHA_ENCAP] = { .type = NLA_NESTED }, | |
38428d68 | 38 | [NHA_FDB] = { .type = NLA_FLAG }, |
a2601e2b | 39 | [NHA_RES_GROUP] = { .type = NLA_NESTED }, |
ab84be7e DA |
40 | }; |
41 | ||
60f5ad5e PM |
42 | static const struct nla_policy rtm_nh_policy_get[] = { |
43 | [NHA_ID] = { .type = NLA_U32 }, | |
44 | }; | |
45 | ||
44551bff PM |
46 | static const struct nla_policy rtm_nh_policy_dump[] = { |
47 | [NHA_OIF] = { .type = NLA_U32 }, | |
48 | [NHA_GROUPS] = { .type = NLA_FLAG }, | |
49 | [NHA_MASTER] = { .type = NLA_U32 }, | |
50 | [NHA_FDB] = { .type = NLA_FLAG }, | |
51 | }; | |
52 | ||
a2601e2b PM |
53 | static const struct nla_policy rtm_nh_res_policy_new[] = { |
54 | [NHA_RES_GROUP_BUCKETS] = { .type = NLA_U16 }, | |
55 | [NHA_RES_GROUP_IDLE_TIMER] = { .type = NLA_U32 }, | |
56 | [NHA_RES_GROUP_UNBALANCED_TIMER] = { .type = NLA_U32 }, | |
57 | }; | |
58 | ||
8a1bbabb PM |
59 | static const struct nla_policy rtm_nh_policy_dump_bucket[] = { |
60 | [NHA_ID] = { .type = NLA_U32 }, | |
61 | [NHA_OIF] = { .type = NLA_U32 }, | |
62 | [NHA_MASTER] = { .type = NLA_U32 }, | |
63 | [NHA_RES_BUCKET] = { .type = NLA_NESTED }, | |
64 | }; | |
65 | ||
66 | static const struct nla_policy rtm_nh_res_bucket_policy_dump[] = { | |
67 | [NHA_RES_BUCKET_NH_ID] = { .type = NLA_U32 }, | |
68 | }; | |
69 | ||
187d4c6b PM |
70 | static const struct nla_policy rtm_nh_policy_get_bucket[] = { |
71 | [NHA_ID] = { .type = NLA_U32 }, | |
72 | [NHA_RES_BUCKET] = { .type = NLA_NESTED }, | |
73 | }; | |
74 | ||
75 | static const struct nla_policy rtm_nh_res_bucket_policy_get[] = { | |
76 | [NHA_RES_BUCKET_INDEX] = { .type = NLA_U16 }, | |
77 | }; | |
78 | ||
5ca474f2 IS |
79 | static bool nexthop_notifiers_is_empty(struct net *net) |
80 | { | |
81 | return !net->nexthop.notifier_chain.head; | |
82 | } | |
83 | ||
84 | static void | |
85 | __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info, | |
96a85625 | 86 | const struct nh_info *nhi) |
5ca474f2 | 87 | { |
5ca474f2 IS |
88 | nh_info->dev = nhi->fib_nhc.nhc_dev; |
89 | nh_info->gw_family = nhi->fib_nhc.nhc_gw_family; | |
90 | if (nh_info->gw_family == AF_INET) | |
91 | nh_info->ipv4 = nhi->fib_nhc.nhc_gw.ipv4; | |
92 | else if (nh_info->gw_family == AF_INET6) | |
93 | nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6; | |
94 | ||
95 | nh_info->is_reject = nhi->reject_nh; | |
96 | nh_info->is_fdb = nhi->fdb_nh; | |
97 | nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate; | |
98 | } | |
99 | ||
100 | static int nh_notifier_single_info_init(struct nh_notifier_info *info, | |
101 | const struct nexthop *nh) | |
102 | { | |
96a85625 PM |
103 | struct nh_info *nhi = rtnl_dereference(nh->nh_info); |
104 | ||
09ad6bec | 105 | info->type = NH_NOTIFIER_INFO_TYPE_SINGLE; |
5ca474f2 IS |
106 | info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL); |
107 | if (!info->nh) | |
108 | return -ENOMEM; | |
109 | ||
96a85625 | 110 | __nh_notifier_single_info_init(info->nh, nhi); |
5ca474f2 IS |
111 | |
112 | return 0; | |
113 | } | |
114 | ||
115 | static void nh_notifier_single_info_fini(struct nh_notifier_info *info) | |
116 | { | |
117 | kfree(info->nh); | |
118 | } | |
119 | ||
de1d1ee3 PM |
120 | static int nh_notifier_mpath_info_init(struct nh_notifier_info *info, |
121 | struct nh_group *nhg) | |
5ca474f2 | 122 | { |
5ca474f2 IS |
123 | u16 num_nh = nhg->num_nh; |
124 | int i; | |
125 | ||
09ad6bec | 126 | info->type = NH_NOTIFIER_INFO_TYPE_GRP; |
5ca474f2 IS |
127 | info->nh_grp = kzalloc(struct_size(info->nh_grp, nh_entries, num_nh), |
128 | GFP_KERNEL); | |
129 | if (!info->nh_grp) | |
130 | return -ENOMEM; | |
131 | ||
132 | info->nh_grp->num_nh = num_nh; | |
133 | info->nh_grp->is_fdb = nhg->fdb_nh; | |
134 | ||
135 | for (i = 0; i < num_nh; i++) { | |
136 | struct nh_grp_entry *nhge = &nhg->nh_entries[i]; | |
96a85625 | 137 | struct nh_info *nhi; |
5ca474f2 | 138 | |
96a85625 | 139 | nhi = rtnl_dereference(nhge->nh->nh_info); |
5ca474f2 IS |
140 | info->nh_grp->nh_entries[i].id = nhge->nh->id; |
141 | info->nh_grp->nh_entries[i].weight = nhge->weight; | |
142 | __nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh, | |
96a85625 | 143 | nhi); |
5ca474f2 IS |
144 | } |
145 | ||
146 | return 0; | |
147 | } | |
148 | ||
7c37c7e0 PM |
149 | static int nh_notifier_res_table_info_init(struct nh_notifier_info *info, |
150 | struct nh_group *nhg) | |
151 | { | |
152 | struct nh_res_table *res_table = rtnl_dereference(nhg->res_table); | |
153 | u16 num_nh_buckets = res_table->num_nh_buckets; | |
154 | unsigned long size; | |
155 | u16 i; | |
156 | ||
157 | info->type = NH_NOTIFIER_INFO_TYPE_RES_TABLE; | |
158 | size = struct_size(info->nh_res_table, nhs, num_nh_buckets); | |
159 | info->nh_res_table = __vmalloc(size, GFP_KERNEL | __GFP_ZERO | | |
160 | __GFP_NOWARN); | |
161 | if (!info->nh_res_table) | |
162 | return -ENOMEM; | |
163 | ||
164 | info->nh_res_table->num_nh_buckets = num_nh_buckets; | |
165 | ||
166 | for (i = 0; i < num_nh_buckets; i++) { | |
167 | struct nh_res_bucket *bucket = &res_table->nh_buckets[i]; | |
168 | struct nh_grp_entry *nhge; | |
169 | struct nh_info *nhi; | |
170 | ||
171 | nhge = rtnl_dereference(bucket->nh_entry); | |
172 | nhi = rtnl_dereference(nhge->nh->nh_info); | |
173 | __nh_notifier_single_info_init(&info->nh_res_table->nhs[i], | |
174 | nhi); | |
175 | } | |
176 | ||
177 | return 0; | |
178 | } | |
179 | ||
da230501 PM |
180 | static int nh_notifier_grp_info_init(struct nh_notifier_info *info, |
181 | const struct nexthop *nh) | |
182 | { | |
183 | struct nh_group *nhg = rtnl_dereference(nh->nh_grp); | |
184 | ||
de1d1ee3 PM |
185 | if (nhg->hash_threshold) |
186 | return nh_notifier_mpath_info_init(info, nhg); | |
7c37c7e0 PM |
187 | else if (nhg->resilient) |
188 | return nh_notifier_res_table_info_init(info, nhg); | |
da230501 PM |
189 | return -EINVAL; |
190 | } | |
191 | ||
192 | static void nh_notifier_grp_info_fini(struct nh_notifier_info *info, | |
193 | const struct nexthop *nh) | |
5ca474f2 | 194 | { |
da230501 PM |
195 | struct nh_group *nhg = rtnl_dereference(nh->nh_grp); |
196 | ||
de1d1ee3 | 197 | if (nhg->hash_threshold) |
da230501 | 198 | kfree(info->nh_grp); |
7c37c7e0 PM |
199 | else if (nhg->resilient) |
200 | vfree(info->nh_res_table); | |
5ca474f2 IS |
201 | } |
202 | ||
203 | static int nh_notifier_info_init(struct nh_notifier_info *info, | |
204 | const struct nexthop *nh) | |
205 | { | |
206 | info->id = nh->id; | |
5ca474f2 | 207 | |
09ad6bec | 208 | if (nh->is_group) |
5ca474f2 IS |
209 | return nh_notifier_grp_info_init(info, nh); |
210 | else | |
211 | return nh_notifier_single_info_init(info, nh); | |
212 | } | |
213 | ||
09ad6bec IS |
214 | static void nh_notifier_info_fini(struct nh_notifier_info *info, |
215 | const struct nexthop *nh) | |
5ca474f2 | 216 | { |
09ad6bec | 217 | if (nh->is_group) |
da230501 | 218 | nh_notifier_grp_info_fini(info, nh); |
5ca474f2 IS |
219 | else |
220 | nh_notifier_single_info_fini(info); | |
221 | } | |
222 | ||
8590ceed | 223 | static int call_nexthop_notifiers(struct net *net, |
d8e79f1d | 224 | enum nexthop_event_type event_type, |
3578d53d IS |
225 | struct nexthop *nh, |
226 | struct netlink_ext_ack *extack) | |
8590ceed | 227 | { |
5ca474f2 IS |
228 | struct nh_notifier_info info = { |
229 | .net = net, | |
230 | .extack = extack, | |
231 | }; | |
8590ceed RP |
232 | int err; |
233 | ||
5ca474f2 IS |
234 | ASSERT_RTNL(); |
235 | ||
236 | if (nexthop_notifiers_is_empty(net)) | |
237 | return 0; | |
238 | ||
239 | err = nh_notifier_info_init(&info, nh); | |
240 | if (err) { | |
241 | NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info"); | |
242 | return err; | |
243 | } | |
244 | ||
80690ec6 | 245 | err = blocking_notifier_call_chain(&net->nexthop.notifier_chain, |
1ec69d18 | 246 | event_type, &info); |
09ad6bec | 247 | nh_notifier_info_fini(&info, nh); |
5ca474f2 | 248 | |
8590ceed RP |
249 | return notifier_to_errno(err); |
250 | } | |
251 | ||
7c37c7e0 PM |
252 | static int |
253 | nh_notifier_res_bucket_idle_timer_get(const struct nh_notifier_info *info, | |
254 | bool force, unsigned int *p_idle_timer_ms) | |
255 | { | |
256 | struct nh_res_table *res_table; | |
257 | struct nh_group *nhg; | |
258 | struct nexthop *nh; | |
259 | int err = 0; | |
260 | ||
261 | /* When 'force' is false, nexthop bucket replacement is performed | |
262 | * because the bucket was deemed to be idle. In this case, capable | |
263 | * listeners can choose to perform an atomic replacement: The bucket is | |
264 | * only replaced if it is inactive. However, if the idle timer interval | |
265 | * is smaller than the interval in which a listener is querying | |
266 | * buckets' activity from the device, then atomic replacement should | |
267 | * not be tried. Pass the idle timer value to listeners, so that they | |
268 | * could determine which type of replacement to perform. | |
269 | */ | |
270 | if (force) { | |
271 | *p_idle_timer_ms = 0; | |
272 | return 0; | |
273 | } | |
274 | ||
275 | rcu_read_lock(); | |
276 | ||
277 | nh = nexthop_find_by_id(info->net, info->id); | |
278 | if (!nh) { | |
279 | err = -EINVAL; | |
280 | goto out; | |
281 | } | |
282 | ||
283 | nhg = rcu_dereference(nh->nh_grp); | |
284 | res_table = rcu_dereference(nhg->res_table); | |
285 | *p_idle_timer_ms = jiffies_to_msecs(res_table->idle_timer); | |
286 | ||
287 | out: | |
288 | rcu_read_unlock(); | |
289 | ||
290 | return err; | |
291 | } | |
292 | ||
293 | static int nh_notifier_res_bucket_info_init(struct nh_notifier_info *info, | |
294 | u16 bucket_index, bool force, | |
295 | struct nh_info *oldi, | |
296 | struct nh_info *newi) | |
297 | { | |
298 | unsigned int idle_timer_ms; | |
299 | int err; | |
300 | ||
301 | err = nh_notifier_res_bucket_idle_timer_get(info, force, | |
302 | &idle_timer_ms); | |
303 | if (err) | |
304 | return err; | |
305 | ||
306 | info->type = NH_NOTIFIER_INFO_TYPE_RES_BUCKET; | |
307 | info->nh_res_bucket = kzalloc(sizeof(*info->nh_res_bucket), | |
308 | GFP_KERNEL); | |
309 | if (!info->nh_res_bucket) | |
310 | return -ENOMEM; | |
311 | ||
312 | info->nh_res_bucket->bucket_index = bucket_index; | |
313 | info->nh_res_bucket->idle_timer_ms = idle_timer_ms; | |
314 | info->nh_res_bucket->force = force; | |
315 | __nh_notifier_single_info_init(&info->nh_res_bucket->old_nh, oldi); | |
316 | __nh_notifier_single_info_init(&info->nh_res_bucket->new_nh, newi); | |
317 | return 0; | |
318 | } | |
319 | ||
320 | static void nh_notifier_res_bucket_info_fini(struct nh_notifier_info *info) | |
321 | { | |
322 | kfree(info->nh_res_bucket); | |
323 | } | |
324 | ||
325 | static int __call_nexthop_res_bucket_notifiers(struct net *net, u32 nhg_id, | |
326 | u16 bucket_index, bool force, | |
327 | struct nh_info *oldi, | |
328 | struct nh_info *newi, | |
329 | struct netlink_ext_ack *extack) | |
330 | { | |
331 | struct nh_notifier_info info = { | |
332 | .net = net, | |
333 | .extack = extack, | |
334 | .id = nhg_id, | |
335 | }; | |
336 | int err; | |
337 | ||
338 | if (nexthop_notifiers_is_empty(net)) | |
339 | return 0; | |
340 | ||
341 | err = nh_notifier_res_bucket_info_init(&info, bucket_index, force, | |
342 | oldi, newi); | |
343 | if (err) | |
344 | return err; | |
345 | ||
346 | err = blocking_notifier_call_chain(&net->nexthop.notifier_chain, | |
347 | NEXTHOP_EVENT_BUCKET_REPLACE, &info); | |
348 | nh_notifier_res_bucket_info_fini(&info); | |
349 | ||
350 | return notifier_to_errno(err); | |
351 | } | |
352 | ||
283a72a5 PM |
353 | /* There are three users of RES_TABLE, and NHs etc. referenced from there: |
354 | * | |
355 | * 1) a collection of callbacks for NH maintenance. This operates under | |
356 | * RTNL, | |
357 | * 2) the delayed work that gradually balances the resilient table, | |
358 | * 3) and nexthop_select_path(), operating under RCU. | |
359 | * | |
360 | * Both the delayed work and the RTNL block are writers, and need to | |
361 | * maintain mutual exclusion. Since there are only two and well-known | |
362 | * writers for each table, the RTNL code can make sure it has exclusive | |
363 | * access thus: | |
364 | * | |
365 | * - Have the DW operate without locking; | |
366 | * - synchronously cancel the DW; | |
367 | * - do the writing; | |
368 | * - if the write was not actually a delete, call upkeep, which schedules | |
369 | * DW again if necessary. | |
370 | * | |
371 | * The functions that are always called from the RTNL context use | |
372 | * rtnl_dereference(). The functions that can also be called from the DW do | |
373 | * a raw dereference and rely on the above mutual exclusion scheme. | |
374 | */ | |
375 | #define nh_res_dereference(p) (rcu_dereference_raw(p)) | |
376 | ||
7c37c7e0 PM |
377 | static int call_nexthop_res_bucket_notifiers(struct net *net, u32 nhg_id, |
378 | u16 bucket_index, bool force, | |
379 | struct nexthop *old_nh, | |
380 | struct nexthop *new_nh, | |
381 | struct netlink_ext_ack *extack) | |
382 | { | |
383 | struct nh_info *oldi = nh_res_dereference(old_nh->nh_info); | |
384 | struct nh_info *newi = nh_res_dereference(new_nh->nh_info); | |
385 | ||
386 | return __call_nexthop_res_bucket_notifiers(net, nhg_id, bucket_index, | |
387 | force, oldi, newi, extack); | |
388 | } | |
389 | ||
390 | static int call_nexthop_res_table_notifiers(struct net *net, struct nexthop *nh, | |
391 | struct netlink_ext_ack *extack) | |
392 | { | |
393 | struct nh_notifier_info info = { | |
394 | .net = net, | |
395 | .extack = extack, | |
396 | }; | |
397 | struct nh_group *nhg; | |
398 | int err; | |
399 | ||
400 | ASSERT_RTNL(); | |
401 | ||
402 | if (nexthop_notifiers_is_empty(net)) | |
403 | return 0; | |
404 | ||
405 | /* At this point, the nexthop buckets are still not populated. Only | |
406 | * emit a notification with the logical nexthops, so that a listener | |
407 | * could potentially veto it in case of unsupported configuration. | |
408 | */ | |
409 | nhg = rtnl_dereference(nh->nh_grp); | |
de1d1ee3 | 410 | err = nh_notifier_mpath_info_init(&info, nhg); |
7c37c7e0 PM |
411 | if (err) { |
412 | NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info"); | |
413 | return err; | |
414 | } | |
415 | ||
416 | err = blocking_notifier_call_chain(&net->nexthop.notifier_chain, | |
417 | NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE, | |
418 | &info); | |
419 | kfree(info.nh_grp); | |
420 | ||
421 | return notifier_to_errno(err); | |
422 | } | |
423 | ||
975ff7f3 IS |
424 | static int call_nexthop_notifier(struct notifier_block *nb, struct net *net, |
425 | enum nexthop_event_type event_type, | |
426 | struct nexthop *nh, | |
427 | struct netlink_ext_ack *extack) | |
428 | { | |
429 | struct nh_notifier_info info = { | |
430 | .net = net, | |
431 | .extack = extack, | |
432 | }; | |
433 | int err; | |
434 | ||
435 | err = nh_notifier_info_init(&info, nh); | |
436 | if (err) | |
437 | return err; | |
438 | ||
439 | err = nb->notifier_call(nb, event_type, &info); | |
09ad6bec | 440 | nh_notifier_info_fini(&info, nh); |
975ff7f3 IS |
441 | |
442 | return notifier_to_errno(err); | |
443 | } | |
444 | ||
597cfe4f DA |
445 | static unsigned int nh_dev_hashfn(unsigned int val) |
446 | { | |
447 | unsigned int mask = NH_DEV_HASHSIZE - 1; | |
448 | ||
449 | return (val ^ | |
450 | (val >> NH_DEV_HASHBITS) ^ | |
451 | (val >> (NH_DEV_HASHBITS * 2))) & mask; | |
452 | } | |
453 | ||
454 | static void nexthop_devhash_add(struct net *net, struct nh_info *nhi) | |
455 | { | |
456 | struct net_device *dev = nhi->fib_nhc.nhc_dev; | |
457 | struct hlist_head *head; | |
458 | unsigned int hash; | |
459 | ||
460 | WARN_ON(!dev); | |
461 | ||
462 | hash = nh_dev_hashfn(dev->ifindex); | |
463 | head = &net->nexthop.devhash[hash]; | |
464 | hlist_add_head(&nhi->dev_hash, head); | |
465 | } | |
466 | ||
5d1f0f09 | 467 | static void nexthop_free_group(struct nexthop *nh) |
430a0491 DA |
468 | { |
469 | struct nh_group *nhg; | |
470 | int i; | |
471 | ||
472 | nhg = rcu_dereference_raw(nh->nh_grp); | |
90f33bff NA |
473 | for (i = 0; i < nhg->num_nh; ++i) { |
474 | struct nh_grp_entry *nhge = &nhg->nh_entries[i]; | |
430a0491 | 475 | |
90f33bff NA |
476 | WARN_ON(!list_empty(&nhge->nh_list)); |
477 | nexthop_put(nhge->nh); | |
478 | } | |
479 | ||
480 | WARN_ON(nhg->spare == nhg); | |
430a0491 | 481 | |
283a72a5 PM |
482 | if (nhg->resilient) |
483 | vfree(rcu_dereference_raw(nhg->res_table)); | |
484 | ||
90f33bff | 485 | kfree(nhg->spare); |
430a0491 DA |
486 | kfree(nhg); |
487 | } | |
488 | ||
489 | static void nexthop_free_single(struct nexthop *nh) | |
ab84be7e | 490 | { |
ab84be7e DA |
491 | struct nh_info *nhi; |
492 | ||
493 | nhi = rcu_dereference_raw(nh->nh_info); | |
597cfe4f DA |
494 | switch (nhi->family) { |
495 | case AF_INET: | |
496 | fib_nh_release(nh->net, &nhi->fib_nh); | |
497 | break; | |
53010f99 DA |
498 | case AF_INET6: |
499 | ipv6_stub->fib6_nh_release(&nhi->fib6_nh); | |
500 | break; | |
597cfe4f | 501 | } |
ab84be7e | 502 | kfree(nhi); |
430a0491 DA |
503 | } |
504 | ||
505 | void nexthop_free_rcu(struct rcu_head *head) | |
506 | { | |
507 | struct nexthop *nh = container_of(head, struct nexthop, rcu); | |
508 | ||
509 | if (nh->is_group) | |
5d1f0f09 | 510 | nexthop_free_group(nh); |
430a0491 DA |
511 | else |
512 | nexthop_free_single(nh); | |
ab84be7e DA |
513 | |
514 | kfree(nh); | |
515 | } | |
516 | EXPORT_SYMBOL_GPL(nexthop_free_rcu); | |
517 | ||
518 | static struct nexthop *nexthop_alloc(void) | |
519 | { | |
520 | struct nexthop *nh; | |
521 | ||
522 | nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL); | |
430a0491 | 523 | if (nh) { |
4c7e8084 | 524 | INIT_LIST_HEAD(&nh->fi_list); |
f88d8ea6 | 525 | INIT_LIST_HEAD(&nh->f6i_list); |
430a0491 | 526 | INIT_LIST_HEAD(&nh->grp_list); |
38428d68 | 527 | INIT_LIST_HEAD(&nh->fdb_list); |
430a0491 | 528 | } |
ab84be7e DA |
529 | return nh; |
530 | } | |
531 | ||
430a0491 DA |
532 | static struct nh_group *nexthop_grp_alloc(u16 num_nh) |
533 | { | |
430a0491 DA |
534 | struct nh_group *nhg; |
535 | ||
d7d49dc7 | 536 | nhg = kzalloc(struct_size(nhg, nh_entries, num_nh), GFP_KERNEL); |
430a0491 DA |
537 | if (nhg) |
538 | nhg->num_nh = num_nh; | |
539 | ||
540 | return nhg; | |
541 | } | |
542 | ||
283a72a5 PM |
543 | static void nh_res_table_upkeep_dw(struct work_struct *work); |
544 | ||
545 | static struct nh_res_table * | |
546 | nexthop_res_table_alloc(struct net *net, u32 nhg_id, struct nh_config *cfg) | |
547 | { | |
548 | const u16 num_nh_buckets = cfg->nh_grp_res_num_buckets; | |
549 | struct nh_res_table *res_table; | |
550 | unsigned long size; | |
551 | ||
552 | size = struct_size(res_table, nh_buckets, num_nh_buckets); | |
553 | res_table = __vmalloc(size, GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN); | |
554 | if (!res_table) | |
555 | return NULL; | |
556 | ||
557 | res_table->net = net; | |
558 | res_table->nhg_id = nhg_id; | |
559 | INIT_DELAYED_WORK(&res_table->upkeep_dw, &nh_res_table_upkeep_dw); | |
560 | INIT_LIST_HEAD(&res_table->uw_nh_entries); | |
561 | res_table->idle_timer = cfg->nh_grp_res_idle_timer; | |
562 | res_table->unbalanced_timer = cfg->nh_grp_res_unbalanced_timer; | |
563 | res_table->num_nh_buckets = num_nh_buckets; | |
564 | return res_table; | |
565 | } | |
566 | ||
ab84be7e DA |
567 | static void nh_base_seq_inc(struct net *net) |
568 | { | |
569 | while (++net->nexthop.seq == 0) | |
570 | ; | |
571 | } | |
572 | ||
573 | /* no reference taken; rcu lock or rtnl must be held */ | |
574 | struct nexthop *nexthop_find_by_id(struct net *net, u32 id) | |
575 | { | |
576 | struct rb_node **pp, *parent = NULL, *next; | |
577 | ||
578 | pp = &net->nexthop.rb_root.rb_node; | |
579 | while (1) { | |
580 | struct nexthop *nh; | |
581 | ||
582 | next = rcu_dereference_raw(*pp); | |
583 | if (!next) | |
584 | break; | |
585 | parent = next; | |
586 | ||
587 | nh = rb_entry(parent, struct nexthop, rb_node); | |
588 | if (id < nh->id) | |
589 | pp = &next->rb_left; | |
590 | else if (id > nh->id) | |
591 | pp = &next->rb_right; | |
592 | else | |
593 | return nh; | |
594 | } | |
595 | return NULL; | |
596 | } | |
597 | EXPORT_SYMBOL_GPL(nexthop_find_by_id); | |
598 | ||
599 | /* used for auto id allocation; called with rtnl held */ | |
600 | static u32 nh_find_unused_id(struct net *net) | |
601 | { | |
602 | u32 id_start = net->nexthop.last_id_allocated; | |
603 | ||
604 | while (1) { | |
605 | net->nexthop.last_id_allocated++; | |
606 | if (net->nexthop.last_id_allocated == id_start) | |
607 | break; | |
608 | ||
609 | if (!nexthop_find_by_id(net, net->nexthop.last_id_allocated)) | |
610 | return net->nexthop.last_id_allocated; | |
611 | } | |
612 | return 0; | |
613 | } | |
614 | ||
283a72a5 PM |
615 | static void nh_res_time_set_deadline(unsigned long next_time, |
616 | unsigned long *deadline) | |
617 | { | |
618 | if (time_before(next_time, *deadline)) | |
619 | *deadline = next_time; | |
620 | } | |
621 | ||
a2601e2b PM |
622 | static clock_t nh_res_table_unbalanced_time(struct nh_res_table *res_table) |
623 | { | |
624 | if (list_empty(&res_table->uw_nh_entries)) | |
625 | return 0; | |
626 | return jiffies_delta_to_clock_t(jiffies - res_table->unbalanced_since); | |
627 | } | |
628 | ||
629 | static int nla_put_nh_group_res(struct sk_buff *skb, struct nh_group *nhg) | |
630 | { | |
631 | struct nh_res_table *res_table = rtnl_dereference(nhg->res_table); | |
632 | struct nlattr *nest; | |
633 | ||
634 | nest = nla_nest_start(skb, NHA_RES_GROUP); | |
635 | if (!nest) | |
636 | return -EMSGSIZE; | |
637 | ||
638 | if (nla_put_u16(skb, NHA_RES_GROUP_BUCKETS, | |
639 | res_table->num_nh_buckets) || | |
640 | nla_put_u32(skb, NHA_RES_GROUP_IDLE_TIMER, | |
641 | jiffies_to_clock_t(res_table->idle_timer)) || | |
642 | nla_put_u32(skb, NHA_RES_GROUP_UNBALANCED_TIMER, | |
643 | jiffies_to_clock_t(res_table->unbalanced_timer)) || | |
644 | nla_put_u64_64bit(skb, NHA_RES_GROUP_UNBALANCED_TIME, | |
645 | nh_res_table_unbalanced_time(res_table), | |
646 | NHA_RES_GROUP_PAD)) | |
647 | goto nla_put_failure; | |
648 | ||
649 | nla_nest_end(skb, nest); | |
650 | return 0; | |
651 | ||
652 | nla_put_failure: | |
653 | nla_nest_cancel(skb, nest); | |
654 | return -EMSGSIZE; | |
655 | } | |
656 | ||
430a0491 DA |
657 | static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg) |
658 | { | |
659 | struct nexthop_grp *p; | |
660 | size_t len = nhg->num_nh * sizeof(*p); | |
661 | struct nlattr *nla; | |
662 | u16 group_type = 0; | |
663 | int i; | |
664 | ||
de1d1ee3 | 665 | if (nhg->hash_threshold) |
430a0491 | 666 | group_type = NEXTHOP_GRP_TYPE_MPATH; |
a2601e2b PM |
667 | else if (nhg->resilient) |
668 | group_type = NEXTHOP_GRP_TYPE_RES; | |
430a0491 DA |
669 | |
670 | if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type)) | |
671 | goto nla_put_failure; | |
672 | ||
673 | nla = nla_reserve(skb, NHA_GROUP, len); | |
674 | if (!nla) | |
675 | goto nla_put_failure; | |
676 | ||
677 | p = nla_data(nla); | |
678 | for (i = 0; i < nhg->num_nh; ++i) { | |
679 | p->id = nhg->nh_entries[i].nh->id; | |
680 | p->weight = nhg->nh_entries[i].weight - 1; | |
681 | p += 1; | |
682 | } | |
683 | ||
a2601e2b PM |
684 | if (nhg->resilient && nla_put_nh_group_res(skb, nhg)) |
685 | goto nla_put_failure; | |
686 | ||
430a0491 DA |
687 | return 0; |
688 | ||
689 | nla_put_failure: | |
690 | return -EMSGSIZE; | |
691 | } | |
692 | ||
ab84be7e DA |
693 | static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, |
694 | int event, u32 portid, u32 seq, unsigned int nlflags) | |
695 | { | |
53010f99 | 696 | struct fib6_nh *fib6_nh; |
597cfe4f | 697 | struct fib_nh *fib_nh; |
ab84be7e DA |
698 | struct nlmsghdr *nlh; |
699 | struct nh_info *nhi; | |
700 | struct nhmsg *nhm; | |
701 | ||
702 | nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags); | |
703 | if (!nlh) | |
704 | return -EMSGSIZE; | |
705 | ||
706 | nhm = nlmsg_data(nlh); | |
707 | nhm->nh_family = AF_UNSPEC; | |
708 | nhm->nh_flags = nh->nh_flags; | |
709 | nhm->nh_protocol = nh->protocol; | |
710 | nhm->nh_scope = 0; | |
711 | nhm->resvd = 0; | |
712 | ||
713 | if (nla_put_u32(skb, NHA_ID, nh->id)) | |
714 | goto nla_put_failure; | |
715 | ||
430a0491 DA |
716 | if (nh->is_group) { |
717 | struct nh_group *nhg = rtnl_dereference(nh->nh_grp); | |
718 | ||
ce9ac056 DA |
719 | if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB)) |
720 | goto nla_put_failure; | |
430a0491 DA |
721 | if (nla_put_nh_group(skb, nhg)) |
722 | goto nla_put_failure; | |
723 | goto out; | |
724 | } | |
725 | ||
ab84be7e DA |
726 | nhi = rtnl_dereference(nh->nh_info); |
727 | nhm->nh_family = nhi->family; | |
728 | if (nhi->reject_nh) { | |
729 | if (nla_put_flag(skb, NHA_BLACKHOLE)) | |
730 | goto nla_put_failure; | |
731 | goto out; | |
ce9ac056 DA |
732 | } else if (nhi->fdb_nh) { |
733 | if (nla_put_flag(skb, NHA_FDB)) | |
734 | goto nla_put_failure; | |
735 | } else { | |
597cfe4f DA |
736 | const struct net_device *dev; |
737 | ||
738 | dev = nhi->fib_nhc.nhc_dev; | |
739 | if (dev && nla_put_u32(skb, NHA_OIF, dev->ifindex)) | |
740 | goto nla_put_failure; | |
741 | } | |
742 | ||
743 | nhm->nh_scope = nhi->fib_nhc.nhc_scope; | |
744 | switch (nhi->family) { | |
745 | case AF_INET: | |
746 | fib_nh = &nhi->fib_nh; | |
747 | if (fib_nh->fib_nh_gw_family && | |
33d80996 | 748 | nla_put_be32(skb, NHA_GATEWAY, fib_nh->fib_nh_gw4)) |
597cfe4f DA |
749 | goto nla_put_failure; |
750 | break; | |
53010f99 DA |
751 | |
752 | case AF_INET6: | |
753 | fib6_nh = &nhi->fib6_nh; | |
754 | if (fib6_nh->fib_nh_gw_family && | |
755 | nla_put_in6_addr(skb, NHA_GATEWAY, &fib6_nh->fib_nh_gw6)) | |
756 | goto nla_put_failure; | |
757 | break; | |
ab84be7e DA |
758 | } |
759 | ||
b513bd03 DA |
760 | if (nhi->fib_nhc.nhc_lwtstate && |
761 | lwtunnel_fill_encap(skb, nhi->fib_nhc.nhc_lwtstate, | |
762 | NHA_ENCAP, NHA_ENCAP_TYPE) < 0) | |
763 | goto nla_put_failure; | |
764 | ||
ab84be7e DA |
765 | out: |
766 | nlmsg_end(skb, nlh); | |
767 | return 0; | |
768 | ||
769 | nla_put_failure: | |
d69100b8 | 770 | nlmsg_cancel(skb, nlh); |
ab84be7e DA |
771 | return -EMSGSIZE; |
772 | } | |
773 | ||
a2601e2b PM |
774 | static size_t nh_nlmsg_size_grp_res(struct nh_group *nhg) |
775 | { | |
776 | return nla_total_size(0) + /* NHA_RES_GROUP */ | |
777 | nla_total_size(2) + /* NHA_RES_GROUP_BUCKETS */ | |
778 | nla_total_size(4) + /* NHA_RES_GROUP_IDLE_TIMER */ | |
779 | nla_total_size(4) + /* NHA_RES_GROUP_UNBALANCED_TIMER */ | |
780 | nla_total_size_64bit(8);/* NHA_RES_GROUP_UNBALANCED_TIME */ | |
781 | } | |
782 | ||
430a0491 DA |
783 | static size_t nh_nlmsg_size_grp(struct nexthop *nh) |
784 | { | |
785 | struct nh_group *nhg = rtnl_dereference(nh->nh_grp); | |
786 | size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh; | |
a2601e2b PM |
787 | size_t tot = nla_total_size(sz) + |
788 | nla_total_size(2); /* NHA_GROUP_TYPE */ | |
789 | ||
790 | if (nhg->resilient) | |
791 | tot += nh_nlmsg_size_grp_res(nhg); | |
430a0491 | 792 | |
a2601e2b | 793 | return tot; |
430a0491 DA |
794 | } |
795 | ||
796 | static size_t nh_nlmsg_size_single(struct nexthop *nh) | |
ab84be7e | 797 | { |
597cfe4f | 798 | struct nh_info *nhi = rtnl_dereference(nh->nh_info); |
430a0491 | 799 | size_t sz; |
ab84be7e DA |
800 | |
801 | /* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE | |
802 | * are mutually exclusive | |
803 | */ | |
430a0491 | 804 | sz = nla_total_size(4); /* NHA_OIF */ |
ab84be7e | 805 | |
597cfe4f DA |
806 | switch (nhi->family) { |
807 | case AF_INET: | |
808 | if (nhi->fib_nh.fib_nh_gw_family) | |
809 | sz += nla_total_size(4); /* NHA_GATEWAY */ | |
810 | break; | |
53010f99 DA |
811 | |
812 | case AF_INET6: | |
813 | /* NHA_GATEWAY */ | |
814 | if (nhi->fib6_nh.fib_nh_gw_family) | |
815 | sz += nla_total_size(sizeof(const struct in6_addr)); | |
816 | break; | |
597cfe4f DA |
817 | } |
818 | ||
b513bd03 DA |
819 | if (nhi->fib_nhc.nhc_lwtstate) { |
820 | sz += lwtunnel_get_encap_size(nhi->fib_nhc.nhc_lwtstate); | |
821 | sz += nla_total_size(2); /* NHA_ENCAP_TYPE */ | |
822 | } | |
823 | ||
ab84be7e DA |
824 | return sz; |
825 | } | |
826 | ||
430a0491 DA |
827 | static size_t nh_nlmsg_size(struct nexthop *nh) |
828 | { | |
f9e95555 SW |
829 | size_t sz = NLMSG_ALIGN(sizeof(struct nhmsg)); |
830 | ||
831 | sz += nla_total_size(4); /* NHA_ID */ | |
430a0491 DA |
832 | |
833 | if (nh->is_group) | |
834 | sz += nh_nlmsg_size_grp(nh); | |
835 | else | |
836 | sz += nh_nlmsg_size_single(nh); | |
837 | ||
838 | return sz; | |
839 | } | |
840 | ||
ab84be7e DA |
841 | static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info) |
842 | { | |
843 | unsigned int nlflags = info->nlh ? info->nlh->nlmsg_flags : 0; | |
844 | u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; | |
845 | struct sk_buff *skb; | |
846 | int err = -ENOBUFS; | |
847 | ||
848 | skb = nlmsg_new(nh_nlmsg_size(nh), gfp_any()); | |
849 | if (!skb) | |
850 | goto errout; | |
851 | ||
852 | err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags); | |
853 | if (err < 0) { | |
854 | /* -EMSGSIZE implies BUG in nh_nlmsg_size() */ | |
855 | WARN_ON(err == -EMSGSIZE); | |
856 | kfree_skb(skb); | |
857 | goto errout; | |
858 | } | |
859 | ||
860 | rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_NEXTHOP, | |
861 | info->nlh, gfp_any()); | |
862 | return; | |
863 | errout: | |
864 | if (err < 0) | |
865 | rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err); | |
866 | } | |
867 | ||
283a72a5 PM |
868 | static unsigned long nh_res_bucket_used_time(const struct nh_res_bucket *bucket) |
869 | { | |
870 | return (unsigned long)atomic_long_read(&bucket->used_time); | |
871 | } | |
872 | ||
873 | static unsigned long | |
874 | nh_res_bucket_idle_point(const struct nh_res_table *res_table, | |
875 | const struct nh_res_bucket *bucket, | |
876 | unsigned long now) | |
877 | { | |
878 | unsigned long time = nh_res_bucket_used_time(bucket); | |
879 | ||
880 | /* Bucket was not used since it was migrated. The idle time is now. */ | |
881 | if (time == bucket->migrated_time) | |
882 | return now; | |
883 | ||
884 | return time + res_table->idle_timer; | |
885 | } | |
886 | ||
887 | static unsigned long | |
888 | nh_res_table_unb_point(const struct nh_res_table *res_table) | |
889 | { | |
890 | return res_table->unbalanced_since + res_table->unbalanced_timer; | |
891 | } | |
892 | ||
893 | static void nh_res_bucket_set_idle(const struct nh_res_table *res_table, | |
894 | struct nh_res_bucket *bucket) | |
895 | { | |
896 | unsigned long now = jiffies; | |
897 | ||
898 | atomic_long_set(&bucket->used_time, (long)now); | |
899 | bucket->migrated_time = now; | |
900 | } | |
901 | ||
902 | static void nh_res_bucket_set_busy(struct nh_res_bucket *bucket) | |
903 | { | |
904 | atomic_long_set(&bucket->used_time, (long)jiffies); | |
905 | } | |
906 | ||
8a1bbabb PM |
907 | static clock_t nh_res_bucket_idle_time(const struct nh_res_bucket *bucket) |
908 | { | |
909 | unsigned long used_time = nh_res_bucket_used_time(bucket); | |
910 | ||
911 | return jiffies_delta_to_clock_t(jiffies - used_time); | |
912 | } | |
913 | ||
914 | static int nh_fill_res_bucket(struct sk_buff *skb, struct nexthop *nh, | |
915 | struct nh_res_bucket *bucket, u16 bucket_index, | |
916 | int event, u32 portid, u32 seq, | |
917 | unsigned int nlflags, | |
918 | struct netlink_ext_ack *extack) | |
919 | { | |
920 | struct nh_grp_entry *nhge = nh_res_dereference(bucket->nh_entry); | |
921 | struct nlmsghdr *nlh; | |
922 | struct nlattr *nest; | |
923 | struct nhmsg *nhm; | |
924 | ||
925 | nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags); | |
926 | if (!nlh) | |
927 | return -EMSGSIZE; | |
928 | ||
929 | nhm = nlmsg_data(nlh); | |
930 | nhm->nh_family = AF_UNSPEC; | |
931 | nhm->nh_flags = bucket->nh_flags; | |
932 | nhm->nh_protocol = nh->protocol; | |
933 | nhm->nh_scope = 0; | |
934 | nhm->resvd = 0; | |
935 | ||
936 | if (nla_put_u32(skb, NHA_ID, nh->id)) | |
937 | goto nla_put_failure; | |
938 | ||
939 | nest = nla_nest_start(skb, NHA_RES_BUCKET); | |
940 | if (!nest) | |
941 | goto nla_put_failure; | |
942 | ||
943 | if (nla_put_u16(skb, NHA_RES_BUCKET_INDEX, bucket_index) || | |
944 | nla_put_u32(skb, NHA_RES_BUCKET_NH_ID, nhge->nh->id) || | |
945 | nla_put_u64_64bit(skb, NHA_RES_BUCKET_IDLE_TIME, | |
946 | nh_res_bucket_idle_time(bucket), | |
947 | NHA_RES_BUCKET_PAD)) | |
948 | goto nla_put_failure_nest; | |
949 | ||
950 | nla_nest_end(skb, nest); | |
951 | nlmsg_end(skb, nlh); | |
952 | return 0; | |
953 | ||
954 | nla_put_failure_nest: | |
955 | nla_nest_cancel(skb, nest); | |
956 | nla_put_failure: | |
957 | nlmsg_cancel(skb, nlh); | |
958 | return -EMSGSIZE; | |
959 | } | |
960 | ||
0b4818aa PM |
961 | static void nexthop_bucket_notify(struct nh_res_table *res_table, |
962 | u16 bucket_index) | |
963 | { | |
964 | struct nh_res_bucket *bucket = &res_table->nh_buckets[bucket_index]; | |
965 | struct nh_grp_entry *nhge = nh_res_dereference(bucket->nh_entry); | |
966 | struct nexthop *nh = nhge->nh_parent; | |
967 | struct sk_buff *skb; | |
968 | int err = -ENOBUFS; | |
969 | ||
970 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); | |
971 | if (!skb) | |
972 | goto errout; | |
973 | ||
974 | err = nh_fill_res_bucket(skb, nh, bucket, bucket_index, | |
975 | RTM_NEWNEXTHOPBUCKET, 0, 0, NLM_F_REPLACE, | |
976 | NULL); | |
977 | if (err < 0) { | |
978 | kfree_skb(skb); | |
979 | goto errout; | |
980 | } | |
981 | ||
982 | rtnl_notify(skb, nh->net, 0, RTNLGRP_NEXTHOP, NULL, GFP_KERNEL); | |
983 | return; | |
984 | errout: | |
985 | if (err < 0) | |
986 | rtnl_set_sk_err(nh->net, RTNLGRP_NEXTHOP, err); | |
987 | } | |
988 | ||
430a0491 | 989 | static bool valid_group_nh(struct nexthop *nh, unsigned int npaths, |
ce9ac056 | 990 | bool *is_fdb, struct netlink_ext_ack *extack) |
597cfe4f | 991 | { |
430a0491 DA |
992 | if (nh->is_group) { |
993 | struct nh_group *nhg = rtnl_dereference(nh->nh_grp); | |
597cfe4f | 994 | |
283a72a5 | 995 | /* Nesting groups within groups is not supported. */ |
de1d1ee3 | 996 | if (nhg->hash_threshold) { |
430a0491 | 997 | NL_SET_ERR_MSG(extack, |
de1d1ee3 | 998 | "Hash-threshold group can not be a nexthop within a group"); |
430a0491 DA |
999 | return false; |
1000 | } | |
283a72a5 PM |
1001 | if (nhg->resilient) { |
1002 | NL_SET_ERR_MSG(extack, | |
1003 | "Resilient group can not be a nexthop within a group"); | |
1004 | return false; | |
1005 | } | |
ce9ac056 | 1006 | *is_fdb = nhg->fdb_nh; |
430a0491 DA |
1007 | } else { |
1008 | struct nh_info *nhi = rtnl_dereference(nh->nh_info); | |
1009 | ||
1010 | if (nhi->reject_nh && npaths > 1) { | |
1011 | NL_SET_ERR_MSG(extack, | |
1012 | "Blackhole nexthop can not be used in a group with more than 1 path"); | |
1013 | return false; | |
1014 | } | |
ce9ac056 | 1015 | *is_fdb = nhi->fdb_nh; |
430a0491 DA |
1016 | } |
1017 | ||
1018 | return true; | |
1019 | } | |
1020 | ||
38428d68 RP |
1021 | static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family, |
1022 | struct netlink_ext_ack *extack) | |
1023 | { | |
1024 | struct nh_info *nhi; | |
1025 | ||
ce9ac056 DA |
1026 | nhi = rtnl_dereference(nh->nh_info); |
1027 | ||
1028 | if (!nhi->fdb_nh) { | |
38428d68 RP |
1029 | NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops"); |
1030 | return -EINVAL; | |
1031 | } | |
1032 | ||
38428d68 RP |
1033 | if (*nh_family == AF_UNSPEC) { |
1034 | *nh_family = nhi->family; | |
1035 | } else if (*nh_family != nhi->family) { | |
1036 | NL_SET_ERR_MSG(extack, "FDB nexthop group cannot have mixed family nexthops"); | |
1037 | return -EINVAL; | |
1038 | } | |
1039 | ||
1040 | return 0; | |
1041 | } | |
1042 | ||
643d0878 PM |
1043 | static int nh_check_attr_group(struct net *net, |
1044 | struct nlattr *tb[], size_t tb_size, | |
a2601e2b | 1045 | u16 nh_grp_type, struct netlink_ext_ack *extack) |
430a0491 DA |
1046 | { |
1047 | unsigned int len = nla_len(tb[NHA_GROUP]); | |
38428d68 | 1048 | u8 nh_family = AF_UNSPEC; |
430a0491 DA |
1049 | struct nexthop_grp *nhg; |
1050 | unsigned int i, j; | |
38428d68 | 1051 | u8 nhg_fdb = 0; |
430a0491 | 1052 | |
eeaac363 | 1053 | if (!len || len & (sizeof(struct nexthop_grp) - 1)) { |
430a0491 DA |
1054 | NL_SET_ERR_MSG(extack, |
1055 | "Invalid length for nexthop group attribute"); | |
1056 | return -EINVAL; | |
1057 | } | |
1058 | ||
1059 | /* convert len to number of nexthop ids */ | |
1060 | len /= sizeof(*nhg); | |
1061 | ||
1062 | nhg = nla_data(tb[NHA_GROUP]); | |
1063 | for (i = 0; i < len; ++i) { | |
1064 | if (nhg[i].resvd1 || nhg[i].resvd2) { | |
1065 | NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0"); | |
1066 | return -EINVAL; | |
1067 | } | |
1068 | if (nhg[i].weight > 254) { | |
1069 | NL_SET_ERR_MSG(extack, "Invalid value for weight"); | |
1070 | return -EINVAL; | |
1071 | } | |
1072 | for (j = i + 1; j < len; ++j) { | |
1073 | if (nhg[i].id == nhg[j].id) { | |
1074 | NL_SET_ERR_MSG(extack, "Nexthop id can not be used twice in a group"); | |
1075 | return -EINVAL; | |
1076 | } | |
1077 | } | |
1078 | } | |
1079 | ||
38428d68 RP |
1080 | if (tb[NHA_FDB]) |
1081 | nhg_fdb = 1; | |
430a0491 DA |
1082 | nhg = nla_data(tb[NHA_GROUP]); |
1083 | for (i = 0; i < len; ++i) { | |
1084 | struct nexthop *nh; | |
ce9ac056 | 1085 | bool is_fdb_nh; |
430a0491 DA |
1086 | |
1087 | nh = nexthop_find_by_id(net, nhg[i].id); | |
1088 | if (!nh) { | |
1089 | NL_SET_ERR_MSG(extack, "Invalid nexthop id"); | |
1090 | return -EINVAL; | |
1091 | } | |
ce9ac056 | 1092 | if (!valid_group_nh(nh, len, &is_fdb_nh, extack)) |
430a0491 | 1093 | return -EINVAL; |
38428d68 RP |
1094 | |
1095 | if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack)) | |
1096 | return -EINVAL; | |
1097 | ||
ce9ac056 | 1098 | if (!nhg_fdb && is_fdb_nh) { |
38428d68 RP |
1099 | NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops"); |
1100 | return -EINVAL; | |
1101 | } | |
430a0491 | 1102 | } |
643d0878 | 1103 | for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) { |
430a0491 DA |
1104 | if (!tb[i]) |
1105 | continue; | |
a2601e2b PM |
1106 | switch (i) { |
1107 | case NHA_FDB: | |
38428d68 | 1108 | continue; |
a2601e2b PM |
1109 | case NHA_RES_GROUP: |
1110 | if (nh_grp_type == NEXTHOP_GRP_TYPE_RES) | |
1111 | continue; | |
1112 | break; | |
1113 | } | |
430a0491 DA |
1114 | NL_SET_ERR_MSG(extack, |
1115 | "No other attributes can be set in nexthop groups"); | |
1116 | return -EINVAL; | |
1117 | } | |
1118 | ||
1119 | return 0; | |
1120 | } | |
1121 | ||
1122 | static bool ipv6_good_nh(const struct fib6_nh *nh) | |
1123 | { | |
1124 | int state = NUD_REACHABLE; | |
1125 | struct neighbour *n; | |
1126 | ||
1127 | rcu_read_lock_bh(); | |
1128 | ||
1129 | n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6); | |
1130 | if (n) | |
1131 | state = n->nud_state; | |
1132 | ||
1133 | rcu_read_unlock_bh(); | |
1134 | ||
1135 | return !!(state & NUD_VALID); | |
1136 | } | |
1137 | ||
1138 | static bool ipv4_good_nh(const struct fib_nh *nh) | |
1139 | { | |
1140 | int state = NUD_REACHABLE; | |
1141 | struct neighbour *n; | |
1142 | ||
1143 | rcu_read_lock_bh(); | |
1144 | ||
1145 | n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, | |
1146 | (__force u32)nh->fib_nh_gw4); | |
1147 | if (n) | |
1148 | state = n->nud_state; | |
1149 | ||
1150 | rcu_read_unlock_bh(); | |
1151 | ||
1152 | return !!(state & NUD_VALID); | |
1153 | } | |
1154 | ||
de1d1ee3 | 1155 | static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash) |
430a0491 DA |
1156 | { |
1157 | struct nexthop *rc = NULL; | |
430a0491 DA |
1158 | int i; |
1159 | ||
430a0491 DA |
1160 | for (i = 0; i < nhg->num_nh; ++i) { |
1161 | struct nh_grp_entry *nhge = &nhg->nh_entries[i]; | |
1162 | struct nh_info *nhi; | |
1163 | ||
de1d1ee3 | 1164 | if (hash > atomic_read(&nhge->hthr.upper_bound)) |
430a0491 DA |
1165 | continue; |
1166 | ||
ce9ac056 DA |
1167 | nhi = rcu_dereference(nhge->nh->nh_info); |
1168 | if (nhi->fdb_nh) | |
38428d68 RP |
1169 | return nhge->nh; |
1170 | ||
430a0491 DA |
1171 | /* nexthops always check if it is good and does |
1172 | * not rely on a sysctl for this behavior | |
1173 | */ | |
430a0491 DA |
1174 | switch (nhi->family) { |
1175 | case AF_INET: | |
1176 | if (ipv4_good_nh(&nhi->fib_nh)) | |
1177 | return nhge->nh; | |
1178 | break; | |
1179 | case AF_INET6: | |
1180 | if (ipv6_good_nh(&nhi->fib6_nh)) | |
1181 | return nhge->nh; | |
1182 | break; | |
1183 | } | |
1184 | ||
1185 | if (!rc) | |
1186 | rc = nhge->nh; | |
1187 | } | |
1188 | ||
1189 | return rc; | |
1190 | } | |
79bc55e3 | 1191 | |
283a72a5 PM |
1192 | static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash) |
1193 | { | |
1194 | struct nh_res_table *res_table = rcu_dereference(nhg->res_table); | |
1195 | u16 bucket_index = hash % res_table->num_nh_buckets; | |
1196 | struct nh_res_bucket *bucket; | |
1197 | struct nh_grp_entry *nhge; | |
1198 | ||
1199 | /* nexthop_select_path() is expected to return a non-NULL value, so | |
1200 | * skip protocol validation and just hand out whatever there is. | |
1201 | */ | |
1202 | bucket = &res_table->nh_buckets[bucket_index]; | |
1203 | nh_res_bucket_set_busy(bucket); | |
1204 | nhge = rcu_dereference(bucket->nh_entry); | |
1205 | return nhge->nh; | |
1206 | } | |
1207 | ||
79bc55e3 PM |
1208 | struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) |
1209 | { | |
1210 | struct nh_group *nhg; | |
1211 | ||
1212 | if (!nh->is_group) | |
1213 | return nh; | |
1214 | ||
1215 | nhg = rcu_dereference(nh->nh_grp); | |
de1d1ee3 PM |
1216 | if (nhg->hash_threshold) |
1217 | return nexthop_select_path_hthr(nhg, hash); | |
283a72a5 PM |
1218 | else if (nhg->resilient) |
1219 | return nexthop_select_path_res(nhg, hash); | |
79bc55e3 PM |
1220 | |
1221 | /* Unreachable. */ | |
1222 | return NULL; | |
1223 | } | |
430a0491 DA |
1224 | EXPORT_SYMBOL_GPL(nexthop_select_path); |
1225 | ||
f88c9aa1 DA |
1226 | int nexthop_for_each_fib6_nh(struct nexthop *nh, |
1227 | int (*cb)(struct fib6_nh *nh, void *arg), | |
1228 | void *arg) | |
1229 | { | |
1230 | struct nh_info *nhi; | |
1231 | int err; | |
1232 | ||
1233 | if (nh->is_group) { | |
1234 | struct nh_group *nhg; | |
1235 | int i; | |
1236 | ||
1237 | nhg = rcu_dereference_rtnl(nh->nh_grp); | |
1238 | for (i = 0; i < nhg->num_nh; i++) { | |
1239 | struct nh_grp_entry *nhge = &nhg->nh_entries[i]; | |
1240 | ||
1241 | nhi = rcu_dereference_rtnl(nhge->nh->nh_info); | |
1242 | err = cb(&nhi->fib6_nh, arg); | |
1243 | if (err) | |
1244 | return err; | |
1245 | } | |
1246 | } else { | |
1247 | nhi = rcu_dereference_rtnl(nh->nh_info); | |
1248 | err = cb(&nhi->fib6_nh, arg); | |
1249 | if (err) | |
1250 | return err; | |
1251 | } | |
1252 | ||
1253 | return 0; | |
1254 | } | |
1255 | EXPORT_SYMBOL_GPL(nexthop_for_each_fib6_nh); | |
1256 | ||
7bf4796d DA |
1257 | static int check_src_addr(const struct in6_addr *saddr, |
1258 | struct netlink_ext_ack *extack) | |
1259 | { | |
1260 | if (!ipv6_addr_any(saddr)) { | |
1261 | NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects"); | |
1262 | return -EINVAL; | |
1263 | } | |
1264 | return 0; | |
1265 | } | |
1266 | ||
f88d8ea6 DA |
1267 | int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, |
1268 | struct netlink_ext_ack *extack) | |
1269 | { | |
1270 | struct nh_info *nhi; | |
ce9ac056 | 1271 | bool is_fdb_nh; |
38428d68 | 1272 | |
f88d8ea6 DA |
1273 | /* fib6_src is unique to a fib6_info and limits the ability to cache |
1274 | * routes in fib6_nh within a nexthop that is potentially shared | |
1275 | * across multiple fib entries. If the config wants to use source | |
1276 | * routing it can not use nexthop objects. mlxsw also does not allow | |
1277 | * fib6_src on routes. | |
1278 | */ | |
7bf4796d | 1279 | if (cfg && check_src_addr(&cfg->fc_src, extack) < 0) |
f88d8ea6 | 1280 | return -EINVAL; |
f88d8ea6 DA |
1281 | |
1282 | if (nh->is_group) { | |
1283 | struct nh_group *nhg; | |
1284 | ||
1285 | nhg = rtnl_dereference(nh->nh_grp); | |
1286 | if (nhg->has_v4) | |
1287 | goto no_v4_nh; | |
ce9ac056 | 1288 | is_fdb_nh = nhg->fdb_nh; |
f88d8ea6 DA |
1289 | } else { |
1290 | nhi = rtnl_dereference(nh->nh_info); | |
1291 | if (nhi->family == AF_INET) | |
1292 | goto no_v4_nh; | |
ce9ac056 DA |
1293 | is_fdb_nh = nhi->fdb_nh; |
1294 | } | |
1295 | ||
1296 | if (is_fdb_nh) { | |
1297 | NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); | |
1298 | return -EINVAL; | |
f88d8ea6 DA |
1299 | } |
1300 | ||
1301 | return 0; | |
1302 | no_v4_nh: | |
1303 | NL_SET_ERR_MSG(extack, "IPv6 routes can not use an IPv4 nexthop"); | |
1304 | return -EINVAL; | |
1305 | } | |
1306 | EXPORT_SYMBOL_GPL(fib6_check_nexthop); | |
1307 | ||
7bf4796d DA |
1308 | /* if existing nexthop has ipv6 routes linked to it, need |
1309 | * to verify this new spec works with ipv6 | |
1310 | */ | |
1311 | static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new, | |
1312 | struct netlink_ext_ack *extack) | |
1313 | { | |
1314 | struct fib6_info *f6i; | |
1315 | ||
1316 | if (list_empty(&old->f6i_list)) | |
1317 | return 0; | |
1318 | ||
1319 | list_for_each_entry(f6i, &old->f6i_list, nh_list) { | |
1320 | if (check_src_addr(&f6i->fib6_src.addr, extack) < 0) | |
1321 | return -EINVAL; | |
1322 | } | |
1323 | ||
1324 | return fib6_check_nexthop(new, NULL, extack); | |
1325 | } | |
1326 | ||
ce9ac056 | 1327 | static int nexthop_check_scope(struct nh_info *nhi, u8 scope, |
4c7e8084 DA |
1328 | struct netlink_ext_ack *extack) |
1329 | { | |
4c7e8084 DA |
1330 | if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) { |
1331 | NL_SET_ERR_MSG(extack, | |
1332 | "Route with host scope can not have a gateway"); | |
1333 | return -EINVAL; | |
1334 | } | |
1335 | ||
1336 | if (nhi->fib_nhc.nhc_flags & RTNH_F_ONLINK && scope >= RT_SCOPE_LINK) { | |
1337 | NL_SET_ERR_MSG(extack, "Scope mismatch with nexthop"); | |
1338 | return -EINVAL; | |
1339 | } | |
1340 | ||
1341 | return 0; | |
1342 | } | |
1343 | ||
1344 | /* Invoked by fib add code to verify nexthop by id is ok with | |
1345 | * config for prefix; parts of fib_check_nh not done when nexthop | |
1346 | * object is used. | |
1347 | */ | |
1348 | int fib_check_nexthop(struct nexthop *nh, u8 scope, | |
1349 | struct netlink_ext_ack *extack) | |
1350 | { | |
ce9ac056 | 1351 | struct nh_info *nhi; |
4c7e8084 DA |
1352 | int err = 0; |
1353 | ||
1354 | if (nh->is_group) { | |
1355 | struct nh_group *nhg; | |
1356 | ||
ce9ac056 DA |
1357 | nhg = rtnl_dereference(nh->nh_grp); |
1358 | if (nhg->fdb_nh) { | |
1359 | NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); | |
1360 | err = -EINVAL; | |
1361 | goto out; | |
1362 | } | |
1363 | ||
4c7e8084 DA |
1364 | if (scope == RT_SCOPE_HOST) { |
1365 | NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops"); | |
1366 | err = -EINVAL; | |
1367 | goto out; | |
1368 | } | |
1369 | ||
4c7e8084 | 1370 | /* all nexthops in a group have the same scope */ |
ce9ac056 DA |
1371 | nhi = rtnl_dereference(nhg->nh_entries[0].nh->nh_info); |
1372 | err = nexthop_check_scope(nhi, scope, extack); | |
4c7e8084 | 1373 | } else { |
ce9ac056 DA |
1374 | nhi = rtnl_dereference(nh->nh_info); |
1375 | if (nhi->fdb_nh) { | |
1376 | NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); | |
1377 | err = -EINVAL; | |
1378 | goto out; | |
1379 | } | |
1380 | err = nexthop_check_scope(nhi, scope, extack); | |
4c7e8084 | 1381 | } |
ce9ac056 | 1382 | |
4c7e8084 DA |
1383 | out: |
1384 | return err; | |
1385 | } | |
1386 | ||
7bf4796d DA |
1387 | static int fib_check_nh_list(struct nexthop *old, struct nexthop *new, |
1388 | struct netlink_ext_ack *extack) | |
1389 | { | |
1390 | struct fib_info *fi; | |
1391 | ||
1392 | list_for_each_entry(fi, &old->fi_list, nh_list) { | |
1393 | int err; | |
1394 | ||
1395 | err = fib_check_nexthop(new, fi->fib_scope, extack); | |
1396 | if (err) | |
1397 | return err; | |
1398 | } | |
1399 | return 0; | |
1400 | } | |
1401 | ||
283a72a5 PM |
1402 | static bool nh_res_nhge_is_balanced(const struct nh_grp_entry *nhge) |
1403 | { | |
1404 | return nhge->res.count_buckets == nhge->res.wants_buckets; | |
1405 | } | |
1406 | ||
1407 | static bool nh_res_nhge_is_ow(const struct nh_grp_entry *nhge) | |
1408 | { | |
1409 | return nhge->res.count_buckets > nhge->res.wants_buckets; | |
1410 | } | |
1411 | ||
1412 | static bool nh_res_nhge_is_uw(const struct nh_grp_entry *nhge) | |
1413 | { | |
1414 | return nhge->res.count_buckets < nhge->res.wants_buckets; | |
1415 | } | |
1416 | ||
1417 | static bool nh_res_table_is_balanced(const struct nh_res_table *res_table) | |
1418 | { | |
1419 | return list_empty(&res_table->uw_nh_entries); | |
1420 | } | |
1421 | ||
1422 | static void nh_res_bucket_unset_nh(struct nh_res_bucket *bucket) | |
1423 | { | |
1424 | struct nh_grp_entry *nhge; | |
1425 | ||
1426 | if (bucket->occupied) { | |
1427 | nhge = nh_res_dereference(bucket->nh_entry); | |
1428 | nhge->res.count_buckets--; | |
1429 | bucket->occupied = false; | |
1430 | } | |
1431 | } | |
1432 | ||
1433 | static void nh_res_bucket_set_nh(struct nh_res_bucket *bucket, | |
1434 | struct nh_grp_entry *nhge) | |
1435 | { | |
1436 | nh_res_bucket_unset_nh(bucket); | |
1437 | ||
1438 | bucket->occupied = true; | |
1439 | rcu_assign_pointer(bucket->nh_entry, nhge); | |
1440 | nhge->res.count_buckets++; | |
1441 | } | |
1442 | ||
1443 | static bool nh_res_bucket_should_migrate(struct nh_res_table *res_table, | |
1444 | struct nh_res_bucket *bucket, | |
1445 | unsigned long *deadline, bool *force) | |
1446 | { | |
1447 | unsigned long now = jiffies; | |
1448 | struct nh_grp_entry *nhge; | |
1449 | unsigned long idle_point; | |
1450 | ||
1451 | if (!bucket->occupied) { | |
1452 | /* The bucket is not occupied, its NHGE pointer is either | |
1453 | * NULL or obsolete. We _have to_ migrate: set force. | |
1454 | */ | |
1455 | *force = true; | |
1456 | return true; | |
1457 | } | |
1458 | ||
1459 | nhge = nh_res_dereference(bucket->nh_entry); | |
1460 | ||
1461 | /* If the bucket is populated by an underweight or balanced | |
1462 | * nexthop, do not migrate. | |
1463 | */ | |
1464 | if (!nh_res_nhge_is_ow(nhge)) | |
1465 | return false; | |
1466 | ||
1467 | /* At this point we know that the bucket is populated with an | |
1468 | * overweight nexthop. It needs to be migrated to a new nexthop if | |
1469 | * the idle timer of unbalanced timer expired. | |
1470 | */ | |
1471 | ||
1472 | idle_point = nh_res_bucket_idle_point(res_table, bucket, now); | |
1473 | if (time_after_eq(now, idle_point)) { | |
1474 | /* The bucket is idle. We _can_ migrate: unset force. */ | |
1475 | *force = false; | |
1476 | return true; | |
1477 | } | |
1478 | ||
1479 | /* Unbalanced timer of 0 means "never force". */ | |
1480 | if (res_table->unbalanced_timer) { | |
1481 | unsigned long unb_point; | |
1482 | ||
1483 | unb_point = nh_res_table_unb_point(res_table); | |
1484 | if (time_after(now, unb_point)) { | |
1485 | /* The bucket is not idle, but the unbalanced timer | |
1486 | * expired. We _can_ migrate, but set force anyway, | |
1487 | * so that drivers know to ignore activity reports | |
1488 | * from the HW. | |
1489 | */ | |
1490 | *force = true; | |
1491 | return true; | |
1492 | } | |
1493 | ||
1494 | nh_res_time_set_deadline(unb_point, deadline); | |
1495 | } | |
1496 | ||
1497 | nh_res_time_set_deadline(idle_point, deadline); | |
1498 | return false; | |
1499 | } | |
1500 | ||
1501 | static bool nh_res_bucket_migrate(struct nh_res_table *res_table, | |
0b4818aa PM |
1502 | u16 bucket_index, bool notify, |
1503 | bool notify_nl, bool force) | |
283a72a5 PM |
1504 | { |
1505 | struct nh_res_bucket *bucket = &res_table->nh_buckets[bucket_index]; | |
1506 | struct nh_grp_entry *new_nhge; | |
7c37c7e0 PM |
1507 | struct netlink_ext_ack extack; |
1508 | int err; | |
283a72a5 PM |
1509 | |
1510 | new_nhge = list_first_entry_or_null(&res_table->uw_nh_entries, | |
1511 | struct nh_grp_entry, | |
1512 | res.uw_nh_entry); | |
1513 | if (WARN_ON_ONCE(!new_nhge)) | |
1514 | /* If this function is called, "bucket" is either not | |
1515 | * occupied, or it belongs to a next hop that is | |
1516 | * overweight. In either case, there ought to be a | |
1517 | * corresponding underweight next hop. | |
1518 | */ | |
1519 | return false; | |
1520 | ||
7c37c7e0 PM |
1521 | if (notify) { |
1522 | struct nh_grp_entry *old_nhge; | |
1523 | ||
1524 | old_nhge = nh_res_dereference(bucket->nh_entry); | |
1525 | err = call_nexthop_res_bucket_notifiers(res_table->net, | |
1526 | res_table->nhg_id, | |
1527 | bucket_index, force, | |
1528 | old_nhge->nh, | |
1529 | new_nhge->nh, &extack); | |
1530 | if (err) { | |
1531 | pr_err_ratelimited("%s\n", extack._msg); | |
1532 | if (!force) | |
1533 | return false; | |
1534 | /* It is not possible to veto a forced replacement, so | |
1535 | * just clear the hardware flags from the nexthop | |
1536 | * bucket to indicate to user space that this bucket is | |
1537 | * not correctly populated in hardware. | |
1538 | */ | |
1539 | bucket->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP); | |
1540 | } | |
1541 | } | |
1542 | ||
283a72a5 PM |
1543 | nh_res_bucket_set_nh(bucket, new_nhge); |
1544 | nh_res_bucket_set_idle(res_table, bucket); | |
1545 | ||
0b4818aa PM |
1546 | if (notify_nl) |
1547 | nexthop_bucket_notify(res_table, bucket_index); | |
1548 | ||
283a72a5 PM |
1549 | if (nh_res_nhge_is_balanced(new_nhge)) |
1550 | list_del(&new_nhge->res.uw_nh_entry); | |
1551 | return true; | |
1552 | } | |
1553 | ||
1554 | #define NH_RES_UPKEEP_DW_MINIMUM_INTERVAL (HZ / 2) | |
1555 | ||
0b4818aa PM |
1556 | static void nh_res_table_upkeep(struct nh_res_table *res_table, |
1557 | bool notify, bool notify_nl) | |
283a72a5 PM |
1558 | { |
1559 | unsigned long now = jiffies; | |
1560 | unsigned long deadline; | |
1561 | u16 i; | |
1562 | ||
1563 | /* Deadline is the next time that upkeep should be run. It is the | |
1564 | * earliest time at which one of the buckets might be migrated. | |
1565 | * Start at the most pessimistic estimate: either unbalanced_timer | |
1566 | * from now, or if there is none, idle_timer from now. For each | |
1567 | * encountered time point, call nh_res_time_set_deadline() to | |
1568 | * refine the estimate. | |
1569 | */ | |
1570 | if (res_table->unbalanced_timer) | |
1571 | deadline = now + res_table->unbalanced_timer; | |
1572 | else | |
1573 | deadline = now + res_table->idle_timer; | |
1574 | ||
1575 | for (i = 0; i < res_table->num_nh_buckets; i++) { | |
1576 | struct nh_res_bucket *bucket = &res_table->nh_buckets[i]; | |
1577 | bool force; | |
1578 | ||
1579 | if (nh_res_bucket_should_migrate(res_table, bucket, | |
1580 | &deadline, &force)) { | |
7c37c7e0 | 1581 | if (!nh_res_bucket_migrate(res_table, i, notify, |
0b4818aa | 1582 | notify_nl, force)) { |
283a72a5 PM |
1583 | unsigned long idle_point; |
1584 | ||
1585 | /* A driver can override the migration | |
1586 | * decision if the HW reports that the | |
1587 | * bucket is actually not idle. Therefore | |
1588 | * remark the bucket as busy again and | |
1589 | * update the deadline. | |
1590 | */ | |
1591 | nh_res_bucket_set_busy(bucket); | |
1592 | idle_point = nh_res_bucket_idle_point(res_table, | |
1593 | bucket, | |
1594 | now); | |
1595 | nh_res_time_set_deadline(idle_point, &deadline); | |
1596 | } | |
1597 | } | |
1598 | } | |
1599 | ||
1600 | /* If the group is still unbalanced, schedule the next upkeep to | |
1601 | * either the deadline computed above, or the minimum deadline, | |
1602 | * whichever comes later. | |
1603 | */ | |
1604 | if (!nh_res_table_is_balanced(res_table)) { | |
1605 | unsigned long now = jiffies; | |
1606 | unsigned long min_deadline; | |
1607 | ||
1608 | min_deadline = now + NH_RES_UPKEEP_DW_MINIMUM_INTERVAL; | |
1609 | if (time_before(deadline, min_deadline)) | |
1610 | deadline = min_deadline; | |
1611 | ||
1612 | queue_delayed_work(system_power_efficient_wq, | |
1613 | &res_table->upkeep_dw, deadline - now); | |
1614 | } | |
1615 | } | |
1616 | ||
1617 | static void nh_res_table_upkeep_dw(struct work_struct *work) | |
1618 | { | |
1619 | struct delayed_work *dw = to_delayed_work(work); | |
1620 | struct nh_res_table *res_table; | |
1621 | ||
1622 | res_table = container_of(dw, struct nh_res_table, upkeep_dw); | |
0b4818aa | 1623 | nh_res_table_upkeep(res_table, true, true); |
283a72a5 PM |
1624 | } |
1625 | ||
1626 | static void nh_res_table_cancel_upkeep(struct nh_res_table *res_table) | |
1627 | { | |
1628 | cancel_delayed_work_sync(&res_table->upkeep_dw); | |
1629 | } | |
1630 | ||
1631 | static void nh_res_group_rebalance(struct nh_group *nhg, | |
1632 | struct nh_res_table *res_table) | |
1633 | { | |
1634 | int prev_upper_bound = 0; | |
1635 | int total = 0; | |
1636 | int w = 0; | |
1637 | int i; | |
1638 | ||
1639 | INIT_LIST_HEAD(&res_table->uw_nh_entries); | |
1640 | ||
1641 | for (i = 0; i < nhg->num_nh; ++i) | |
1642 | total += nhg->nh_entries[i].weight; | |
1643 | ||
1644 | for (i = 0; i < nhg->num_nh; ++i) { | |
1645 | struct nh_grp_entry *nhge = &nhg->nh_entries[i]; | |
1646 | int upper_bound; | |
1647 | ||
1648 | w += nhge->weight; | |
1649 | upper_bound = DIV_ROUND_CLOSEST(res_table->num_nh_buckets * w, | |
1650 | total); | |
1651 | nhge->res.wants_buckets = upper_bound - prev_upper_bound; | |
1652 | prev_upper_bound = upper_bound; | |
1653 | ||
1654 | if (nh_res_nhge_is_uw(nhge)) { | |
1655 | if (list_empty(&res_table->uw_nh_entries)) | |
1656 | res_table->unbalanced_since = jiffies; | |
1657 | list_add(&nhge->res.uw_nh_entry, | |
1658 | &res_table->uw_nh_entries); | |
1659 | } | |
1660 | } | |
1661 | } | |
1662 | ||
1663 | /* Migrate buckets in res_table so that they reference NHGE's from NHG with | |
1664 | * the right NH ID. Set those buckets that do not have a corresponding NHGE | |
1665 | * entry in NHG as not occupied. | |
1666 | */ | |
1667 | static void nh_res_table_migrate_buckets(struct nh_res_table *res_table, | |
1668 | struct nh_group *nhg) | |
1669 | { | |
1670 | u16 i; | |
1671 | ||
1672 | for (i = 0; i < res_table->num_nh_buckets; i++) { | |
1673 | struct nh_res_bucket *bucket = &res_table->nh_buckets[i]; | |
1674 | u32 id = rtnl_dereference(bucket->nh_entry)->nh->id; | |
1675 | bool found = false; | |
1676 | int j; | |
1677 | ||
1678 | for (j = 0; j < nhg->num_nh; j++) { | |
1679 | struct nh_grp_entry *nhge = &nhg->nh_entries[j]; | |
1680 | ||
1681 | if (nhge->nh->id == id) { | |
1682 | nh_res_bucket_set_nh(bucket, nhge); | |
1683 | found = true; | |
1684 | break; | |
1685 | } | |
1686 | } | |
1687 | ||
1688 | if (!found) | |
1689 | nh_res_bucket_unset_nh(bucket); | |
1690 | } | |
1691 | } | |
1692 | ||
1693 | static void replace_nexthop_grp_res(struct nh_group *oldg, | |
1694 | struct nh_group *newg) | |
1695 | { | |
1696 | /* For NH group replacement, the new NHG might only have a stub | |
1697 | * hash table with 0 buckets, because the number of buckets was not | |
1698 | * specified. For NH removal, oldg and newg both reference the same | |
1699 | * res_table. So in any case, in the following, we want to work | |
1700 | * with oldg->res_table. | |
1701 | */ | |
1702 | struct nh_res_table *old_res_table = rtnl_dereference(oldg->res_table); | |
1703 | unsigned long prev_unbalanced_since = old_res_table->unbalanced_since; | |
1704 | bool prev_has_uw = !list_empty(&old_res_table->uw_nh_entries); | |
1705 | ||
1706 | nh_res_table_cancel_upkeep(old_res_table); | |
1707 | nh_res_table_migrate_buckets(old_res_table, newg); | |
1708 | nh_res_group_rebalance(newg, old_res_table); | |
1709 | if (prev_has_uw && !list_empty(&old_res_table->uw_nh_entries)) | |
1710 | old_res_table->unbalanced_since = prev_unbalanced_since; | |
0b4818aa | 1711 | nh_res_table_upkeep(old_res_table, true, false); |
283a72a5 PM |
1712 | } |
1713 | ||
de1d1ee3 | 1714 | static void nh_hthr_group_rebalance(struct nh_group *nhg) |
430a0491 DA |
1715 | { |
1716 | int total = 0; | |
1717 | int w = 0; | |
1718 | int i; | |
1719 | ||
1720 | for (i = 0; i < nhg->num_nh; ++i) | |
1721 | total += nhg->nh_entries[i].weight; | |
1722 | ||
1723 | for (i = 0; i < nhg->num_nh; ++i) { | |
1724 | struct nh_grp_entry *nhge = &nhg->nh_entries[i]; | |
1725 | int upper_bound; | |
1726 | ||
1727 | w += nhge->weight; | |
1728 | upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1; | |
de1d1ee3 | 1729 | atomic_set(&nhge->hthr.upper_bound, upper_bound); |
430a0491 DA |
1730 | } |
1731 | } | |
1732 | ||
ac21753a | 1733 | static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, |
430a0491 DA |
1734 | struct nl_info *nlinfo) |
1735 | { | |
90f33bff | 1736 | struct nh_grp_entry *nhges, *new_nhges; |
ac21753a | 1737 | struct nexthop *nhp = nhge->nh_parent; |
833a1065 | 1738 | struct netlink_ext_ack extack; |
430a0491 | 1739 | struct nexthop *nh = nhge->nh; |
90f33bff | 1740 | struct nh_group *nhg, *newg; |
833a1065 | 1741 | int i, j, err; |
430a0491 DA |
1742 | |
1743 | WARN_ON(!nh); | |
1744 | ||
ac21753a | 1745 | nhg = rtnl_dereference(nhp->nh_grp); |
90f33bff | 1746 | newg = nhg->spare; |
430a0491 | 1747 | |
90f33bff NA |
1748 | /* last entry, keep it visible and remove the parent */ |
1749 | if (nhg->num_nh == 1) { | |
1750 | remove_nexthop(net, nhp, nlinfo); | |
430a0491 | 1751 | return; |
90f33bff | 1752 | } |
430a0491 | 1753 | |
863b2558 | 1754 | newg->has_v4 = false; |
90e1a9e2 | 1755 | newg->is_multipath = nhg->is_multipath; |
de1d1ee3 | 1756 | newg->hash_threshold = nhg->hash_threshold; |
283a72a5 | 1757 | newg->resilient = nhg->resilient; |
ce9ac056 | 1758 | newg->fdb_nh = nhg->fdb_nh; |
90f33bff | 1759 | newg->num_nh = nhg->num_nh; |
430a0491 | 1760 | |
90f33bff NA |
1761 | /* copy old entries to new except the one getting removed */ |
1762 | nhges = nhg->nh_entries; | |
1763 | new_nhges = newg->nh_entries; | |
1764 | for (i = 0, j = 0; i < nhg->num_nh; ++i) { | |
863b2558 IS |
1765 | struct nh_info *nhi; |
1766 | ||
90f33bff NA |
1767 | /* current nexthop getting removed */ |
1768 | if (nhg->nh_entries[i].nh == nh) { | |
1769 | newg->num_nh--; | |
1770 | continue; | |
1771 | } | |
430a0491 | 1772 | |
863b2558 IS |
1773 | nhi = rtnl_dereference(nhges[i].nh->nh_info); |
1774 | if (nhi->family == AF_INET) | |
1775 | newg->has_v4 = true; | |
1776 | ||
90f33bff NA |
1777 | list_del(&nhges[i].nh_list); |
1778 | new_nhges[j].nh_parent = nhges[i].nh_parent; | |
1779 | new_nhges[j].nh = nhges[i].nh; | |
1780 | new_nhges[j].weight = nhges[i].weight; | |
1781 | list_add(&new_nhges[j].nh_list, &new_nhges[j].nh->grp_list); | |
1782 | j++; | |
1783 | } | |
430a0491 | 1784 | |
de1d1ee3 PM |
1785 | if (newg->hash_threshold) |
1786 | nh_hthr_group_rebalance(newg); | |
283a72a5 PM |
1787 | else if (newg->resilient) |
1788 | replace_nexthop_grp_res(nhg, newg); | |
1789 | ||
90f33bff NA |
1790 | rcu_assign_pointer(nhp->nh_grp, newg); |
1791 | ||
1792 | list_del(&nhge->nh_list); | |
1793 | nexthop_put(nhge->nh); | |
430a0491 | 1794 | |
7c37c7e0 PM |
1795 | /* Removal of a NH from a resilient group is notified through |
1796 | * bucket notifications. | |
1797 | */ | |
de1d1ee3 | 1798 | if (newg->hash_threshold) { |
7c37c7e0 PM |
1799 | err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, |
1800 | &extack); | |
1801 | if (err) | |
1802 | pr_err("%s\n", extack._msg); | |
1803 | } | |
833a1065 | 1804 | |
430a0491 | 1805 | if (nlinfo) |
ac21753a | 1806 | nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo); |
430a0491 DA |
1807 | } |
1808 | ||
1809 | static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh, | |
1810 | struct nl_info *nlinfo) | |
1811 | { | |
1812 | struct nh_grp_entry *nhge, *tmp; | |
1813 | ||
ac21753a DA |
1814 | list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list) |
1815 | remove_nh_grp_entry(net, nhge, nlinfo); | |
430a0491 | 1816 | |
90f33bff | 1817 | /* make sure all see the newly published array before releasing rtnl */ |
df6afe2f | 1818 | synchronize_net(); |
430a0491 DA |
1819 | } |
1820 | ||
1821 | static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo) | |
1822 | { | |
1823 | struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp); | |
283a72a5 | 1824 | struct nh_res_table *res_table; |
430a0491 DA |
1825 | int i, num_nh = nhg->num_nh; |
1826 | ||
1827 | for (i = 0; i < num_nh; ++i) { | |
1828 | struct nh_grp_entry *nhge = &nhg->nh_entries[i]; | |
1829 | ||
1830 | if (WARN_ON(!nhge->nh)) | |
1831 | continue; | |
1832 | ||
90f33bff | 1833 | list_del_init(&nhge->nh_list); |
430a0491 | 1834 | } |
283a72a5 PM |
1835 | |
1836 | if (nhg->resilient) { | |
1837 | res_table = rtnl_dereference(nhg->res_table); | |
1838 | nh_res_table_cancel_upkeep(res_table); | |
1839 | } | |
430a0491 DA |
1840 | } |
1841 | ||
7bf4796d | 1842 | /* not called for nexthop replace */ |
4c7e8084 DA |
1843 | static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) |
1844 | { | |
f88d8ea6 | 1845 | struct fib6_info *f6i, *tmp; |
4c7e8084 DA |
1846 | bool do_flush = false; |
1847 | struct fib_info *fi; | |
1848 | ||
1849 | list_for_each_entry(fi, &nh->fi_list, nh_list) { | |
1850 | fi->fib_flags |= RTNH_F_DEAD; | |
1851 | do_flush = true; | |
1852 | } | |
1853 | if (do_flush) | |
1854 | fib_flush(net); | |
f88d8ea6 DA |
1855 | |
1856 | /* ip6_del_rt removes the entry from this list hence the _safe */ | |
1857 | list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) { | |
1858 | /* __ip6_del_rt does a release, so do a hold here */ | |
1859 | fib6_info_hold(f6i); | |
4f80116d RP |
1860 | ipv6_stub->ip6_del_rt(net, f6i, |
1861 | !net->ipv4.sysctl_nexthop_compat_mode); | |
f88d8ea6 | 1862 | } |
4c7e8084 DA |
1863 | } |
1864 | ||
430a0491 DA |
1865 | static void __remove_nexthop(struct net *net, struct nexthop *nh, |
1866 | struct nl_info *nlinfo) | |
1867 | { | |
4c7e8084 DA |
1868 | __remove_nexthop_fib(net, nh); |
1869 | ||
430a0491 DA |
1870 | if (nh->is_group) { |
1871 | remove_nexthop_group(nh, nlinfo); | |
1872 | } else { | |
1873 | struct nh_info *nhi; | |
1874 | ||
1875 | nhi = rtnl_dereference(nh->nh_info); | |
1876 | if (nhi->fib_nhc.nhc_dev) | |
1877 | hlist_del(&nhi->dev_hash); | |
1878 | ||
1879 | remove_nexthop_from_groups(net, nh, nlinfo); | |
1880 | } | |
597cfe4f DA |
1881 | } |
1882 | ||
ab84be7e | 1883 | static void remove_nexthop(struct net *net, struct nexthop *nh, |
430a0491 | 1884 | struct nl_info *nlinfo) |
ab84be7e | 1885 | { |
3578d53d | 1886 | call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh, NULL); |
0695564b | 1887 | |
ab84be7e DA |
1888 | /* remove from the tree */ |
1889 | rb_erase(&nh->rb_node, &net->nexthop.rb_root); | |
1890 | ||
1891 | if (nlinfo) | |
1892 | nexthop_notify(RTM_DELNEXTHOP, nh, nlinfo); | |
1893 | ||
430a0491 | 1894 | __remove_nexthop(net, nh, nlinfo); |
ab84be7e DA |
1895 | nh_base_seq_inc(net); |
1896 | ||
1897 | nexthop_put(nh); | |
1898 | } | |
1899 | ||
7bf4796d DA |
1900 | /* if any FIB entries reference this nexthop, any dst entries |
1901 | * need to be regenerated | |
1902 | */ | |
1005f19b NA |
1903 | static void nh_rt_cache_flush(struct net *net, struct nexthop *nh, |
1904 | struct nexthop *replaced_nh) | |
7bf4796d DA |
1905 | { |
1906 | struct fib6_info *f6i; | |
1005f19b NA |
1907 | struct nh_group *nhg; |
1908 | int i; | |
7bf4796d DA |
1909 | |
1910 | if (!list_empty(&nh->fi_list)) | |
1911 | rt_cache_flush(net); | |
1912 | ||
1913 | list_for_each_entry(f6i, &nh->f6i_list, nh_list) | |
1914 | ipv6_stub->fib6_update_sernum(net, f6i); | |
1005f19b NA |
1915 | |
1916 | /* if an IPv6 group was replaced, we have to release all old | |
1917 | * dsts to make sure all refcounts are released | |
1918 | */ | |
1919 | if (!replaced_nh->is_group) | |
1920 | return; | |
1921 | ||
1005f19b NA |
1922 | nhg = rtnl_dereference(replaced_nh->nh_grp); |
1923 | for (i = 0; i < nhg->num_nh; i++) { | |
1924 | struct nh_grp_entry *nhge = &nhg->nh_entries[i]; | |
1925 | struct nh_info *nhi = rtnl_dereference(nhge->nh->nh_info); | |
1926 | ||
1927 | if (nhi->family == AF_INET6) | |
1928 | ipv6_stub->fib6_nh_release_dsts(&nhi->fib6_nh); | |
1929 | } | |
7bf4796d DA |
1930 | } |
1931 | ||
1932 | static int replace_nexthop_grp(struct net *net, struct nexthop *old, | |
597f48e4 | 1933 | struct nexthop *new, const struct nh_config *cfg, |
7bf4796d DA |
1934 | struct netlink_ext_ack *extack) |
1935 | { | |
283a72a5 PM |
1936 | struct nh_res_table *tmp_table = NULL; |
1937 | struct nh_res_table *new_res_table; | |
1938 | struct nh_res_table *old_res_table; | |
7bf4796d | 1939 | struct nh_group *oldg, *newg; |
d144cc5f | 1940 | int i, err; |
7bf4796d DA |
1941 | |
1942 | if (!new->is_group) { | |
1943 | NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop."); | |
1944 | return -EINVAL; | |
1945 | } | |
1946 | ||
1947 | oldg = rtnl_dereference(old->nh_grp); | |
1948 | newg = rtnl_dereference(new->nh_grp); | |
1949 | ||
de1d1ee3 | 1950 | if (newg->hash_threshold != oldg->hash_threshold) { |
283a72a5 PM |
1951 | NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with one of a different type."); |
1952 | return -EINVAL; | |
1953 | } | |
1954 | ||
de1d1ee3 | 1955 | if (newg->hash_threshold) { |
283a72a5 PM |
1956 | err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, |
1957 | extack); | |
1958 | if (err) | |
1959 | return err; | |
1960 | } else if (newg->resilient) { | |
1961 | new_res_table = rtnl_dereference(newg->res_table); | |
1962 | old_res_table = rtnl_dereference(oldg->res_table); | |
1963 | ||
1964 | /* Accept if num_nh_buckets was not given, but if it was | |
1965 | * given, demand that the value be correct. | |
1966 | */ | |
1967 | if (cfg->nh_grp_res_has_num_buckets && | |
1968 | cfg->nh_grp_res_num_buckets != | |
1969 | old_res_table->num_nh_buckets) { | |
1970 | NL_SET_ERR_MSG(extack, "Can not change number of buckets of a resilient nexthop group."); | |
1971 | return -EINVAL; | |
1972 | } | |
1973 | ||
7c37c7e0 PM |
1974 | /* Emit a pre-replace notification so that listeners could veto |
1975 | * a potentially unsupported configuration. Otherwise, | |
1976 | * individual bucket replacement notifications would need to be | |
1977 | * vetoed, which is something that should only happen if the | |
1978 | * bucket is currently active. | |
1979 | */ | |
1980 | err = call_nexthop_res_table_notifiers(net, new, extack); | |
1981 | if (err) | |
1982 | return err; | |
1983 | ||
283a72a5 PM |
1984 | if (cfg->nh_grp_res_has_idle_timer) |
1985 | old_res_table->idle_timer = cfg->nh_grp_res_idle_timer; | |
1986 | if (cfg->nh_grp_res_has_unbalanced_timer) | |
1987 | old_res_table->unbalanced_timer = | |
1988 | cfg->nh_grp_res_unbalanced_timer; | |
1989 | ||
1990 | replace_nexthop_grp_res(oldg, newg); | |
1991 | ||
1992 | tmp_table = new_res_table; | |
1993 | rcu_assign_pointer(newg->res_table, old_res_table); | |
1994 | rcu_assign_pointer(newg->spare->res_table, old_res_table); | |
1995 | } | |
1996 | ||
7bf4796d DA |
1997 | /* update parents - used by nexthop code for cleanup */ |
1998 | for (i = 0; i < newg->num_nh; i++) | |
1999 | newg->nh_entries[i].nh_parent = old; | |
2000 | ||
2001 | rcu_assign_pointer(old->nh_grp, newg); | |
2002 | ||
7709efa6 NA |
2003 | /* Make sure concurrent readers are not using 'oldg' anymore. */ |
2004 | synchronize_net(); | |
2005 | ||
283a72a5 PM |
2006 | if (newg->resilient) { |
2007 | rcu_assign_pointer(oldg->res_table, tmp_table); | |
2008 | rcu_assign_pointer(oldg->spare->res_table, tmp_table); | |
2009 | } | |
2010 | ||
7bf4796d DA |
2011 | for (i = 0; i < oldg->num_nh; i++) |
2012 | oldg->nh_entries[i].nh_parent = new; | |
2013 | ||
2014 | rcu_assign_pointer(new->nh_grp, oldg); | |
2015 | ||
2016 | return 0; | |
2017 | } | |
2018 | ||
885a3b15 IS |
2019 | static void nh_group_v4_update(struct nh_group *nhg) |
2020 | { | |
2021 | struct nh_grp_entry *nhges; | |
2022 | bool has_v4 = false; | |
2023 | int i; | |
2024 | ||
2025 | nhges = nhg->nh_entries; | |
2026 | for (i = 0; i < nhg->num_nh; i++) { | |
2027 | struct nh_info *nhi; | |
2028 | ||
2029 | nhi = rtnl_dereference(nhges[i].nh->nh_info); | |
2030 | if (nhi->family == AF_INET) | |
2031 | has_v4 = true; | |
2032 | } | |
2033 | nhg->has_v4 = has_v4; | |
2034 | } | |
2035 | ||
7c37c7e0 PM |
2036 | static int replace_nexthop_single_notify_res(struct net *net, |
2037 | struct nh_res_table *res_table, | |
2038 | struct nexthop *old, | |
2039 | struct nh_info *oldi, | |
2040 | struct nh_info *newi, | |
2041 | struct netlink_ext_ack *extack) | |
2042 | { | |
2043 | u32 nhg_id = res_table->nhg_id; | |
2044 | int err; | |
2045 | u16 i; | |
2046 | ||
2047 | for (i = 0; i < res_table->num_nh_buckets; i++) { | |
2048 | struct nh_res_bucket *bucket = &res_table->nh_buckets[i]; | |
2049 | struct nh_grp_entry *nhge; | |
2050 | ||
2051 | nhge = rtnl_dereference(bucket->nh_entry); | |
2052 | if (nhge->nh == old) { | |
2053 | err = __call_nexthop_res_bucket_notifiers(net, nhg_id, | |
2054 | i, true, | |
2055 | oldi, newi, | |
2056 | extack); | |
2057 | if (err) | |
2058 | goto err_notify; | |
2059 | } | |
2060 | } | |
2061 | ||
2062 | return 0; | |
2063 | ||
2064 | err_notify: | |
2065 | while (i-- > 0) { | |
2066 | struct nh_res_bucket *bucket = &res_table->nh_buckets[i]; | |
2067 | struct nh_grp_entry *nhge; | |
2068 | ||
2069 | nhge = rtnl_dereference(bucket->nh_entry); | |
2070 | if (nhge->nh == old) | |
2071 | __call_nexthop_res_bucket_notifiers(net, nhg_id, i, | |
2072 | true, newi, oldi, | |
2073 | extack); | |
2074 | } | |
2075 | return err; | |
2076 | } | |
2077 | ||
2078 | static int replace_nexthop_single_notify(struct net *net, | |
2079 | struct nexthop *group_nh, | |
2080 | struct nexthop *old, | |
2081 | struct nh_info *oldi, | |
2082 | struct nh_info *newi, | |
2083 | struct netlink_ext_ack *extack) | |
2084 | { | |
2085 | struct nh_group *nhg = rtnl_dereference(group_nh->nh_grp); | |
2086 | struct nh_res_table *res_table; | |
2087 | ||
de1d1ee3 | 2088 | if (nhg->hash_threshold) { |
7c37c7e0 PM |
2089 | return call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, |
2090 | group_nh, extack); | |
2091 | } else if (nhg->resilient) { | |
2092 | res_table = rtnl_dereference(nhg->res_table); | |
2093 | return replace_nexthop_single_notify_res(net, res_table, | |
2094 | old, oldi, newi, | |
2095 | extack); | |
2096 | } | |
2097 | ||
2098 | return -EINVAL; | |
2099 | } | |
2100 | ||
7bf4796d DA |
2101 | static int replace_nexthop_single(struct net *net, struct nexthop *old, |
2102 | struct nexthop *new, | |
2103 | struct netlink_ext_ack *extack) | |
2104 | { | |
f17bc33d | 2105 | u8 old_protocol, old_nh_flags; |
7bf4796d | 2106 | struct nh_info *oldi, *newi; |
f17bc33d | 2107 | struct nh_grp_entry *nhge; |
8c09c9f9 | 2108 | int err; |
7bf4796d DA |
2109 | |
2110 | if (new->is_group) { | |
2111 | NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group."); | |
2112 | return -EINVAL; | |
2113 | } | |
2114 | ||
8c09c9f9 IS |
2115 | err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack); |
2116 | if (err) | |
2117 | return err; | |
2118 | ||
2119 | /* Hardware flags were set on 'old' as 'new' is not in the red-black | |
2120 | * tree. Therefore, inherit the flags from 'old' to 'new'. | |
2121 | */ | |
2122 | new->nh_flags |= old->nh_flags & (RTNH_F_OFFLOAD | RTNH_F_TRAP); | |
2123 | ||
7bf4796d DA |
2124 | oldi = rtnl_dereference(old->nh_info); |
2125 | newi = rtnl_dereference(new->nh_info); | |
2126 | ||
2127 | newi->nh_parent = old; | |
2128 | oldi->nh_parent = new; | |
2129 | ||
f17bc33d IS |
2130 | old_protocol = old->protocol; |
2131 | old_nh_flags = old->nh_flags; | |
2132 | ||
7bf4796d DA |
2133 | old->protocol = new->protocol; |
2134 | old->nh_flags = new->nh_flags; | |
2135 | ||
2136 | rcu_assign_pointer(old->nh_info, newi); | |
2137 | rcu_assign_pointer(new->nh_info, oldi); | |
2138 | ||
f17bc33d IS |
2139 | /* Send a replace notification for all the groups using the nexthop. */ |
2140 | list_for_each_entry(nhge, &old->grp_list, nh_list) { | |
2141 | struct nexthop *nhp = nhge->nh_parent; | |
2142 | ||
7c37c7e0 PM |
2143 | err = replace_nexthop_single_notify(net, nhp, old, oldi, newi, |
2144 | extack); | |
f17bc33d IS |
2145 | if (err) |
2146 | goto err_notify; | |
2147 | } | |
2148 | ||
885a3b15 IS |
2149 | /* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially |
2150 | * update IPv4 indication in all the groups using the nexthop. | |
2151 | */ | |
2152 | if (oldi->family == AF_INET && newi->family == AF_INET6) { | |
885a3b15 IS |
2153 | list_for_each_entry(nhge, &old->grp_list, nh_list) { |
2154 | struct nexthop *nhp = nhge->nh_parent; | |
2155 | struct nh_group *nhg; | |
2156 | ||
2157 | nhg = rtnl_dereference(nhp->nh_grp); | |
2158 | nh_group_v4_update(nhg); | |
2159 | } | |
2160 | } | |
2161 | ||
7bf4796d | 2162 | return 0; |
f17bc33d IS |
2163 | |
2164 | err_notify: | |
2165 | rcu_assign_pointer(new->nh_info, newi); | |
2166 | rcu_assign_pointer(old->nh_info, oldi); | |
2167 | old->nh_flags = old_nh_flags; | |
2168 | old->protocol = old_protocol; | |
2169 | oldi->nh_parent = old; | |
2170 | newi->nh_parent = new; | |
2171 | list_for_each_entry_continue_reverse(nhge, &old->grp_list, nh_list) { | |
2172 | struct nexthop *nhp = nhge->nh_parent; | |
2173 | ||
7c37c7e0 | 2174 | replace_nexthop_single_notify(net, nhp, old, newi, oldi, NULL); |
f17bc33d IS |
2175 | } |
2176 | call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, old, extack); | |
2177 | return err; | |
7bf4796d DA |
2178 | } |
2179 | ||
2180 | static void __nexthop_replace_notify(struct net *net, struct nexthop *nh, | |
2181 | struct nl_info *info) | |
2182 | { | |
2183 | struct fib6_info *f6i; | |
2184 | ||
2185 | if (!list_empty(&nh->fi_list)) { | |
2186 | struct fib_info *fi; | |
2187 | ||
2188 | /* expectation is a few fib_info per nexthop and then | |
2189 | * a lot of routes per fib_info. So mark the fib_info | |
2190 | * and then walk the fib tables once | |
2191 | */ | |
2192 | list_for_each_entry(fi, &nh->fi_list, nh_list) | |
2193 | fi->nh_updated = true; | |
2194 | ||
2195 | fib_info_notify_update(net, info); | |
2196 | ||
2197 | list_for_each_entry(fi, &nh->fi_list, nh_list) | |
2198 | fi->nh_updated = false; | |
2199 | } | |
2200 | ||
2201 | list_for_each_entry(f6i, &nh->f6i_list, nh_list) | |
2202 | ipv6_stub->fib6_rt_update(net, f6i, info); | |
2203 | } | |
2204 | ||
2205 | /* send RTM_NEWROUTE with REPLACE flag set for all FIB entries | |
2206 | * linked to this nexthop and for all groups that the nexthop | |
2207 | * is a member of | |
2208 | */ | |
2209 | static void nexthop_replace_notify(struct net *net, struct nexthop *nh, | |
2210 | struct nl_info *info) | |
2211 | { | |
2212 | struct nh_grp_entry *nhge; | |
2213 | ||
2214 | __nexthop_replace_notify(net, nh, info); | |
2215 | ||
2216 | list_for_each_entry(nhge, &nh->grp_list, nh_list) | |
2217 | __nexthop_replace_notify(net, nhge->nh_parent, info); | |
2218 | } | |
2219 | ||
ab84be7e | 2220 | static int replace_nexthop(struct net *net, struct nexthop *old, |
597f48e4 PM |
2221 | struct nexthop *new, const struct nh_config *cfg, |
2222 | struct netlink_ext_ack *extack) | |
ab84be7e | 2223 | { |
7bf4796d DA |
2224 | bool new_is_reject = false; |
2225 | struct nh_grp_entry *nhge; | |
2226 | int err; | |
2227 | ||
2228 | /* check that existing FIB entries are ok with the | |
2229 | * new nexthop definition | |
2230 | */ | |
2231 | err = fib_check_nh_list(old, new, extack); | |
2232 | if (err) | |
2233 | return err; | |
2234 | ||
2235 | err = fib6_check_nh_list(old, new, extack); | |
2236 | if (err) | |
2237 | return err; | |
2238 | ||
2239 | if (!new->is_group) { | |
2240 | struct nh_info *nhi = rtnl_dereference(new->nh_info); | |
2241 | ||
2242 | new_is_reject = nhi->reject_nh; | |
2243 | } | |
2244 | ||
2245 | list_for_each_entry(nhge, &old->grp_list, nh_list) { | |
2246 | /* if new nexthop is a blackhole, any groups using this | |
2247 | * nexthop cannot have more than 1 path | |
2248 | */ | |
2249 | if (new_is_reject && | |
2250 | nexthop_num_path(nhge->nh_parent) > 1) { | |
2251 | NL_SET_ERR_MSG(extack, "Blackhole nexthop can not be a member of a group with more than one path"); | |
2252 | return -EINVAL; | |
2253 | } | |
2254 | ||
2255 | err = fib_check_nh_list(nhge->nh_parent, new, extack); | |
2256 | if (err) | |
2257 | return err; | |
2258 | ||
2259 | err = fib6_check_nh_list(nhge->nh_parent, new, extack); | |
2260 | if (err) | |
2261 | return err; | |
2262 | } | |
2263 | ||
2264 | if (old->is_group) | |
597f48e4 | 2265 | err = replace_nexthop_grp(net, old, new, cfg, extack); |
7bf4796d DA |
2266 | else |
2267 | err = replace_nexthop_single(net, old, new, extack); | |
2268 | ||
2269 | if (!err) { | |
1005f19b | 2270 | nh_rt_cache_flush(net, old, new); |
7bf4796d DA |
2271 | |
2272 | __remove_nexthop(net, new, NULL); | |
2273 | nexthop_put(new); | |
2274 | } | |
2275 | ||
2276 | return err; | |
ab84be7e DA |
2277 | } |
2278 | ||
2279 | /* called with rtnl_lock held */ | |
2280 | static int insert_nexthop(struct net *net, struct nexthop *new_nh, | |
2281 | struct nh_config *cfg, struct netlink_ext_ack *extack) | |
2282 | { | |
2283 | struct rb_node **pp, *parent = NULL, *next; | |
2284 | struct rb_root *root = &net->nexthop.rb_root; | |
2285 | bool replace = !!(cfg->nlflags & NLM_F_REPLACE); | |
2286 | bool create = !!(cfg->nlflags & NLM_F_CREATE); | |
2287 | u32 new_id = new_nh->id; | |
7bf4796d | 2288 | int replace_notify = 0; |
ab84be7e DA |
2289 | int rc = -EEXIST; |
2290 | ||
2291 | pp = &root->rb_node; | |
2292 | while (1) { | |
2293 | struct nexthop *nh; | |
2294 | ||
233c6378 | 2295 | next = *pp; |
ab84be7e DA |
2296 | if (!next) |
2297 | break; | |
2298 | ||
2299 | parent = next; | |
2300 | ||
2301 | nh = rb_entry(parent, struct nexthop, rb_node); | |
2302 | if (new_id < nh->id) { | |
2303 | pp = &next->rb_left; | |
2304 | } else if (new_id > nh->id) { | |
2305 | pp = &next->rb_right; | |
2306 | } else if (replace) { | |
597f48e4 | 2307 | rc = replace_nexthop(net, nh, new_nh, cfg, extack); |
7bf4796d | 2308 | if (!rc) { |
ab84be7e | 2309 | new_nh = nh; /* send notification with old nh */ |
7bf4796d DA |
2310 | replace_notify = 1; |
2311 | } | |
ab84be7e DA |
2312 | goto out; |
2313 | } else { | |
2314 | /* id already exists and not a replace */ | |
2315 | goto out; | |
2316 | } | |
2317 | } | |
2318 | ||
2319 | if (replace && !create) { | |
2320 | NL_SET_ERR_MSG(extack, "Replace specified without create and no entry exists"); | |
2321 | rc = -ENOENT; | |
2322 | goto out; | |
2323 | } | |
2324 | ||
283a72a5 PM |
2325 | if (new_nh->is_group) { |
2326 | struct nh_group *nhg = rtnl_dereference(new_nh->nh_grp); | |
2327 | struct nh_res_table *res_table; | |
2328 | ||
2329 | if (nhg->resilient) { | |
2330 | res_table = rtnl_dereference(nhg->res_table); | |
2331 | ||
2332 | /* Not passing the number of buckets is OK when | |
2333 | * replacing, but not when creating a new group. | |
2334 | */ | |
2335 | if (!cfg->nh_grp_res_has_num_buckets) { | |
2336 | NL_SET_ERR_MSG(extack, "Number of buckets not specified for nexthop group insertion"); | |
2337 | rc = -EINVAL; | |
2338 | goto out; | |
2339 | } | |
2340 | ||
2341 | nh_res_group_rebalance(nhg, res_table); | |
7c37c7e0 PM |
2342 | |
2343 | /* Do not send bucket notifications, we do full | |
2344 | * notification below. | |
2345 | */ | |
0b4818aa | 2346 | nh_res_table_upkeep(res_table, false, false); |
283a72a5 PM |
2347 | } |
2348 | } | |
2349 | ||
ab84be7e DA |
2350 | rb_link_node_rcu(&new_nh->rb_node, parent, pp); |
2351 | rb_insert_color(&new_nh->rb_node, root); | |
732d167b | 2352 | |
de1d1ee3 PM |
2353 | /* The initial insertion is a full notification for hash-threshold as |
2354 | * well as resilient groups. | |
7c37c7e0 | 2355 | */ |
732d167b IS |
2356 | rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack); |
2357 | if (rc) | |
2358 | rb_erase(&new_nh->rb_node, &net->nexthop.rb_root); | |
2359 | ||
ab84be7e DA |
2360 | out: |
2361 | if (!rc) { | |
2362 | nh_base_seq_inc(net); | |
2363 | nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo); | |
4f80116d | 2364 | if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode) |
7bf4796d | 2365 | nexthop_replace_notify(net, new_nh, &cfg->nlinfo); |
ab84be7e DA |
2366 | } |
2367 | ||
2368 | return rc; | |
2369 | } | |
2370 | ||
597cfe4f DA |
2371 | /* rtnl */ |
2372 | /* remove all nexthops tied to a device being deleted */ | |
76c03bf8 | 2373 | static void nexthop_flush_dev(struct net_device *dev, unsigned long event) |
597cfe4f DA |
2374 | { |
2375 | unsigned int hash = nh_dev_hashfn(dev->ifindex); | |
2376 | struct net *net = dev_net(dev); | |
2377 | struct hlist_head *head = &net->nexthop.devhash[hash]; | |
2378 | struct hlist_node *n; | |
2379 | struct nh_info *nhi; | |
2380 | ||
2381 | hlist_for_each_entry_safe(nhi, n, head, dev_hash) { | |
2382 | if (nhi->fib_nhc.nhc_dev != dev) | |
2383 | continue; | |
2384 | ||
76c03bf8 IS |
2385 | if (nhi->reject_nh && |
2386 | (event == NETDEV_DOWN || event == NETDEV_CHANGE)) | |
2387 | continue; | |
2388 | ||
430a0491 | 2389 | remove_nexthop(net, nhi->nh_parent, NULL); |
597cfe4f DA |
2390 | } |
2391 | } | |
2392 | ||
ab84be7e DA |
2393 | /* rtnl; called when net namespace is deleted */ |
2394 | static void flush_all_nexthops(struct net *net) | |
2395 | { | |
2396 | struct rb_root *root = &net->nexthop.rb_root; | |
2397 | struct rb_node *node; | |
2398 | struct nexthop *nh; | |
2399 | ||
2400 | while ((node = rb_first(root))) { | |
2401 | nh = rb_entry(node, struct nexthop, rb_node); | |
430a0491 | 2402 | remove_nexthop(net, nh, NULL); |
ab84be7e DA |
2403 | cond_resched(); |
2404 | } | |
2405 | } | |
2406 | ||
430a0491 DA |
2407 | static struct nexthop *nexthop_create_group(struct net *net, |
2408 | struct nh_config *cfg) | |
2409 | { | |
2410 | struct nlattr *grps_attr = cfg->nh_grp; | |
2411 | struct nexthop_grp *entry = nla_data(grps_attr); | |
90f33bff | 2412 | u16 num_nh = nla_len(grps_attr) / sizeof(*entry); |
430a0491 DA |
2413 | struct nh_group *nhg; |
2414 | struct nexthop *nh; | |
283a72a5 | 2415 | int err; |
430a0491 DA |
2416 | int i; |
2417 | ||
eeaac363 NA |
2418 | if (WARN_ON(!num_nh)) |
2419 | return ERR_PTR(-EINVAL); | |
2420 | ||
430a0491 DA |
2421 | nh = nexthop_alloc(); |
2422 | if (!nh) | |
2423 | return ERR_PTR(-ENOMEM); | |
2424 | ||
2425 | nh->is_group = 1; | |
2426 | ||
90f33bff | 2427 | nhg = nexthop_grp_alloc(num_nh); |
430a0491 DA |
2428 | if (!nhg) { |
2429 | kfree(nh); | |
2430 | return ERR_PTR(-ENOMEM); | |
2431 | } | |
2432 | ||
90f33bff NA |
2433 | /* spare group used for removals */ |
2434 | nhg->spare = nexthop_grp_alloc(num_nh); | |
dafe2078 | 2435 | if (!nhg->spare) { |
90f33bff NA |
2436 | kfree(nhg); |
2437 | kfree(nh); | |
dafe2078 | 2438 | return ERR_PTR(-ENOMEM); |
90f33bff NA |
2439 | } |
2440 | nhg->spare->spare = nhg; | |
2441 | ||
430a0491 DA |
2442 | for (i = 0; i < nhg->num_nh; ++i) { |
2443 | struct nexthop *nhe; | |
2444 | struct nh_info *nhi; | |
2445 | ||
2446 | nhe = nexthop_find_by_id(net, entry[i].id); | |
283a72a5 PM |
2447 | if (!nexthop_get(nhe)) { |
2448 | err = -ENOENT; | |
430a0491 | 2449 | goto out_no_nh; |
283a72a5 | 2450 | } |
430a0491 DA |
2451 | |
2452 | nhi = rtnl_dereference(nhe->nh_info); | |
2453 | if (nhi->family == AF_INET) | |
2454 | nhg->has_v4 = true; | |
2455 | ||
2456 | nhg->nh_entries[i].nh = nhe; | |
2457 | nhg->nh_entries[i].weight = entry[i].weight + 1; | |
2458 | list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list); | |
2459 | nhg->nh_entries[i].nh_parent = nh; | |
2460 | } | |
2461 | ||
90e1a9e2 | 2462 | if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) { |
de1d1ee3 | 2463 | nhg->hash_threshold = 1; |
90e1a9e2 | 2464 | nhg->is_multipath = true; |
710ec562 | 2465 | } else if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES) { |
283a72a5 PM |
2466 | struct nh_res_table *res_table; |
2467 | ||
283a72a5 PM |
2468 | res_table = nexthop_res_table_alloc(net, cfg->nh_id, cfg); |
2469 | if (!res_table) { | |
2470 | err = -ENOMEM; | |
2471 | goto out_no_nh; | |
2472 | } | |
2473 | ||
2474 | rcu_assign_pointer(nhg->spare->res_table, res_table); | |
2475 | rcu_assign_pointer(nhg->res_table, res_table); | |
2476 | nhg->resilient = true; | |
2477 | nhg->is_multipath = true; | |
90e1a9e2 | 2478 | } |
720ccd9a | 2479 | |
de1d1ee3 | 2480 | WARN_ON_ONCE(nhg->hash_threshold + nhg->resilient != 1); |
720ccd9a | 2481 | |
de1d1ee3 PM |
2482 | if (nhg->hash_threshold) |
2483 | nh_hthr_group_rebalance(nhg); | |
430a0491 | 2484 | |
38428d68 | 2485 | if (cfg->nh_fdb) |
ce9ac056 | 2486 | nhg->fdb_nh = 1; |
38428d68 | 2487 | |
430a0491 DA |
2488 | rcu_assign_pointer(nh->nh_grp, nhg); |
2489 | ||
2490 | return nh; | |
2491 | ||
2492 | out_no_nh: | |
7b01e53e IS |
2493 | for (i--; i >= 0; --i) { |
2494 | list_del(&nhg->nh_entries[i].nh_list); | |
430a0491 | 2495 | nexthop_put(nhg->nh_entries[i].nh); |
7b01e53e | 2496 | } |
430a0491 | 2497 | |
90f33bff | 2498 | kfree(nhg->spare); |
430a0491 DA |
2499 | kfree(nhg); |
2500 | kfree(nh); | |
2501 | ||
283a72a5 | 2502 | return ERR_PTR(err); |
430a0491 DA |
2503 | } |
2504 | ||
597cfe4f DA |
2505 | static int nh_create_ipv4(struct net *net, struct nexthop *nh, |
2506 | struct nh_info *nhi, struct nh_config *cfg, | |
2507 | struct netlink_ext_ack *extack) | |
2508 | { | |
2509 | struct fib_nh *fib_nh = &nhi->fib_nh; | |
2510 | struct fib_config fib_cfg = { | |
2511 | .fc_oif = cfg->nh_ifindex, | |
2512 | .fc_gw4 = cfg->gw.ipv4, | |
2513 | .fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0, | |
2514 | .fc_flags = cfg->nh_flags, | |
9aca491e | 2515 | .fc_nlinfo = cfg->nlinfo, |
b513bd03 DA |
2516 | .fc_encap = cfg->nh_encap, |
2517 | .fc_encap_type = cfg->nh_encap_type, | |
597cfe4f | 2518 | }; |
38428d68 | 2519 | u32 tb_id = (cfg->dev ? l3mdev_fib_table(cfg->dev) : RT_TABLE_MAIN); |
c76c9925 | 2520 | int err; |
597cfe4f DA |
2521 | |
2522 | err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack); | |
2523 | if (err) { | |
2524 | fib_nh_release(net, fib_nh); | |
2525 | goto out; | |
2526 | } | |
2527 | ||
ce9ac056 | 2528 | if (nhi->fdb_nh) |
38428d68 RP |
2529 | goto out; |
2530 | ||
597cfe4f DA |
2531 | /* sets nh_dev if successful */ |
2532 | err = fib_check_nh(net, fib_nh, tb_id, 0, extack); | |
2533 | if (!err) { | |
2534 | nh->nh_flags = fib_nh->fib_nh_flags; | |
dcb1ecb5 DA |
2535 | fib_info_update_nhc_saddr(net, &fib_nh->nh_common, |
2536 | fib_nh->fib_nh_scope); | |
597cfe4f DA |
2537 | } else { |
2538 | fib_nh_release(net, fib_nh); | |
2539 | } | |
2540 | out: | |
2541 | return err; | |
2542 | } | |
2543 | ||
53010f99 DA |
2544 | static int nh_create_ipv6(struct net *net, struct nexthop *nh, |
2545 | struct nh_info *nhi, struct nh_config *cfg, | |
2546 | struct netlink_ext_ack *extack) | |
2547 | { | |
2548 | struct fib6_nh *fib6_nh = &nhi->fib6_nh; | |
2549 | struct fib6_config fib6_cfg = { | |
2550 | .fc_table = l3mdev_fib_table(cfg->dev), | |
2551 | .fc_ifindex = cfg->nh_ifindex, | |
2552 | .fc_gateway = cfg->gw.ipv6, | |
2553 | .fc_flags = cfg->nh_flags, | |
9aca491e | 2554 | .fc_nlinfo = cfg->nlinfo, |
b513bd03 DA |
2555 | .fc_encap = cfg->nh_encap, |
2556 | .fc_encap_type = cfg->nh_encap_type, | |
38428d68 | 2557 | .fc_is_fdb = cfg->nh_fdb, |
53010f99 | 2558 | }; |
6f43e525 | 2559 | int err; |
53010f99 DA |
2560 | |
2561 | if (!ipv6_addr_any(&cfg->gw.ipv6)) | |
2562 | fib6_cfg.fc_flags |= RTF_GATEWAY; | |
2563 | ||
2564 | /* sets nh_dev if successful */ | |
2565 | err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL, | |
2566 | extack); | |
1c743127 NA |
2567 | if (err) { |
2568 | /* IPv6 is not enabled, don't call fib6_nh_release */ | |
2569 | if (err == -EAFNOSUPPORT) | |
2570 | goto out; | |
53010f99 | 2571 | ipv6_stub->fib6_nh_release(fib6_nh); |
1c743127 | 2572 | } else { |
53010f99 | 2573 | nh->nh_flags = fib6_nh->fib_nh_flags; |
1c743127 NA |
2574 | } |
2575 | out: | |
53010f99 DA |
2576 | return err; |
2577 | } | |
2578 | ||
ab84be7e DA |
2579 | static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, |
2580 | struct netlink_ext_ack *extack) | |
2581 | { | |
2582 | struct nh_info *nhi; | |
2583 | struct nexthop *nh; | |
2584 | int err = 0; | |
2585 | ||
2586 | nh = nexthop_alloc(); | |
2587 | if (!nh) | |
2588 | return ERR_PTR(-ENOMEM); | |
2589 | ||
2590 | nhi = kzalloc(sizeof(*nhi), GFP_KERNEL); | |
2591 | if (!nhi) { | |
2592 | kfree(nh); | |
2593 | return ERR_PTR(-ENOMEM); | |
2594 | } | |
2595 | ||
2596 | nh->nh_flags = cfg->nh_flags; | |
2597 | nh->net = net; | |
2598 | ||
2599 | nhi->nh_parent = nh; | |
2600 | nhi->family = cfg->nh_family; | |
2601 | nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK; | |
2602 | ||
38428d68 | 2603 | if (cfg->nh_fdb) |
ce9ac056 | 2604 | nhi->fdb_nh = 1; |
38428d68 | 2605 | |
ab84be7e DA |
2606 | if (cfg->nh_blackhole) { |
2607 | nhi->reject_nh = 1; | |
2608 | cfg->nh_ifindex = net->loopback_dev->ifindex; | |
2609 | } | |
2610 | ||
597cfe4f DA |
2611 | switch (cfg->nh_family) { |
2612 | case AF_INET: | |
2613 | err = nh_create_ipv4(net, nh, nhi, cfg, extack); | |
2614 | break; | |
53010f99 DA |
2615 | case AF_INET6: |
2616 | err = nh_create_ipv6(net, nh, nhi, cfg, extack); | |
2617 | break; | |
597cfe4f DA |
2618 | } |
2619 | ||
ab84be7e DA |
2620 | if (err) { |
2621 | kfree(nhi); | |
2622 | kfree(nh); | |
2623 | return ERR_PTR(err); | |
2624 | } | |
2625 | ||
597cfe4f | 2626 | /* add the entry to the device based hash */ |
ce9ac056 | 2627 | if (!nhi->fdb_nh) |
38428d68 | 2628 | nexthop_devhash_add(net, nhi); |
597cfe4f | 2629 | |
ab84be7e DA |
2630 | rcu_assign_pointer(nh->nh_info, nhi); |
2631 | ||
2632 | return nh; | |
2633 | } | |
2634 | ||
2635 | /* called with rtnl lock held */ | |
2636 | static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg, | |
2637 | struct netlink_ext_ack *extack) | |
2638 | { | |
2639 | struct nexthop *nh; | |
2640 | int err; | |
2641 | ||
2642 | if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) { | |
2643 | NL_SET_ERR_MSG(extack, "Replace requires nexthop id"); | |
2644 | return ERR_PTR(-EINVAL); | |
2645 | } | |
2646 | ||
2647 | if (!cfg->nh_id) { | |
2648 | cfg->nh_id = nh_find_unused_id(net); | |
2649 | if (!cfg->nh_id) { | |
2650 | NL_SET_ERR_MSG(extack, "No unused id"); | |
2651 | return ERR_PTR(-EINVAL); | |
2652 | } | |
2653 | } | |
2654 | ||
430a0491 DA |
2655 | if (cfg->nh_grp) |
2656 | nh = nexthop_create_group(net, cfg); | |
2657 | else | |
2658 | nh = nexthop_create(net, cfg, extack); | |
2659 | ||
ab84be7e DA |
2660 | if (IS_ERR(nh)) |
2661 | return nh; | |
2662 | ||
2663 | refcount_set(&nh->refcnt, 1); | |
2664 | nh->id = cfg->nh_id; | |
2665 | nh->protocol = cfg->nh_protocol; | |
2666 | nh->net = net; | |
2667 | ||
2668 | err = insert_nexthop(net, nh, cfg, extack); | |
2669 | if (err) { | |
430a0491 | 2670 | __remove_nexthop(net, nh, NULL); |
ab84be7e DA |
2671 | nexthop_put(nh); |
2672 | nh = ERR_PTR(err); | |
2673 | } | |
2674 | ||
2675 | return nh; | |
2676 | } | |
2677 | ||
a2601e2b PM |
2678 | static int rtm_nh_get_timer(struct nlattr *attr, unsigned long fallback, |
2679 | unsigned long *timer_p, bool *has_p, | |
2680 | struct netlink_ext_ack *extack) | |
2681 | { | |
2682 | unsigned long timer; | |
2683 | u32 value; | |
2684 | ||
2685 | if (!attr) { | |
2686 | *timer_p = fallback; | |
2687 | *has_p = false; | |
2688 | return 0; | |
2689 | } | |
2690 | ||
2691 | value = nla_get_u32(attr); | |
2692 | timer = clock_t_to_jiffies(value); | |
2693 | if (timer == ~0UL) { | |
2694 | NL_SET_ERR_MSG(extack, "Timer value too large"); | |
2695 | return -EINVAL; | |
2696 | } | |
2697 | ||
2698 | *timer_p = timer; | |
2699 | *has_p = true; | |
2700 | return 0; | |
2701 | } | |
2702 | ||
2703 | static int rtm_to_nh_config_grp_res(struct nlattr *res, struct nh_config *cfg, | |
2704 | struct netlink_ext_ack *extack) | |
2705 | { | |
2706 | struct nlattr *tb[ARRAY_SIZE(rtm_nh_res_policy_new)] = {}; | |
2707 | int err; | |
2708 | ||
2709 | if (res) { | |
2710 | err = nla_parse_nested(tb, | |
2711 | ARRAY_SIZE(rtm_nh_res_policy_new) - 1, | |
2712 | res, rtm_nh_res_policy_new, extack); | |
2713 | if (err < 0) | |
2714 | return err; | |
2715 | } | |
2716 | ||
2717 | if (tb[NHA_RES_GROUP_BUCKETS]) { | |
2718 | cfg->nh_grp_res_num_buckets = | |
2719 | nla_get_u16(tb[NHA_RES_GROUP_BUCKETS]); | |
2720 | cfg->nh_grp_res_has_num_buckets = true; | |
2721 | if (!cfg->nh_grp_res_num_buckets) { | |
2722 | NL_SET_ERR_MSG(extack, "Number of buckets needs to be non-0"); | |
2723 | return -EINVAL; | |
2724 | } | |
2725 | } | |
2726 | ||
2727 | err = rtm_nh_get_timer(tb[NHA_RES_GROUP_IDLE_TIMER], | |
2728 | NH_RES_DEFAULT_IDLE_TIMER, | |
2729 | &cfg->nh_grp_res_idle_timer, | |
2730 | &cfg->nh_grp_res_has_idle_timer, | |
2731 | extack); | |
2732 | if (err) | |
2733 | return err; | |
2734 | ||
2735 | return rtm_nh_get_timer(tb[NHA_RES_GROUP_UNBALANCED_TIMER], | |
2736 | NH_RES_DEFAULT_UNBALANCED_TIMER, | |
2737 | &cfg->nh_grp_res_unbalanced_timer, | |
2738 | &cfg->nh_grp_res_has_unbalanced_timer, | |
2739 | extack); | |
2740 | } | |
2741 | ||
ab84be7e DA |
2742 | static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, |
2743 | struct nlmsghdr *nlh, struct nh_config *cfg, | |
2744 | struct netlink_ext_ack *extack) | |
2745 | { | |
2746 | struct nhmsg *nhm = nlmsg_data(nlh); | |
643d0878 | 2747 | struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)]; |
ab84be7e DA |
2748 | int err; |
2749 | ||
643d0878 PM |
2750 | err = nlmsg_parse(nlh, sizeof(*nhm), tb, |
2751 | ARRAY_SIZE(rtm_nh_policy_new) - 1, | |
2752 | rtm_nh_policy_new, extack); | |
ab84be7e DA |
2753 | if (err < 0) |
2754 | return err; | |
2755 | ||
2756 | err = -EINVAL; | |
2757 | if (nhm->resvd || nhm->nh_scope) { | |
2758 | NL_SET_ERR_MSG(extack, "Invalid values in ancillary header"); | |
2759 | goto out; | |
2760 | } | |
2761 | if (nhm->nh_flags & ~NEXTHOP_VALID_USER_FLAGS) { | |
2762 | NL_SET_ERR_MSG(extack, "Invalid nexthop flags in ancillary header"); | |
2763 | goto out; | |
2764 | } | |
2765 | ||
2766 | switch (nhm->nh_family) { | |
597cfe4f | 2767 | case AF_INET: |
53010f99 | 2768 | case AF_INET6: |
597cfe4f | 2769 | break; |
430a0491 DA |
2770 | case AF_UNSPEC: |
2771 | if (tb[NHA_GROUP]) | |
2772 | break; | |
a8eceea8 | 2773 | fallthrough; |
ab84be7e DA |
2774 | default: |
2775 | NL_SET_ERR_MSG(extack, "Invalid address family"); | |
2776 | goto out; | |
2777 | } | |
2778 | ||
ab84be7e DA |
2779 | memset(cfg, 0, sizeof(*cfg)); |
2780 | cfg->nlflags = nlh->nlmsg_flags; | |
2781 | cfg->nlinfo.portid = NETLINK_CB(skb).portid; | |
2782 | cfg->nlinfo.nlh = nlh; | |
2783 | cfg->nlinfo.nl_net = net; | |
2784 | ||
2785 | cfg->nh_family = nhm->nh_family; | |
2786 | cfg->nh_protocol = nhm->nh_protocol; | |
2787 | cfg->nh_flags = nhm->nh_flags; | |
2788 | ||
2789 | if (tb[NHA_ID]) | |
2790 | cfg->nh_id = nla_get_u32(tb[NHA_ID]); | |
2791 | ||
38428d68 RP |
2792 | if (tb[NHA_FDB]) { |
2793 | if (tb[NHA_OIF] || tb[NHA_BLACKHOLE] || | |
2794 | tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) { | |
2795 | NL_SET_ERR_MSG(extack, "Fdb attribute can not be used with encap, oif or blackhole"); | |
2796 | goto out; | |
2797 | } | |
2798 | if (nhm->nh_flags) { | |
2799 | NL_SET_ERR_MSG(extack, "Unsupported nexthop flags in ancillary header"); | |
2800 | goto out; | |
2801 | } | |
2802 | cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]); | |
2803 | } | |
2804 | ||
430a0491 DA |
2805 | if (tb[NHA_GROUP]) { |
2806 | if (nhm->nh_family != AF_UNSPEC) { | |
2807 | NL_SET_ERR_MSG(extack, "Invalid family for group"); | |
2808 | goto out; | |
2809 | } | |
2810 | cfg->nh_grp = tb[NHA_GROUP]; | |
2811 | ||
2812 | cfg->nh_grp_type = NEXTHOP_GRP_TYPE_MPATH; | |
2813 | if (tb[NHA_GROUP_TYPE]) | |
2814 | cfg->nh_grp_type = nla_get_u16(tb[NHA_GROUP_TYPE]); | |
2815 | ||
2816 | if (cfg->nh_grp_type > NEXTHOP_GRP_TYPE_MAX) { | |
2817 | NL_SET_ERR_MSG(extack, "Invalid group type"); | |
2818 | goto out; | |
2819 | } | |
a2601e2b PM |
2820 | err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb), |
2821 | cfg->nh_grp_type, extack); | |
2822 | if (err) | |
2823 | goto out; | |
2824 | ||
2825 | if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES) | |
2826 | err = rtm_to_nh_config_grp_res(tb[NHA_RES_GROUP], | |
2827 | cfg, extack); | |
430a0491 DA |
2828 | |
2829 | /* no other attributes should be set */ | |
2830 | goto out; | |
2831 | } | |
2832 | ||
ab84be7e | 2833 | if (tb[NHA_BLACKHOLE]) { |
b513bd03 | 2834 | if (tb[NHA_GATEWAY] || tb[NHA_OIF] || |
38428d68 RP |
2835 | tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE] || tb[NHA_FDB]) { |
2836 | NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway, oif, encap or fdb"); | |
ab84be7e DA |
2837 | goto out; |
2838 | } | |
2839 | ||
2840 | cfg->nh_blackhole = 1; | |
2841 | err = 0; | |
2842 | goto out; | |
2843 | } | |
2844 | ||
38428d68 RP |
2845 | if (!cfg->nh_fdb && !tb[NHA_OIF]) { |
2846 | NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops"); | |
ab84be7e DA |
2847 | goto out; |
2848 | } | |
2849 | ||
38428d68 RP |
2850 | if (!cfg->nh_fdb && tb[NHA_OIF]) { |
2851 | cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]); | |
2852 | if (cfg->nh_ifindex) | |
2853 | cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex); | |
ab84be7e | 2854 | |
38428d68 RP |
2855 | if (!cfg->dev) { |
2856 | NL_SET_ERR_MSG(extack, "Invalid device index"); | |
2857 | goto out; | |
2858 | } else if (!(cfg->dev->flags & IFF_UP)) { | |
2859 | NL_SET_ERR_MSG(extack, "Nexthop device is not up"); | |
2860 | err = -ENETDOWN; | |
2861 | goto out; | |
2862 | } else if (!netif_carrier_ok(cfg->dev)) { | |
2863 | NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down"); | |
2864 | err = -ENETDOWN; | |
2865 | goto out; | |
2866 | } | |
ab84be7e DA |
2867 | } |
2868 | ||
597cfe4f DA |
2869 | err = -EINVAL; |
2870 | if (tb[NHA_GATEWAY]) { | |
2871 | struct nlattr *gwa = tb[NHA_GATEWAY]; | |
2872 | ||
2873 | switch (cfg->nh_family) { | |
2874 | case AF_INET: | |
2875 | if (nla_len(gwa) != sizeof(u32)) { | |
2876 | NL_SET_ERR_MSG(extack, "Invalid gateway"); | |
2877 | goto out; | |
2878 | } | |
2879 | cfg->gw.ipv4 = nla_get_be32(gwa); | |
2880 | break; | |
53010f99 DA |
2881 | case AF_INET6: |
2882 | if (nla_len(gwa) != sizeof(struct in6_addr)) { | |
2883 | NL_SET_ERR_MSG(extack, "Invalid gateway"); | |
2884 | goto out; | |
2885 | } | |
2886 | cfg->gw.ipv6 = nla_get_in6_addr(gwa); | |
2887 | break; | |
597cfe4f DA |
2888 | default: |
2889 | NL_SET_ERR_MSG(extack, | |
2890 | "Unknown address family for gateway"); | |
2891 | goto out; | |
2892 | } | |
2893 | } else { | |
2894 | /* device only nexthop (no gateway) */ | |
2895 | if (cfg->nh_flags & RTNH_F_ONLINK) { | |
2896 | NL_SET_ERR_MSG(extack, | |
2897 | "ONLINK flag can not be set for nexthop without a gateway"); | |
2898 | goto out; | |
2899 | } | |
2900 | } | |
2901 | ||
b513bd03 DA |
2902 | if (tb[NHA_ENCAP]) { |
2903 | cfg->nh_encap = tb[NHA_ENCAP]; | |
2904 | ||
2905 | if (!tb[NHA_ENCAP_TYPE]) { | |
2906 | NL_SET_ERR_MSG(extack, "LWT encapsulation type is missing"); | |
2907 | goto out; | |
2908 | } | |
2909 | ||
2910 | cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]); | |
2911 | err = lwtunnel_valid_encap_type(cfg->nh_encap_type, extack); | |
2912 | if (err < 0) | |
2913 | goto out; | |
2914 | ||
2915 | } else if (tb[NHA_ENCAP_TYPE]) { | |
2916 | NL_SET_ERR_MSG(extack, "LWT encapsulation attribute is missing"); | |
2917 | goto out; | |
2918 | } | |
2919 | ||
2920 | ||
ab84be7e DA |
2921 | err = 0; |
2922 | out: | |
2923 | return err; | |
2924 | } | |
2925 | ||
2926 | /* rtnl */ | |
2927 | static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, | |
2928 | struct netlink_ext_ack *extack) | |
2929 | { | |
2930 | struct net *net = sock_net(skb->sk); | |
2931 | struct nh_config cfg; | |
2932 | struct nexthop *nh; | |
2933 | int err; | |
2934 | ||
2935 | err = rtm_to_nh_config(net, skb, nlh, &cfg, extack); | |
2936 | if (!err) { | |
2937 | nh = nexthop_add(net, &cfg, extack); | |
2938 | if (IS_ERR(nh)) | |
2939 | err = PTR_ERR(nh); | |
2940 | } | |
2941 | ||
2942 | return err; | |
2943 | } | |
2944 | ||
0bccf8ed PM |
2945 | static int __nh_valid_get_del_req(const struct nlmsghdr *nlh, |
2946 | struct nlattr **tb, u32 *id, | |
2947 | struct netlink_ext_ack *extack) | |
ab84be7e DA |
2948 | { |
2949 | struct nhmsg *nhm = nlmsg_data(nlh); | |
ab84be7e | 2950 | |
ab84be7e DA |
2951 | if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) { |
2952 | NL_SET_ERR_MSG(extack, "Invalid values in header"); | |
0bccf8ed | 2953 | return -EINVAL; |
ab84be7e DA |
2954 | } |
2955 | ||
2956 | if (!tb[NHA_ID]) { | |
2957 | NL_SET_ERR_MSG(extack, "Nexthop id is missing"); | |
0bccf8ed | 2958 | return -EINVAL; |
ab84be7e DA |
2959 | } |
2960 | ||
2961 | *id = nla_get_u32(tb[NHA_ID]); | |
0bccf8ed | 2962 | if (!(*id)) { |
ab84be7e | 2963 | NL_SET_ERR_MSG(extack, "Invalid nexthop id"); |
0bccf8ed PM |
2964 | return -EINVAL; |
2965 | } | |
2966 | ||
2967 | return 0; | |
2968 | } | |
2969 | ||
2970 | static int nh_valid_get_del_req(const struct nlmsghdr *nlh, u32 *id, | |
2971 | struct netlink_ext_ack *extack) | |
2972 | { | |
2973 | struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)]; | |
2974 | int err; | |
2975 | ||
2976 | err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, | |
2977 | ARRAY_SIZE(rtm_nh_policy_get) - 1, | |
2978 | rtm_nh_policy_get, extack); | |
2979 | if (err < 0) | |
2980 | return err; | |
2981 | ||
2982 | return __nh_valid_get_del_req(nlh, tb, id, extack); | |
ab84be7e DA |
2983 | } |
2984 | ||
2985 | /* rtnl */ | |
2986 | static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, | |
2987 | struct netlink_ext_ack *extack) | |
2988 | { | |
2989 | struct net *net = sock_net(skb->sk); | |
2990 | struct nl_info nlinfo = { | |
2991 | .nlh = nlh, | |
2992 | .nl_net = net, | |
2993 | .portid = NETLINK_CB(skb).portid, | |
2994 | }; | |
2995 | struct nexthop *nh; | |
2996 | int err; | |
2997 | u32 id; | |
2998 | ||
2999 | err = nh_valid_get_del_req(nlh, &id, extack); | |
3000 | if (err) | |
3001 | return err; | |
3002 | ||
3003 | nh = nexthop_find_by_id(net, id); | |
3004 | if (!nh) | |
3005 | return -ENOENT; | |
3006 | ||
430a0491 | 3007 | remove_nexthop(net, nh, &nlinfo); |
ab84be7e DA |
3008 | |
3009 | return 0; | |
3010 | } | |
3011 | ||
3012 | /* rtnl */ | |
3013 | static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh, | |
3014 | struct netlink_ext_ack *extack) | |
3015 | { | |
3016 | struct net *net = sock_net(in_skb->sk); | |
3017 | struct sk_buff *skb = NULL; | |
3018 | struct nexthop *nh; | |
3019 | int err; | |
3020 | u32 id; | |
3021 | ||
3022 | err = nh_valid_get_del_req(nlh, &id, extack); | |
3023 | if (err) | |
3024 | return err; | |
3025 | ||
3026 | err = -ENOBUFS; | |
3027 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); | |
3028 | if (!skb) | |
3029 | goto out; | |
3030 | ||
3031 | err = -ENOENT; | |
3032 | nh = nexthop_find_by_id(net, id); | |
3033 | if (!nh) | |
3034 | goto errout_free; | |
3035 | ||
3036 | err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid, | |
3037 | nlh->nlmsg_seq, 0); | |
3038 | if (err < 0) { | |
3039 | WARN_ON(err == -EMSGSIZE); | |
3040 | goto errout_free; | |
3041 | } | |
3042 | ||
3043 | err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); | |
3044 | out: | |
3045 | return err; | |
3046 | errout_free: | |
3047 | kfree_skb(skb); | |
3048 | goto out; | |
3049 | } | |
3050 | ||
56450ec6 | 3051 | struct nh_dump_filter { |
8a1bbabb | 3052 | u32 nh_id; |
56450ec6 PM |
3053 | int dev_idx; |
3054 | int master_idx; | |
3055 | bool group_filter; | |
3056 | bool fdb_filter; | |
8a1bbabb | 3057 | u32 res_bucket_nh_id; |
56450ec6 PM |
3058 | }; |
3059 | ||
3060 | static bool nh_dump_filtered(struct nexthop *nh, | |
3061 | struct nh_dump_filter *filter, u8 family) | |
ab84be7e DA |
3062 | { |
3063 | const struct net_device *dev; | |
3064 | const struct nh_info *nhi; | |
3065 | ||
56450ec6 | 3066 | if (filter->group_filter && !nh->is_group) |
430a0491 DA |
3067 | return true; |
3068 | ||
56450ec6 | 3069 | if (!filter->dev_idx && !filter->master_idx && !family) |
ab84be7e DA |
3070 | return false; |
3071 | ||
430a0491 DA |
3072 | if (nh->is_group) |
3073 | return true; | |
3074 | ||
ab84be7e DA |
3075 | nhi = rtnl_dereference(nh->nh_info); |
3076 | if (family && nhi->family != family) | |
3077 | return true; | |
3078 | ||
3079 | dev = nhi->fib_nhc.nhc_dev; | |
56450ec6 | 3080 | if (filter->dev_idx && (!dev || dev->ifindex != filter->dev_idx)) |
ab84be7e DA |
3081 | return true; |
3082 | ||
56450ec6 | 3083 | if (filter->master_idx) { |
ab84be7e DA |
3084 | struct net_device *master; |
3085 | ||
3086 | if (!dev) | |
3087 | return true; | |
3088 | ||
3089 | master = netdev_master_upper_dev_get((struct net_device *)dev); | |
56450ec6 | 3090 | if (!master || master->ifindex != filter->master_idx) |
ab84be7e DA |
3091 | return true; |
3092 | } | |
3093 | ||
3094 | return false; | |
3095 | } | |
3096 | ||
b9ebea12 PM |
3097 | static int __nh_valid_dump_req(const struct nlmsghdr *nlh, struct nlattr **tb, |
3098 | struct nh_dump_filter *filter, | |
3099 | struct netlink_ext_ack *extack) | |
ab84be7e | 3100 | { |
ab84be7e | 3101 | struct nhmsg *nhm; |
ab84be7e DA |
3102 | u32 idx; |
3103 | ||
44551bff PM |
3104 | if (tb[NHA_OIF]) { |
3105 | idx = nla_get_u32(tb[NHA_OIF]); | |
3106 | if (idx > INT_MAX) { | |
3107 | NL_SET_ERR_MSG(extack, "Invalid device index"); | |
3108 | return -EINVAL; | |
3109 | } | |
56450ec6 | 3110 | filter->dev_idx = idx; |
44551bff PM |
3111 | } |
3112 | if (tb[NHA_MASTER]) { | |
3113 | idx = nla_get_u32(tb[NHA_MASTER]); | |
3114 | if (idx > INT_MAX) { | |
3115 | NL_SET_ERR_MSG(extack, "Invalid master device index"); | |
ab84be7e DA |
3116 | return -EINVAL; |
3117 | } | |
56450ec6 | 3118 | filter->master_idx = idx; |
ab84be7e | 3119 | } |
56450ec6 PM |
3120 | filter->group_filter = nla_get_flag(tb[NHA_GROUPS]); |
3121 | filter->fdb_filter = nla_get_flag(tb[NHA_FDB]); | |
ab84be7e DA |
3122 | |
3123 | nhm = nlmsg_data(nlh); | |
3124 | if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) { | |
3125 | NL_SET_ERR_MSG(extack, "Invalid values in header for nexthop dump request"); | |
3126 | return -EINVAL; | |
3127 | } | |
3128 | ||
3129 | return 0; | |
3130 | } | |
3131 | ||
b9ebea12 PM |
3132 | static int nh_valid_dump_req(const struct nlmsghdr *nlh, |
3133 | struct nh_dump_filter *filter, | |
3134 | struct netlink_callback *cb) | |
3135 | { | |
3136 | struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)]; | |
3137 | int err; | |
3138 | ||
3139 | err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, | |
3140 | ARRAY_SIZE(rtm_nh_policy_dump) - 1, | |
3141 | rtm_nh_policy_dump, cb->extack); | |
3142 | if (err < 0) | |
3143 | return err; | |
3144 | ||
3145 | return __nh_valid_dump_req(nlh, tb, filter, cb->extack); | |
3146 | } | |
3147 | ||
a6fbbaa6 PM |
3148 | struct rtm_dump_nh_ctx { |
3149 | u32 idx; | |
3150 | }; | |
3151 | ||
3152 | static struct rtm_dump_nh_ctx * | |
3153 | rtm_dump_nh_ctx(struct netlink_callback *cb) | |
3154 | { | |
3155 | struct rtm_dump_nh_ctx *ctx = (void *)cb->ctx; | |
3156 | ||
3157 | BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); | |
3158 | return ctx; | |
3159 | } | |
3160 | ||
cbee1807 PM |
3161 | static int rtm_dump_walk_nexthops(struct sk_buff *skb, |
3162 | struct netlink_callback *cb, | |
3163 | struct rb_root *root, | |
3164 | struct rtm_dump_nh_ctx *ctx, | |
e948217d PM |
3165 | int (*nh_cb)(struct sk_buff *skb, |
3166 | struct netlink_callback *cb, | |
3167 | struct nexthop *nh, void *data), | |
3168 | void *data) | |
ab84be7e | 3169 | { |
ab84be7e | 3170 | struct rb_node *node; |
9e46fb65 | 3171 | int s_idx; |
ab84be7e DA |
3172 | int err; |
3173 | ||
a6fbbaa6 | 3174 | s_idx = ctx->idx; |
ab84be7e DA |
3175 | for (node = rb_first(root); node; node = rb_next(node)) { |
3176 | struct nexthop *nh; | |
3177 | ||
ab84be7e | 3178 | nh = rb_entry(node, struct nexthop, rb_node); |
9e46fb65 IS |
3179 | if (nh->id < s_idx) |
3180 | continue; | |
3181 | ||
3182 | ctx->idx = nh->id; | |
e948217d PM |
3183 | err = nh_cb(skb, cb, nh, data); |
3184 | if (err) | |
cbee1807 | 3185 | return err; |
ab84be7e DA |
3186 | } |
3187 | ||
9e46fb65 | 3188 | ctx->idx++; |
cbee1807 PM |
3189 | return 0; |
3190 | } | |
3191 | ||
e948217d PM |
3192 | static int rtm_dump_nexthop_cb(struct sk_buff *skb, struct netlink_callback *cb, |
3193 | struct nexthop *nh, void *data) | |
3194 | { | |
3195 | struct nhmsg *nhm = nlmsg_data(cb->nlh); | |
3196 | struct nh_dump_filter *filter = data; | |
3197 | ||
3198 | if (nh_dump_filtered(nh, filter, nhm->nh_family)) | |
3199 | return 0; | |
3200 | ||
3201 | return nh_fill_node(skb, nh, RTM_NEWNEXTHOP, | |
3202 | NETLINK_CB(cb->skb).portid, | |
3203 | cb->nlh->nlmsg_seq, NLM_F_MULTI); | |
3204 | } | |
3205 | ||
cbee1807 PM |
3206 | /* rtnl */ |
3207 | static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) | |
3208 | { | |
3209 | struct rtm_dump_nh_ctx *ctx = rtm_dump_nh_ctx(cb); | |
3210 | struct net *net = sock_net(skb->sk); | |
3211 | struct rb_root *root = &net->nexthop.rb_root; | |
3212 | struct nh_dump_filter filter = {}; | |
3213 | int err; | |
3214 | ||
3215 | err = nh_valid_dump_req(cb->nlh, &filter, cb); | |
3216 | if (err < 0) | |
3217 | return err; | |
3218 | ||
e948217d PM |
3219 | err = rtm_dump_walk_nexthops(skb, cb, root, ctx, |
3220 | &rtm_dump_nexthop_cb, &filter); | |
cbee1807 PM |
3221 | if (err < 0) { |
3222 | if (likely(skb->len)) | |
3223 | goto out; | |
3224 | goto out_err; | |
3225 | } | |
3226 | ||
ab84be7e DA |
3227 | out: |
3228 | err = skb->len; | |
3229 | out_err: | |
ab84be7e DA |
3230 | cb->seq = net->nexthop.seq; |
3231 | nl_dump_check_consistent(cb, nlmsg_hdr(skb)); | |
ab84be7e DA |
3232 | return err; |
3233 | } | |
3234 | ||
8a1bbabb PM |
3235 | static struct nexthop * |
3236 | nexthop_find_group_resilient(struct net *net, u32 id, | |
3237 | struct netlink_ext_ack *extack) | |
3238 | { | |
3239 | struct nh_group *nhg; | |
3240 | struct nexthop *nh; | |
3241 | ||
3242 | nh = nexthop_find_by_id(net, id); | |
3243 | if (!nh) | |
3244 | return ERR_PTR(-ENOENT); | |
3245 | ||
3246 | if (!nh->is_group) { | |
3247 | NL_SET_ERR_MSG(extack, "Not a nexthop group"); | |
3248 | return ERR_PTR(-EINVAL); | |
3249 | } | |
3250 | ||
3251 | nhg = rtnl_dereference(nh->nh_grp); | |
3252 | if (!nhg->resilient) { | |
3253 | NL_SET_ERR_MSG(extack, "Nexthop group not of type resilient"); | |
3254 | return ERR_PTR(-EINVAL); | |
3255 | } | |
3256 | ||
3257 | return nh; | |
3258 | } | |
3259 | ||
3260 | static int nh_valid_dump_nhid(struct nlattr *attr, u32 *nh_id_p, | |
3261 | struct netlink_ext_ack *extack) | |
3262 | { | |
3263 | u32 idx; | |
3264 | ||
3265 | if (attr) { | |
3266 | idx = nla_get_u32(attr); | |
3267 | if (!idx) { | |
3268 | NL_SET_ERR_MSG(extack, "Invalid nexthop id"); | |
3269 | return -EINVAL; | |
3270 | } | |
3271 | *nh_id_p = idx; | |
3272 | } else { | |
3273 | *nh_id_p = 0; | |
3274 | } | |
3275 | ||
3276 | return 0; | |
3277 | } | |
3278 | ||
3279 | static int nh_valid_dump_bucket_req(const struct nlmsghdr *nlh, | |
3280 | struct nh_dump_filter *filter, | |
3281 | struct netlink_callback *cb) | |
3282 | { | |
3283 | struct nlattr *res_tb[ARRAY_SIZE(rtm_nh_res_bucket_policy_dump)]; | |
3284 | struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump_bucket)]; | |
3285 | int err; | |
3286 | ||
3287 | err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, | |
3288 | ARRAY_SIZE(rtm_nh_policy_dump_bucket) - 1, | |
3289 | rtm_nh_policy_dump_bucket, NULL); | |
3290 | if (err < 0) | |
3291 | return err; | |
3292 | ||
3293 | err = nh_valid_dump_nhid(tb[NHA_ID], &filter->nh_id, cb->extack); | |
3294 | if (err) | |
3295 | return err; | |
3296 | ||
3297 | if (tb[NHA_RES_BUCKET]) { | |
3298 | size_t max = ARRAY_SIZE(rtm_nh_res_bucket_policy_dump) - 1; | |
3299 | ||
3300 | err = nla_parse_nested(res_tb, max, | |
3301 | tb[NHA_RES_BUCKET], | |
3302 | rtm_nh_res_bucket_policy_dump, | |
3303 | cb->extack); | |
3304 | if (err < 0) | |
3305 | return err; | |
3306 | ||
3307 | err = nh_valid_dump_nhid(res_tb[NHA_RES_BUCKET_NH_ID], | |
3308 | &filter->res_bucket_nh_id, | |
3309 | cb->extack); | |
3310 | if (err) | |
3311 | return err; | |
3312 | } | |
3313 | ||
3314 | return __nh_valid_dump_req(nlh, tb, filter, cb->extack); | |
3315 | } | |
3316 | ||
3317 | struct rtm_dump_res_bucket_ctx { | |
3318 | struct rtm_dump_nh_ctx nh; | |
3319 | u16 bucket_index; | |
3320 | u32 done_nh_idx; /* 1 + the index of the last fully processed NH. */ | |
3321 | }; | |
3322 | ||
3323 | static struct rtm_dump_res_bucket_ctx * | |
3324 | rtm_dump_res_bucket_ctx(struct netlink_callback *cb) | |
3325 | { | |
3326 | struct rtm_dump_res_bucket_ctx *ctx = (void *)cb->ctx; | |
3327 | ||
3328 | BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); | |
3329 | return ctx; | |
3330 | } | |
3331 | ||
3332 | struct rtm_dump_nexthop_bucket_data { | |
3333 | struct rtm_dump_res_bucket_ctx *ctx; | |
3334 | struct nh_dump_filter filter; | |
3335 | }; | |
3336 | ||
3337 | static int rtm_dump_nexthop_bucket_nh(struct sk_buff *skb, | |
3338 | struct netlink_callback *cb, | |
3339 | struct nexthop *nh, | |
3340 | struct rtm_dump_nexthop_bucket_data *dd) | |
3341 | { | |
3342 | u32 portid = NETLINK_CB(cb->skb).portid; | |
3343 | struct nhmsg *nhm = nlmsg_data(cb->nlh); | |
3344 | struct nh_res_table *res_table; | |
3345 | struct nh_group *nhg; | |
3346 | u16 bucket_index; | |
3347 | int err; | |
3348 | ||
3349 | if (dd->ctx->nh.idx < dd->ctx->done_nh_idx) | |
3350 | return 0; | |
3351 | ||
3352 | nhg = rtnl_dereference(nh->nh_grp); | |
3353 | res_table = rtnl_dereference(nhg->res_table); | |
3354 | for (bucket_index = dd->ctx->bucket_index; | |
3355 | bucket_index < res_table->num_nh_buckets; | |
3356 | bucket_index++) { | |
3357 | struct nh_res_bucket *bucket; | |
3358 | struct nh_grp_entry *nhge; | |
3359 | ||
3360 | bucket = &res_table->nh_buckets[bucket_index]; | |
3361 | nhge = rtnl_dereference(bucket->nh_entry); | |
3362 | if (nh_dump_filtered(nhge->nh, &dd->filter, nhm->nh_family)) | |
3363 | continue; | |
3364 | ||
3365 | if (dd->filter.res_bucket_nh_id && | |
3366 | dd->filter.res_bucket_nh_id != nhge->nh->id) | |
3367 | continue; | |
3368 | ||
3369 | err = nh_fill_res_bucket(skb, nh, bucket, bucket_index, | |
3370 | RTM_NEWNEXTHOPBUCKET, portid, | |
3371 | cb->nlh->nlmsg_seq, NLM_F_MULTI, | |
3372 | cb->extack); | |
3373 | if (err < 0) { | |
3374 | if (likely(skb->len)) | |
3375 | goto out; | |
3376 | goto out_err; | |
3377 | } | |
3378 | } | |
3379 | ||
3380 | dd->ctx->done_nh_idx = dd->ctx->nh.idx + 1; | |
3381 | bucket_index = 0; | |
3382 | ||
3383 | out: | |
3384 | err = skb->len; | |
3385 | out_err: | |
3386 | dd->ctx->bucket_index = bucket_index; | |
3387 | return err; | |
3388 | } | |
3389 | ||
3390 | static int rtm_dump_nexthop_bucket_cb(struct sk_buff *skb, | |
3391 | struct netlink_callback *cb, | |
3392 | struct nexthop *nh, void *data) | |
3393 | { | |
3394 | struct rtm_dump_nexthop_bucket_data *dd = data; | |
3395 | struct nh_group *nhg; | |
3396 | ||
3397 | if (!nh->is_group) | |
3398 | return 0; | |
3399 | ||
3400 | nhg = rtnl_dereference(nh->nh_grp); | |
3401 | if (!nhg->resilient) | |
3402 | return 0; | |
3403 | ||
3404 | return rtm_dump_nexthop_bucket_nh(skb, cb, nh, dd); | |
3405 | } | |
3406 | ||
3407 | /* rtnl */ | |
3408 | static int rtm_dump_nexthop_bucket(struct sk_buff *skb, | |
3409 | struct netlink_callback *cb) | |
3410 | { | |
3411 | struct rtm_dump_res_bucket_ctx *ctx = rtm_dump_res_bucket_ctx(cb); | |
3412 | struct rtm_dump_nexthop_bucket_data dd = { .ctx = ctx }; | |
3413 | struct net *net = sock_net(skb->sk); | |
3414 | struct nexthop *nh; | |
3415 | int err; | |
3416 | ||
3417 | err = nh_valid_dump_bucket_req(cb->nlh, &dd.filter, cb); | |
3418 | if (err) | |
3419 | return err; | |
3420 | ||
3421 | if (dd.filter.nh_id) { | |
3422 | nh = nexthop_find_group_resilient(net, dd.filter.nh_id, | |
3423 | cb->extack); | |
3424 | if (IS_ERR(nh)) | |
3425 | return PTR_ERR(nh); | |
3426 | err = rtm_dump_nexthop_bucket_nh(skb, cb, nh, &dd); | |
3427 | } else { | |
3428 | struct rb_root *root = &net->nexthop.rb_root; | |
3429 | ||
3430 | err = rtm_dump_walk_nexthops(skb, cb, root, &ctx->nh, | |
3431 | &rtm_dump_nexthop_bucket_cb, &dd); | |
3432 | } | |
3433 | ||
3434 | if (err < 0) { | |
3435 | if (likely(skb->len)) | |
3436 | goto out; | |
3437 | goto out_err; | |
3438 | } | |
3439 | ||
3440 | out: | |
3441 | err = skb->len; | |
3442 | out_err: | |
3443 | cb->seq = net->nexthop.seq; | |
3444 | nl_dump_check_consistent(cb, nlmsg_hdr(skb)); | |
3445 | return err; | |
3446 | } | |
3447 | ||
187d4c6b PM |
3448 | static int nh_valid_get_bucket_req_res_bucket(struct nlattr *res, |
3449 | u16 *bucket_index, | |
3450 | struct netlink_ext_ack *extack) | |
3451 | { | |
3452 | struct nlattr *tb[ARRAY_SIZE(rtm_nh_res_bucket_policy_get)]; | |
3453 | int err; | |
3454 | ||
3455 | err = nla_parse_nested(tb, ARRAY_SIZE(rtm_nh_res_bucket_policy_get) - 1, | |
3456 | res, rtm_nh_res_bucket_policy_get, extack); | |
3457 | if (err < 0) | |
3458 | return err; | |
3459 | ||
3460 | if (!tb[NHA_RES_BUCKET_INDEX]) { | |
3461 | NL_SET_ERR_MSG(extack, "Bucket index is missing"); | |
3462 | return -EINVAL; | |
3463 | } | |
3464 | ||
3465 | *bucket_index = nla_get_u16(tb[NHA_RES_BUCKET_INDEX]); | |
3466 | return 0; | |
3467 | } | |
3468 | ||
3469 | static int nh_valid_get_bucket_req(const struct nlmsghdr *nlh, | |
3470 | u32 *id, u16 *bucket_index, | |
3471 | struct netlink_ext_ack *extack) | |
3472 | { | |
3473 | struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get_bucket)]; | |
3474 | int err; | |
3475 | ||
3476 | err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, | |
3477 | ARRAY_SIZE(rtm_nh_policy_get_bucket) - 1, | |
3478 | rtm_nh_policy_get_bucket, extack); | |
3479 | if (err < 0) | |
3480 | return err; | |
3481 | ||
3482 | err = __nh_valid_get_del_req(nlh, tb, id, extack); | |
3483 | if (err) | |
3484 | return err; | |
3485 | ||
3486 | if (!tb[NHA_RES_BUCKET]) { | |
3487 | NL_SET_ERR_MSG(extack, "Bucket information is missing"); | |
3488 | return -EINVAL; | |
3489 | } | |
3490 | ||
3491 | err = nh_valid_get_bucket_req_res_bucket(tb[NHA_RES_BUCKET], | |
3492 | bucket_index, extack); | |
3493 | if (err) | |
3494 | return err; | |
3495 | ||
3496 | return 0; | |
3497 | } | |
3498 | ||
3499 | /* rtnl */ | |
3500 | static int rtm_get_nexthop_bucket(struct sk_buff *in_skb, struct nlmsghdr *nlh, | |
3501 | struct netlink_ext_ack *extack) | |
3502 | { | |
3503 | struct net *net = sock_net(in_skb->sk); | |
3504 | struct nh_res_table *res_table; | |
3505 | struct sk_buff *skb = NULL; | |
3506 | struct nh_group *nhg; | |
3507 | struct nexthop *nh; | |
3508 | u16 bucket_index; | |
3509 | int err; | |
3510 | u32 id; | |
3511 | ||
3512 | err = nh_valid_get_bucket_req(nlh, &id, &bucket_index, extack); | |
3513 | if (err) | |
3514 | return err; | |
3515 | ||
3516 | nh = nexthop_find_group_resilient(net, id, extack); | |
3517 | if (IS_ERR(nh)) | |
3518 | return PTR_ERR(nh); | |
3519 | ||
3520 | nhg = rtnl_dereference(nh->nh_grp); | |
3521 | res_table = rtnl_dereference(nhg->res_table); | |
3522 | if (bucket_index >= res_table->num_nh_buckets) { | |
3523 | NL_SET_ERR_MSG(extack, "Bucket index out of bounds"); | |
3524 | return -ENOENT; | |
3525 | } | |
3526 | ||
3527 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); | |
3528 | if (!skb) | |
3529 | return -ENOBUFS; | |
3530 | ||
3531 | err = nh_fill_res_bucket(skb, nh, &res_table->nh_buckets[bucket_index], | |
3532 | bucket_index, RTM_NEWNEXTHOPBUCKET, | |
3533 | NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, | |
3534 | 0, extack); | |
3535 | if (err < 0) { | |
3536 | WARN_ON(err == -EMSGSIZE); | |
3537 | goto errout_free; | |
3538 | } | |
3539 | ||
3540 | return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); | |
3541 | ||
3542 | errout_free: | |
3543 | kfree_skb(skb); | |
3544 | return err; | |
3545 | } | |
3546 | ||
597cfe4f DA |
3547 | static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu) |
3548 | { | |
3549 | unsigned int hash = nh_dev_hashfn(dev->ifindex); | |
3550 | struct net *net = dev_net(dev); | |
3551 | struct hlist_head *head = &net->nexthop.devhash[hash]; | |
3552 | struct hlist_node *n; | |
3553 | struct nh_info *nhi; | |
3554 | ||
3555 | hlist_for_each_entry_safe(nhi, n, head, dev_hash) { | |
3556 | if (nhi->fib_nhc.nhc_dev == dev) { | |
3557 | if (nhi->family == AF_INET) | |
3558 | fib_nhc_update_mtu(&nhi->fib_nhc, dev->mtu, | |
3559 | orig_mtu); | |
3560 | } | |
3561 | } | |
3562 | } | |
3563 | ||
3564 | /* rtnl */ | |
3565 | static int nh_netdev_event(struct notifier_block *this, | |
3566 | unsigned long event, void *ptr) | |
3567 | { | |
3568 | struct net_device *dev = netdev_notifier_info_to_dev(ptr); | |
3569 | struct netdev_notifier_info_ext *info_ext; | |
3570 | ||
3571 | switch (event) { | |
3572 | case NETDEV_DOWN: | |
3573 | case NETDEV_UNREGISTER: | |
76c03bf8 | 3574 | nexthop_flush_dev(dev, event); |
597cfe4f DA |
3575 | break; |
3576 | case NETDEV_CHANGE: | |
3577 | if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP))) | |
76c03bf8 | 3578 | nexthop_flush_dev(dev, event); |
597cfe4f DA |
3579 | break; |
3580 | case NETDEV_CHANGEMTU: | |
3581 | info_ext = ptr; | |
3582 | nexthop_sync_mtu(dev, info_ext->ext.mtu); | |
3583 | rt_cache_flush(dev_net(dev)); | |
3584 | break; | |
3585 | } | |
3586 | return NOTIFY_DONE; | |
3587 | } | |
3588 | ||
3589 | static struct notifier_block nh_netdev_notifier = { | |
3590 | .notifier_call = nh_netdev_event, | |
3591 | }; | |
3592 | ||
975ff7f3 | 3593 | static int nexthops_dump(struct net *net, struct notifier_block *nb, |
3106a084 | 3594 | enum nexthop_event_type event_type, |
975ff7f3 IS |
3595 | struct netlink_ext_ack *extack) |
3596 | { | |
3597 | struct rb_root *root = &net->nexthop.rb_root; | |
3598 | struct rb_node *node; | |
3599 | int err = 0; | |
3600 | ||
3601 | for (node = rb_first(root); node; node = rb_next(node)) { | |
3602 | struct nexthop *nh; | |
3603 | ||
3604 | nh = rb_entry(node, struct nexthop, rb_node); | |
3106a084 | 3605 | err = call_nexthop_notifier(nb, net, event_type, nh, extack); |
975ff7f3 IS |
3606 | if (err) |
3607 | break; | |
3608 | } | |
3609 | ||
3610 | return err; | |
3611 | } | |
3612 | ||
ce7e9c8a IS |
3613 | int register_nexthop_notifier(struct net *net, struct notifier_block *nb, |
3614 | struct netlink_ext_ack *extack) | |
8590ceed | 3615 | { |
975ff7f3 IS |
3616 | int err; |
3617 | ||
3618 | rtnl_lock(); | |
3106a084 | 3619 | err = nexthops_dump(net, nb, NEXTHOP_EVENT_REPLACE, extack); |
975ff7f3 IS |
3620 | if (err) |
3621 | goto unlock; | |
3622 | err = blocking_notifier_chain_register(&net->nexthop.notifier_chain, | |
3623 | nb); | |
3624 | unlock: | |
3625 | rtnl_unlock(); | |
3626 | return err; | |
8590ceed RP |
3627 | } |
3628 | EXPORT_SYMBOL(register_nexthop_notifier); | |
3629 | ||
3630 | int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) | |
3631 | { | |
3106a084 IS |
3632 | int err; |
3633 | ||
3634 | rtnl_lock(); | |
3635 | err = blocking_notifier_chain_unregister(&net->nexthop.notifier_chain, | |
3636 | nb); | |
3637 | if (err) | |
3638 | goto unlock; | |
3639 | nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL); | |
3640 | unlock: | |
3641 | rtnl_unlock(); | |
3642 | return err; | |
8590ceed RP |
3643 | } |
3644 | EXPORT_SYMBOL(unregister_nexthop_notifier); | |
3645 | ||
e95f2592 IS |
3646 | void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap) |
3647 | { | |
3648 | struct nexthop *nexthop; | |
3649 | ||
3650 | rcu_read_lock(); | |
3651 | ||
3652 | nexthop = nexthop_find_by_id(net, id); | |
3653 | if (!nexthop) | |
3654 | goto out; | |
3655 | ||
3656 | nexthop->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP); | |
3657 | if (offload) | |
3658 | nexthop->nh_flags |= RTNH_F_OFFLOAD; | |
3659 | if (trap) | |
3660 | nexthop->nh_flags |= RTNH_F_TRAP; | |
3661 | ||
3662 | out: | |
3663 | rcu_read_unlock(); | |
3664 | } | |
3665 | EXPORT_SYMBOL(nexthop_set_hw_flags); | |
3666 | ||
56ad5ba3 IS |
3667 | void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index, |
3668 | bool offload, bool trap) | |
3669 | { | |
3670 | struct nh_res_table *res_table; | |
3671 | struct nh_res_bucket *bucket; | |
3672 | struct nexthop *nexthop; | |
3673 | struct nh_group *nhg; | |
3674 | ||
3675 | rcu_read_lock(); | |
3676 | ||
3677 | nexthop = nexthop_find_by_id(net, id); | |
3678 | if (!nexthop || !nexthop->is_group) | |
3679 | goto out; | |
3680 | ||
3681 | nhg = rcu_dereference(nexthop->nh_grp); | |
3682 | if (!nhg->resilient) | |
3683 | goto out; | |
3684 | ||
3685 | if (bucket_index >= nhg->res_table->num_nh_buckets) | |
3686 | goto out; | |
3687 | ||
3688 | res_table = rcu_dereference(nhg->res_table); | |
3689 | bucket = &res_table->nh_buckets[bucket_index]; | |
3690 | bucket->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP); | |
3691 | if (offload) | |
3692 | bucket->nh_flags |= RTNH_F_OFFLOAD; | |
3693 | if (trap) | |
3694 | bucket->nh_flags |= RTNH_F_TRAP; | |
3695 | ||
3696 | out: | |
3697 | rcu_read_unlock(); | |
3698 | } | |
3699 | EXPORT_SYMBOL(nexthop_bucket_set_hw_flags); | |
3700 | ||
cfc15c1d IS |
3701 | void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets, |
3702 | unsigned long *activity) | |
3703 | { | |
3704 | struct nh_res_table *res_table; | |
3705 | struct nexthop *nexthop; | |
3706 | struct nh_group *nhg; | |
3707 | u16 i; | |
3708 | ||
3709 | rcu_read_lock(); | |
3710 | ||
3711 | nexthop = nexthop_find_by_id(net, id); | |
3712 | if (!nexthop || !nexthop->is_group) | |
3713 | goto out; | |
3714 | ||
3715 | nhg = rcu_dereference(nexthop->nh_grp); | |
3716 | if (!nhg->resilient) | |
3717 | goto out; | |
3718 | ||
3719 | /* Instead of silently ignoring some buckets, demand that the sizes | |
3720 | * be the same. | |
3721 | */ | |
3722 | res_table = rcu_dereference(nhg->res_table); | |
3723 | if (num_buckets != res_table->num_nh_buckets) | |
3724 | goto out; | |
3725 | ||
3726 | for (i = 0; i < num_buckets; i++) { | |
3727 | if (test_bit(i, activity)) | |
3728 | nh_res_bucket_set_busy(&res_table->nh_buckets[i]); | |
3729 | } | |
3730 | ||
3731 | out: | |
3732 | rcu_read_unlock(); | |
3733 | } | |
3734 | EXPORT_SYMBOL(nexthop_res_grp_activity_update); | |
3735 | ||
fea7b201 | 3736 | static void __net_exit nexthop_net_exit_batch(struct list_head *net_list) |
ab84be7e | 3737 | { |
fea7b201 ED |
3738 | struct net *net; |
3739 | ||
ab84be7e | 3740 | rtnl_lock(); |
fea7b201 ED |
3741 | list_for_each_entry(net, net_list, exit_list) { |
3742 | flush_all_nexthops(net); | |
3743 | kfree(net->nexthop.devhash); | |
3744 | } | |
ab84be7e DA |
3745 | rtnl_unlock(); |
3746 | } | |
3747 | ||
3748 | static int __net_init nexthop_net_init(struct net *net) | |
3749 | { | |
597cfe4f DA |
3750 | size_t sz = sizeof(struct hlist_head) * NH_DEV_HASHSIZE; |
3751 | ||
ab84be7e | 3752 | net->nexthop.rb_root = RB_ROOT; |
597cfe4f DA |
3753 | net->nexthop.devhash = kzalloc(sz, GFP_KERNEL); |
3754 | if (!net->nexthop.devhash) | |
3755 | return -ENOMEM; | |
80690ec6 | 3756 | BLOCKING_INIT_NOTIFIER_HEAD(&net->nexthop.notifier_chain); |
ab84be7e DA |
3757 | |
3758 | return 0; | |
3759 | } | |
3760 | ||
3761 | static struct pernet_operations nexthop_net_ops = { | |
3762 | .init = nexthop_net_init, | |
fea7b201 | 3763 | .exit_batch = nexthop_net_exit_batch, |
ab84be7e DA |
3764 | }; |
3765 | ||
3766 | static int __init nexthop_init(void) | |
3767 | { | |
3768 | register_pernet_subsys(&nexthop_net_ops); | |
3769 | ||
597cfe4f DA |
3770 | register_netdevice_notifier(&nh_netdev_notifier); |
3771 | ||
ab84be7e DA |
3772 | rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); |
3773 | rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0); | |
3774 | rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop, | |
3775 | rtm_dump_nexthop, 0); | |
3776 | ||
3777 | rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); | |
3778 | rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0); | |
3779 | ||
3780 | rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); | |
3781 | rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0); | |
3782 | ||
187d4c6b | 3783 | rtnl_register(PF_UNSPEC, RTM_GETNEXTHOPBUCKET, rtm_get_nexthop_bucket, |
8a1bbabb PM |
3784 | rtm_dump_nexthop_bucket, 0); |
3785 | ||
ab84be7e DA |
3786 | return 0; |
3787 | } | |
3788 | subsys_initcall(nexthop_init); |