mm: Don't pin ZERO_PAGE in pin_user_pages()
[linux-block.git] / include / net / nexthop.h
CommitLineData
ab84be7e
DA
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Generic nexthop implementation
4 *
5 * Copyright (c) 2017-19 Cumulus Networks
6 * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
7 */
8
9#ifndef __LINUX_NEXTHOP_H
10#define __LINUX_NEXTHOP_H
11
12#include <linux/netdevice.h>
8590ceed 13#include <linux/notifier.h>
f88d8ea6 14#include <linux/route.h>
ab84be7e
DA
15#include <linux/types.h>
16#include <net/ip_fib.h>
53010f99 17#include <net/ip6_fib.h>
ab84be7e
DA
18#include <net/netlink.h>
19
20#define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK
21
22struct nexthop;
23
24struct nh_config {
25 u32 nh_id;
26
27 u8 nh_family;
28 u8 nh_protocol;
29 u8 nh_blackhole;
38428d68 30 u8 nh_fdb;
ab84be7e
DA
31 u32 nh_flags;
32
33 int nh_ifindex;
34 struct net_device *dev;
35
597cfe4f
DA
36 union {
37 __be32 ipv4;
53010f99 38 struct in6_addr ipv6;
597cfe4f
DA
39 } gw;
40
430a0491
DA
41 struct nlattr *nh_grp;
42 u16 nh_grp_type;
283a72a5
PM
43 u16 nh_grp_res_num_buckets;
44 unsigned long nh_grp_res_idle_timer;
45 unsigned long nh_grp_res_unbalanced_timer;
46 bool nh_grp_res_has_num_buckets;
47 bool nh_grp_res_has_idle_timer;
48 bool nh_grp_res_has_unbalanced_timer;
430a0491 49
b513bd03
DA
50 struct nlattr *nh_encap;
51 u16 nh_encap_type;
52
ab84be7e
DA
53 u32 nlflags;
54 struct nl_info nlinfo;
55};
56
57struct nh_info {
58 struct hlist_node dev_hash; /* entry on netns devhash */
59 struct nexthop *nh_parent;
60
61 u8 family;
62 bool reject_nh;
38428d68 63 bool fdb_nh;
ab84be7e
DA
64
65 union {
66 struct fib_nh_common fib_nhc;
597cfe4f 67 struct fib_nh fib_nh;
53010f99 68 struct fib6_nh fib6_nh;
ab84be7e
DA
69 };
70};
71
283a72a5
PM
72struct nh_res_bucket {
73 struct nh_grp_entry __rcu *nh_entry;
74 atomic_long_t used_time;
75 unsigned long migrated_time;
76 bool occupied;
77 u8 nh_flags;
78};
79
80struct nh_res_table {
81 struct net *net;
82 u32 nhg_id;
83 struct delayed_work upkeep_dw;
84
85 /* List of NHGEs that have too few buckets ("uw" for underweight).
86 * Reclaimed buckets will be given to entries in this list.
87 */
88 struct list_head uw_nh_entries;
89 unsigned long unbalanced_since;
90
91 u32 idle_timer;
92 u32 unbalanced_timer;
93
94 u16 num_nh_buckets;
95 struct nh_res_bucket nh_buckets[];
96};
97
430a0491
DA
98struct nh_grp_entry {
99 struct nexthop *nh;
100 u8 weight;
b9bae61b
PM
101
102 union {
103 struct {
104 atomic_t upper_bound;
de1d1ee3 105 } hthr;
283a72a5
PM
106 struct {
107 /* Member on uw_nh_entries. */
108 struct list_head uw_nh_entry;
109
110 u16 count_buckets;
111 u16 wants_buckets;
112 } res;
b9bae61b 113 };
430a0491
DA
114
115 struct list_head nh_list;
116 struct nexthop *nh_parent; /* nexthop of group with this entry */
117};
118
119struct nh_group {
90f33bff 120 struct nh_group *spare; /* spare group for removals */
430a0491 121 u16 num_nh;
90e1a9e2 122 bool is_multipath;
de1d1ee3 123 bool hash_threshold;
283a72a5 124 bool resilient;
ce9ac056 125 bool fdb_nh;
430a0491 126 bool has_v4;
283a72a5
PM
127
128 struct nh_res_table __rcu *res_table;
97a888c2 129 struct nh_grp_entry nh_entries[];
430a0491
DA
130};
131
ab84be7e
DA
132struct nexthop {
133 struct rb_node rb_node; /* entry on netns rbtree */
4c7e8084 134 struct list_head fi_list; /* v4 entries using nh */
f88d8ea6 135 struct list_head f6i_list; /* v6 entries using nh */
38428d68 136 struct list_head fdb_list; /* fdb entries using this nh */
430a0491 137 struct list_head grp_list; /* nh group entries using this nh */
ab84be7e
DA
138 struct net *net;
139
140 u32 id;
141
142 u8 protocol; /* app managing this nh */
143 u8 nh_flags;
430a0491 144 bool is_group;
ab84be7e
DA
145
146 refcount_t refcnt;
147 struct rcu_head rcu;
148
149 union {
150 struct nh_info __rcu *nh_info;
430a0491 151 struct nh_group __rcu *nh_grp;
ab84be7e
DA
152 };
153};
154
8590ceed 155enum nexthop_event_type {
732d167b
IS
156 NEXTHOP_EVENT_DEL,
157 NEXTHOP_EVENT_REPLACE,
b8f090d0
IS
158 NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
159 NEXTHOP_EVENT_BUCKET_REPLACE,
8590ceed
RP
160};
161
09ad6bec
IS
162enum nh_notifier_info_type {
163 NH_NOTIFIER_INFO_TYPE_SINGLE,
164 NH_NOTIFIER_INFO_TYPE_GRP,
b8f090d0
IS
165 NH_NOTIFIER_INFO_TYPE_RES_TABLE,
166 NH_NOTIFIER_INFO_TYPE_RES_BUCKET,
09ad6bec
IS
167};
168
1c9cac65
IS
169struct nh_notifier_single_info {
170 struct net_device *dev;
171 u8 gw_family;
172 union {
173 __be32 ipv4;
174 struct in6_addr ipv6;
175 };
176 u8 is_reject:1,
177 is_fdb:1,
178 has_encap:1;
179};
180
181struct nh_notifier_grp_entry_info {
182 u8 weight;
183 u32 id;
184 struct nh_notifier_single_info nh;
185};
186
187struct nh_notifier_grp_info {
188 u16 num_nh;
189 bool is_fdb;
190 struct nh_notifier_grp_entry_info nh_entries[];
191};
192
b8f090d0
IS
193struct nh_notifier_res_bucket_info {
194 u16 bucket_index;
195 unsigned int idle_timer_ms;
196 bool force;
197 struct nh_notifier_single_info old_nh;
198 struct nh_notifier_single_info new_nh;
199};
200
201struct nh_notifier_res_table_info {
202 u16 num_nh_buckets;
203 struct nh_notifier_single_info nhs[];
204};
205
1c9cac65
IS
206struct nh_notifier_info {
207 struct net *net;
208 struct netlink_ext_ack *extack;
209 u32 id;
09ad6bec 210 enum nh_notifier_info_type type;
1c9cac65
IS
211 union {
212 struct nh_notifier_single_info *nh;
213 struct nh_notifier_grp_info *nh_grp;
b8f090d0
IS
214 struct nh_notifier_res_table_info *nh_res_table;
215 struct nh_notifier_res_bucket_info *nh_res_bucket;
1c9cac65
IS
216 };
217};
218
ce7e9c8a
IS
219int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
220 struct netlink_ext_ack *extack);
8590ceed 221int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
e95f2592 222void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
56ad5ba3
IS
223void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
224 bool offload, bool trap);
cfc15c1d
IS
225void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
226 unsigned long *activity);
8590ceed 227
ab84be7e
DA
228/* caller is holding rcu or rtnl; no reference taken to nexthop */
229struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
230void nexthop_free_rcu(struct rcu_head *head);
231
232static inline bool nexthop_get(struct nexthop *nh)
233{
234 return refcount_inc_not_zero(&nh->refcnt);
235}
236
237static inline void nexthop_put(struct nexthop *nh)
238{
239 if (refcount_dec_and_test(&nh->refcnt))
240 call_rcu(&nh->rcu, nexthop_free_rcu);
241}
242
4c7e8084
DA
243static inline bool nexthop_cmp(const struct nexthop *nh1,
244 const struct nexthop *nh2)
245{
246 return nh1 == nh2;
ce9ac056
DA
247}
248
249static inline bool nexthop_is_fdb(const struct nexthop *nh)
250{
251 if (nh->is_group) {
252 const struct nh_group *nh_grp;
253
254 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
255 return nh_grp->fdb_nh;
256 } else {
257 const struct nh_info *nhi;
258
259 nhi = rcu_dereference_rtnl(nh->nh_info);
260 return nhi->fdb_nh;
261 }
50cb8769
DA
262}
263
264static inline bool nexthop_has_v4(const struct nexthop *nh)
265{
266 if (nh->is_group) {
267 struct nh_group *nh_grp;
268
269 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
270 return nh_grp->has_v4;
271 }
272 return false;
4c7e8084
DA
273}
274
430a0491
DA
275static inline bool nexthop_is_multipath(const struct nexthop *nh)
276{
277 if (nh->is_group) {
278 struct nh_group *nh_grp;
279
280 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
90e1a9e2 281 return nh_grp->is_multipath;
430a0491
DA
282 }
283 return false;
284}
285
286struct nexthop *nexthop_select_path(struct nexthop *nh, int hash);
287
288static inline unsigned int nexthop_num_path(const struct nexthop *nh)
289{
290 unsigned int rc = 1;
291
0b5e2e39 292 if (nh->is_group) {
430a0491
DA
293 struct nh_group *nh_grp;
294
295 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
90e1a9e2 296 if (nh_grp->is_multipath)
0b5e2e39 297 rc = nh_grp->num_nh;
430a0491
DA
298 }
299
300 return rc;
301}
302
303static inline
0b5e2e39 304struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel)
430a0491 305{
430a0491
DA
306 /* for_nexthops macros in fib_semantics.c grabs a pointer to
307 * the nexthop before checking nhsel
308 */
5270041d 309 if (nhsel >= nhg->num_nh)
430a0491
DA
310 return NULL;
311
312 return nhg->nh_entries[nhsel].nh;
313}
314
315static inline
7bdf4de1
DS
316int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh,
317 u8 rt_family)
430a0491
DA
318{
319 struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
320 int i;
321
322 for (i = 0; i < nhg->num_nh; i++) {
323 struct nexthop *nhe = nhg->nh_entries[i].nh;
324 struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info);
325 struct fib_nh_common *nhc = &nhi->fib_nhc;
326 int weight = nhg->nh_entries[i].weight;
327
597aa16c 328 if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0)
430a0491
DA
329 return -EMSGSIZE;
330 }
331
332 return 0;
333}
334
ab84be7e
DA
335/* called with rcu lock */
336static inline bool nexthop_is_blackhole(const struct nexthop *nh)
337{
338 const struct nh_info *nhi;
339
0b5e2e39
DA
340 if (nh->is_group) {
341 struct nh_group *nh_grp;
342
343 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
344 if (nh_grp->num_nh > 1)
430a0491 345 return false;
0b5e2e39
DA
346
347 nh = nh_grp->nh_entries[0].nh;
430a0491
DA
348 }
349
350 nhi = rcu_dereference_rtnl(nh->nh_info);
ab84be7e
DA
351 return nhi->reject_nh;
352}
5481d73f 353
4c7e8084
DA
354static inline void nexthop_path_fib_result(struct fib_result *res, int hash)
355{
356 struct nh_info *nhi;
357 struct nexthop *nh;
358
359 nh = nexthop_select_path(res->fi->nh, hash);
360 nhi = rcu_dereference(nh->nh_info);
361 res->nhc = &nhi->fib_nhc;
362}
363
364/* called with rcu read lock or rtnl held */
365static inline
366struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel)
367{
368 struct nh_info *nhi;
369
370 BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0);
371 BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0);
372
0b5e2e39
DA
373 if (nh->is_group) {
374 struct nh_group *nh_grp;
375
376 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
90e1a9e2 377 if (nh_grp->is_multipath) {
0b5e2e39
DA
378 nh = nexthop_mpath_select(nh_grp, nhsel);
379 if (!nh)
380 return NULL;
381 }
4c7e8084
DA
382 }
383
384 nhi = rcu_dereference_rtnl(nh->nh_info);
385 return &nhi->fib_nhc;
386}
387
af7888ad
DA
388/* called from fib_table_lookup with rcu_lock */
389static inline
390struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh,
391 int fib_flags,
392 const struct flowi4 *flp,
393 int *nhsel)
394{
395 struct nh_info *nhi;
396
397 if (nh->is_group) {
398 struct nh_group *nhg = rcu_dereference(nh->nh_grp);
399 int i;
400
401 for (i = 0; i < nhg->num_nh; i++) {
402 struct nexthop *nhe = nhg->nh_entries[i].nh;
403
404 nhi = rcu_dereference(nhe->nh_info);
405 if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
406 *nhsel = i;
407 return &nhi->fib_nhc;
408 }
409 }
410 } else {
411 nhi = rcu_dereference(nh->nh_info);
412 if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
413 *nhsel = 0;
414 return &nhi->fib_nhc;
415 }
416 }
417
418 return NULL;
419}
420
1fd1c768
DA
421static inline bool nexthop_uses_dev(const struct nexthop *nh,
422 const struct net_device *dev)
423{
424 struct nh_info *nhi;
425
426 if (nh->is_group) {
427 struct nh_group *nhg = rcu_dereference(nh->nh_grp);
428 int i;
429
430 for (i = 0; i < nhg->num_nh; i++) {
431 struct nexthop *nhe = nhg->nh_entries[i].nh;
432
433 nhi = rcu_dereference(nhe->nh_info);
434 if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
435 return true;
436 }
437 } else {
438 nhi = rcu_dereference(nh->nh_info);
439 if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
440 return true;
441 }
442
443 return false;
444}
445
5481d73f
DA
446static inline unsigned int fib_info_num_path(const struct fib_info *fi)
447{
4c7e8084
DA
448 if (unlikely(fi->nh))
449 return nexthop_num_path(fi->nh);
450
5481d73f
DA
451 return fi->fib_nhs;
452}
453
4c7e8084
DA
454int fib_check_nexthop(struct nexthop *nh, u8 scope,
455 struct netlink_ext_ack *extack);
456
5481d73f
DA
457static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel)
458{
4c7e8084
DA
459 if (unlikely(fi->nh))
460 return nexthop_fib_nhc(fi->nh, nhsel);
461
5481d73f
DA
462 return &fi->fib_nh[nhsel].nh_common;
463}
464
4c7e8084 465/* only used when fib_nh is built into fib_info */
5481d73f
DA
466static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel)
467{
4c7e8084
DA
468 WARN_ON(fi->nh);
469
5481d73f
DA
470 return &fi->fib_nh[nhsel];
471}
f88d8ea6
DA
472
473/*
474 * IPv6 variants
475 */
476int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
477 struct netlink_ext_ack *extack);
478
28259bac 479/* Caller should either hold rcu_read_lock(), or RTNL. */
f88d8ea6
DA
480static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
481{
482 struct nh_info *nhi;
483
0b5e2e39
DA
484 if (nh->is_group) {
485 struct nh_group *nh_grp;
486
487 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
488 nh = nexthop_mpath_select(nh_grp, 0);
f88d8ea6
DA
489 if (!nh)
490 return NULL;
491 }
492
493 nhi = rcu_dereference_rtnl(nh->nh_info);
494 if (nhi->family == AF_INET6)
495 return &nhi->fib6_nh;
496
497 return NULL;
498}
499
28259bac 500/* Variant of nexthop_fib6_nh().
09eed119 501 * Caller should either hold rcu_read_lock(), or RTNL.
28259bac
WW
502 */
503static inline struct fib6_nh *nexthop_fib6_nh_bh(struct nexthop *nh)
504{
505 struct nh_info *nhi;
506
507 if (nh->is_group) {
508 struct nh_group *nh_grp;
509
09eed119 510 nh_grp = rcu_dereference_rtnl(nh->nh_grp);
28259bac
WW
511 nh = nexthop_mpath_select(nh_grp, 0);
512 if (!nh)
513 return NULL;
514 }
515
09eed119 516 nhi = rcu_dereference_rtnl(nh->nh_info);
28259bac
WW
517 if (nhi->family == AF_INET6)
518 return &nhi->fib6_nh;
519
520 return NULL;
521}
522
f88d8ea6
DA
523static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
524{
525 struct fib6_nh *fib6_nh;
526
527 fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh;
528 return fib6_nh->fib_nh_dev;
529}
530
531static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
532{
533 struct nexthop *nh = res->f6i->nh;
534 struct nh_info *nhi;
535
536 nh = nexthop_select_path(nh, hash);
537
538 nhi = rcu_dereference_rtnl(nh->nh_info);
539 if (nhi->reject_nh) {
540 res->fib6_type = RTN_BLACKHOLE;
541 res->fib6_flags |= RTF_REJECT;
542 res->nh = nexthop_fib6_nh(nh);
543 } else {
544 res->nh = &nhi->fib6_nh;
545 }
546}
f88c9aa1
DA
547
548int nexthop_for_each_fib6_nh(struct nexthop *nh,
549 int (*cb)(struct fib6_nh *nh, void *arg),
550 void *arg);
38428d68
RP
551
552static inline int nexthop_get_family(struct nexthop *nh)
553{
554 struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
555
556 return nhi->family;
557}
558
559static inline
560struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh)
561{
562 struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
563
564 return &nhi->fib_nhc;
565}
566
567static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh,
568 int hash)
569{
570 struct nh_info *nhi;
571 struct nexthop *nhp;
572
573 nhp = nexthop_select_path(nh, hash);
574 if (unlikely(!nhp))
575 return NULL;
576 nhi = rcu_dereference(nhp->nh_info);
577 return &nhi->fib_nhc;
578}
ab84be7e 579#endif