1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/if_macvlan.h>
17 #include <linux/refcount.h>
18 #include <linux/jhash.h>
19 #include <linux/net_namespace.h>
20 #include <linux/mutex.h>
21 #include <net/netevent.h>
22 #include <net/neighbour.h>
24 #include <net/ip_fib.h>
25 #include <net/ip6_fib.h>
26 #include <net/nexthop.h>
27 #include <net/fib_rules.h>
28 #include <net/ip_tunnels.h>
29 #include <net/l3mdev.h>
30 #include <net/addrconf.h>
31 #include <net/ndisc.h>
33 #include <net/fib_notifier.h>
34 #include <net/switchdev.h>
39 #include "spectrum_cnt.h"
40 #include "spectrum_dpipe.h"
41 #include "spectrum_ipip.h"
42 #include "spectrum_mr.h"
43 #include "spectrum_mr_tcam.h"
44 #include "spectrum_router.h"
45 #include "spectrum_span.h"
49 struct mlxsw_sp_lpm_tree;
50 struct mlxsw_sp_rif_ops;
53 struct list_head nexthop_list;
54 struct list_head neigh_list;
55 struct net_device *dev; /* NULL for underlay RIF */
56 struct mlxsw_sp_fid *fid;
57 unsigned char addr[ETH_ALEN];
61 const struct mlxsw_sp_rif_ops *ops;
62 struct mlxsw_sp *mlxsw_sp;
64 unsigned int counter_ingress;
65 bool counter_ingress_valid;
66 unsigned int counter_egress;
67 bool counter_egress_valid;
70 struct mlxsw_sp_rif_params {
71 struct net_device *dev;
80 struct mlxsw_sp_rif_subport {
81 struct mlxsw_sp_rif common;
91 struct mlxsw_sp_rif_ipip_lb {
92 struct mlxsw_sp_rif common;
93 struct mlxsw_sp_rif_ipip_lb_config lb_config;
94 u16 ul_vr_id; /* Reserved for Spectrum-2. */
95 u16 ul_rif_id; /* Reserved for Spectrum. */
98 struct mlxsw_sp_rif_params_ipip_lb {
99 struct mlxsw_sp_rif_params common;
100 struct mlxsw_sp_rif_ipip_lb_config lb_config;
103 struct mlxsw_sp_rif_ops {
104 enum mlxsw_sp_rif_type type;
107 void (*setup)(struct mlxsw_sp_rif *rif,
108 const struct mlxsw_sp_rif_params *params);
109 int (*configure)(struct mlxsw_sp_rif *rif);
110 void (*deconfigure)(struct mlxsw_sp_rif *rif);
111 struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
112 struct netlink_ext_ack *extack);
113 void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
116 static struct mlxsw_sp_rif *
117 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
118 const struct net_device *dev);
119 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
120 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
121 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
122 struct mlxsw_sp_lpm_tree *lpm_tree);
123 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
124 const struct mlxsw_sp_fib *fib,
126 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
127 const struct mlxsw_sp_fib *fib);
129 static unsigned int *
130 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
131 enum mlxsw_sp_rif_counter_dir dir)
134 case MLXSW_SP_RIF_COUNTER_EGRESS:
135 return &rif->counter_egress;
136 case MLXSW_SP_RIF_COUNTER_INGRESS:
137 return &rif->counter_ingress;
143 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
144 enum mlxsw_sp_rif_counter_dir dir)
147 case MLXSW_SP_RIF_COUNTER_EGRESS:
148 return rif->counter_egress_valid;
149 case MLXSW_SP_RIF_COUNTER_INGRESS:
150 return rif->counter_ingress_valid;
156 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
157 enum mlxsw_sp_rif_counter_dir dir,
161 case MLXSW_SP_RIF_COUNTER_EGRESS:
162 rif->counter_egress_valid = valid;
164 case MLXSW_SP_RIF_COUNTER_INGRESS:
165 rif->counter_ingress_valid = valid;
170 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
171 unsigned int counter_index, bool enable,
172 enum mlxsw_sp_rif_counter_dir dir)
174 char ritr_pl[MLXSW_REG_RITR_LEN];
175 bool is_egress = false;
178 if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
180 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
181 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
185 mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
187 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
190 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
191 struct mlxsw_sp_rif *rif,
192 enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
194 char ricnt_pl[MLXSW_REG_RICNT_LEN];
195 unsigned int *p_counter_index;
199 valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
203 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
204 if (!p_counter_index)
206 mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
207 MLXSW_REG_RICNT_OPCODE_NOP);
208 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
211 *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
215 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
216 unsigned int counter_index)
218 char ricnt_pl[MLXSW_REG_RICNT_LEN];
220 mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
221 MLXSW_REG_RICNT_OPCODE_CLEAR);
222 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
225 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
226 struct mlxsw_sp_rif *rif,
227 enum mlxsw_sp_rif_counter_dir dir)
229 unsigned int *p_counter_index;
232 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
233 if (!p_counter_index)
235 err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
240 err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
242 goto err_counter_clear;
244 err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
245 *p_counter_index, true, dir);
247 goto err_counter_edit;
248 mlxsw_sp_rif_counter_valid_set(rif, dir, true);
253 mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
258 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
259 struct mlxsw_sp_rif *rif,
260 enum mlxsw_sp_rif_counter_dir dir)
262 unsigned int *p_counter_index;
264 if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
267 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
268 if (WARN_ON(!p_counter_index))
270 mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
271 *p_counter_index, false, dir);
272 mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
274 mlxsw_sp_rif_counter_valid_set(rif, dir, false);
277 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
279 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
280 struct devlink *devlink;
282 devlink = priv_to_devlink(mlxsw_sp->core);
283 if (!devlink_dpipe_table_counter_enabled(devlink,
284 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
286 mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
289 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
291 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
293 mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
296 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
298 struct mlxsw_sp_prefix_usage {
299 DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
302 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
303 for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
306 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
307 struct mlxsw_sp_prefix_usage *prefix_usage2)
309 return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
313 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
314 struct mlxsw_sp_prefix_usage *prefix_usage2)
316 memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
320 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
321 unsigned char prefix_len)
323 set_bit(prefix_len, prefix_usage->b);
327 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
328 unsigned char prefix_len)
330 clear_bit(prefix_len, prefix_usage->b);
333 struct mlxsw_sp_fib_key {
334 unsigned char addr[sizeof(struct in6_addr)];
335 unsigned char prefix_len;
338 enum mlxsw_sp_fib_entry_type {
339 MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
340 MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
341 MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
342 MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE,
343 MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE,
345 /* This is a special case of local delivery, where a packet should be
346 * decapsulated on reception. Note that there is no corresponding ENCAP,
347 * because that's a type of next hop, not of FIB entry. (There can be
348 * several next hops in a REMOTE entry, and some of them may be
349 * encapsulating entries.)
351 MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
352 MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
355 struct mlxsw_sp_nexthop_group_info;
356 struct mlxsw_sp_nexthop_group;
357 struct mlxsw_sp_fib_entry;
359 struct mlxsw_sp_fib_node {
360 struct mlxsw_sp_fib_entry *fib_entry;
361 struct list_head list;
362 struct rhash_head ht_node;
363 struct mlxsw_sp_fib *fib;
364 struct mlxsw_sp_fib_key key;
367 struct mlxsw_sp_fib_entry_decap {
368 struct mlxsw_sp_ipip_entry *ipip_entry;
372 static struct mlxsw_sp_fib_entry_priv *
373 mlxsw_sp_fib_entry_priv_create(const struct mlxsw_sp_router_ll_ops *ll_ops)
375 struct mlxsw_sp_fib_entry_priv *priv;
377 if (!ll_ops->fib_entry_priv_size)
378 /* No need to have priv */
381 priv = kzalloc(sizeof(*priv) + ll_ops->fib_entry_priv_size, GFP_KERNEL);
383 return ERR_PTR(-ENOMEM);
384 refcount_set(&priv->refcnt, 1);
389 mlxsw_sp_fib_entry_priv_destroy(struct mlxsw_sp_fib_entry_priv *priv)
394 static void mlxsw_sp_fib_entry_priv_hold(struct mlxsw_sp_fib_entry_priv *priv)
396 refcount_inc(&priv->refcnt);
399 static void mlxsw_sp_fib_entry_priv_put(struct mlxsw_sp_fib_entry_priv *priv)
401 if (!priv || !refcount_dec_and_test(&priv->refcnt))
403 mlxsw_sp_fib_entry_priv_destroy(priv);
406 static void mlxsw_sp_fib_entry_op_ctx_priv_hold(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
407 struct mlxsw_sp_fib_entry_priv *priv)
411 mlxsw_sp_fib_entry_priv_hold(priv);
412 list_add(&priv->list, &op_ctx->fib_entry_priv_list);
415 static void mlxsw_sp_fib_entry_op_ctx_priv_put_all(struct mlxsw_sp_fib_entry_op_ctx *op_ctx)
417 struct mlxsw_sp_fib_entry_priv *priv, *tmp;
419 list_for_each_entry_safe(priv, tmp, &op_ctx->fib_entry_priv_list, list)
420 mlxsw_sp_fib_entry_priv_put(priv);
421 INIT_LIST_HEAD(&op_ctx->fib_entry_priv_list);
424 struct mlxsw_sp_fib_entry {
425 struct mlxsw_sp_fib_node *fib_node;
426 enum mlxsw_sp_fib_entry_type type;
427 struct list_head nexthop_group_node;
428 struct mlxsw_sp_nexthop_group *nh_group;
429 struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
430 struct mlxsw_sp_fib_entry_priv *priv;
433 struct mlxsw_sp_fib4_entry {
434 struct mlxsw_sp_fib_entry common;
441 struct mlxsw_sp_fib6_entry {
442 struct mlxsw_sp_fib_entry common;
443 struct list_head rt6_list;
447 struct mlxsw_sp_rt6 {
448 struct list_head list;
449 struct fib6_info *rt;
452 struct mlxsw_sp_lpm_tree {
454 unsigned int ref_count;
455 enum mlxsw_sp_l3proto proto;
456 unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
457 struct mlxsw_sp_prefix_usage prefix_usage;
460 struct mlxsw_sp_fib {
461 struct rhashtable ht;
462 struct list_head node_list;
463 struct mlxsw_sp_vr *vr;
464 struct mlxsw_sp_lpm_tree *lpm_tree;
465 enum mlxsw_sp_l3proto proto;
466 const struct mlxsw_sp_router_ll_ops *ll_ops;
470 u16 id; /* virtual router ID */
471 u32 tb_id; /* kernel fib table id */
472 unsigned int rif_count;
473 struct mlxsw_sp_fib *fib4;
474 struct mlxsw_sp_fib *fib6;
475 struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
476 struct mlxsw_sp_rif *ul_rif;
477 refcount_t ul_rif_refcnt;
480 static int mlxsw_sp_router_ll_basic_init(struct mlxsw_sp *mlxsw_sp, u16 vr_id,
481 enum mlxsw_sp_l3proto proto)
486 static int mlxsw_sp_router_ll_basic_ralta_write(struct mlxsw_sp *mlxsw_sp, char *xralta_pl)
488 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta),
489 xralta_pl + MLXSW_REG_XRALTA_RALTA_OFFSET);
492 static int mlxsw_sp_router_ll_basic_ralst_write(struct mlxsw_sp *mlxsw_sp, char *xralst_pl)
494 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst),
495 xralst_pl + MLXSW_REG_XRALST_RALST_OFFSET);
498 static int mlxsw_sp_router_ll_basic_raltb_write(struct mlxsw_sp *mlxsw_sp, char *xraltb_pl)
500 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
501 xraltb_pl + MLXSW_REG_XRALTB_RALTB_OFFSET);
504 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
506 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
507 struct mlxsw_sp_vr *vr,
508 enum mlxsw_sp_l3proto proto)
510 const struct mlxsw_sp_router_ll_ops *ll_ops = mlxsw_sp->router->proto_ll_ops[proto];
511 struct mlxsw_sp_lpm_tree *lpm_tree;
512 struct mlxsw_sp_fib *fib;
515 err = ll_ops->init(mlxsw_sp, vr->id, proto);
519 lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
520 fib = kzalloc(sizeof(*fib), GFP_KERNEL);
522 return ERR_PTR(-ENOMEM);
523 err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
525 goto err_rhashtable_init;
526 INIT_LIST_HEAD(&fib->node_list);
529 fib->lpm_tree = lpm_tree;
530 fib->ll_ops = ll_ops;
531 mlxsw_sp_lpm_tree_hold(lpm_tree);
532 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
534 goto err_lpm_tree_bind;
538 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
544 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
545 struct mlxsw_sp_fib *fib)
547 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
548 mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
549 WARN_ON(!list_empty(&fib->node_list));
550 rhashtable_destroy(&fib->ht);
554 static struct mlxsw_sp_lpm_tree *
555 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
557 static struct mlxsw_sp_lpm_tree *lpm_tree;
560 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
561 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
562 if (lpm_tree->ref_count == 0)
568 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
569 const struct mlxsw_sp_router_ll_ops *ll_ops,
570 struct mlxsw_sp_lpm_tree *lpm_tree)
572 char xralta_pl[MLXSW_REG_XRALTA_LEN];
574 mlxsw_reg_xralta_pack(xralta_pl, true,
575 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
577 return ll_ops->ralta_write(mlxsw_sp, xralta_pl);
580 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
581 const struct mlxsw_sp_router_ll_ops *ll_ops,
582 struct mlxsw_sp_lpm_tree *lpm_tree)
584 char xralta_pl[MLXSW_REG_XRALTA_LEN];
586 mlxsw_reg_xralta_pack(xralta_pl, false,
587 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
589 ll_ops->ralta_write(mlxsw_sp, xralta_pl);
593 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
594 const struct mlxsw_sp_router_ll_ops *ll_ops,
595 struct mlxsw_sp_prefix_usage *prefix_usage,
596 struct mlxsw_sp_lpm_tree *lpm_tree)
598 char xralst_pl[MLXSW_REG_XRALST_LEN];
601 u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
603 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
606 mlxsw_reg_xralst_pack(xralst_pl, root_bin, lpm_tree->id);
607 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
610 mlxsw_reg_xralst_bin_pack(xralst_pl, prefix, last_prefix,
611 MLXSW_REG_RALST_BIN_NO_CHILD);
612 last_prefix = prefix;
614 return ll_ops->ralst_write(mlxsw_sp, xralst_pl);
617 static struct mlxsw_sp_lpm_tree *
618 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
619 const struct mlxsw_sp_router_ll_ops *ll_ops,
620 struct mlxsw_sp_prefix_usage *prefix_usage,
621 enum mlxsw_sp_l3proto proto)
623 struct mlxsw_sp_lpm_tree *lpm_tree;
626 lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
628 return ERR_PTR(-EBUSY);
629 lpm_tree->proto = proto;
630 err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, ll_ops, lpm_tree);
634 err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, ll_ops, prefix_usage, lpm_tree);
636 goto err_left_struct_set;
637 memcpy(&lpm_tree->prefix_usage, prefix_usage,
638 sizeof(lpm_tree->prefix_usage));
639 memset(&lpm_tree->prefix_ref_count, 0,
640 sizeof(lpm_tree->prefix_ref_count));
641 lpm_tree->ref_count = 1;
645 mlxsw_sp_lpm_tree_free(mlxsw_sp, ll_ops, lpm_tree);
649 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
650 const struct mlxsw_sp_router_ll_ops *ll_ops,
651 struct mlxsw_sp_lpm_tree *lpm_tree)
653 mlxsw_sp_lpm_tree_free(mlxsw_sp, ll_ops, lpm_tree);
656 static struct mlxsw_sp_lpm_tree *
657 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
658 struct mlxsw_sp_prefix_usage *prefix_usage,
659 enum mlxsw_sp_l3proto proto)
661 const struct mlxsw_sp_router_ll_ops *ll_ops = mlxsw_sp->router->proto_ll_ops[proto];
662 struct mlxsw_sp_lpm_tree *lpm_tree;
665 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
666 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
667 if (lpm_tree->ref_count != 0 &&
668 lpm_tree->proto == proto &&
669 mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
671 mlxsw_sp_lpm_tree_hold(lpm_tree);
675 return mlxsw_sp_lpm_tree_create(mlxsw_sp, ll_ops, prefix_usage, proto);
678 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
680 lpm_tree->ref_count++;
683 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
684 struct mlxsw_sp_lpm_tree *lpm_tree)
686 const struct mlxsw_sp_router_ll_ops *ll_ops =
687 mlxsw_sp->router->proto_ll_ops[lpm_tree->proto];
689 if (--lpm_tree->ref_count == 0)
690 mlxsw_sp_lpm_tree_destroy(mlxsw_sp, ll_ops, lpm_tree);
693 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
695 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
697 struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
698 struct mlxsw_sp_lpm_tree *lpm_tree;
702 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
705 max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
706 mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
707 mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
708 sizeof(struct mlxsw_sp_lpm_tree),
710 if (!mlxsw_sp->router->lpm.trees)
713 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
714 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
715 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
718 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
719 MLXSW_SP_L3_PROTO_IPV4);
720 if (IS_ERR(lpm_tree)) {
721 err = PTR_ERR(lpm_tree);
722 goto err_ipv4_tree_get;
724 mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
726 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
727 MLXSW_SP_L3_PROTO_IPV6);
728 if (IS_ERR(lpm_tree)) {
729 err = PTR_ERR(lpm_tree);
730 goto err_ipv6_tree_get;
732 mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
737 lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
738 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
740 kfree(mlxsw_sp->router->lpm.trees);
744 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
746 struct mlxsw_sp_lpm_tree *lpm_tree;
748 lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
749 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
751 lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
752 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
754 kfree(mlxsw_sp->router->lpm.trees);
757 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
759 return !!vr->fib4 || !!vr->fib6 ||
760 !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
761 !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
764 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
766 struct mlxsw_sp_vr *vr;
769 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
770 vr = &mlxsw_sp->router->vrs[i];
771 if (!mlxsw_sp_vr_is_used(vr))
777 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
778 const struct mlxsw_sp_fib *fib, u8 tree_id)
780 char xraltb_pl[MLXSW_REG_XRALTB_LEN];
782 mlxsw_reg_xraltb_pack(xraltb_pl, fib->vr->id,
783 (enum mlxsw_reg_ralxx_protocol) fib->proto,
785 return fib->ll_ops->raltb_write(mlxsw_sp, xraltb_pl);
788 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
789 const struct mlxsw_sp_fib *fib)
791 char xraltb_pl[MLXSW_REG_XRALTB_LEN];
793 /* Bind to tree 0 which is default */
794 mlxsw_reg_xraltb_pack(xraltb_pl, fib->vr->id,
795 (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
796 return fib->ll_ops->raltb_write(mlxsw_sp, xraltb_pl);
799 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
801 /* For our purpose, squash main, default and local tables into one */
802 if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
803 tb_id = RT_TABLE_MAIN;
807 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
810 struct mlxsw_sp_vr *vr;
813 tb_id = mlxsw_sp_fix_tb_id(tb_id);
815 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
816 vr = &mlxsw_sp->router->vrs[i];
817 if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
823 int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
826 struct mlxsw_sp_vr *vr;
829 mutex_lock(&mlxsw_sp->router->lock);
830 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
837 mutex_unlock(&mlxsw_sp->router->lock);
841 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
842 enum mlxsw_sp_l3proto proto)
845 case MLXSW_SP_L3_PROTO_IPV4:
847 case MLXSW_SP_L3_PROTO_IPV6:
853 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
855 struct netlink_ext_ack *extack)
857 struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
858 struct mlxsw_sp_fib *fib4;
859 struct mlxsw_sp_fib *fib6;
860 struct mlxsw_sp_vr *vr;
863 vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
865 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
866 return ERR_PTR(-EBUSY);
868 fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
870 return ERR_CAST(fib4);
871 fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
874 goto err_fib6_create;
876 mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
877 MLXSW_SP_L3_PROTO_IPV4);
878 if (IS_ERR(mr4_table)) {
879 err = PTR_ERR(mr4_table);
880 goto err_mr4_table_create;
882 mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
883 MLXSW_SP_L3_PROTO_IPV6);
884 if (IS_ERR(mr6_table)) {
885 err = PTR_ERR(mr6_table);
886 goto err_mr6_table_create;
891 vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
892 vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
896 err_mr6_table_create:
897 mlxsw_sp_mr_table_destroy(mr4_table);
898 err_mr4_table_create:
899 mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
901 mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
905 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
906 struct mlxsw_sp_vr *vr)
908 mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
909 vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
910 mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
911 vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
912 mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
914 mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
918 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
919 struct netlink_ext_ack *extack)
921 struct mlxsw_sp_vr *vr;
923 tb_id = mlxsw_sp_fix_tb_id(tb_id);
924 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
926 vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
930 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
932 if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
933 list_empty(&vr->fib6->node_list) &&
934 mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
935 mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
936 mlxsw_sp_vr_destroy(mlxsw_sp, vr);
940 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
941 enum mlxsw_sp_l3proto proto, u8 tree_id)
943 struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
945 if (!mlxsw_sp_vr_is_used(vr))
947 if (fib->lpm_tree->id == tree_id)
952 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
953 struct mlxsw_sp_fib *fib,
954 struct mlxsw_sp_lpm_tree *new_tree)
956 struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
959 fib->lpm_tree = new_tree;
960 mlxsw_sp_lpm_tree_hold(new_tree);
961 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
964 mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
968 mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
969 fib->lpm_tree = old_tree;
973 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
974 struct mlxsw_sp_fib *fib,
975 struct mlxsw_sp_lpm_tree *new_tree)
977 enum mlxsw_sp_l3proto proto = fib->proto;
978 struct mlxsw_sp_lpm_tree *old_tree;
979 u8 old_id, new_id = new_tree->id;
980 struct mlxsw_sp_vr *vr;
983 old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
984 old_id = old_tree->id;
986 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
987 vr = &mlxsw_sp->router->vrs[i];
988 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
990 err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
991 mlxsw_sp_vr_fib(vr, proto),
994 goto err_tree_replace;
997 memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
998 sizeof(new_tree->prefix_ref_count));
999 mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
1000 mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
1005 for (i--; i >= 0; i--) {
1006 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
1008 mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
1009 mlxsw_sp_vr_fib(vr, proto),
1015 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
1017 struct mlxsw_sp_vr *vr;
1021 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
1024 max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
1025 mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
1027 if (!mlxsw_sp->router->vrs)
1030 for (i = 0; i < max_vrs; i++) {
1031 vr = &mlxsw_sp->router->vrs[i];
1038 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
1040 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
1042 /* At this stage we're guaranteed not to have new incoming
1043 * FIB notifications and the work queue is free from FIBs
1044 * sitting on top of mlxsw netdevs. However, we can still
1045 * have other FIBs queued. Flush the queue before flushing
1046 * the device's tables. No need for locks, as we're the only
1049 mlxsw_core_flush_owq();
1050 mlxsw_sp_router_fib_flush(mlxsw_sp);
1051 kfree(mlxsw_sp->router->vrs);
1054 static struct net_device *
1055 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
1057 struct ip_tunnel *tun = netdev_priv(ol_dev);
1058 struct net *net = dev_net(ol_dev);
1060 return dev_get_by_index_rcu(net, tun->parms.link);
1063 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
1065 struct net_device *d;
1069 d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1071 tb_id = l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
1073 tb_id = RT_TABLE_MAIN;
1079 static struct mlxsw_sp_rif *
1080 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
1081 const struct mlxsw_sp_rif_params *params,
1082 struct netlink_ext_ack *extack);
1084 static struct mlxsw_sp_rif_ipip_lb *
1085 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1086 enum mlxsw_sp_ipip_type ipipt,
1087 struct net_device *ol_dev,
1088 struct netlink_ext_ack *extack)
1090 struct mlxsw_sp_rif_params_ipip_lb lb_params;
1091 const struct mlxsw_sp_ipip_ops *ipip_ops;
1092 struct mlxsw_sp_rif *rif;
1094 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1095 lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1096 .common.dev = ol_dev,
1097 .common.lag = false,
1098 .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1101 rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1103 return ERR_CAST(rif);
1104 return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1107 static struct mlxsw_sp_ipip_entry *
1108 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1109 enum mlxsw_sp_ipip_type ipipt,
1110 struct net_device *ol_dev)
1112 const struct mlxsw_sp_ipip_ops *ipip_ops;
1113 struct mlxsw_sp_ipip_entry *ipip_entry;
1114 struct mlxsw_sp_ipip_entry *ret = NULL;
1116 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1117 ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1119 return ERR_PTR(-ENOMEM);
1121 ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1123 if (IS_ERR(ipip_entry->ol_lb)) {
1124 ret = ERR_CAST(ipip_entry->ol_lb);
1125 goto err_ol_ipip_lb_create;
1128 ipip_entry->ipipt = ipipt;
1129 ipip_entry->ol_dev = ol_dev;
1131 switch (ipip_ops->ul_proto) {
1132 case MLXSW_SP_L3_PROTO_IPV4:
1133 ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1135 case MLXSW_SP_L3_PROTO_IPV6:
1142 err_ol_ipip_lb_create:
1148 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1150 mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1155 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1156 const enum mlxsw_sp_l3proto ul_proto,
1157 union mlxsw_sp_l3addr saddr,
1159 struct mlxsw_sp_ipip_entry *ipip_entry)
1161 u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1162 enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1163 union mlxsw_sp_l3addr tun_saddr;
1165 if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1168 tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1169 return tun_ul_tb_id == ul_tb_id &&
1170 mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1174 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1175 struct mlxsw_sp_fib_entry *fib_entry,
1176 struct mlxsw_sp_ipip_entry *ipip_entry)
1181 err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1186 ipip_entry->decap_fib_entry = fib_entry;
1187 fib_entry->decap.ipip_entry = ipip_entry;
1188 fib_entry->decap.tunnel_index = tunnel_index;
1192 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1193 struct mlxsw_sp_fib_entry *fib_entry)
1195 /* Unlink this node from the IPIP entry that it's the decap entry of. */
1196 fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1197 fib_entry->decap.ipip_entry = NULL;
1198 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1199 1, fib_entry->decap.tunnel_index);
1202 static struct mlxsw_sp_fib_node *
1203 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1204 size_t addr_len, unsigned char prefix_len);
1205 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1206 struct mlxsw_sp_fib_entry *fib_entry);
1209 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1210 struct mlxsw_sp_ipip_entry *ipip_entry)
1212 struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1214 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1215 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1217 mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1221 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1222 struct mlxsw_sp_ipip_entry *ipip_entry,
1223 struct mlxsw_sp_fib_entry *decap_fib_entry)
1225 if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1228 decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1230 if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1231 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1234 static struct mlxsw_sp_fib_entry *
1235 mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1236 enum mlxsw_sp_l3proto proto,
1237 const union mlxsw_sp_l3addr *addr,
1238 enum mlxsw_sp_fib_entry_type type)
1240 struct mlxsw_sp_fib_node *fib_node;
1241 unsigned char addr_prefix_len;
1242 struct mlxsw_sp_fib *fib;
1243 struct mlxsw_sp_vr *vr;
1248 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1251 fib = mlxsw_sp_vr_fib(vr, proto);
1254 case MLXSW_SP_L3_PROTO_IPV4:
1255 addr4 = be32_to_cpu(addr->addr4);
1258 addr_prefix_len = 32;
1260 case MLXSW_SP_L3_PROTO_IPV6:
1266 fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1268 if (!fib_node || fib_node->fib_entry->type != type)
1271 return fib_node->fib_entry;
1274 /* Given an IPIP entry, find the corresponding decap route. */
1275 static struct mlxsw_sp_fib_entry *
1276 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1277 struct mlxsw_sp_ipip_entry *ipip_entry)
1279 static struct mlxsw_sp_fib_node *fib_node;
1280 const struct mlxsw_sp_ipip_ops *ipip_ops;
1281 unsigned char saddr_prefix_len;
1282 union mlxsw_sp_l3addr saddr;
1283 struct mlxsw_sp_fib *ul_fib;
1284 struct mlxsw_sp_vr *ul_vr;
1290 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1292 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1293 ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1297 ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1298 saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1299 ipip_entry->ol_dev);
1301 switch (ipip_ops->ul_proto) {
1302 case MLXSW_SP_L3_PROTO_IPV4:
1303 saddr4 = be32_to_cpu(saddr.addr4);
1306 saddr_prefix_len = 32;
1313 fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1316 fib_node->fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1319 return fib_node->fib_entry;
1322 static struct mlxsw_sp_ipip_entry *
1323 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1324 enum mlxsw_sp_ipip_type ipipt,
1325 struct net_device *ol_dev)
1327 struct mlxsw_sp_ipip_entry *ipip_entry;
1329 ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1330 if (IS_ERR(ipip_entry))
1333 list_add_tail(&ipip_entry->ipip_list_node,
1334 &mlxsw_sp->router->ipip_list);
1340 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1341 struct mlxsw_sp_ipip_entry *ipip_entry)
1343 list_del(&ipip_entry->ipip_list_node);
1344 mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1348 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1349 const struct net_device *ul_dev,
1350 enum mlxsw_sp_l3proto ul_proto,
1351 union mlxsw_sp_l3addr ul_dip,
1352 struct mlxsw_sp_ipip_entry *ipip_entry)
1354 u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1355 enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1357 if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1360 return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1361 ul_tb_id, ipip_entry);
1364 /* Given decap parameters, find the corresponding IPIP entry. */
1365 static struct mlxsw_sp_ipip_entry *
1366 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, int ul_dev_ifindex,
1367 enum mlxsw_sp_l3proto ul_proto,
1368 union mlxsw_sp_l3addr ul_dip)
1370 struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1371 struct net_device *ul_dev;
1375 ul_dev = dev_get_by_index_rcu(mlxsw_sp_net(mlxsw_sp), ul_dev_ifindex);
1379 list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1381 if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1395 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1396 const struct net_device *dev,
1397 enum mlxsw_sp_ipip_type *p_type)
1399 struct mlxsw_sp_router *router = mlxsw_sp->router;
1400 const struct mlxsw_sp_ipip_ops *ipip_ops;
1401 enum mlxsw_sp_ipip_type ipipt;
1403 for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1404 ipip_ops = router->ipip_ops_arr[ipipt];
1405 if (dev->type == ipip_ops->dev_type) {
1414 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1415 const struct net_device *dev)
1417 return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1420 static struct mlxsw_sp_ipip_entry *
1421 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1422 const struct net_device *ol_dev)
1424 struct mlxsw_sp_ipip_entry *ipip_entry;
1426 list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1428 if (ipip_entry->ol_dev == ol_dev)
1434 static struct mlxsw_sp_ipip_entry *
1435 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1436 const struct net_device *ul_dev,
1437 struct mlxsw_sp_ipip_entry *start)
1439 struct mlxsw_sp_ipip_entry *ipip_entry;
1441 ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1443 list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1445 struct net_device *ol_dev = ipip_entry->ol_dev;
1446 struct net_device *ipip_ul_dev;
1449 ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1452 if (ipip_ul_dev == ul_dev)
1459 bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp,
1460 const struct net_device *dev)
1464 mutex_lock(&mlxsw_sp->router->lock);
1465 is_ipip_ul = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1466 mutex_unlock(&mlxsw_sp->router->lock);
1471 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1472 const struct net_device *ol_dev,
1473 enum mlxsw_sp_ipip_type ipipt)
1475 const struct mlxsw_sp_ipip_ops *ops
1476 = mlxsw_sp->router->ipip_ops_arr[ipipt];
1478 return ops->can_offload(mlxsw_sp, ol_dev);
1481 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1482 struct net_device *ol_dev)
1484 enum mlxsw_sp_ipip_type ipipt = MLXSW_SP_IPIP_TYPE_MAX;
1485 struct mlxsw_sp_ipip_entry *ipip_entry;
1486 enum mlxsw_sp_l3proto ul_proto;
1487 union mlxsw_sp_l3addr saddr;
1490 mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1491 if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1492 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1493 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1494 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1495 if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1498 ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1500 if (IS_ERR(ipip_entry))
1501 return PTR_ERR(ipip_entry);
1508 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1509 struct net_device *ol_dev)
1511 struct mlxsw_sp_ipip_entry *ipip_entry;
1513 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1515 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1519 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1520 struct mlxsw_sp_ipip_entry *ipip_entry)
1522 struct mlxsw_sp_fib_entry *decap_fib_entry;
1524 decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1525 if (decap_fib_entry)
1526 mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1531 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
1532 u16 ul_rif_id, bool enable)
1534 struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1535 struct mlxsw_sp_rif *rif = &lb_rif->common;
1536 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1537 char ritr_pl[MLXSW_REG_RITR_LEN];
1540 switch (lb_cf.ul_protocol) {
1541 case MLXSW_SP_L3_PROTO_IPV4:
1542 saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1543 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1544 rif->rif_index, rif->vr_id, rif->dev->mtu);
1545 mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1546 MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1547 ul_vr_id, ul_rif_id, saddr4, lb_cf.okey);
1550 case MLXSW_SP_L3_PROTO_IPV6:
1551 return -EAFNOSUPPORT;
1554 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1557 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1558 struct net_device *ol_dev)
1560 struct mlxsw_sp_ipip_entry *ipip_entry;
1561 struct mlxsw_sp_rif_ipip_lb *lb_rif;
1564 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1566 lb_rif = ipip_entry->ol_lb;
1567 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
1568 lb_rif->ul_rif_id, true);
1571 lb_rif->common.mtu = ol_dev->mtu;
1578 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1579 struct net_device *ol_dev)
1581 struct mlxsw_sp_ipip_entry *ipip_entry;
1583 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1585 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1589 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1590 struct mlxsw_sp_ipip_entry *ipip_entry)
1592 if (ipip_entry->decap_fib_entry)
1593 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1596 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1597 struct net_device *ol_dev)
1599 struct mlxsw_sp_ipip_entry *ipip_entry;
1601 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1603 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1606 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1607 struct mlxsw_sp_rif *old_rif,
1608 struct mlxsw_sp_rif *new_rif);
1610 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1611 struct mlxsw_sp_ipip_entry *ipip_entry,
1613 struct netlink_ext_ack *extack)
1615 struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1616 struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1618 new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1622 if (IS_ERR(new_lb_rif))
1623 return PTR_ERR(new_lb_rif);
1624 ipip_entry->ol_lb = new_lb_rif;
1627 mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1628 &new_lb_rif->common);
1630 mlxsw_sp_rif_destroy(&old_lb_rif->common);
1635 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1636 struct mlxsw_sp_rif *rif);
1639 * __mlxsw_sp_ipip_entry_update_tunnel - Update offload related to IPIP entry.
1640 * @mlxsw_sp: mlxsw_sp.
1641 * @ipip_entry: IPIP entry.
1642 * @recreate_loopback: Recreates the associated loopback RIF.
1643 * @keep_encap: Updates next hops that use the tunnel netdevice. This is only
1644 * relevant when recreate_loopback is true.
1645 * @update_nexthops: Updates next hops, keeping the current loopback RIF. This
1646 * is only relevant when recreate_loopback is false.
1649 * Return: Non-zero value on failure.
1651 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1652 struct mlxsw_sp_ipip_entry *ipip_entry,
1653 bool recreate_loopback,
1655 bool update_nexthops,
1656 struct netlink_ext_ack *extack)
1660 /* RIFs can't be edited, so to update loopback, we need to destroy and
1661 * recreate it. That creates a window of opportunity where RALUE and
1662 * RATR registers end up referencing a RIF that's already gone. RATRs
1663 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1664 * of RALUE, demote the decap route back.
1666 if (ipip_entry->decap_fib_entry)
1667 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1669 if (recreate_loopback) {
1670 err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1671 keep_encap, extack);
1674 } else if (update_nexthops) {
1675 mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1676 &ipip_entry->ol_lb->common);
1679 if (ipip_entry->ol_dev->flags & IFF_UP)
1680 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1685 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1686 struct net_device *ol_dev,
1687 struct netlink_ext_ack *extack)
1689 struct mlxsw_sp_ipip_entry *ipip_entry =
1690 mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1695 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1696 true, false, false, extack);
1700 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1701 struct mlxsw_sp_ipip_entry *ipip_entry,
1702 struct net_device *ul_dev,
1704 struct netlink_ext_ack *extack)
1706 u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1707 enum mlxsw_sp_l3proto ul_proto;
1708 union mlxsw_sp_l3addr saddr;
1710 /* Moving underlay to a different VRF might cause local address
1711 * conflict, and the conflicting tunnels need to be demoted.
1713 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1714 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1715 if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1718 *demote_this = true;
1722 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1723 true, true, false, extack);
1727 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1728 struct mlxsw_sp_ipip_entry *ipip_entry,
1729 struct net_device *ul_dev)
1731 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1732 false, false, true, NULL);
1736 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1737 struct mlxsw_sp_ipip_entry *ipip_entry,
1738 struct net_device *ul_dev)
1740 /* A down underlay device causes encapsulated packets to not be
1741 * forwarded, but decap still works. So refresh next hops without
1742 * touching anything else.
1744 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1745 false, false, true, NULL);
1749 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1750 struct net_device *ol_dev,
1751 struct netlink_ext_ack *extack)
1753 const struct mlxsw_sp_ipip_ops *ipip_ops;
1754 struct mlxsw_sp_ipip_entry *ipip_entry;
1757 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1759 /* A change might make a tunnel eligible for offloading, but
1760 * that is currently not implemented. What falls to slow path
1765 /* A change might make a tunnel not eligible for offloading. */
1766 if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1767 ipip_entry->ipipt)) {
1768 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1772 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1773 err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1777 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1778 struct mlxsw_sp_ipip_entry *ipip_entry)
1780 struct net_device *ol_dev = ipip_entry->ol_dev;
1782 if (ol_dev->flags & IFF_UP)
1783 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1784 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1787 /* The configuration where several tunnels have the same local address in the
1788 * same underlay table needs special treatment in the HW. That is currently not
1789 * implemented in the driver. This function finds and demotes the first tunnel
1790 * with a given source address, except the one passed in in the argument
1794 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1795 enum mlxsw_sp_l3proto ul_proto,
1796 union mlxsw_sp_l3addr saddr,
1798 const struct mlxsw_sp_ipip_entry *except)
1800 struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1802 list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1804 if (ipip_entry != except &&
1805 mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1806 ul_tb_id, ipip_entry)) {
1807 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1815 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1816 struct net_device *ul_dev)
1818 struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1820 list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1822 struct net_device *ol_dev = ipip_entry->ol_dev;
1823 struct net_device *ipip_ul_dev;
1826 ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1828 if (ipip_ul_dev == ul_dev)
1829 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1833 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1834 struct net_device *ol_dev,
1835 unsigned long event,
1836 struct netdev_notifier_info *info)
1838 struct netdev_notifier_changeupper_info *chup;
1839 struct netlink_ext_ack *extack;
1842 mutex_lock(&mlxsw_sp->router->lock);
1844 case NETDEV_REGISTER:
1845 err = mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1847 case NETDEV_UNREGISTER:
1848 mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1851 mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1854 mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1856 case NETDEV_CHANGEUPPER:
1857 chup = container_of(info, typeof(*chup), info);
1858 extack = info->extack;
1859 if (netif_is_l3_master(chup->upper_dev))
1860 err = mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1865 extack = info->extack;
1866 err = mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1869 case NETDEV_CHANGEMTU:
1870 err = mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1873 mutex_unlock(&mlxsw_sp->router->lock);
1878 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1879 struct mlxsw_sp_ipip_entry *ipip_entry,
1880 struct net_device *ul_dev,
1882 unsigned long event,
1883 struct netdev_notifier_info *info)
1885 struct netdev_notifier_changeupper_info *chup;
1886 struct netlink_ext_ack *extack;
1889 case NETDEV_CHANGEUPPER:
1890 chup = container_of(info, typeof(*chup), info);
1891 extack = info->extack;
1892 if (netif_is_l3_master(chup->upper_dev))
1893 return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1901 return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1904 return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1912 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1913 struct net_device *ul_dev,
1914 unsigned long event,
1915 struct netdev_notifier_info *info)
1917 struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1920 mutex_lock(&mlxsw_sp->router->lock);
1921 while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1924 struct mlxsw_sp_ipip_entry *prev;
1925 bool demote_this = false;
1927 err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1928 ul_dev, &demote_this,
1931 mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1937 if (list_is_first(&ipip_entry->ipip_list_node,
1938 &mlxsw_sp->router->ipip_list))
1941 /* This can't be cached from previous iteration,
1942 * because that entry could be gone now.
1944 prev = list_prev_entry(ipip_entry,
1946 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1950 mutex_unlock(&mlxsw_sp->router->lock);
1955 int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1956 enum mlxsw_sp_l3proto ul_proto,
1957 const union mlxsw_sp_l3addr *ul_sip,
1960 enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1961 struct mlxsw_sp_router *router = mlxsw_sp->router;
1962 struct mlxsw_sp_fib_entry *fib_entry;
1965 mutex_lock(&mlxsw_sp->router->lock);
1967 if (WARN_ON_ONCE(router->nve_decap_config.valid)) {
1972 router->nve_decap_config.ul_tb_id = ul_tb_id;
1973 router->nve_decap_config.tunnel_index = tunnel_index;
1974 router->nve_decap_config.ul_proto = ul_proto;
1975 router->nve_decap_config.ul_sip = *ul_sip;
1976 router->nve_decap_config.valid = true;
1978 /* It is valid to create a tunnel with a local IP and only later
1979 * assign this IP address to a local interface
1981 fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1987 fib_entry->decap.tunnel_index = tunnel_index;
1988 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1990 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1992 goto err_fib_entry_update;
1996 err_fib_entry_update:
1997 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1998 mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2000 mutex_unlock(&mlxsw_sp->router->lock);
2004 void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
2005 enum mlxsw_sp_l3proto ul_proto,
2006 const union mlxsw_sp_l3addr *ul_sip)
2008 enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
2009 struct mlxsw_sp_router *router = mlxsw_sp->router;
2010 struct mlxsw_sp_fib_entry *fib_entry;
2012 mutex_lock(&mlxsw_sp->router->lock);
2014 if (WARN_ON_ONCE(!router->nve_decap_config.valid))
2017 router->nve_decap_config.valid = false;
2019 fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
2025 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2026 mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2028 mutex_unlock(&mlxsw_sp->router->lock);
2031 static bool mlxsw_sp_router_nve_is_decap(struct mlxsw_sp *mlxsw_sp,
2033 enum mlxsw_sp_l3proto ul_proto,
2034 const union mlxsw_sp_l3addr *ul_sip)
2036 struct mlxsw_sp_router *router = mlxsw_sp->router;
2038 return router->nve_decap_config.valid &&
2039 router->nve_decap_config.ul_tb_id == ul_tb_id &&
2040 router->nve_decap_config.ul_proto == ul_proto &&
2041 !memcmp(&router->nve_decap_config.ul_sip, ul_sip,
2045 struct mlxsw_sp_neigh_key {
2046 struct neighbour *n;
2049 struct mlxsw_sp_neigh_entry {
2050 struct list_head rif_list_node;
2051 struct rhash_head ht_node;
2052 struct mlxsw_sp_neigh_key key;
2055 unsigned char ha[ETH_ALEN];
2056 struct list_head nexthop_list; /* list of nexthops using
2059 struct list_head nexthop_neighs_list_node;
2060 unsigned int counter_index;
2064 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
2065 .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
2066 .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
2067 .key_len = sizeof(struct mlxsw_sp_neigh_key),
2070 struct mlxsw_sp_neigh_entry *
2071 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
2072 struct mlxsw_sp_neigh_entry *neigh_entry)
2075 if (list_empty(&rif->neigh_list))
2078 return list_first_entry(&rif->neigh_list,
2079 typeof(*neigh_entry),
2082 if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
2084 return list_next_entry(neigh_entry, rif_list_node);
2087 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
2089 return neigh_entry->key.n->tbl->family;
2093 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
2095 return neigh_entry->ha;
2098 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
2100 struct neighbour *n;
2102 n = neigh_entry->key.n;
2103 return ntohl(*((__be32 *) n->primary_key));
2107 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
2109 struct neighbour *n;
2111 n = neigh_entry->key.n;
2112 return (struct in6_addr *) &n->primary_key;
2115 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
2116 struct mlxsw_sp_neigh_entry *neigh_entry,
2119 if (!neigh_entry->counter_valid)
2122 return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
2126 static struct mlxsw_sp_neigh_entry *
2127 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
2130 struct mlxsw_sp_neigh_entry *neigh_entry;
2132 neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
2136 neigh_entry->key.n = n;
2137 neigh_entry->rif = rif;
2138 INIT_LIST_HEAD(&neigh_entry->nexthop_list);
2143 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
2149 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
2150 struct mlxsw_sp_neigh_entry *neigh_entry)
2152 return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
2153 &neigh_entry->ht_node,
2154 mlxsw_sp_neigh_ht_params);
2158 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
2159 struct mlxsw_sp_neigh_entry *neigh_entry)
2161 rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
2162 &neigh_entry->ht_node,
2163 mlxsw_sp_neigh_ht_params);
2167 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
2168 struct mlxsw_sp_neigh_entry *neigh_entry)
2170 struct devlink *devlink;
2171 const char *table_name;
2173 switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2175 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2178 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2185 devlink = priv_to_devlink(mlxsw_sp->core);
2186 return devlink_dpipe_table_counter_enabled(devlink, table_name);
2190 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2191 struct mlxsw_sp_neigh_entry *neigh_entry)
2193 if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2196 if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2199 neigh_entry->counter_valid = true;
2203 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2204 struct mlxsw_sp_neigh_entry *neigh_entry)
2206 if (!neigh_entry->counter_valid)
2208 mlxsw_sp_flow_counter_free(mlxsw_sp,
2209 neigh_entry->counter_index);
2210 neigh_entry->counter_valid = false;
2213 static struct mlxsw_sp_neigh_entry *
2214 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2216 struct mlxsw_sp_neigh_entry *neigh_entry;
2217 struct mlxsw_sp_rif *rif;
2220 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2222 return ERR_PTR(-EINVAL);
2224 neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2226 return ERR_PTR(-ENOMEM);
2228 err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2230 goto err_neigh_entry_insert;
2232 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2233 list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2237 err_neigh_entry_insert:
2238 mlxsw_sp_neigh_entry_free(neigh_entry);
2239 return ERR_PTR(err);
2243 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2244 struct mlxsw_sp_neigh_entry *neigh_entry)
2246 list_del(&neigh_entry->rif_list_node);
2247 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2248 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2249 mlxsw_sp_neigh_entry_free(neigh_entry);
2252 static struct mlxsw_sp_neigh_entry *
2253 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2255 struct mlxsw_sp_neigh_key key;
2258 return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2259 &key, mlxsw_sp_neigh_ht_params);
2263 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2265 unsigned long interval;
2267 #if IS_ENABLED(CONFIG_IPV6)
2268 interval = min_t(unsigned long,
2269 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2270 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2272 interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2274 mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2277 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2281 struct net_device *dev;
2282 struct neighbour *n;
2287 mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2289 if (!mlxsw_sp->router->rifs[rif]) {
2290 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2295 dev = mlxsw_sp->router->rifs[rif]->dev;
2296 n = neigh_lookup(&arp_tbl, &dipn, dev);
2300 netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2301 neigh_event_send(n, NULL);
2305 #if IS_ENABLED(CONFIG_IPV6)
2306 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2310 struct net_device *dev;
2311 struct neighbour *n;
2312 struct in6_addr dip;
2315 mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2318 if (!mlxsw_sp->router->rifs[rif]) {
2319 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2323 dev = mlxsw_sp->router->rifs[rif]->dev;
2324 n = neigh_lookup(&nd_tbl, &dip, dev);
2328 netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2329 neigh_event_send(n, NULL);
2333 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2340 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2347 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2349 /* Hardware starts counting at 0, so add 1. */
2352 /* Each record consists of several neighbour entries. */
2353 for (i = 0; i < num_entries; i++) {
2356 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2357 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2363 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2367 /* One record contains one entry. */
2368 mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2372 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2373 char *rauhtd_pl, int rec_index)
2375 switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2376 case MLXSW_REG_RAUHTD_TYPE_IPV4:
2377 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2380 case MLXSW_REG_RAUHTD_TYPE_IPV6:
2381 mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2387 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2389 u8 num_rec, last_rec_index, num_entries;
2391 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2392 last_rec_index = num_rec - 1;
2394 if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2396 if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2397 MLXSW_REG_RAUHTD_TYPE_IPV6)
2400 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2402 if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2408 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2410 enum mlxsw_reg_rauhtd_type type)
2415 /* Ensure the RIF we read from the device does not change mid-dump. */
2416 mutex_lock(&mlxsw_sp->router->lock);
2418 mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2419 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2422 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2425 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2426 for (i = 0; i < num_rec; i++)
2427 mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2429 } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2430 mutex_unlock(&mlxsw_sp->router->lock);
2435 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2437 enum mlxsw_reg_rauhtd_type type;
2441 rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2445 type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2446 err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2450 type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2451 err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2457 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2459 struct mlxsw_sp_neigh_entry *neigh_entry;
2461 mutex_lock(&mlxsw_sp->router->lock);
2462 list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2463 nexthop_neighs_list_node)
2464 /* If this neigh have nexthops, make the kernel think this neigh
2465 * is active regardless of the traffic.
2467 neigh_event_send(neigh_entry->key.n, NULL);
2468 mutex_unlock(&mlxsw_sp->router->lock);
2472 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2474 unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2476 mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2477 msecs_to_jiffies(interval));
2480 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2482 struct mlxsw_sp_router *router;
2485 router = container_of(work, struct mlxsw_sp_router,
2486 neighs_update.dw.work);
2487 err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2489 dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2491 mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2493 mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2496 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2498 struct mlxsw_sp_neigh_entry *neigh_entry;
2499 struct mlxsw_sp_router *router;
2501 router = container_of(work, struct mlxsw_sp_router,
2502 nexthop_probe_dw.work);
2503 /* Iterate over nexthop neighbours, find those who are unresolved and
2504 * send arp on them. This solves the chicken-egg problem when
2505 * the nexthop wouldn't get offloaded until the neighbor is resolved
2506 * but it wouldn't get resolved ever in case traffic is flowing in HW
2507 * using different nexthop.
2509 mutex_lock(&router->lock);
2510 list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2511 nexthop_neighs_list_node)
2512 if (!neigh_entry->connected)
2513 neigh_event_send(neigh_entry->key.n, NULL);
2514 mutex_unlock(&router->lock);
2516 mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2517 MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2521 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2522 struct mlxsw_sp_neigh_entry *neigh_entry,
2523 bool removing, bool dead);
2525 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2527 return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2528 MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2532 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2533 struct mlxsw_sp_neigh_entry *neigh_entry,
2534 enum mlxsw_reg_rauht_op op)
2536 struct neighbour *n = neigh_entry->key.n;
2537 u32 dip = ntohl(*((__be32 *) n->primary_key));
2538 char rauht_pl[MLXSW_REG_RAUHT_LEN];
2540 mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2542 if (neigh_entry->counter_valid)
2543 mlxsw_reg_rauht_pack_counter(rauht_pl,
2544 neigh_entry->counter_index);
2545 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2549 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2550 struct mlxsw_sp_neigh_entry *neigh_entry,
2551 enum mlxsw_reg_rauht_op op)
2553 struct neighbour *n = neigh_entry->key.n;
2554 char rauht_pl[MLXSW_REG_RAUHT_LEN];
2555 const char *dip = n->primary_key;
2557 mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2559 if (neigh_entry->counter_valid)
2560 mlxsw_reg_rauht_pack_counter(rauht_pl,
2561 neigh_entry->counter_index);
2562 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2565 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2567 struct neighbour *n = neigh_entry->key.n;
2569 /* Packets with a link-local destination address are trapped
2570 * after LPM lookup and never reach the neighbour table, so
2571 * there is no need to program such neighbours to the device.
2573 if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2574 IPV6_ADDR_LINKLOCAL)
2580 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2581 struct mlxsw_sp_neigh_entry *neigh_entry,
2584 enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding);
2587 if (!adding && !neigh_entry->connected)
2589 neigh_entry->connected = adding;
2590 if (neigh_entry->key.n->tbl->family == AF_INET) {
2591 err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2595 } else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2596 if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2598 err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2608 neigh_entry->key.n->flags |= NTF_OFFLOADED;
2610 neigh_entry->key.n->flags &= ~NTF_OFFLOADED;
2614 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2615 struct mlxsw_sp_neigh_entry *neigh_entry,
2619 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2621 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2622 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2625 struct mlxsw_sp_netevent_work {
2626 struct work_struct work;
2627 struct mlxsw_sp *mlxsw_sp;
2628 struct neighbour *n;
2631 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2633 struct mlxsw_sp_netevent_work *net_work =
2634 container_of(work, struct mlxsw_sp_netevent_work, work);
2635 struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2636 struct mlxsw_sp_neigh_entry *neigh_entry;
2637 struct neighbour *n = net_work->n;
2638 unsigned char ha[ETH_ALEN];
2639 bool entry_connected;
2642 /* If these parameters are changed after we release the lock,
2643 * then we are guaranteed to receive another event letting us
2646 read_lock_bh(&n->lock);
2647 memcpy(ha, n->ha, ETH_ALEN);
2648 nud_state = n->nud_state;
2650 read_unlock_bh(&n->lock);
2652 mutex_lock(&mlxsw_sp->router->lock);
2653 mlxsw_sp_span_respin(mlxsw_sp);
2655 entry_connected = nud_state & NUD_VALID && !dead;
2656 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2657 if (!entry_connected && !neigh_entry)
2660 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2661 if (IS_ERR(neigh_entry))
2665 memcpy(neigh_entry->ha, ha, ETH_ALEN);
2666 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2667 mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
2670 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2671 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2674 mutex_unlock(&mlxsw_sp->router->lock);
2679 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2681 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2683 struct mlxsw_sp_netevent_work *net_work =
2684 container_of(work, struct mlxsw_sp_netevent_work, work);
2685 struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2687 mlxsw_sp_mp_hash_init(mlxsw_sp);
2691 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2693 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2695 struct mlxsw_sp_netevent_work *net_work =
2696 container_of(work, struct mlxsw_sp_netevent_work, work);
2697 struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2699 __mlxsw_sp_router_init(mlxsw_sp);
2703 static int mlxsw_sp_router_schedule_work(struct net *net,
2704 struct notifier_block *nb,
2705 void (*cb)(struct work_struct *))
2707 struct mlxsw_sp_netevent_work *net_work;
2708 struct mlxsw_sp_router *router;
2710 router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2711 if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp)))
2714 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2718 INIT_WORK(&net_work->work, cb);
2719 net_work->mlxsw_sp = router->mlxsw_sp;
2720 mlxsw_core_schedule_work(&net_work->work);
2724 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2725 unsigned long event, void *ptr)
2727 struct mlxsw_sp_netevent_work *net_work;
2728 struct mlxsw_sp_port *mlxsw_sp_port;
2729 struct mlxsw_sp *mlxsw_sp;
2730 unsigned long interval;
2731 struct neigh_parms *p;
2732 struct neighbour *n;
2735 case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2738 /* We don't care about changes in the default table. */
2739 if (!p->dev || (p->tbl->family != AF_INET &&
2740 p->tbl->family != AF_INET6))
2743 /* We are in atomic context and can't take RTNL mutex,
2744 * so use RCU variant to walk the device chain.
2746 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2750 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2751 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2752 mlxsw_sp->router->neighs_update.interval = interval;
2754 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2756 case NETEVENT_NEIGH_UPDATE:
2759 if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2762 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2766 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2768 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2772 INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2773 net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2776 /* Take a reference to ensure the neighbour won't be
2777 * destructed until we drop the reference in delayed
2781 mlxsw_core_schedule_work(&net_work->work);
2782 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2784 case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2785 case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2786 return mlxsw_sp_router_schedule_work(ptr, nb,
2787 mlxsw_sp_router_mp_hash_event_work);
2789 case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2790 return mlxsw_sp_router_schedule_work(ptr, nb,
2791 mlxsw_sp_router_update_priority_work);
2797 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2801 err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2802 &mlxsw_sp_neigh_ht_params);
2806 /* Initialize the polling interval according to the default
2809 mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2811 /* Create the delayed works for the activity_update */
2812 INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2813 mlxsw_sp_router_neighs_update_work);
2814 INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2815 mlxsw_sp_router_probe_unresolved_nexthops);
2816 mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2817 mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2821 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2823 cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2824 cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2825 rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2828 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2829 struct mlxsw_sp_rif *rif)
2831 struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2833 list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2835 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2836 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2840 enum mlxsw_sp_nexthop_type {
2841 MLXSW_SP_NEXTHOP_TYPE_ETH,
2842 MLXSW_SP_NEXTHOP_TYPE_IPIP,
2845 struct mlxsw_sp_nexthop_key {
2846 struct fib_nh *fib_nh;
2849 struct mlxsw_sp_nexthop {
2850 struct list_head neigh_list_node; /* member of neigh entry list */
2851 struct list_head rif_list_node;
2852 struct list_head router_list_node;
2853 struct mlxsw_sp_nexthop_group_info *nhgi; /* pointer back to the group
2854 * this nexthop belongs to
2856 struct rhash_head ht_node;
2857 struct neigh_table *neigh_tbl;
2858 struct mlxsw_sp_nexthop_key key;
2859 unsigned char gw_addr[sizeof(struct in6_addr)];
2863 int num_adj_entries;
2864 struct mlxsw_sp_rif *rif;
2865 u8 should_offload:1, /* set indicates this neigh is connected and
2866 * should be put to KVD linear area of this group.
2868 offloaded:1, /* set in case the neigh is actually put into
2869 * KVD linear area of this group.
2871 update:1, /* set indicates that MAC of this neigh should be
2874 discard:1; /* nexthop is programmed to discard packets */
2875 enum mlxsw_sp_nexthop_type type;
2877 struct mlxsw_sp_neigh_entry *neigh_entry;
2878 struct mlxsw_sp_ipip_entry *ipip_entry;
2880 unsigned int counter_index;
2884 enum mlxsw_sp_nexthop_group_type {
2885 MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4,
2886 MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6,
2887 MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ,
2890 struct mlxsw_sp_nexthop_group_info {
2891 struct mlxsw_sp_nexthop_group *nh_grp;
2895 int sum_norm_weight;
2896 u8 adj_index_valid:1,
2897 gateway:1; /* routes using the group use a gateway */
2898 struct mlxsw_sp_nexthop nexthops[0];
2899 #define nh_rif nexthops[0].rif
2902 struct mlxsw_sp_nexthop_group_vr_key {
2904 enum mlxsw_sp_l3proto proto;
2907 struct mlxsw_sp_nexthop_group_vr_entry {
2908 struct list_head list; /* member in vr_list */
2909 struct rhash_head ht_node; /* member in vr_ht */
2910 refcount_t ref_count;
2911 struct mlxsw_sp_nexthop_group_vr_key key;
2914 struct mlxsw_sp_nexthop_group {
2915 struct rhash_head ht_node;
2916 struct list_head fib_list; /* list of fib entries that use this group */
2919 struct fib_info *fi;
2925 struct mlxsw_sp_nexthop_group_info *nhgi;
2926 struct list_head vr_list;
2927 struct rhashtable vr_ht;
2928 enum mlxsw_sp_nexthop_group_type type;
2932 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2933 struct mlxsw_sp_nexthop *nh)
2935 struct devlink *devlink;
2937 devlink = priv_to_devlink(mlxsw_sp->core);
2938 if (!devlink_dpipe_table_counter_enabled(devlink,
2939 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2942 if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2945 nh->counter_valid = true;
2948 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2949 struct mlxsw_sp_nexthop *nh)
2951 if (!nh->counter_valid)
2953 mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2954 nh->counter_valid = false;
2957 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2958 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2960 if (!nh->counter_valid)
2963 return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2967 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2968 struct mlxsw_sp_nexthop *nh)
2971 if (list_empty(&router->nexthop_list))
2974 return list_first_entry(&router->nexthop_list,
2975 typeof(*nh), router_list_node);
2977 if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2979 return list_next_entry(nh, router_list_node);
2982 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2984 return nh->offloaded;
2987 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2991 return nh->neigh_entry->ha;
2994 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2995 u32 *p_adj_size, u32 *p_adj_hash_index)
2997 struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi;
2998 u32 adj_hash_index = 0;
3001 if (!nh->offloaded || !nhgi->adj_index_valid)
3004 *p_adj_index = nhgi->adj_index;
3005 *p_adj_size = nhgi->ecmp_size;
3007 for (i = 0; i < nhgi->count; i++) {
3008 struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i];
3012 if (nh_iter->offloaded)
3013 adj_hash_index += nh_iter->num_adj_entries;
3016 *p_adj_hash_index = adj_hash_index;
3020 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
3025 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
3027 struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi;
3030 for (i = 0; i < nhgi->count; i++) {
3031 struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i];
3033 if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
3039 bool mlxsw_sp_nexthop_is_discard(const struct mlxsw_sp_nexthop *nh)
3044 static const struct rhashtable_params mlxsw_sp_nexthop_group_vr_ht_params = {
3045 .key_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, key),
3046 .head_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, ht_node),
3047 .key_len = sizeof(struct mlxsw_sp_nexthop_group_vr_key),
3048 .automatic_shrinking = true,
3051 static struct mlxsw_sp_nexthop_group_vr_entry *
3052 mlxsw_sp_nexthop_group_vr_entry_lookup(struct mlxsw_sp_nexthop_group *nh_grp,
3053 const struct mlxsw_sp_fib *fib)
3055 struct mlxsw_sp_nexthop_group_vr_key key;
3057 memset(&key, 0, sizeof(key));
3058 key.vr_id = fib->vr->id;
3059 key.proto = fib->proto;
3060 return rhashtable_lookup_fast(&nh_grp->vr_ht, &key,
3061 mlxsw_sp_nexthop_group_vr_ht_params);
3065 mlxsw_sp_nexthop_group_vr_entry_create(struct mlxsw_sp_nexthop_group *nh_grp,
3066 const struct mlxsw_sp_fib *fib)
3068 struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3071 vr_entry = kzalloc(sizeof(*vr_entry), GFP_KERNEL);
3075 vr_entry->key.vr_id = fib->vr->id;
3076 vr_entry->key.proto = fib->proto;
3077 refcount_set(&vr_entry->ref_count, 1);
3079 err = rhashtable_insert_fast(&nh_grp->vr_ht, &vr_entry->ht_node,
3080 mlxsw_sp_nexthop_group_vr_ht_params);
3082 goto err_hashtable_insert;
3084 list_add(&vr_entry->list, &nh_grp->vr_list);
3088 err_hashtable_insert:
3094 mlxsw_sp_nexthop_group_vr_entry_destroy(struct mlxsw_sp_nexthop_group *nh_grp,
3095 struct mlxsw_sp_nexthop_group_vr_entry *vr_entry)
3097 list_del(&vr_entry->list);
3098 rhashtable_remove_fast(&nh_grp->vr_ht, &vr_entry->ht_node,
3099 mlxsw_sp_nexthop_group_vr_ht_params);
3104 mlxsw_sp_nexthop_group_vr_link(struct mlxsw_sp_nexthop_group *nh_grp,
3105 const struct mlxsw_sp_fib *fib)
3107 struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3109 vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib);
3111 refcount_inc(&vr_entry->ref_count);
3115 return mlxsw_sp_nexthop_group_vr_entry_create(nh_grp, fib);
3119 mlxsw_sp_nexthop_group_vr_unlink(struct mlxsw_sp_nexthop_group *nh_grp,
3120 const struct mlxsw_sp_fib *fib)
3122 struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3124 vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib);
3125 if (WARN_ON_ONCE(!vr_entry))
3128 if (!refcount_dec_and_test(&vr_entry->ref_count))
3131 mlxsw_sp_nexthop_group_vr_entry_destroy(nh_grp, vr_entry);
3134 struct mlxsw_sp_nexthop_group_cmp_arg {
3135 enum mlxsw_sp_nexthop_group_type type;
3137 struct fib_info *fi;
3138 struct mlxsw_sp_fib6_entry *fib6_entry;
3144 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
3145 const struct in6_addr *gw, int ifindex,
3150 for (i = 0; i < nh_grp->nhgi->count; i++) {
3151 const struct mlxsw_sp_nexthop *nh;
3153 nh = &nh_grp->nhgi->nexthops[i];
3154 if (nh->ifindex == ifindex && nh->nh_weight == weight &&
3155 ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
3163 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
3164 const struct mlxsw_sp_fib6_entry *fib6_entry)
3166 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3168 if (nh_grp->nhgi->count != fib6_entry->nrt6)
3171 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3172 struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3173 struct in6_addr *gw;
3174 int ifindex, weight;
3176 ifindex = fib6_nh->fib_nh_dev->ifindex;
3177 weight = fib6_nh->fib_nh_weight;
3178 gw = &fib6_nh->fib_nh_gw6;
3179 if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
3188 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
3190 const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
3191 const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
3193 if (nh_grp->type != cmp_arg->type)
3196 switch (cmp_arg->type) {
3197 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3198 return cmp_arg->fi != nh_grp->ipv4.fi;
3199 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3200 return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
3201 cmp_arg->fib6_entry);
3202 case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3203 return cmp_arg->id != nh_grp->obj.id;
3210 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
3212 const struct mlxsw_sp_nexthop_group *nh_grp = data;
3213 const struct mlxsw_sp_nexthop *nh;
3214 struct fib_info *fi;
3218 switch (nh_grp->type) {
3219 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3220 fi = nh_grp->ipv4.fi;
3221 return jhash(&fi, sizeof(fi), seed);
3222 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3223 val = nh_grp->nhgi->count;
3224 for (i = 0; i < nh_grp->nhgi->count; i++) {
3225 nh = &nh_grp->nhgi->nexthops[i];
3226 val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed);
3227 val ^= jhash(&nh->gw_addr, sizeof(nh->gw_addr), seed);
3229 return jhash(&val, sizeof(val), seed);
3230 case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3231 return jhash(&nh_grp->obj.id, sizeof(nh_grp->obj.id), seed);
3239 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
3241 unsigned int val = fib6_entry->nrt6;
3242 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3244 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3245 struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3246 struct net_device *dev = fib6_nh->fib_nh_dev;
3247 struct in6_addr *gw = &fib6_nh->fib_nh_gw6;
3249 val ^= jhash(&dev->ifindex, sizeof(dev->ifindex), seed);
3250 val ^= jhash(gw, sizeof(*gw), seed);
3253 return jhash(&val, sizeof(val), seed);
3257 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
3259 const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
3261 switch (cmp_arg->type) {
3262 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3263 return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
3264 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3265 return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
3266 case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3267 return jhash(&cmp_arg->id, sizeof(cmp_arg->id), seed);
3274 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
3275 .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
3276 .hashfn = mlxsw_sp_nexthop_group_hash,
3277 .obj_hashfn = mlxsw_sp_nexthop_group_hash_obj,
3278 .obj_cmpfn = mlxsw_sp_nexthop_group_cmp,
3281 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
3282 struct mlxsw_sp_nexthop_group *nh_grp)
3284 if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 &&
3285 !nh_grp->nhgi->gateway)
3288 return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
3290 mlxsw_sp_nexthop_group_ht_params);
3293 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
3294 struct mlxsw_sp_nexthop_group *nh_grp)
3296 if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 &&
3297 !nh_grp->nhgi->gateway)
3300 rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
3302 mlxsw_sp_nexthop_group_ht_params);
3305 static struct mlxsw_sp_nexthop_group *
3306 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
3307 struct fib_info *fi)
3309 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3311 cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4;
3313 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3315 mlxsw_sp_nexthop_group_ht_params);
3318 static struct mlxsw_sp_nexthop_group *
3319 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3320 struct mlxsw_sp_fib6_entry *fib6_entry)
3322 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3324 cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6;
3325 cmp_arg.fib6_entry = fib6_entry;
3326 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3328 mlxsw_sp_nexthop_group_ht_params);
3331 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3332 .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3333 .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3334 .key_len = sizeof(struct mlxsw_sp_nexthop_key),
3337 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3338 struct mlxsw_sp_nexthop *nh)
3340 return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3341 &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3344 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3345 struct mlxsw_sp_nexthop *nh)
3347 rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3348 mlxsw_sp_nexthop_ht_params);
3351 static struct mlxsw_sp_nexthop *
3352 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3353 struct mlxsw_sp_nexthop_key key)
3355 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3356 mlxsw_sp_nexthop_ht_params);
3359 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3360 enum mlxsw_sp_l3proto proto,
3362 u32 adj_index, u16 ecmp_size,
3366 char raleu_pl[MLXSW_REG_RALEU_LEN];
3368 mlxsw_reg_raleu_pack(raleu_pl,
3369 (enum mlxsw_reg_ralxx_protocol) proto, vr_id,
3370 adj_index, ecmp_size, new_adj_index,
3372 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3375 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3376 struct mlxsw_sp_nexthop_group *nh_grp,
3377 u32 old_adj_index, u16 old_ecmp_size)
3379 struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
3380 struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3383 list_for_each_entry(vr_entry, &nh_grp->vr_list, list) {
3384 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp,
3385 vr_entry->key.proto,
3386 vr_entry->key.vr_id,
3392 goto err_mass_update_vr;
3397 list_for_each_entry_continue_reverse(vr_entry, &nh_grp->vr_list, list)
3398 mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr_entry->key.proto,
3399 vr_entry->key.vr_id,
3402 old_adj_index, old_ecmp_size);
3406 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3407 struct mlxsw_sp_nexthop *nh)
3409 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3410 char ratr_pl[MLXSW_REG_RATR_LEN];
3412 mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
3413 true, MLXSW_REG_RATR_TYPE_ETHERNET,
3414 adj_index, nh->rif->rif_index);
3416 mlxsw_reg_ratr_trap_action_set(ratr_pl,
3417 MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS);
3419 mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3420 if (nh->counter_valid)
3421 mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3423 mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3425 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3428 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3429 struct mlxsw_sp_nexthop *nh)
3433 for (i = 0; i < nh->num_adj_entries; i++) {
3436 err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
3444 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3446 struct mlxsw_sp_nexthop *nh)
3448 const struct mlxsw_sp_ipip_ops *ipip_ops;
3450 ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3451 return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
3454 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3456 struct mlxsw_sp_nexthop *nh)
3460 for (i = 0; i < nh->num_adj_entries; i++) {
3463 err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3473 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3474 struct mlxsw_sp_nexthop_group_info *nhgi,
3477 u32 adj_index = nhgi->adj_index; /* base */
3478 struct mlxsw_sp_nexthop *nh;
3481 for (i = 0; i < nhgi->count; i++) {
3482 nh = &nhgi->nexthops[i];
3484 if (!nh->should_offload) {
3489 if (nh->update || reallocate) {
3493 case MLXSW_SP_NEXTHOP_TYPE_ETH:
3494 err = mlxsw_sp_nexthop_update
3495 (mlxsw_sp, adj_index, nh);
3497 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3498 err = mlxsw_sp_nexthop_ipip_update
3499 (mlxsw_sp, adj_index, nh);
3507 adj_index += nh->num_adj_entries;
3513 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3514 struct mlxsw_sp_nexthop_group *nh_grp)
3516 struct mlxsw_sp_fib_entry *fib_entry;
3519 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3520 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3527 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3529 /* Valid sizes for an adjacency group are:
3530 * 1-64, 512, 1024, 2048 and 4096.
3532 if (*p_adj_grp_size <= 64)
3534 else if (*p_adj_grp_size <= 512)
3535 *p_adj_grp_size = 512;
3536 else if (*p_adj_grp_size <= 1024)
3537 *p_adj_grp_size = 1024;
3538 else if (*p_adj_grp_size <= 2048)
3539 *p_adj_grp_size = 2048;
3541 *p_adj_grp_size = 4096;
3544 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3545 unsigned int alloc_size)
3547 if (alloc_size >= 4096)
3548 *p_adj_grp_size = 4096;
3549 else if (alloc_size >= 2048)
3550 *p_adj_grp_size = 2048;
3551 else if (alloc_size >= 1024)
3552 *p_adj_grp_size = 1024;
3553 else if (alloc_size >= 512)
3554 *p_adj_grp_size = 512;
3557 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3558 u16 *p_adj_grp_size)
3560 unsigned int alloc_size;
3563 /* Round up the requested group size to the next size supported
3564 * by the device and make sure the request can be satisfied.
3566 mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3567 err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3568 MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3569 *p_adj_grp_size, &alloc_size);
3572 /* It is possible the allocation results in more allocated
3573 * entries than requested. Try to use as much of them as
3576 mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3582 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group_info *nhgi)
3584 int i, g = 0, sum_norm_weight = 0;
3585 struct mlxsw_sp_nexthop *nh;
3587 for (i = 0; i < nhgi->count; i++) {
3588 nh = &nhgi->nexthops[i];
3590 if (!nh->should_offload)
3593 g = gcd(nh->nh_weight, g);
3598 for (i = 0; i < nhgi->count; i++) {
3599 nh = &nhgi->nexthops[i];
3601 if (!nh->should_offload)
3603 nh->norm_nh_weight = nh->nh_weight / g;
3604 sum_norm_weight += nh->norm_nh_weight;
3607 nhgi->sum_norm_weight = sum_norm_weight;
3611 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group_info *nhgi)
3613 int i, weight = 0, lower_bound = 0;
3614 int total = nhgi->sum_norm_weight;
3615 u16 ecmp_size = nhgi->ecmp_size;
3617 for (i = 0; i < nhgi->count; i++) {
3618 struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
3621 if (!nh->should_offload)
3623 weight += nh->norm_nh_weight;
3624 upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3625 nh->num_adj_entries = upper_bound - lower_bound;
3626 lower_bound = upper_bound;
3630 static struct mlxsw_sp_nexthop *
3631 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3632 const struct mlxsw_sp_rt6 *mlxsw_sp_rt6);
3635 mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3636 struct mlxsw_sp_nexthop_group *nh_grp)
3640 for (i = 0; i < nh_grp->nhgi->count; i++) {
3641 struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
3644 nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3646 nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3651 __mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp_nexthop_group *nh_grp,
3652 struct mlxsw_sp_fib6_entry *fib6_entry)
3654 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3656 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3657 struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3658 struct mlxsw_sp_nexthop *nh;
3660 nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3661 if (nh && nh->offloaded)
3662 fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3664 fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3669 mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3670 struct mlxsw_sp_nexthop_group *nh_grp)
3672 struct mlxsw_sp_fib6_entry *fib6_entry;
3674 /* Unfortunately, in IPv6 the route and the nexthop are described by
3675 * the same struct, so we need to iterate over all the routes using the
3676 * nexthop group and set / clear the offload indication for them.
3678 list_for_each_entry(fib6_entry, &nh_grp->fib_list,
3679 common.nexthop_group_node)
3680 __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
3684 mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3685 struct mlxsw_sp_nexthop_group *nh_grp)
3687 /* Do not update the flags if the nexthop group is being destroyed
3689 * 1. The nexthop objects is being deleted, in which case the flags are
3691 * 2. The nexthop group was replaced by a newer group, in which case
3692 * the flags of the nexthop object were already updated based on the
3695 if (nh_grp->can_destroy)
3698 nexthop_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
3699 nh_grp->nhgi->adj_index_valid, false);
3703 mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3704 struct mlxsw_sp_nexthop_group *nh_grp)
3706 switch (nh_grp->type) {
3707 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3708 mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp);
3710 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3711 mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp);
3713 case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3714 mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, nh_grp);
3720 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3721 struct mlxsw_sp_nexthop_group *nh_grp)
3723 struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
3724 u16 ecmp_size, old_ecmp_size;
3725 struct mlxsw_sp_nexthop *nh;
3726 bool offload_change = false;
3728 bool old_adj_index_valid;
3729 int i, err2, err = 0;
3733 return mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3735 for (i = 0; i < nhgi->count; i++) {
3736 nh = &nhgi->nexthops[i];
3738 if (nh->should_offload != nh->offloaded) {
3739 offload_change = true;
3740 if (nh->should_offload)
3744 if (!offload_change) {
3745 /* Nothing was added or removed, so no need to reallocate. Just
3746 * update MAC on existing adjacency indexes.
3748 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, false);
3750 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3755 mlxsw_sp_nexthop_group_normalize(nhgi);
3756 if (!nhgi->sum_norm_weight)
3757 /* No neigh of this group is connected so we just set
3758 * the trap and let everthing flow through kernel.
3762 ecmp_size = nhgi->sum_norm_weight;
3763 err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3765 /* No valid allocation size available. */
3768 err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3769 ecmp_size, &adj_index);
3771 /* We ran out of KVD linear space, just set the
3772 * trap and let everything flow through kernel.
3774 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3777 old_adj_index_valid = nhgi->adj_index_valid;
3778 old_adj_index = nhgi->adj_index;
3779 old_ecmp_size = nhgi->ecmp_size;
3780 nhgi->adj_index_valid = 1;
3781 nhgi->adj_index = adj_index;
3782 nhgi->ecmp_size = ecmp_size;
3783 mlxsw_sp_nexthop_group_rebalance(nhgi);
3784 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, true);
3786 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3790 mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
3792 if (!old_adj_index_valid) {
3793 /* The trap was set for fib entries, so we have to call
3794 * fib entry update to unset it and use adjacency index.
3796 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3798 dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3804 err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3805 old_adj_index, old_ecmp_size);
3806 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3807 old_ecmp_size, old_adj_index);
3809 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3816 old_adj_index_valid = nhgi->adj_index_valid;
3817 nhgi->adj_index_valid = 0;
3818 for (i = 0; i < nhgi->count; i++) {
3819 nh = &nhgi->nexthops[i];
3822 err2 = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3824 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3825 mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
3826 if (old_adj_index_valid)
3827 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3828 nhgi->ecmp_size, nhgi->adj_index);
3832 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3836 nh->should_offload = 1;
3838 nh->should_offload = 0;
3843 mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
3844 struct mlxsw_sp_neigh_entry *neigh_entry)
3846 struct neighbour *n, *old_n = neigh_entry->key.n;
3847 struct mlxsw_sp_nexthop *nh;
3848 bool entry_connected;
3852 nh = list_first_entry(&neigh_entry->nexthop_list,
3853 struct mlxsw_sp_nexthop, neigh_list_node);
3855 n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3857 n = neigh_create(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3860 neigh_event_send(n, NULL);
3863 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
3864 neigh_entry->key.n = n;
3865 err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
3867 goto err_neigh_entry_insert;
3869 read_lock_bh(&n->lock);
3870 nud_state = n->nud_state;
3872 read_unlock_bh(&n->lock);
3873 entry_connected = nud_state & NUD_VALID && !dead;
3875 list_for_each_entry(nh, &neigh_entry->nexthop_list,
3877 neigh_release(old_n);
3879 __mlxsw_sp_nexthop_neigh_update(nh, !entry_connected);
3880 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
3887 err_neigh_entry_insert:
3888 neigh_entry->key.n = old_n;
3889 mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
3895 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3896 struct mlxsw_sp_neigh_entry *neigh_entry,
3897 bool removing, bool dead)
3899 struct mlxsw_sp_nexthop *nh;
3901 if (list_empty(&neigh_entry->nexthop_list))
3907 err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp,
3910 dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n");
3914 list_for_each_entry(nh, &neigh_entry->nexthop_list,
3916 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3917 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
3921 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3922 struct mlxsw_sp_rif *rif)
3928 list_add(&nh->rif_list_node, &rif->nexthop_list);
3931 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3936 list_del(&nh->rif_list_node);
3940 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3941 struct mlxsw_sp_nexthop *nh)
3943 struct mlxsw_sp_neigh_entry *neigh_entry;
3944 struct neighbour *n;
3948 if (!nh->nhgi->gateway || nh->neigh_entry)
3951 /* Take a reference of neigh here ensuring that neigh would
3952 * not be destructed before the nexthop entry is finished.
3953 * The reference is taken either in neigh_lookup() or
3954 * in neigh_create() in case n is not found.
3956 n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3958 n = neigh_create(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3961 neigh_event_send(n, NULL);
3963 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3965 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3966 if (IS_ERR(neigh_entry)) {
3968 goto err_neigh_entry_create;
3972 /* If that is the first nexthop connected to that neigh, add to
3973 * nexthop_neighs_list
3975 if (list_empty(&neigh_entry->nexthop_list))
3976 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3977 &mlxsw_sp->router->nexthop_neighs_list);
3979 nh->neigh_entry = neigh_entry;
3980 list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3981 read_lock_bh(&n->lock);
3982 nud_state = n->nud_state;
3984 read_unlock_bh(&n->lock);
3985 __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3989 err_neigh_entry_create:
3994 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3995 struct mlxsw_sp_nexthop *nh)
3997 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3998 struct neighbour *n;
4002 n = neigh_entry->key.n;
4004 __mlxsw_sp_nexthop_neigh_update(nh, true);
4005 list_del(&nh->neigh_list_node);
4006 nh->neigh_entry = NULL;
4008 /* If that is the last nexthop connected to that neigh, remove from
4009 * nexthop_neighs_list
4011 if (list_empty(&neigh_entry->nexthop_list))
4012 list_del(&neigh_entry->nexthop_neighs_list_node);
4014 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
4015 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
4020 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
4022 struct net_device *ul_dev;
4026 ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
4027 is_up = ul_dev ? (ul_dev->flags & IFF_UP) : true;
4033 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
4034 struct mlxsw_sp_nexthop *nh,
4035 struct mlxsw_sp_ipip_entry *ipip_entry)
4039 if (!nh->nhgi->gateway || nh->ipip_entry)
4042 nh->ipip_entry = ipip_entry;
4043 removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
4044 __mlxsw_sp_nexthop_neigh_update(nh, removing);
4045 mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
4048 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
4049 struct mlxsw_sp_nexthop *nh)
4051 struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
4056 __mlxsw_sp_nexthop_neigh_update(nh, true);
4057 nh->ipip_entry = NULL;
4060 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4061 const struct fib_nh *fib_nh,
4062 enum mlxsw_sp_ipip_type *p_ipipt)
4064 struct net_device *dev = fib_nh->fib_nh_dev;
4067 fib_nh->nh_parent->fib_type == RTN_UNICAST &&
4068 mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
4071 static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp,
4072 struct mlxsw_sp_nexthop *nh,
4073 const struct net_device *dev)
4075 const struct mlxsw_sp_ipip_ops *ipip_ops;
4076 struct mlxsw_sp_ipip_entry *ipip_entry;
4077 struct mlxsw_sp_rif *rif;
4080 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4082 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4083 if (ipip_ops->can_offload(mlxsw_sp, dev)) {
4084 nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4085 mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4090 nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4091 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4095 mlxsw_sp_nexthop_rif_init(nh, rif);
4096 err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4098 goto err_neigh_init;
4103 mlxsw_sp_nexthop_rif_fini(nh);
4107 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
4108 struct mlxsw_sp_nexthop *nh)
4111 case MLXSW_SP_NEXTHOP_TYPE_ETH:
4112 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
4113 mlxsw_sp_nexthop_rif_fini(nh);
4115 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4116 mlxsw_sp_nexthop_rif_fini(nh);
4117 mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
4122 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
4123 struct mlxsw_sp_nexthop_group *nh_grp,
4124 struct mlxsw_sp_nexthop *nh,
4125 struct fib_nh *fib_nh)
4127 struct net_device *dev = fib_nh->fib_nh_dev;
4128 struct in_device *in_dev;
4131 nh->nhgi = nh_grp->nhgi;
4132 nh->key.fib_nh = fib_nh;
4133 #ifdef CONFIG_IP_ROUTE_MULTIPATH
4134 nh->nh_weight = fib_nh->fib_nh_weight;
4138 memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4));
4139 nh->neigh_tbl = &arp_tbl;
4140 err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
4144 mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4145 list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4149 nh->ifindex = dev->ifindex;
4152 in_dev = __in_dev_get_rcu(dev);
4153 if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
4154 fib_nh->fib_nh_flags & RTNH_F_LINKDOWN) {
4160 err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
4162 goto err_nexthop_neigh_init;
4166 err_nexthop_neigh_init:
4167 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
4171 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
4172 struct mlxsw_sp_nexthop *nh)
4174 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4175 list_del(&nh->router_list_node);
4176 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4177 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
4180 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
4181 unsigned long event, struct fib_nh *fib_nh)
4183 struct mlxsw_sp_nexthop_key key;
4184 struct mlxsw_sp_nexthop *nh;
4186 if (mlxsw_sp->router->aborted)
4189 key.fib_nh = fib_nh;
4190 nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
4195 case FIB_EVENT_NH_ADD:
4196 mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, fib_nh->fib_nh_dev);
4198 case FIB_EVENT_NH_DEL:
4199 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4203 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4206 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
4207 struct mlxsw_sp_rif *rif)
4209 struct mlxsw_sp_nexthop *nh;
4212 list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
4214 case MLXSW_SP_NEXTHOP_TYPE_ETH:
4217 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4218 removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
4225 __mlxsw_sp_nexthop_neigh_update(nh, removing);
4226 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4230 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
4231 struct mlxsw_sp_rif *old_rif,
4232 struct mlxsw_sp_rif *new_rif)
4234 struct mlxsw_sp_nexthop *nh;
4236 list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
4237 list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
4239 mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
4242 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
4243 struct mlxsw_sp_rif *rif)
4245 struct mlxsw_sp_nexthop *nh, *tmp;
4247 list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
4248 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4249 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4254 mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp,
4255 const struct nh_notifier_single_info *nh,
4256 struct netlink_ext_ack *extack)
4261 NL_SET_ERR_MSG_MOD(extack, "FDB nexthops are not supported");
4262 else if (nh->has_encap)
4263 NL_SET_ERR_MSG_MOD(extack, "Encapsulating nexthops are not supported");
4271 mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp,
4272 const struct nh_notifier_grp_info *nh_grp,
4273 struct netlink_ext_ack *extack)
4277 if (nh_grp->is_fdb) {
4278 NL_SET_ERR_MSG_MOD(extack, "FDB nexthop groups are not supported");
4282 for (i = 0; i < nh_grp->num_nh; i++) {
4283 const struct nh_notifier_single_info *nh;
4286 nh = &nh_grp->nh_entries[i].nh;
4287 err = mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, nh,
4292 /* Device only nexthops with an IPIP device are programmed as
4293 * encapsulating adjacency entries.
4295 if (!nh->gw_family && !nh->is_reject &&
4296 !mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) {
4297 NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway");
4305 static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp,
4306 unsigned long event,
4307 struct nh_notifier_info *info)
4309 if (event != NEXTHOP_EVENT_REPLACE)
4312 switch (info->type) {
4313 case NH_NOTIFIER_INFO_TYPE_SINGLE:
4314 return mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, info->nh,
4316 case NH_NOTIFIER_INFO_TYPE_GRP:
4317 return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp,
4321 NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type");
4326 static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp,
4327 const struct nh_notifier_info *info)
4329 const struct net_device *dev;
4331 switch (info->type) {
4332 case NH_NOTIFIER_INFO_TYPE_SINGLE:
4333 dev = info->nh->dev;
4334 return info->nh->gw_family || info->nh->is_reject ||
4335 mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
4336 case NH_NOTIFIER_INFO_TYPE_GRP:
4337 /* Already validated earlier. */
4344 static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp,
4345 struct mlxsw_sp_nexthop *nh)
4347 u16 lb_rif_index = mlxsw_sp->router->lb_rif_index;
4350 nh->should_offload = 1;
4351 /* While nexthops that discard packets do not forward packets
4352 * via an egress RIF, they still need to be programmed using a
4353 * valid RIF, so use the loopback RIF created during init.
4355 nh->rif = mlxsw_sp->router->rifs[lb_rif_index];
4358 static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp,
4359 struct mlxsw_sp_nexthop *nh)
4362 nh->should_offload = 0;
4366 mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
4367 struct mlxsw_sp_nexthop_group *nh_grp,
4368 struct mlxsw_sp_nexthop *nh,
4369 struct nh_notifier_single_info *nh_obj, int weight)
4371 struct net_device *dev = nh_obj->dev;
4374 nh->nhgi = nh_grp->nhgi;
4375 nh->nh_weight = weight;
4377 switch (nh_obj->gw_family) {
4379 memcpy(&nh->gw_addr, &nh_obj->ipv4, sizeof(nh_obj->ipv4));
4380 nh->neigh_tbl = &arp_tbl;
4383 memcpy(&nh->gw_addr, &nh_obj->ipv6, sizeof(nh_obj->ipv6));
4384 #if IS_ENABLED(CONFIG_IPV6)
4385 nh->neigh_tbl = &nd_tbl;
4390 mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4391 list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4392 nh->ifindex = dev->ifindex;
4394 err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
4398 if (nh_obj->is_reject)
4399 mlxsw_sp_nexthop_obj_blackhole_init(mlxsw_sp, nh);
4404 list_del(&nh->router_list_node);
4405 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4409 static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp,
4410 struct mlxsw_sp_nexthop *nh)
4413 mlxsw_sp_nexthop_obj_blackhole_fini(mlxsw_sp, nh);
4414 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4415 list_del(&nh->router_list_node);
4416 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4420 mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
4421 struct mlxsw_sp_nexthop_group *nh_grp,
4422 struct nh_notifier_info *info)
4424 struct mlxsw_sp_nexthop_group_info *nhgi;
4425 struct mlxsw_sp_nexthop *nh;
4429 switch (info->type) {
4430 case NH_NOTIFIER_INFO_TYPE_SINGLE:
4433 case NH_NOTIFIER_INFO_TYPE_GRP:
4434 nhs = info->nh_grp->num_nh;
4440 nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL);
4443 nh_grp->nhgi = nhgi;
4444 nhgi->nh_grp = nh_grp;
4445 nhgi->gateway = mlxsw_sp_nexthop_obj_is_gateway(mlxsw_sp, info);
4447 for (i = 0; i < nhgi->count; i++) {
4448 struct nh_notifier_single_info *nh_obj;
4451 nh = &nhgi->nexthops[i];
4452 switch (info->type) {
4453 case NH_NOTIFIER_INFO_TYPE_SINGLE:
4457 case NH_NOTIFIER_INFO_TYPE_GRP:
4458 nh_obj = &info->nh_grp->nh_entries[i].nh;
4459 weight = info->nh_grp->nh_entries[i].weight;
4463 goto err_nexthop_obj_init;
4465 err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj,
4468 goto err_nexthop_obj_init;
4470 err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4472 NL_SET_ERR_MSG_MOD(info->extack, "Failed to write adjacency entries to the device");
4473 goto err_group_refresh;
4480 err_nexthop_obj_init:
4481 for (i--; i >= 0; i--) {
4482 nh = &nhgi->nexthops[i];
4483 mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
4490 mlxsw_sp_nexthop_obj_group_info_fini(struct mlxsw_sp *mlxsw_sp,
4491 struct mlxsw_sp_nexthop_group *nh_grp)
4493 struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
4496 for (i = nhgi->count - 1; i >= 0; i--) {
4497 struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
4499 mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
4501 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4502 WARN_ON_ONCE(nhgi->adj_index_valid);
4506 static struct mlxsw_sp_nexthop_group *
4507 mlxsw_sp_nexthop_obj_group_create(struct mlxsw_sp *mlxsw_sp,
4508 struct nh_notifier_info *info)
4510 struct mlxsw_sp_nexthop_group *nh_grp;
4513 nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
4515 return ERR_PTR(-ENOMEM);
4516 INIT_LIST_HEAD(&nh_grp->vr_list);
4517 err = rhashtable_init(&nh_grp->vr_ht,
4518 &mlxsw_sp_nexthop_group_vr_ht_params);
4520 goto err_nexthop_group_vr_ht_init;
4521 INIT_LIST_HEAD(&nh_grp->fib_list);
4522 nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
4523 nh_grp->obj.id = info->id;
4525 err = mlxsw_sp_nexthop_obj_group_info_init(mlxsw_sp, nh_grp, info);
4527 goto err_nexthop_group_info_init;
4529 nh_grp->can_destroy = false;
4533 err_nexthop_group_info_init:
4534 rhashtable_destroy(&nh_grp->vr_ht);
4535 err_nexthop_group_vr_ht_init:
4537 return ERR_PTR(err);
4541 mlxsw_sp_nexthop_obj_group_destroy(struct mlxsw_sp *mlxsw_sp,
4542 struct mlxsw_sp_nexthop_group *nh_grp)
4544 if (!nh_grp->can_destroy)
4546 mlxsw_sp_nexthop_obj_group_info_fini(mlxsw_sp, nh_grp);
4547 WARN_ON_ONCE(!list_empty(&nh_grp->fib_list));
4548 WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
4549 rhashtable_destroy(&nh_grp->vr_ht);
4553 static struct mlxsw_sp_nexthop_group *
4554 mlxsw_sp_nexthop_obj_group_lookup(struct mlxsw_sp *mlxsw_sp, u32 id)
4556 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
4558 cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
4560 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
4562 mlxsw_sp_nexthop_group_ht_params);
4565 static int mlxsw_sp_nexthop_obj_group_add(struct mlxsw_sp *mlxsw_sp,
4566 struct mlxsw_sp_nexthop_group *nh_grp)
4568 return mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4572 mlxsw_sp_nexthop_obj_group_replace(struct mlxsw_sp *mlxsw_sp,
4573 struct mlxsw_sp_nexthop_group *nh_grp,
4574 struct mlxsw_sp_nexthop_group *old_nh_grp,
4575 struct netlink_ext_ack *extack)
4577 struct mlxsw_sp_nexthop_group_info *old_nhgi = old_nh_grp->nhgi;
4578 struct mlxsw_sp_nexthop_group_info *new_nhgi = nh_grp->nhgi;
4581 old_nh_grp->nhgi = new_nhgi;
4582 new_nhgi->nh_grp = old_nh_grp;
4583 nh_grp->nhgi = old_nhgi;
4584 old_nhgi->nh_grp = nh_grp;
4586 if (old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
4587 /* Both the old adjacency index and the new one are valid.
4588 * Routes are currently using the old one. Tell the device to
4589 * replace the old adjacency index with the new one.
4591 err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, old_nh_grp,
4592 old_nhgi->adj_index,
4593 old_nhgi->ecmp_size);
4595 NL_SET_ERR_MSG_MOD(extack, "Failed to replace old adjacency index with new one");
4598 } else if (old_nhgi->adj_index_valid && !new_nhgi->adj_index_valid) {
4599 /* The old adjacency index is valid, while the new one is not.
4600 * Iterate over all the routes using the group and change them
4601 * to trap packets to the CPU.
4603 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
4605 NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to trap packets");
4608 } else if (!old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
4609 /* The old adjacency index is invalid, while the new one is.
4610 * Iterate over all the routes using the group and change them
4611 * to forward packets using the new valid index.
4613 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
4615 NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to forward packets");
4620 /* Make sure the flags are set / cleared based on the new nexthop group
4623 mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, old_nh_grp);
4625 /* At this point 'nh_grp' is just a shell that is not used by anyone
4626 * and its nexthop group info is the old info that was just replaced
4627 * with the new one. Remove it.
4629 nh_grp->can_destroy = true;
4630 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
4635 old_nhgi->nh_grp = old_nh_grp;
4636 nh_grp->nhgi = new_nhgi;
4637 new_nhgi->nh_grp = nh_grp;
4638 old_nh_grp->nhgi = old_nhgi;
4642 static int mlxsw_sp_nexthop_obj_new(struct mlxsw_sp *mlxsw_sp,
4643 struct nh_notifier_info *info)
4645 struct mlxsw_sp_nexthop_group *nh_grp, *old_nh_grp;
4646 struct netlink_ext_ack *extack = info->extack;
4649 nh_grp = mlxsw_sp_nexthop_obj_group_create(mlxsw_sp, info);
4651 return PTR_ERR(nh_grp);
4653 old_nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
4655 err = mlxsw_sp_nexthop_obj_group_add(mlxsw_sp, nh_grp);
4657 err = mlxsw_sp_nexthop_obj_group_replace(mlxsw_sp, nh_grp,
4658 old_nh_grp, extack);
4661 nh_grp->can_destroy = true;
4662 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
4668 static void mlxsw_sp_nexthop_obj_del(struct mlxsw_sp *mlxsw_sp,
4669 struct nh_notifier_info *info)
4671 struct mlxsw_sp_nexthop_group *nh_grp;
4673 nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
4677 nh_grp->can_destroy = true;
4678 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4680 /* If the group still has routes using it, then defer the delete
4681 * operation until the last route using it is deleted.
4683 if (!list_empty(&nh_grp->fib_list))
4685 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
4688 static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
4689 unsigned long event, void *ptr)
4691 struct nh_notifier_info *info = ptr;
4692 struct mlxsw_sp_router *router;
4695 router = container_of(nb, struct mlxsw_sp_router, nexthop_nb);
4696 err = mlxsw_sp_nexthop_obj_validate(router->mlxsw_sp, event, info);
4700 mutex_lock(&router->lock);
4705 case NEXTHOP_EVENT_REPLACE:
4706 err = mlxsw_sp_nexthop_obj_new(router->mlxsw_sp, info);
4708 case NEXTHOP_EVENT_DEL:
4709 mlxsw_sp_nexthop_obj_del(router->mlxsw_sp, info);
4715 mutex_unlock(&router->lock);
4718 return notifier_from_errno(err);
4721 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
4722 struct fib_info *fi)
4724 const struct fib_nh *nh = fib_info_nh(fi, 0);
4726 return nh->fib_nh_scope == RT_SCOPE_LINK ||
4727 mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
4731 mlxsw_sp_nexthop4_group_info_init(struct mlxsw_sp *mlxsw_sp,
4732 struct mlxsw_sp_nexthop_group *nh_grp)
4734 unsigned int nhs = fib_info_num_path(nh_grp->ipv4.fi);
4735 struct mlxsw_sp_nexthop_group_info *nhgi;
4736 struct mlxsw_sp_nexthop *nh;
4739 nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL);
4742 nh_grp->nhgi = nhgi;
4743 nhgi->nh_grp = nh_grp;
4744 nhgi->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, nh_grp->ipv4.fi);
4746 for (i = 0; i < nhgi->count; i++) {
4747 struct fib_nh *fib_nh;
4749 nh = &nhgi->nexthops[i];
4750 fib_nh = fib_info_nh(nh_grp->ipv4.fi, i);
4751 err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
4753 goto err_nexthop4_init;
4755 err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4757 goto err_group_refresh;
4764 for (i--; i >= 0; i--) {
4765 nh = &nhgi->nexthops[i];
4766 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
4773 mlxsw_sp_nexthop4_group_info_fini(struct mlxsw_sp *mlxsw_sp,
4774 struct mlxsw_sp_nexthop_group *nh_grp)
4776 struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
4779 for (i = nhgi->count - 1; i >= 0; i--) {
4780 struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
4782 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
4784 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4785 WARN_ON_ONCE(nhgi->adj_index_valid);
4789 static struct mlxsw_sp_nexthop_group *
4790 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
4792 struct mlxsw_sp_nexthop_group *nh_grp;
4795 nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
4797 return ERR_PTR(-ENOMEM);
4798 INIT_LIST_HEAD(&nh_grp->vr_list);
4799 err = rhashtable_init(&nh_grp->vr_ht,
4800 &mlxsw_sp_nexthop_group_vr_ht_params);
4802 goto err_nexthop_group_vr_ht_init;
4803 INIT_LIST_HEAD(&nh_grp->fib_list);
4804 nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4;
4805 nh_grp->ipv4.fi = fi;
4808 err = mlxsw_sp_nexthop4_group_info_init(mlxsw_sp, nh_grp);
4810 goto err_nexthop_group_info_init;
4812 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4814 goto err_nexthop_group_insert;
4816 nh_grp->can_destroy = true;
4820 err_nexthop_group_insert:
4821 mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp);
4822 err_nexthop_group_info_init:
4824 rhashtable_destroy(&nh_grp->vr_ht);
4825 err_nexthop_group_vr_ht_init:
4827 return ERR_PTR(err);
4831 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
4832 struct mlxsw_sp_nexthop_group *nh_grp)
4834 if (!nh_grp->can_destroy)
4836 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4837 mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp);
4838 fib_info_put(nh_grp->ipv4.fi);
4839 WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
4840 rhashtable_destroy(&nh_grp->vr_ht);
4844 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
4845 struct mlxsw_sp_fib_entry *fib_entry,
4846 struct fib_info *fi)
4848 struct mlxsw_sp_nexthop_group *nh_grp;
4851 nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp,
4853 if (WARN_ON_ONCE(!nh_grp))
4858 nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
4860 nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
4862 return PTR_ERR(nh_grp);
4865 list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
4866 fib_entry->nh_group = nh_grp;
4870 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
4871 struct mlxsw_sp_fib_entry *fib_entry)
4873 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4875 list_del(&fib_entry->nexthop_group_node);
4876 if (!list_empty(&nh_grp->fib_list))
4879 if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) {
4880 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
4884 mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
4888 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
4890 struct mlxsw_sp_fib4_entry *fib4_entry;
4892 fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
4894 return !fib4_entry->tos;
4898 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
4900 struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
4902 switch (fib_entry->fib_node->fib->proto) {
4903 case MLXSW_SP_L3_PROTO_IPV4:
4904 if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
4907 case MLXSW_SP_L3_PROTO_IPV6:
4911 switch (fib_entry->type) {
4912 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4913 return !!nh_group->nhgi->adj_index_valid;
4914 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4915 return !!nh_group->nhgi->nh_rif;
4916 case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
4917 case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4918 case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
4925 static struct mlxsw_sp_nexthop *
4926 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
4927 const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4931 for (i = 0; i < nh_grp->nhgi->count; i++) {
4932 struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
4933 struct fib6_info *rt = mlxsw_sp_rt6->rt;
4935 if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev &&
4936 ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
4937 &rt->fib6_nh->fib_nh_gw6))
4946 mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
4947 struct mlxsw_sp_fib_entry *fib_entry)
4949 u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
4950 int dst_len = fib_entry->fib_node->key.prefix_len;
4951 struct mlxsw_sp_fib4_entry *fib4_entry;
4952 struct fib_rt_info fri;
4953 bool should_offload;
4955 should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
4956 fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
4958 fri.fi = fib4_entry->fi;
4959 fri.tb_id = fib4_entry->tb_id;
4960 fri.dst = cpu_to_be32(*p_dst);
4961 fri.dst_len = dst_len;
4962 fri.tos = fib4_entry->tos;
4963 fri.type = fib4_entry->type;
4964 fri.offload = should_offload;
4965 fri.trap = !should_offload;
4966 fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
4970 mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
4971 struct mlxsw_sp_fib_entry *fib_entry)
4973 u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
4974 int dst_len = fib_entry->fib_node->key.prefix_len;
4975 struct mlxsw_sp_fib4_entry *fib4_entry;
4976 struct fib_rt_info fri;
4978 fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
4980 fri.fi = fib4_entry->fi;
4981 fri.tb_id = fib4_entry->tb_id;
4982 fri.dst = cpu_to_be32(*p_dst);
4983 fri.dst_len = dst_len;
4984 fri.tos = fib4_entry->tos;
4985 fri.type = fib4_entry->type;
4986 fri.offload = false;
4988 fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
4992 mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
4993 struct mlxsw_sp_fib_entry *fib_entry)
4995 struct mlxsw_sp_fib6_entry *fib6_entry;
4996 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4997 bool should_offload;
4999 should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
5001 /* In IPv6 a multipath route is represented using multiple routes, so
5002 * we need to set the flags on all of them.
5004 fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
5006 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
5007 fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, should_offload,
5012 mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5013 struct mlxsw_sp_fib_entry *fib_entry)
5015 struct mlxsw_sp_fib6_entry *fib6_entry;
5016 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5018 fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
5020 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
5021 fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, false, false);
5025 mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5026 struct mlxsw_sp_fib_entry *fib_entry)
5028 switch (fib_entry->fib_node->fib->proto) {
5029 case MLXSW_SP_L3_PROTO_IPV4:
5030 mlxsw_sp_fib4_entry_hw_flags_set(mlxsw_sp, fib_entry);
5032 case MLXSW_SP_L3_PROTO_IPV6:
5033 mlxsw_sp_fib6_entry_hw_flags_set(mlxsw_sp, fib_entry);
5039 mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5040 struct mlxsw_sp_fib_entry *fib_entry)
5042 switch (fib_entry->fib_node->fib->proto) {
5043 case MLXSW_SP_L3_PROTO_IPV4:
5044 mlxsw_sp_fib4_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5046 case MLXSW_SP_L3_PROTO_IPV6:
5047 mlxsw_sp_fib6_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5053 mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp,
5054 struct mlxsw_sp_fib_entry *fib_entry,
5055 enum mlxsw_sp_fib_entry_op op)
5058 case MLXSW_SP_FIB_ENTRY_OP_WRITE:
5059 case MLXSW_SP_FIB_ENTRY_OP_UPDATE:
5060 mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry);
5062 case MLXSW_SP_FIB_ENTRY_OP_DELETE:
5063 mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5070 struct mlxsw_sp_fib_entry_op_ctx_basic {
5071 char ralue_pl[MLXSW_REG_RALUE_LEN];
5075 mlxsw_sp_router_ll_basic_fib_entry_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5076 enum mlxsw_sp_l3proto proto,
5077 enum mlxsw_sp_fib_entry_op op,
5078 u16 virtual_router, u8 prefix_len,
5079 unsigned char *addr,
5080 struct mlxsw_sp_fib_entry_priv *priv)
5082 struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
5083 enum mlxsw_reg_ralxx_protocol ralxx_proto;
5084 char *ralue_pl = op_ctx_basic->ralue_pl;
5085 enum mlxsw_reg_ralue_op ralue_op;
5087 ralxx_proto = (enum mlxsw_reg_ralxx_protocol) proto;
5090 case MLXSW_SP_FIB_ENTRY_OP_WRITE:
5091 case MLXSW_SP_FIB_ENTRY_OP_UPDATE:
5092 ralue_op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
5094 case MLXSW_SP_FIB_ENTRY_OP_DELETE:
5095 ralue_op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
5103 case MLXSW_SP_L3_PROTO_IPV4:
5104 mlxsw_reg_ralue_pack4(ralue_pl, ralxx_proto, ralue_op,
5105 virtual_router, prefix_len, (u32 *) addr);
5107 case MLXSW_SP_L3_PROTO_IPV6:
5108 mlxsw_reg_ralue_pack6(ralue_pl, ralxx_proto, ralue_op,
5109 virtual_router, prefix_len, addr);
5115 mlxsw_sp_router_ll_basic_fib_entry_act_remote_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5116 enum mlxsw_reg_ralue_trap_action trap_action,
5117 u16 trap_id, u32 adjacency_index, u16 ecmp_size)
5119 struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
5121 mlxsw_reg_ralue_act_remote_pack(op_ctx_basic->ralue_pl, trap_action,
5122 trap_id, adjacency_index, ecmp_size);
5126 mlxsw_sp_router_ll_basic_fib_entry_act_local_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5127 enum mlxsw_reg_ralue_trap_action trap_action,
5128 u16 trap_id, u16 local_erif)
5130 struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
5132 mlxsw_reg_ralue_act_local_pack(op_ctx_basic->ralue_pl, trap_action,
5133 trap_id, local_erif);
5137 mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx)
5139 struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
5141 mlxsw_reg_ralue_act_ip2me_pack(op_ctx_basic->ralue_pl);
5145 mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_tun_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5148 struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
5150 mlxsw_reg_ralue_act_ip2me_tun_pack(op_ctx_basic->ralue_pl, tunnel_ptr);
5154 mlxsw_sp_router_ll_basic_fib_entry_commit(struct mlxsw_sp *mlxsw_sp,
5155 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5156 bool *postponed_for_bulk)
5158 struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
5160 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5161 op_ctx_basic->ralue_pl);
5165 mlxsw_sp_router_ll_basic_fib_entry_is_committed(struct mlxsw_sp_fib_entry_priv *priv)
5170 static void mlxsw_sp_fib_entry_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5171 struct mlxsw_sp_fib_entry *fib_entry,
5172 enum mlxsw_sp_fib_entry_op op)
5174 struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
5176 mlxsw_sp_fib_entry_op_ctx_priv_hold(op_ctx, fib_entry->priv);
5177 fib->ll_ops->fib_entry_pack(op_ctx, fib->proto, op, fib->vr->id,
5178 fib_entry->fib_node->key.prefix_len,
5179 fib_entry->fib_node->key.addr,
5183 static int mlxsw_sp_fib_entry_commit(struct mlxsw_sp *mlxsw_sp,
5184 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5185 const struct mlxsw_sp_router_ll_ops *ll_ops)
5187 bool postponed_for_bulk = false;
5190 err = ll_ops->fib_entry_commit(mlxsw_sp, op_ctx, &postponed_for_bulk);
5191 if (!postponed_for_bulk)
5192 mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
5196 static int mlxsw_sp_adj_discard_write(struct mlxsw_sp *mlxsw_sp)
5198 enum mlxsw_reg_ratr_trap_action trap_action;
5199 char ratr_pl[MLXSW_REG_RATR_LEN];
5202 if (mlxsw_sp->router->adj_discard_index_valid)
5205 err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
5206 &mlxsw_sp->router->adj_discard_index);
5210 trap_action = MLXSW_REG_RATR_TRAP_ACTION_TRAP;
5211 mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true,
5212 MLXSW_REG_RATR_TYPE_ETHERNET,
5213 mlxsw_sp->router->adj_discard_index,
5214 mlxsw_sp->router->lb_rif_index);
5215 mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action);
5216 mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
5217 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
5219 goto err_ratr_write;
5221 mlxsw_sp->router->adj_discard_index_valid = true;
5226 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
5227 mlxsw_sp->router->adj_discard_index);
5231 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
5232 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5233 struct mlxsw_sp_fib_entry *fib_entry,
5234 enum mlxsw_sp_fib_entry_op op)
5236 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5237 struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
5238 struct mlxsw_sp_nexthop_group_info *nhgi = nh_group->nhgi;
5239 enum mlxsw_reg_ralue_trap_action trap_action;
5241 u32 adjacency_index = 0;
5245 /* In case the nexthop group adjacency index is valid, use it
5246 * with provided ECMP size. Otherwise, setup trap and pass
5247 * traffic to kernel.
5249 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
5250 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5251 adjacency_index = nhgi->adj_index;
5252 ecmp_size = nhgi->ecmp_size;
5253 } else if (!nhgi->adj_index_valid && nhgi->count && nhgi->nh_rif) {
5254 err = mlxsw_sp_adj_discard_write(mlxsw_sp);
5257 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5258 adjacency_index = mlxsw_sp->router->adj_discard_index;
5261 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5262 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
5265 mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
5266 ll_ops->fib_entry_act_remote_pack(op_ctx, trap_action, trap_id,
5267 adjacency_index, ecmp_size);
5268 return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
5271 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
5272 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5273 struct mlxsw_sp_fib_entry *fib_entry,
5274 enum mlxsw_sp_fib_entry_op op)
5276 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5277 struct mlxsw_sp_rif *rif = fib_entry->nh_group->nhgi->nh_rif;
5278 enum mlxsw_reg_ralue_trap_action trap_action;
5282 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
5283 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5284 rif_index = rif->rif_index;
5286 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5287 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
5290 mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
5291 ll_ops->fib_entry_act_local_pack(op_ctx, trap_action, trap_id, rif_index);
5292 return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
5295 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
5296 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5297 struct mlxsw_sp_fib_entry *fib_entry,
5298 enum mlxsw_sp_fib_entry_op op)
5300 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5302 mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
5303 ll_ops->fib_entry_act_ip2me_pack(op_ctx);
5304 return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
5307 static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
5308 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5309 struct mlxsw_sp_fib_entry *fib_entry,
5310 enum mlxsw_sp_fib_entry_op op)
5312 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5313 enum mlxsw_reg_ralue_trap_action trap_action;
5315 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR;
5316 mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
5317 ll_ops->fib_entry_act_local_pack(op_ctx, trap_action, 0, 0);
5318 return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
5322 mlxsw_sp_fib_entry_op_unreachable(struct mlxsw_sp *mlxsw_sp,
5323 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5324 struct mlxsw_sp_fib_entry *fib_entry,
5325 enum mlxsw_sp_fib_entry_op op)
5327 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5328 enum mlxsw_reg_ralue_trap_action trap_action;
5331 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5332 trap_id = MLXSW_TRAP_ID_RTR_INGRESS1;
5334 mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
5335 ll_ops->fib_entry_act_local_pack(op_ctx, trap_action, trap_id, 0);
5336 return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
5340 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
5341 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5342 struct mlxsw_sp_fib_entry *fib_entry,
5343 enum mlxsw_sp_fib_entry_op op)
5345 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5346 struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
5347 const struct mlxsw_sp_ipip_ops *ipip_ops;
5350 if (WARN_ON(!ipip_entry))
5353 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
5354 err = ipip_ops->decap_config(mlxsw_sp, ipip_entry,
5355 fib_entry->decap.tunnel_index);
5359 mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
5360 ll_ops->fib_entry_act_ip2me_tun_pack(op_ctx,
5361 fib_entry->decap.tunnel_index);
5362 return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
5365 static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
5366 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5367 struct mlxsw_sp_fib_entry *fib_entry,
5368 enum mlxsw_sp_fib_entry_op op)
5370 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5372 mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
5373 ll_ops->fib_entry_act_ip2me_tun_pack(op_ctx,
5374 fib_entry->decap.tunnel_index);
5375 return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
5378 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
5379 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5380 struct mlxsw_sp_fib_entry *fib_entry,
5381 enum mlxsw_sp_fib_entry_op op)
5383 switch (fib_entry->type) {
5384 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
5385 return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, op_ctx, fib_entry, op);
5386 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
5387 return mlxsw_sp_fib_entry_op_local(mlxsw_sp, op_ctx, fib_entry, op);
5388 case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
5389 return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, op_ctx, fib_entry, op);
5390 case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
5391 return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, op_ctx, fib_entry, op);
5392 case MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE:
5393 return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, op_ctx, fib_entry, op);
5394 case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
5395 return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp, op_ctx, fib_entry, op);
5396 case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
5397 return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, op_ctx, fib_entry, op);
5402 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
5403 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5404 struct mlxsw_sp_fib_entry *fib_entry,
5405 enum mlxsw_sp_fib_entry_op op)
5407 int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, op_ctx, fib_entry, op);
5412 mlxsw_sp_fib_entry_hw_flags_refresh(mlxsw_sp, fib_entry, op);
5417 static int __mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
5418 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5419 struct mlxsw_sp_fib_entry *fib_entry,
5422 return mlxsw_sp_fib_entry_op(mlxsw_sp, op_ctx, fib_entry,
5423 is_new ? MLXSW_SP_FIB_ENTRY_OP_WRITE :
5424 MLXSW_SP_FIB_ENTRY_OP_UPDATE);
5427 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
5428 struct mlxsw_sp_fib_entry *fib_entry)
5430 struct mlxsw_sp_fib_entry_op_ctx *op_ctx = mlxsw_sp->router->ll_op_ctx;
5432 mlxsw_sp_fib_entry_op_ctx_clear(op_ctx);
5433 return __mlxsw_sp_fib_entry_update(mlxsw_sp, op_ctx, fib_entry, false);
5436 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
5437 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5438 struct mlxsw_sp_fib_entry *fib_entry)
5440 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5442 if (!ll_ops->fib_entry_is_committed(fib_entry->priv))
5444 return mlxsw_sp_fib_entry_op(mlxsw_sp, op_ctx, fib_entry,
5445 MLXSW_SP_FIB_ENTRY_OP_DELETE);
5449 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5450 const struct fib_entry_notifier_info *fen_info,
5451 struct mlxsw_sp_fib_entry *fib_entry)
5453 struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi;
5454 union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
5455 struct mlxsw_sp_router *router = mlxsw_sp->router;
5456 u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
5457 int ifindex = nhgi->nexthops[0].ifindex;
5458 struct mlxsw_sp_ipip_entry *ipip_entry;
5460 switch (fen_info->type) {
5462 ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex,
5463 MLXSW_SP_L3_PROTO_IPV4, dip);
5464 if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
5465 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
5466 return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
5470 if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id,
5471 MLXSW_SP_L3_PROTO_IPV4,
5475 tunnel_index = router->nve_decap_config.tunnel_index;
5476 fib_entry->decap.tunnel_index = tunnel_index;
5477 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
5482 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5485 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
5487 case RTN_UNREACHABLE:
5489 /* Packets hitting these routes need to be trapped, but
5490 * can do so with a lower priority than packets directed
5491 * at the host, so use action type local instead of trap.
5493 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
5497 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5499 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5507 mlxsw_sp_fib4_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
5508 struct mlxsw_sp_fib_entry *fib_entry)
5510 switch (fib_entry->type) {
5511 case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
5512 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
5519 static struct mlxsw_sp_fib4_entry *
5520 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
5521 struct mlxsw_sp_fib_node *fib_node,
5522 const struct fib_entry_notifier_info *fen_info)
5524 struct mlxsw_sp_fib4_entry *fib4_entry;
5525 struct mlxsw_sp_fib_entry *fib_entry;
5528 fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
5530 return ERR_PTR(-ENOMEM);
5531 fib_entry = &fib4_entry->common;
5533 fib_entry->priv = mlxsw_sp_fib_entry_priv_create(fib_node->fib->ll_ops);
5534 if (IS_ERR(fib_entry->priv)) {
5535 err = PTR_ERR(fib_entry->priv);
5536 goto err_fib_entry_priv_create;
5539 err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
5541 goto err_nexthop4_group_get;
5543 err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
5546 goto err_nexthop_group_vr_link;
5548 err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
5550 goto err_fib4_entry_type_set;
5552 fib4_entry->fi = fen_info->fi;
5553 fib_info_hold(fib4_entry->fi);
5554 fib4_entry->tb_id = fen_info->tb_id;
5555 fib4_entry->type = fen_info->type;
5556 fib4_entry->tos = fen_info->tos;
5558 fib_entry->fib_node = fib_node;
5562 err_fib4_entry_type_set:
5563 mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib);
5564 err_nexthop_group_vr_link:
5565 mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
5566 err_nexthop4_group_get:
5567 mlxsw_sp_fib_entry_priv_put(fib_entry->priv);
5568 err_fib_entry_priv_create:
5570 return ERR_PTR(err);
5573 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5574 struct mlxsw_sp_fib4_entry *fib4_entry)
5576 struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
5578 fib_info_put(fib4_entry->fi);
5579 mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, &fib4_entry->common);
5580 mlxsw_sp_nexthop_group_vr_unlink(fib4_entry->common.nh_group,
5582 mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
5583 mlxsw_sp_fib_entry_priv_put(fib4_entry->common.priv);
5587 static struct mlxsw_sp_fib4_entry *
5588 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5589 const struct fib_entry_notifier_info *fen_info)
5591 struct mlxsw_sp_fib4_entry *fib4_entry;
5592 struct mlxsw_sp_fib_node *fib_node;
5593 struct mlxsw_sp_fib *fib;
5594 struct mlxsw_sp_vr *vr;
5596 vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
5599 fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
5601 fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
5602 sizeof(fen_info->dst),
5607 fib4_entry = container_of(fib_node->fib_entry,
5608 struct mlxsw_sp_fib4_entry, common);
5609 if (fib4_entry->tb_id == fen_info->tb_id &&
5610 fib4_entry->tos == fen_info->tos &&
5611 fib4_entry->type == fen_info->type &&
5612 fib4_entry->fi == fen_info->fi)
5618 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
5619 .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
5620 .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
5621 .key_len = sizeof(struct mlxsw_sp_fib_key),
5622 .automatic_shrinking = true,
5625 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
5626 struct mlxsw_sp_fib_node *fib_node)
5628 return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
5629 mlxsw_sp_fib_ht_params);
5632 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
5633 struct mlxsw_sp_fib_node *fib_node)
5635 rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
5636 mlxsw_sp_fib_ht_params);
5639 static struct mlxsw_sp_fib_node *
5640 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
5641 size_t addr_len, unsigned char prefix_len)
5643 struct mlxsw_sp_fib_key key;
5645 memset(&key, 0, sizeof(key));
5646 memcpy(key.addr, addr, addr_len);
5647 key.prefix_len = prefix_len;
5648 return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
5651 static struct mlxsw_sp_fib_node *
5652 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
5653 size_t addr_len, unsigned char prefix_len)
5655 struct mlxsw_sp_fib_node *fib_node;
5657 fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
5661 list_add(&fib_node->list, &fib->node_list);
5662 memcpy(fib_node->key.addr, addr, addr_len);
5663 fib_node->key.prefix_len = prefix_len;
5668 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
5670 list_del(&fib_node->list);
5674 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
5675 struct mlxsw_sp_fib_node *fib_node)
5677 struct mlxsw_sp_prefix_usage req_prefix_usage;
5678 struct mlxsw_sp_fib *fib = fib_node->fib;
5679 struct mlxsw_sp_lpm_tree *lpm_tree;
5682 lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
5683 if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
5686 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
5687 mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
5688 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
5690 if (IS_ERR(lpm_tree))
5691 return PTR_ERR(lpm_tree);
5693 err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
5695 goto err_lpm_tree_replace;
5698 lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
5701 err_lpm_tree_replace:
5702 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
5706 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
5707 struct mlxsw_sp_fib_node *fib_node)
5709 struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
5710 struct mlxsw_sp_prefix_usage req_prefix_usage;
5711 struct mlxsw_sp_fib *fib = fib_node->fib;
5714 if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
5716 /* Try to construct a new LPM tree from the current prefix usage
5717 * minus the unused one. If we fail, continue using the old one.
5719 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
5720 mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
5721 fib_node->key.prefix_len);
5722 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
5724 if (IS_ERR(lpm_tree))
5727 err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
5729 goto err_lpm_tree_replace;
5733 err_lpm_tree_replace:
5734 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
5737 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
5738 struct mlxsw_sp_fib_node *fib_node,
5739 struct mlxsw_sp_fib *fib)
5743 err = mlxsw_sp_fib_node_insert(fib, fib_node);
5746 fib_node->fib = fib;
5748 err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
5750 goto err_fib_lpm_tree_link;
5754 err_fib_lpm_tree_link:
5755 fib_node->fib = NULL;
5756 mlxsw_sp_fib_node_remove(fib, fib_node);
5760 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
5761 struct mlxsw_sp_fib_node *fib_node)
5763 struct mlxsw_sp_fib *fib = fib_node->fib;
5765 mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
5766 fib_node->fib = NULL;
5767 mlxsw_sp_fib_node_remove(fib, fib_node);
5770 static struct mlxsw_sp_fib_node *
5771 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
5772 size_t addr_len, unsigned char prefix_len,
5773 enum mlxsw_sp_l3proto proto)
5775 struct mlxsw_sp_fib_node *fib_node;
5776 struct mlxsw_sp_fib *fib;
5777 struct mlxsw_sp_vr *vr;
5780 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
5782 return ERR_CAST(vr);
5783 fib = mlxsw_sp_vr_fib(vr, proto);
5785 fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
5789 fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
5792 goto err_fib_node_create;
5795 err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
5797 goto err_fib_node_init;
5802 mlxsw_sp_fib_node_destroy(fib_node);
5803 err_fib_node_create:
5804 mlxsw_sp_vr_put(mlxsw_sp, vr);
5805 return ERR_PTR(err);
5808 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
5809 struct mlxsw_sp_fib_node *fib_node)
5811 struct mlxsw_sp_vr *vr = fib_node->fib->vr;
5813 if (fib_node->fib_entry)
5815 mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
5816 mlxsw_sp_fib_node_destroy(fib_node);
5817 mlxsw_sp_vr_put(mlxsw_sp, vr);
5820 static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5821 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5822 struct mlxsw_sp_fib_entry *fib_entry)
5824 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
5825 bool is_new = !fib_node->fib_entry;
5828 fib_node->fib_entry = fib_entry;
5830 err = __mlxsw_sp_fib_entry_update(mlxsw_sp, op_ctx, fib_entry, is_new);
5832 goto err_fib_entry_update;
5836 err_fib_entry_update:
5837 fib_node->fib_entry = NULL;
5841 static int __mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5842 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5843 struct mlxsw_sp_fib_entry *fib_entry)
5845 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
5848 err = mlxsw_sp_fib_entry_del(mlxsw_sp, op_ctx, fib_entry);
5849 fib_node->fib_entry = NULL;
5853 static void mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5854 struct mlxsw_sp_fib_entry *fib_entry)
5856 struct mlxsw_sp_fib_entry_op_ctx *op_ctx = mlxsw_sp->router->ll_op_ctx;
5858 mlxsw_sp_fib_entry_op_ctx_clear(op_ctx);
5859 __mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, op_ctx, fib_entry);
5862 static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry)
5864 struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
5865 struct mlxsw_sp_fib4_entry *fib4_replaced;
5867 if (!fib_node->fib_entry)
5870 fib4_replaced = container_of(fib_node->fib_entry,
5871 struct mlxsw_sp_fib4_entry, common);
5872 if (fib4_entry->tb_id == RT_TABLE_MAIN &&
5873 fib4_replaced->tb_id == RT_TABLE_LOCAL)
5880 mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
5881 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5882 const struct fib_entry_notifier_info *fen_info)
5884 struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced;
5885 struct mlxsw_sp_fib_entry *replaced;
5886 struct mlxsw_sp_fib_node *fib_node;
5889 if (mlxsw_sp->router->aborted)
5892 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
5893 &fen_info->dst, sizeof(fen_info->dst),
5895 MLXSW_SP_L3_PROTO_IPV4);
5896 if (IS_ERR(fib_node)) {
5897 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
5898 return PTR_ERR(fib_node);
5901 fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
5902 if (IS_ERR(fib4_entry)) {
5903 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
5904 err = PTR_ERR(fib4_entry);
5905 goto err_fib4_entry_create;
5908 if (!mlxsw_sp_fib4_allow_replace(fib4_entry)) {
5909 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5910 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5914 replaced = fib_node->fib_entry;
5915 err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, op_ctx, &fib4_entry->common);
5917 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
5918 goto err_fib_node_entry_link;
5921 /* Nothing to replace */
5925 mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
5926 fib4_replaced = container_of(replaced, struct mlxsw_sp_fib4_entry,
5928 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_replaced);
5932 err_fib_node_entry_link:
5933 fib_node->fib_entry = replaced;
5934 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5935 err_fib4_entry_create:
5936 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5940 static int mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
5941 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5942 struct fib_entry_notifier_info *fen_info)
5944 struct mlxsw_sp_fib4_entry *fib4_entry;
5945 struct mlxsw_sp_fib_node *fib_node;
5948 if (mlxsw_sp->router->aborted)
5951 fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
5954 fib_node = fib4_entry->common.fib_node;
5956 err = __mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, op_ctx, &fib4_entry->common);
5957 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5958 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5962 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
5964 /* Multicast routes aren't supported, so ignore them. Neighbour
5965 * Discovery packets are specifically trapped.
5967 if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
5970 /* Cloned routes are irrelevant in the forwarding path. */
5971 if (rt->fib6_flags & RTF_CACHE)
5977 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
5979 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5981 mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
5983 return ERR_PTR(-ENOMEM);
5985 /* In case of route replace, replaced route is deleted with
5986 * no notification. Take reference to prevent accessing freed
5989 mlxsw_sp_rt6->rt = rt;
5992 return mlxsw_sp_rt6;
5995 #if IS_ENABLED(CONFIG_IPV6)
5996 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
5998 fib6_info_release(rt);
6001 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
6006 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
6008 struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
6010 if (!mlxsw_sp_rt6->rt->nh)
6011 fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
6012 mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
6013 kfree(mlxsw_sp_rt6);
6016 static struct fib6_info *
6017 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
6019 return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
6023 static struct mlxsw_sp_rt6 *
6024 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
6025 const struct fib6_info *rt)
6027 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6029 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
6030 if (mlxsw_sp_rt6->rt == rt)
6031 return mlxsw_sp_rt6;
6037 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
6038 const struct fib6_info *rt,
6039 enum mlxsw_sp_ipip_type *ret)
6041 return rt->fib6_nh->fib_nh_dev &&
6042 mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret);
6045 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
6046 struct mlxsw_sp_nexthop_group *nh_grp,
6047 struct mlxsw_sp_nexthop *nh,
6048 const struct fib6_info *rt)
6050 struct net_device *dev = rt->fib6_nh->fib_nh_dev;
6052 nh->nhgi = nh_grp->nhgi;
6053 nh->nh_weight = rt->fib6_nh->fib_nh_weight;
6054 memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr));
6055 #if IS_ENABLED(CONFIG_IPV6)
6056 nh->neigh_tbl = &nd_tbl;
6058 mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
6060 list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
6064 nh->ifindex = dev->ifindex;
6066 return mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
6069 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
6070 struct mlxsw_sp_nexthop *nh)
6072 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
6073 list_del(&nh->router_list_node);
6074 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
6077 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
6078 const struct fib6_info *rt)
6080 return rt->fib6_nh->fib_nh_gw_family ||
6081 mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
6085 mlxsw_sp_nexthop6_group_info_init(struct mlxsw_sp *mlxsw_sp,
6086 struct mlxsw_sp_nexthop_group *nh_grp,
6087 struct mlxsw_sp_fib6_entry *fib6_entry)
6089 struct mlxsw_sp_nexthop_group_info *nhgi;
6090 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6091 struct mlxsw_sp_nexthop *nh;
6094 nhgi = kzalloc(struct_size(nhgi, nexthops, fib6_entry->nrt6),
6098 nh_grp->nhgi = nhgi;
6099 nhgi->nh_grp = nh_grp;
6100 mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
6101 struct mlxsw_sp_rt6, list);
6102 nhgi->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
6103 nhgi->count = fib6_entry->nrt6;
6104 for (i = 0; i < nhgi->count; i++) {
6105 struct fib6_info *rt = mlxsw_sp_rt6->rt;
6107 nh = &nhgi->nexthops[i];
6108 err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
6110 goto err_nexthop6_init;
6111 mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
6113 nh_grp->nhgi = nhgi;
6114 err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
6116 goto err_group_refresh;
6123 for (i--; i >= 0; i--) {
6124 nh = &nhgi->nexthops[i];
6125 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
6132 mlxsw_sp_nexthop6_group_info_fini(struct mlxsw_sp *mlxsw_sp,
6133 struct mlxsw_sp_nexthop_group *nh_grp)
6135 struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
6138 for (i = nhgi->count - 1; i >= 0; i--) {
6139 struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
6141 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
6143 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
6144 WARN_ON_ONCE(nhgi->adj_index_valid);
6148 static struct mlxsw_sp_nexthop_group *
6149 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
6150 struct mlxsw_sp_fib6_entry *fib6_entry)
6152 struct mlxsw_sp_nexthop_group *nh_grp;
6155 nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
6157 return ERR_PTR(-ENOMEM);
6158 INIT_LIST_HEAD(&nh_grp->vr_list);
6159 err = rhashtable_init(&nh_grp->vr_ht,
6160 &mlxsw_sp_nexthop_group_vr_ht_params);
6162 goto err_nexthop_group_vr_ht_init;
6163 INIT_LIST_HEAD(&nh_grp->fib_list);
6164 nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6;
6166 err = mlxsw_sp_nexthop6_group_info_init(mlxsw_sp, nh_grp, fib6_entry);
6168 goto err_nexthop_group_info_init;
6170 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
6172 goto err_nexthop_group_insert;
6174 nh_grp->can_destroy = true;
6178 err_nexthop_group_insert:
6179 mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp);
6180 err_nexthop_group_info_init:
6181 rhashtable_destroy(&nh_grp->vr_ht);
6182 err_nexthop_group_vr_ht_init:
6184 return ERR_PTR(err);
6188 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
6189 struct mlxsw_sp_nexthop_group *nh_grp)
6191 if (!nh_grp->can_destroy)
6193 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
6194 mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp);
6195 WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
6196 rhashtable_destroy(&nh_grp->vr_ht);
6200 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
6201 struct mlxsw_sp_fib6_entry *fib6_entry)
6203 struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
6204 struct mlxsw_sp_nexthop_group *nh_grp;
6207 nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp,
6209 if (WARN_ON_ONCE(!nh_grp))
6214 nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
6216 nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
6218 return PTR_ERR(nh_grp);
6221 /* The route and the nexthop are described by the same struct, so we
6222 * need to the update the nexthop offload indication for the new route.
6224 __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
6227 list_add_tail(&fib6_entry->common.nexthop_group_node,
6229 fib6_entry->common.nh_group = nh_grp;
6234 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
6235 struct mlxsw_sp_fib_entry *fib_entry)
6237 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
6239 list_del(&fib_entry->nexthop_group_node);
6240 if (!list_empty(&nh_grp->fib_list))
6243 if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) {
6244 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
6248 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
6251 static int mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
6252 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6253 struct mlxsw_sp_fib6_entry *fib6_entry)
6255 struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
6256 struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
6259 mlxsw_sp_nexthop_group_vr_unlink(old_nh_grp, fib_node->fib);
6260 fib6_entry->common.nh_group = NULL;
6261 list_del(&fib6_entry->common.nexthop_group_node);
6263 err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
6265 goto err_nexthop6_group_get;
6267 err = mlxsw_sp_nexthop_group_vr_link(fib6_entry->common.nh_group,
6270 goto err_nexthop_group_vr_link;
6272 /* In case this entry is offloaded, then the adjacency index
6273 * currently associated with it in the device's table is that
6274 * of the old group. Start using the new one instead.
6276 err = __mlxsw_sp_fib_entry_update(mlxsw_sp, op_ctx,
6277 &fib6_entry->common, false);
6279 goto err_fib_entry_update;
6281 if (list_empty(&old_nh_grp->fib_list))
6282 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
6286 err_fib_entry_update:
6287 mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group,
6289 err_nexthop_group_vr_link:
6290 mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
6291 err_nexthop6_group_get:
6292 list_add_tail(&fib6_entry->common.nexthop_group_node,
6293 &old_nh_grp->fib_list);
6294 fib6_entry->common.nh_group = old_nh_grp;
6295 mlxsw_sp_nexthop_group_vr_link(old_nh_grp, fib_node->fib);
6300 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
6301 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6302 struct mlxsw_sp_fib6_entry *fib6_entry,
6303 struct fib6_info **rt_arr, unsigned int nrt6)
6305 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6308 for (i = 0; i < nrt6; i++) {
6309 mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
6310 if (IS_ERR(mlxsw_sp_rt6)) {
6311 err = PTR_ERR(mlxsw_sp_rt6);
6312 goto err_rt6_create;
6315 list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
6319 err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, op_ctx, fib6_entry);
6321 goto err_nexthop6_group_update;
6325 err_nexthop6_group_update:
6328 for (i--; i >= 0; i--) {
6330 mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
6331 struct mlxsw_sp_rt6, list);
6332 list_del(&mlxsw_sp_rt6->list);
6333 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6339 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
6340 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6341 struct mlxsw_sp_fib6_entry *fib6_entry,
6342 struct fib6_info **rt_arr, unsigned int nrt6)
6344 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6347 for (i = 0; i < nrt6; i++) {
6348 mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry,
6350 if (WARN_ON_ONCE(!mlxsw_sp_rt6))
6354 list_del(&mlxsw_sp_rt6->list);
6355 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6358 mlxsw_sp_nexthop6_group_update(mlxsw_sp, op_ctx, fib6_entry);
6361 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
6362 struct mlxsw_sp_fib_entry *fib_entry,
6363 const struct fib6_info *rt)
6365 if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
6366 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
6367 else if (rt->fib6_type == RTN_BLACKHOLE)
6368 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
6369 else if (rt->fib6_flags & RTF_REJECT)
6370 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
6371 else if (fib_entry->nh_group->nhgi->gateway)
6372 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
6374 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
6378 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
6380 struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
6382 list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
6385 list_del(&mlxsw_sp_rt6->list);
6386 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6390 static struct mlxsw_sp_fib6_entry *
6391 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
6392 struct mlxsw_sp_fib_node *fib_node,
6393 struct fib6_info **rt_arr, unsigned int nrt6)
6395 struct mlxsw_sp_fib6_entry *fib6_entry;
6396 struct mlxsw_sp_fib_entry *fib_entry;
6397 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6400 fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
6402 return ERR_PTR(-ENOMEM);
6403 fib_entry = &fib6_entry->common;
6405 fib_entry->priv = mlxsw_sp_fib_entry_priv_create(fib_node->fib->ll_ops);
6406 if (IS_ERR(fib_entry->priv)) {
6407 err = PTR_ERR(fib_entry->priv);
6408 goto err_fib_entry_priv_create;
6411 INIT_LIST_HEAD(&fib6_entry->rt6_list);
6413 for (i = 0; i < nrt6; i++) {
6414 mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
6415 if (IS_ERR(mlxsw_sp_rt6)) {
6416 err = PTR_ERR(mlxsw_sp_rt6);
6417 goto err_rt6_create;
6419 list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
6423 err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
6425 goto err_nexthop6_group_get;
6427 err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
6430 goto err_nexthop_group_vr_link;
6432 mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]);
6434 fib_entry->fib_node = fib_node;
6438 err_nexthop_group_vr_link:
6439 mlxsw_sp_nexthop6_group_put(mlxsw_sp, fib_entry);
6440 err_nexthop6_group_get:
6443 for (i--; i >= 0; i--) {
6445 mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
6446 struct mlxsw_sp_rt6, list);
6447 list_del(&mlxsw_sp_rt6->list);
6448 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6450 mlxsw_sp_fib_entry_priv_put(fib_entry->priv);
6451 err_fib_entry_priv_create:
6453 return ERR_PTR(err);
6456 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
6457 struct mlxsw_sp_fib6_entry *fib6_entry)
6459 struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
6461 mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group,
6463 mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
6464 mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
6465 WARN_ON(fib6_entry->nrt6);
6466 mlxsw_sp_fib_entry_priv_put(fib6_entry->common.priv);
6470 static struct mlxsw_sp_fib6_entry *
6471 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
6472 const struct fib6_info *rt)
6474 struct mlxsw_sp_fib6_entry *fib6_entry;
6475 struct mlxsw_sp_fib_node *fib_node;
6476 struct mlxsw_sp_fib *fib;
6477 struct fib6_info *cmp_rt;
6478 struct mlxsw_sp_vr *vr;
6480 vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
6483 fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
6485 fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
6486 sizeof(rt->fib6_dst.addr),
6491 fib6_entry = container_of(fib_node->fib_entry,
6492 struct mlxsw_sp_fib6_entry, common);
6493 cmp_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
6494 if (rt->fib6_table->tb6_id == cmp_rt->fib6_table->tb6_id &&
6495 rt->fib6_metric == cmp_rt->fib6_metric &&
6496 mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
6502 static bool mlxsw_sp_fib6_allow_replace(struct mlxsw_sp_fib6_entry *fib6_entry)
6504 struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
6505 struct mlxsw_sp_fib6_entry *fib6_replaced;
6506 struct fib6_info *rt, *rt_replaced;
6508 if (!fib_node->fib_entry)
6511 fib6_replaced = container_of(fib_node->fib_entry,
6512 struct mlxsw_sp_fib6_entry,
6514 rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
6515 rt_replaced = mlxsw_sp_fib6_entry_rt(fib6_replaced);
6516 if (rt->fib6_table->tb6_id == RT_TABLE_MAIN &&
6517 rt_replaced->fib6_table->tb6_id == RT_TABLE_LOCAL)
6523 static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
6524 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6525 struct fib6_info **rt_arr, unsigned int nrt6)
6527 struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced;
6528 struct mlxsw_sp_fib_entry *replaced;
6529 struct mlxsw_sp_fib_node *fib_node;
6530 struct fib6_info *rt = rt_arr[0];
6533 if (mlxsw_sp->router->aborted)
6536 if (rt->fib6_src.plen)
6539 if (mlxsw_sp_fib6_rt_should_ignore(rt))
6542 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
6544 sizeof(rt->fib6_dst.addr),
6546 MLXSW_SP_L3_PROTO_IPV6);
6547 if (IS_ERR(fib_node))
6548 return PTR_ERR(fib_node);
6550 fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
6552 if (IS_ERR(fib6_entry)) {
6553 err = PTR_ERR(fib6_entry);
6554 goto err_fib6_entry_create;
6557 if (!mlxsw_sp_fib6_allow_replace(fib6_entry)) {
6558 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
6559 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6563 replaced = fib_node->fib_entry;
6564 err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, op_ctx, &fib6_entry->common);
6566 goto err_fib_node_entry_link;
6568 /* Nothing to replace */
6572 mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
6573 fib6_replaced = container_of(replaced, struct mlxsw_sp_fib6_entry,
6575 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_replaced);
6579 err_fib_node_entry_link:
6580 fib_node->fib_entry = replaced;
6581 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
6582 err_fib6_entry_create:
6583 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6587 static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp,
6588 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6589 struct fib6_info **rt_arr, unsigned int nrt6)
6591 struct mlxsw_sp_fib6_entry *fib6_entry;
6592 struct mlxsw_sp_fib_node *fib_node;
6593 struct fib6_info *rt = rt_arr[0];
6596 if (mlxsw_sp->router->aborted)
6599 if (rt->fib6_src.plen)
6602 if (mlxsw_sp_fib6_rt_should_ignore(rt))
6605 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
6607 sizeof(rt->fib6_dst.addr),
6609 MLXSW_SP_L3_PROTO_IPV6);
6610 if (IS_ERR(fib_node))
6611 return PTR_ERR(fib_node);
6613 if (WARN_ON_ONCE(!fib_node->fib_entry)) {
6614 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6618 fib6_entry = container_of(fib_node->fib_entry,
6619 struct mlxsw_sp_fib6_entry, common);
6620 err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, op_ctx, fib6_entry, rt_arr, nrt6);
6622 goto err_fib6_entry_nexthop_add;
6626 err_fib6_entry_nexthop_add:
6627 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6631 static int mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
6632 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6633 struct fib6_info **rt_arr, unsigned int nrt6)
6635 struct mlxsw_sp_fib6_entry *fib6_entry;
6636 struct mlxsw_sp_fib_node *fib_node;
6637 struct fib6_info *rt = rt_arr[0];
6640 if (mlxsw_sp->router->aborted)
6643 if (mlxsw_sp_fib6_rt_should_ignore(rt))
6646 /* Multipath routes are first added to the FIB trie and only then
6647 * notified. If we vetoed the addition, we will get a delete
6648 * notification for a route we do not have. Therefore, do not warn if
6649 * route was not found.
6651 fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
6655 /* If not all the nexthops are deleted, then only reduce the nexthop
6658 if (nrt6 != fib6_entry->nrt6) {
6659 mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, op_ctx, fib6_entry, rt_arr, nrt6);
6663 fib_node = fib6_entry->common.fib_node;
6665 err = __mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, op_ctx, &fib6_entry->common);
6666 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
6667 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6671 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
6672 enum mlxsw_sp_l3proto proto,
6675 const struct mlxsw_sp_router_ll_ops *ll_ops = mlxsw_sp->router->proto_ll_ops[proto];
6676 enum mlxsw_reg_ralxx_protocol ralxx_proto =
6677 (enum mlxsw_reg_ralxx_protocol) proto;
6678 struct mlxsw_sp_fib_entry_priv *priv;
6679 char xralta_pl[MLXSW_REG_XRALTA_LEN];
6680 char xralst_pl[MLXSW_REG_XRALST_LEN];
6683 mlxsw_reg_xralta_pack(xralta_pl, true, ralxx_proto, tree_id);
6684 err = ll_ops->ralta_write(mlxsw_sp, xralta_pl);
6688 mlxsw_reg_xralst_pack(xralst_pl, 0xff, tree_id);
6689 err = ll_ops->ralst_write(mlxsw_sp, xralst_pl);
6693 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
6694 struct mlxsw_sp_fib_entry_op_ctx *op_ctx = mlxsw_sp->router->ll_op_ctx;
6695 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
6696 char xraltb_pl[MLXSW_REG_XRALTB_LEN];
6698 mlxsw_sp_fib_entry_op_ctx_clear(op_ctx);
6699 mlxsw_reg_xraltb_pack(xraltb_pl, vr->id, ralxx_proto, tree_id);
6700 err = ll_ops->raltb_write(mlxsw_sp, xraltb_pl);
6704 priv = mlxsw_sp_fib_entry_priv_create(ll_ops);
6706 return PTR_ERR(priv);
6708 ll_ops->fib_entry_pack(op_ctx, proto, MLXSW_SP_FIB_ENTRY_OP_WRITE,
6709 vr->id, 0, NULL, priv);
6710 ll_ops->fib_entry_act_ip2me_pack(op_ctx);
6711 err = ll_ops->fib_entry_commit(mlxsw_sp, op_ctx, NULL);
6712 mlxsw_sp_fib_entry_priv_put(priv);
6720 static struct mlxsw_sp_mr_table *
6721 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
6723 if (family == RTNL_FAMILY_IPMR)
6724 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
6726 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
6729 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
6730 struct mfc_entry_notifier_info *men_info,
6733 struct mlxsw_sp_mr_table *mrt;
6734 struct mlxsw_sp_vr *vr;
6736 if (mlxsw_sp->router->aborted)
6739 vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
6743 mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
6744 return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
6747 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
6748 struct mfc_entry_notifier_info *men_info)
6750 struct mlxsw_sp_mr_table *mrt;
6751 struct mlxsw_sp_vr *vr;
6753 if (mlxsw_sp->router->aborted)
6756 vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
6760 mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
6761 mlxsw_sp_mr_route_del(mrt, men_info->mfc);
6762 mlxsw_sp_vr_put(mlxsw_sp, vr);
6766 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
6767 struct vif_entry_notifier_info *ven_info)
6769 struct mlxsw_sp_mr_table *mrt;
6770 struct mlxsw_sp_rif *rif;
6771 struct mlxsw_sp_vr *vr;
6773 if (mlxsw_sp->router->aborted)
6776 vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
6780 mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
6781 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
6782 return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
6783 ven_info->vif_index,
6784 ven_info->vif_flags, rif);
6788 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
6789 struct vif_entry_notifier_info *ven_info)
6791 struct mlxsw_sp_mr_table *mrt;
6792 struct mlxsw_sp_vr *vr;
6794 if (mlxsw_sp->router->aborted)
6797 vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
6801 mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
6802 mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
6803 mlxsw_sp_vr_put(mlxsw_sp, vr);
6806 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
6808 enum mlxsw_sp_l3proto proto = MLXSW_SP_L3_PROTO_IPV4;
6811 err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
6812 MLXSW_SP_LPM_TREE_MIN);
6816 /* The multicast router code does not need an abort trap as by default,
6817 * packets that don't match any routes are trapped to the CPU.
6820 proto = MLXSW_SP_L3_PROTO_IPV6;
6821 return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
6822 MLXSW_SP_LPM_TREE_MIN + 1);
6825 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
6826 struct mlxsw_sp_fib_node *fib_node)
6828 struct mlxsw_sp_fib4_entry *fib4_entry;
6830 fib4_entry = container_of(fib_node->fib_entry,
6831 struct mlxsw_sp_fib4_entry, common);
6832 mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
6833 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6834 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6837 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
6838 struct mlxsw_sp_fib_node *fib_node)
6840 struct mlxsw_sp_fib6_entry *fib6_entry;
6842 fib6_entry = container_of(fib_node->fib_entry,
6843 struct mlxsw_sp_fib6_entry, common);
6844 mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
6845 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
6846 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6849 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
6850 struct mlxsw_sp_fib_node *fib_node)
6852 switch (fib_node->fib->proto) {
6853 case MLXSW_SP_L3_PROTO_IPV4:
6854 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
6856 case MLXSW_SP_L3_PROTO_IPV6:
6857 mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
6862 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
6863 struct mlxsw_sp_vr *vr,
6864 enum mlxsw_sp_l3proto proto)
6866 struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
6867 struct mlxsw_sp_fib_node *fib_node, *tmp;
6869 list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
6870 bool do_break = &tmp->list == &fib->node_list;
6872 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
6878 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
6882 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
6883 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
6885 if (!mlxsw_sp_vr_is_used(vr))
6888 for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
6889 mlxsw_sp_mr_table_flush(vr->mr_table[j]);
6890 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
6892 /* If virtual router was only used for IPv4, then it's no
6895 if (!mlxsw_sp_vr_is_used(vr))
6897 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
6900 /* After flushing all the routes, it is not possible anyone is still
6901 * using the adjacency index that is discarding packets, so free it in
6902 * case it was allocated.
6904 if (!mlxsw_sp->router->adj_discard_index_valid)
6906 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
6907 mlxsw_sp->router->adj_discard_index);
6908 mlxsw_sp->router->adj_discard_index_valid = false;
6911 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
6915 if (mlxsw_sp->router->aborted)
6917 dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
6918 mlxsw_sp_router_fib_flush(mlxsw_sp);
6919 mlxsw_sp->router->aborted = true;
6920 err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
6922 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
6925 struct mlxsw_sp_fib6_event {
6926 struct fib6_info **rt_arr;
6930 struct mlxsw_sp_fib_event {
6931 struct list_head list; /* node in fib queue */
6933 struct mlxsw_sp_fib6_event fib6_event;
6934 struct fib_entry_notifier_info fen_info;
6935 struct fib_rule_notifier_info fr_info;
6936 struct fib_nh_notifier_info fnh_info;
6937 struct mfc_entry_notifier_info men_info;
6938 struct vif_entry_notifier_info ven_info;
6940 struct mlxsw_sp *mlxsw_sp;
6941 unsigned long event;
6946 mlxsw_sp_router_fib6_event_init(struct mlxsw_sp_fib6_event *fib6_event,
6947 struct fib6_entry_notifier_info *fen6_info)
6949 struct fib6_info *rt = fen6_info->rt;
6950 struct fib6_info **rt_arr;
6951 struct fib6_info *iter;
6955 nrt6 = fen6_info->nsiblings + 1;
6957 rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC);
6961 fib6_event->rt_arr = rt_arr;
6962 fib6_event->nrt6 = nrt6;
6967 if (!fen6_info->nsiblings)
6970 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
6971 if (i == fen6_info->nsiblings)
6974 rt_arr[i + 1] = iter;
6975 fib6_info_hold(iter);
6978 WARN_ON_ONCE(i != fen6_info->nsiblings);
6984 mlxsw_sp_router_fib6_event_fini(struct mlxsw_sp_fib6_event *fib6_event)
6988 for (i = 0; i < fib6_event->nrt6; i++)
6989 mlxsw_sp_rt6_release(fib6_event->rt_arr[i]);
6990 kfree(fib6_event->rt_arr);
6993 static void mlxsw_sp_router_fib4_event_process(struct mlxsw_sp *mlxsw_sp,
6994 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6995 struct mlxsw_sp_fib_event *fib_event)
6999 mlxsw_sp_span_respin(mlxsw_sp);
7001 switch (fib_event->event) {
7002 case FIB_EVENT_ENTRY_REPLACE:
7003 err = mlxsw_sp_router_fib4_replace(mlxsw_sp, op_ctx, &fib_event->fen_info);
7005 mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
7006 mlxsw_sp_router_fib_abort(mlxsw_sp);
7008 fib_info_put(fib_event->fen_info.fi);
7010 case FIB_EVENT_ENTRY_DEL:
7011 err = mlxsw_sp_router_fib4_del(mlxsw_sp, op_ctx, &fib_event->fen_info);
7013 mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
7014 fib_info_put(fib_event->fen_info.fi);
7016 case FIB_EVENT_NH_ADD:
7017 case FIB_EVENT_NH_DEL:
7018 mlxsw_sp_nexthop4_event(mlxsw_sp, fib_event->event, fib_event->fnh_info.fib_nh);
7019 fib_info_put(fib_event->fnh_info.fib_nh->nh_parent);
7024 static void mlxsw_sp_router_fib6_event_process(struct mlxsw_sp *mlxsw_sp,
7025 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
7026 struct mlxsw_sp_fib_event *fib_event)
7030 mlxsw_sp_span_respin(mlxsw_sp);
7032 switch (fib_event->event) {
7033 case FIB_EVENT_ENTRY_REPLACE:
7034 err = mlxsw_sp_router_fib6_replace(mlxsw_sp, op_ctx, fib_event->fib6_event.rt_arr,
7035 fib_event->fib6_event.nrt6);
7037 mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
7038 mlxsw_sp_router_fib_abort(mlxsw_sp);
7040 mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event);
7042 case FIB_EVENT_ENTRY_APPEND:
7043 err = mlxsw_sp_router_fib6_append(mlxsw_sp, op_ctx, fib_event->fib6_event.rt_arr,
7044 fib_event->fib6_event.nrt6);
7046 mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
7047 mlxsw_sp_router_fib_abort(mlxsw_sp);
7049 mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event);
7051 case FIB_EVENT_ENTRY_DEL:
7052 err = mlxsw_sp_router_fib6_del(mlxsw_sp, op_ctx, fib_event->fib6_event.rt_arr,
7053 fib_event->fib6_event.nrt6);
7055 mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
7056 mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event);
7061 static void mlxsw_sp_router_fibmr_event_process(struct mlxsw_sp *mlxsw_sp,
7062 struct mlxsw_sp_fib_event *fib_event)
7068 mutex_lock(&mlxsw_sp->router->lock);
7069 switch (fib_event->event) {
7070 case FIB_EVENT_ENTRY_REPLACE:
7071 case FIB_EVENT_ENTRY_ADD:
7072 replace = fib_event->event == FIB_EVENT_ENTRY_REPLACE;
7074 err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_event->men_info, replace);
7076 mlxsw_sp_router_fib_abort(mlxsw_sp);
7077 mr_cache_put(fib_event->men_info.mfc);
7079 case FIB_EVENT_ENTRY_DEL:
7080 mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_event->men_info);
7081 mr_cache_put(fib_event->men_info.mfc);
7083 case FIB_EVENT_VIF_ADD:
7084 err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
7085 &fib_event->ven_info);
7087 mlxsw_sp_router_fib_abort(mlxsw_sp);
7088 dev_put(fib_event->ven_info.dev);
7090 case FIB_EVENT_VIF_DEL:
7091 mlxsw_sp_router_fibmr_vif_del(mlxsw_sp, &fib_event->ven_info);
7092 dev_put(fib_event->ven_info.dev);
7095 mutex_unlock(&mlxsw_sp->router->lock);
7099 static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
7101 struct mlxsw_sp_router *router = container_of(work, struct mlxsw_sp_router, fib_event_work);
7102 struct mlxsw_sp_fib_entry_op_ctx *op_ctx = router->ll_op_ctx;
7103 struct mlxsw_sp *mlxsw_sp = router->mlxsw_sp;
7104 struct mlxsw_sp_fib_event *next_fib_event;
7105 struct mlxsw_sp_fib_event *fib_event;
7106 int last_family = AF_UNSPEC;
7107 LIST_HEAD(fib_event_queue);
7109 spin_lock_bh(&router->fib_event_queue_lock);
7110 list_splice_init(&router->fib_event_queue, &fib_event_queue);
7111 spin_unlock_bh(&router->fib_event_queue_lock);
7113 /* Router lock is held here to make sure per-instance
7114 * operation context is not used in between FIB4/6 events
7117 mutex_lock(&router->lock);
7118 mlxsw_sp_fib_entry_op_ctx_clear(op_ctx);
7119 list_for_each_entry_safe(fib_event, next_fib_event,
7120 &fib_event_queue, list) {
7121 /* Check if the next entry in the queue exists and it is
7122 * of the same type (family and event) as the currect one.
7123 * In that case it is permitted to do the bulking
7124 * of multiple FIB entries to a single register write.
7126 op_ctx->bulk_ok = !list_is_last(&fib_event->list, &fib_event_queue) &&
7127 fib_event->family == next_fib_event->family &&
7128 fib_event->event == next_fib_event->event;
7129 op_ctx->event = fib_event->event;
7131 /* In case family of this and the previous entry are different, context
7132 * reinitialization is going to be needed now, indicate that.
7133 * Note that since last_family is initialized to AF_UNSPEC, this is always
7134 * going to happen for the first entry processed in the work.
7136 if (fib_event->family != last_family)
7137 op_ctx->initialized = false;
7139 switch (fib_event->family) {
7141 mlxsw_sp_router_fib4_event_process(mlxsw_sp, op_ctx,
7145 mlxsw_sp_router_fib6_event_process(mlxsw_sp, op_ctx,
7148 case RTNL_FAMILY_IP6MR:
7149 case RTNL_FAMILY_IPMR:
7150 /* Unlock here as inside FIBMR the lock is taken again
7151 * under RTNL. The per-instance operation context
7152 * is not used by FIBMR.
7154 mutex_unlock(&router->lock);
7155 mlxsw_sp_router_fibmr_event_process(mlxsw_sp,
7157 mutex_lock(&router->lock);
7162 last_family = fib_event->family;
7166 WARN_ON_ONCE(!list_empty(&router->ll_op_ctx->fib_entry_priv_list));
7167 mutex_unlock(&router->lock);
7170 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event *fib_event,
7171 struct fib_notifier_info *info)
7173 struct fib_entry_notifier_info *fen_info;
7174 struct fib_nh_notifier_info *fnh_info;
7176 switch (fib_event->event) {
7177 case FIB_EVENT_ENTRY_REPLACE:
7178 case FIB_EVENT_ENTRY_DEL:
7179 fen_info = container_of(info, struct fib_entry_notifier_info,
7181 fib_event->fen_info = *fen_info;
7182 /* Take reference on fib_info to prevent it from being
7183 * freed while event is queued. Release it afterwards.
7185 fib_info_hold(fib_event->fen_info.fi);
7187 case FIB_EVENT_NH_ADD:
7188 case FIB_EVENT_NH_DEL:
7189 fnh_info = container_of(info, struct fib_nh_notifier_info,
7191 fib_event->fnh_info = *fnh_info;
7192 fib_info_hold(fib_event->fnh_info.fib_nh->nh_parent);
7197 static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event *fib_event,
7198 struct fib_notifier_info *info)
7200 struct fib6_entry_notifier_info *fen6_info;
7203 switch (fib_event->event) {
7204 case FIB_EVENT_ENTRY_REPLACE:
7205 case FIB_EVENT_ENTRY_APPEND:
7206 case FIB_EVENT_ENTRY_DEL:
7207 fen6_info = container_of(info, struct fib6_entry_notifier_info,
7209 err = mlxsw_sp_router_fib6_event_init(&fib_event->fib6_event,
7220 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event *fib_event,
7221 struct fib_notifier_info *info)
7223 switch (fib_event->event) {
7224 case FIB_EVENT_ENTRY_REPLACE:
7225 case FIB_EVENT_ENTRY_ADD:
7226 case FIB_EVENT_ENTRY_DEL:
7227 memcpy(&fib_event->men_info, info, sizeof(fib_event->men_info));
7228 mr_cache_hold(fib_event->men_info.mfc);
7230 case FIB_EVENT_VIF_ADD:
7231 case FIB_EVENT_VIF_DEL:
7232 memcpy(&fib_event->ven_info, info, sizeof(fib_event->ven_info));
7233 dev_hold(fib_event->ven_info.dev);
7238 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
7239 struct fib_notifier_info *info,
7240 struct mlxsw_sp *mlxsw_sp)
7242 struct netlink_ext_ack *extack = info->extack;
7243 struct fib_rule_notifier_info *fr_info;
7244 struct fib_rule *rule;
7247 /* nothing to do at the moment */
7248 if (event == FIB_EVENT_RULE_DEL)
7251 if (mlxsw_sp->router->aborted)
7254 fr_info = container_of(info, struct fib_rule_notifier_info, info);
7255 rule = fr_info->rule;
7257 /* Rule only affects locally generated traffic */
7258 if (rule->iifindex == mlxsw_sp_net(mlxsw_sp)->loopback_dev->ifindex)
7261 switch (info->family) {
7263 if (!fib4_rule_default(rule) && !rule->l3mdev)
7267 if (!fib6_rule_default(rule) && !rule->l3mdev)
7270 case RTNL_FAMILY_IPMR:
7271 if (!ipmr_rule_default(rule) && !rule->l3mdev)
7274 case RTNL_FAMILY_IP6MR:
7275 if (!ip6mr_rule_default(rule) && !rule->l3mdev)
7281 NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
7286 /* Called with rcu_read_lock() */
7287 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
7288 unsigned long event, void *ptr)
7290 struct mlxsw_sp_fib_event *fib_event;
7291 struct fib_notifier_info *info = ptr;
7292 struct mlxsw_sp_router *router;
7295 if ((info->family != AF_INET && info->family != AF_INET6 &&
7296 info->family != RTNL_FAMILY_IPMR &&
7297 info->family != RTNL_FAMILY_IP6MR))
7300 router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7303 case FIB_EVENT_RULE_ADD:
7304 case FIB_EVENT_RULE_DEL:
7305 err = mlxsw_sp_router_fib_rule_event(event, info,
7307 return notifier_from_errno(err);
7308 case FIB_EVENT_ENTRY_ADD:
7309 case FIB_EVENT_ENTRY_REPLACE:
7310 case FIB_EVENT_ENTRY_APPEND:
7311 if (router->aborted) {
7312 NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
7313 return notifier_from_errno(-EINVAL);
7315 if (info->family == AF_INET) {
7316 struct fib_entry_notifier_info *fen_info = ptr;
7318 if (fen_info->fi->fib_nh_is_v6) {
7319 NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
7320 return notifier_from_errno(-EINVAL);
7326 fib_event = kzalloc(sizeof(*fib_event), GFP_ATOMIC);
7330 fib_event->mlxsw_sp = router->mlxsw_sp;
7331 fib_event->event = event;
7332 fib_event->family = info->family;
7334 switch (info->family) {
7336 mlxsw_sp_router_fib4_event(fib_event, info);
7339 err = mlxsw_sp_router_fib6_event(fib_event, info);
7343 case RTNL_FAMILY_IP6MR:
7344 case RTNL_FAMILY_IPMR:
7345 mlxsw_sp_router_fibmr_event(fib_event, info);
7349 /* Enqueue the event and trigger the work */
7350 spin_lock_bh(&router->fib_event_queue_lock);
7351 list_add_tail(&fib_event->list, &router->fib_event_queue);
7352 spin_unlock_bh(&router->fib_event_queue_lock);
7353 mlxsw_core_schedule_work(&router->fib_event_work);
7362 static struct mlxsw_sp_rif *
7363 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
7364 const struct net_device *dev)
7368 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7369 if (mlxsw_sp->router->rifs[i] &&
7370 mlxsw_sp->router->rifs[i]->dev == dev)
7371 return mlxsw_sp->router->rifs[i];
7376 bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp,
7377 const struct net_device *dev)
7379 struct mlxsw_sp_rif *rif;
7381 mutex_lock(&mlxsw_sp->router->lock);
7382 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7383 mutex_unlock(&mlxsw_sp->router->lock);
7388 u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev)
7390 struct mlxsw_sp_rif *rif;
7393 mutex_lock(&mlxsw_sp->router->lock);
7394 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7398 /* We only return the VID for VLAN RIFs. Otherwise we return an
7399 * invalid value (0).
7401 if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN)
7404 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7407 mutex_unlock(&mlxsw_sp->router->lock);
7411 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
7413 char ritr_pl[MLXSW_REG_RITR_LEN];
7416 mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
7417 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7421 mlxsw_reg_ritr_enable_set(ritr_pl, false);
7422 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7425 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
7426 struct mlxsw_sp_rif *rif)
7428 mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
7429 mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
7430 mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
7434 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
7435 unsigned long event)
7437 struct inet6_dev *inet6_dev;
7438 bool addr_list_empty = true;
7439 struct in_device *idev;
7446 idev = __in_dev_get_rcu(dev);
7447 if (idev && idev->ifa_list)
7448 addr_list_empty = false;
7450 inet6_dev = __in6_dev_get(dev);
7451 if (addr_list_empty && inet6_dev &&
7452 !list_empty(&inet6_dev->addr_list))
7453 addr_list_empty = false;
7456 /* macvlans do not have a RIF, but rather piggy back on the
7457 * RIF of their lower device.
7459 if (netif_is_macvlan(dev) && addr_list_empty)
7462 if (rif && addr_list_empty &&
7463 !netif_is_l3_slave(rif->dev))
7465 /* It is possible we already removed the RIF ourselves
7466 * if it was assigned to a netdev that is now a bridge
7475 static enum mlxsw_sp_rif_type
7476 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
7477 const struct net_device *dev)
7479 enum mlxsw_sp_fid_type type;
7481 if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
7482 return MLXSW_SP_RIF_TYPE_IPIP_LB;
7484 /* Otherwise RIF type is derived from the type of the underlying FID. */
7485 if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
7486 type = MLXSW_SP_FID_TYPE_8021Q;
7487 else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
7488 type = MLXSW_SP_FID_TYPE_8021Q;
7489 else if (netif_is_bridge_master(dev))
7490 type = MLXSW_SP_FID_TYPE_8021D;
7492 type = MLXSW_SP_FID_TYPE_RFID;
7494 return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
7497 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
7501 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
7502 if (!mlxsw_sp->router->rifs[i]) {
7511 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
7513 struct net_device *l3_dev)
7515 struct mlxsw_sp_rif *rif;
7517 rif = kzalloc(rif_size, GFP_KERNEL);
7521 INIT_LIST_HEAD(&rif->nexthop_list);
7522 INIT_LIST_HEAD(&rif->neigh_list);
7524 ether_addr_copy(rif->addr, l3_dev->dev_addr);
7525 rif->mtu = l3_dev->mtu;
7529 rif->rif_index = rif_index;
7534 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
7537 return mlxsw_sp->router->rifs[rif_index];
7540 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
7542 return rif->rif_index;
7545 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
7547 return lb_rif->common.rif_index;
7550 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
7552 u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev);
7553 struct mlxsw_sp_vr *ul_vr;
7555 ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
7556 if (WARN_ON(IS_ERR(ul_vr)))
7562 u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
7564 return lb_rif->ul_rif_id;
7567 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
7569 return rif->dev->ifindex;
7572 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
7577 static struct mlxsw_sp_rif *
7578 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
7579 const struct mlxsw_sp_rif_params *params,
7580 struct netlink_ext_ack *extack)
7582 u32 tb_id = l3mdev_fib_table(params->dev);
7583 const struct mlxsw_sp_rif_ops *ops;
7584 struct mlxsw_sp_fid *fid = NULL;
7585 enum mlxsw_sp_rif_type type;
7586 struct mlxsw_sp_rif *rif;
7587 struct mlxsw_sp_vr *vr;
7591 type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
7592 ops = mlxsw_sp->rif_ops_arr[type];
7594 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
7596 return ERR_CAST(vr);
7599 err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
7601 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
7602 goto err_rif_index_alloc;
7605 rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
7611 mlxsw_sp->router->rifs[rif_index] = rif;
7612 rif->mlxsw_sp = mlxsw_sp;
7616 fid = ops->fid_get(rif, extack);
7625 ops->setup(rif, params);
7627 err = ops->configure(rif);
7631 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
7632 err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
7634 goto err_mr_rif_add;
7637 mlxsw_sp_rif_counters_alloc(rif);
7642 for (i--; i >= 0; i--)
7643 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
7644 ops->deconfigure(rif);
7647 mlxsw_sp_fid_put(fid);
7649 mlxsw_sp->router->rifs[rif_index] = NULL;
7653 err_rif_index_alloc:
7655 mlxsw_sp_vr_put(mlxsw_sp, vr);
7656 return ERR_PTR(err);
7659 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
7661 const struct mlxsw_sp_rif_ops *ops = rif->ops;
7662 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7663 struct mlxsw_sp_fid *fid = rif->fid;
7664 struct mlxsw_sp_vr *vr;
7667 mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
7668 vr = &mlxsw_sp->router->vrs[rif->vr_id];
7670 mlxsw_sp_rif_counters_free(rif);
7671 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
7672 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
7673 ops->deconfigure(rif);
7675 /* Loopback RIFs are not associated with a FID. */
7676 mlxsw_sp_fid_put(fid);
7677 mlxsw_sp->router->rifs[rif->rif_index] = NULL;
7681 mlxsw_sp_vr_put(mlxsw_sp, vr);
7684 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
7685 struct net_device *dev)
7687 struct mlxsw_sp_rif *rif;
7689 mutex_lock(&mlxsw_sp->router->lock);
7690 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7693 mlxsw_sp_rif_destroy(rif);
7695 mutex_unlock(&mlxsw_sp->router->lock);
7699 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
7700 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
7702 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
7704 params->vid = mlxsw_sp_port_vlan->vid;
7705 params->lag = mlxsw_sp_port->lagged;
7707 params->lag_id = mlxsw_sp_port->lag_id;
7709 params->system_port = mlxsw_sp_port->local_port;
7712 static struct mlxsw_sp_rif_subport *
7713 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
7715 return container_of(rif, struct mlxsw_sp_rif_subport, common);
7718 static struct mlxsw_sp_rif *
7719 mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
7720 const struct mlxsw_sp_rif_params *params,
7721 struct netlink_ext_ack *extack)
7723 struct mlxsw_sp_rif_subport *rif_subport;
7724 struct mlxsw_sp_rif *rif;
7726 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev);
7728 return mlxsw_sp_rif_create(mlxsw_sp, params, extack);
7730 rif_subport = mlxsw_sp_rif_subport_rif(rif);
7731 refcount_inc(&rif_subport->ref_count);
7735 static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif)
7737 struct mlxsw_sp_rif_subport *rif_subport;
7739 rif_subport = mlxsw_sp_rif_subport_rif(rif);
7740 if (!refcount_dec_and_test(&rif_subport->ref_count))
7743 mlxsw_sp_rif_destroy(rif);
7747 __mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
7748 struct net_device *l3_dev,
7749 struct netlink_ext_ack *extack)
7751 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
7752 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
7753 struct mlxsw_sp_rif_params params = {
7756 u16 vid = mlxsw_sp_port_vlan->vid;
7757 struct mlxsw_sp_rif *rif;
7758 struct mlxsw_sp_fid *fid;
7761 mlxsw_sp_rif_subport_params_init(¶ms, mlxsw_sp_port_vlan);
7762 rif = mlxsw_sp_rif_subport_get(mlxsw_sp, ¶ms, extack);
7764 return PTR_ERR(rif);
7766 /* FID was already created, just take a reference */
7767 fid = rif->ops->fid_get(rif, extack);
7768 err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
7770 goto err_fid_port_vid_map;
7772 err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
7774 goto err_port_vid_learning_set;
7776 err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
7777 BR_STATE_FORWARDING);
7779 goto err_port_vid_stp_set;
7781 mlxsw_sp_port_vlan->fid = fid;
7785 err_port_vid_stp_set:
7786 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
7787 err_port_vid_learning_set:
7788 mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
7789 err_fid_port_vid_map:
7790 mlxsw_sp_fid_put(fid);
7791 mlxsw_sp_rif_subport_put(rif);
7796 __mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
7798 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
7799 struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
7800 struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid);
7801 u16 vid = mlxsw_sp_port_vlan->vid;
7803 if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
7806 mlxsw_sp_port_vlan->fid = NULL;
7807 mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
7808 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
7809 mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
7810 mlxsw_sp_fid_put(fid);
7811 mlxsw_sp_rif_subport_put(rif);
7815 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
7816 struct net_device *l3_dev,
7817 struct netlink_ext_ack *extack)
7819 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
7820 struct mlxsw_sp_rif *rif;
7823 mutex_lock(&mlxsw_sp->router->lock);
7824 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7828 err = __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, l3_dev,
7831 mutex_unlock(&mlxsw_sp->router->lock);
7836 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
7838 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
7840 mutex_lock(&mlxsw_sp->router->lock);
7841 __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
7842 mutex_unlock(&mlxsw_sp->router->lock);
7845 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
7846 struct net_device *port_dev,
7847 unsigned long event, u16 vid,
7848 struct netlink_ext_ack *extack)
7850 struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
7851 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
7853 mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
7854 if (WARN_ON(!mlxsw_sp_port_vlan))
7859 return __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
7862 __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
7869 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
7870 unsigned long event,
7871 struct netlink_ext_ack *extack)
7873 if (netif_is_bridge_port(port_dev) ||
7874 netif_is_lag_port(port_dev) ||
7875 netif_is_ovs_port(port_dev))
7878 return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
7879 MLXSW_SP_DEFAULT_VID, extack);
7882 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
7883 struct net_device *lag_dev,
7884 unsigned long event, u16 vid,
7885 struct netlink_ext_ack *extack)
7887 struct net_device *port_dev;
7888 struct list_head *iter;
7891 netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
7892 if (mlxsw_sp_port_dev_check(port_dev)) {
7893 err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
7905 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
7906 unsigned long event,
7907 struct netlink_ext_ack *extack)
7909 if (netif_is_bridge_port(lag_dev))
7912 return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
7913 MLXSW_SP_DEFAULT_VID, extack);
7916 static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
7917 struct net_device *l3_dev,
7918 unsigned long event,
7919 struct netlink_ext_ack *extack)
7921 struct mlxsw_sp_rif_params params = {
7924 struct mlxsw_sp_rif *rif;
7928 if (netif_is_bridge_master(l3_dev) && br_vlan_enabled(l3_dev)) {
7931 br_vlan_get_proto(l3_dev, &proto);
7932 if (proto == ETH_P_8021AD) {
7933 NL_SET_ERR_MSG_MOD(extack, "Adding an IP address to 802.1ad bridge is not supported");
7937 rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms, extack);
7939 return PTR_ERR(rif);
7942 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7943 mlxsw_sp_rif_destroy(rif);
7950 static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
7951 struct net_device *vlan_dev,
7952 unsigned long event,
7953 struct netlink_ext_ack *extack)
7955 struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
7956 u16 vid = vlan_dev_vlan_id(vlan_dev);
7958 if (netif_is_bridge_port(vlan_dev))
7961 if (mlxsw_sp_port_dev_check(real_dev))
7962 return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
7963 event, vid, extack);
7964 else if (netif_is_lag_master(real_dev))
7965 return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
7967 else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
7968 return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event,
7974 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
7976 u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
7977 u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
7979 return ether_addr_equal_masked(mac, vrrp4, mask);
7982 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
7984 u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
7985 u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
7987 return ether_addr_equal_masked(mac, vrrp6, mask);
7990 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
7991 const u8 *mac, bool adding)
7993 char ritr_pl[MLXSW_REG_RITR_LEN];
7994 u8 vrrp_id = adding ? mac[5] : 0;
7997 if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
7998 !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
8001 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
8002 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8006 if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
8007 mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
8009 mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
8011 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8014 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
8015 const struct net_device *macvlan_dev,
8016 struct netlink_ext_ack *extack)
8018 struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
8019 struct mlxsw_sp_rif *rif;
8022 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
8024 NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
8028 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
8029 mlxsw_sp_fid_index(rif->fid), true);
8033 err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
8034 macvlan_dev->dev_addr, true);
8036 goto err_rif_vrrp_add;
8038 /* Make sure the bridge driver does not have this MAC pointing at
8041 if (rif->ops->fdb_del)
8042 rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
8047 mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
8048 mlxsw_sp_fid_index(rif->fid), false);
8052 static void __mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
8053 const struct net_device *macvlan_dev)
8055 struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
8056 struct mlxsw_sp_rif *rif;
8058 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
8059 /* If we do not have a RIF, then we already took care of
8060 * removing the macvlan's MAC during RIF deletion.
8064 mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
8066 mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
8067 mlxsw_sp_fid_index(rif->fid), false);
8070 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
8071 const struct net_device *macvlan_dev)
8073 mutex_lock(&mlxsw_sp->router->lock);
8074 __mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
8075 mutex_unlock(&mlxsw_sp->router->lock);
8078 static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
8079 struct net_device *macvlan_dev,
8080 unsigned long event,
8081 struct netlink_ext_ack *extack)
8085 return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
8087 __mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
8094 static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp,
8095 struct net_device *dev,
8096 const unsigned char *dev_addr,
8097 struct netlink_ext_ack *extack)
8099 struct mlxsw_sp_rif *rif;
8102 /* A RIF is not created for macvlan netdevs. Their MAC is used to
8105 if (netif_is_macvlan(dev) || netif_is_l3_master(dev))
8108 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
8109 rif = mlxsw_sp->router->rifs[i];
8110 if (rif && rif->ops &&
8111 rif->ops->type == MLXSW_SP_RIF_TYPE_IPIP_LB)
8113 if (rif && rif->dev && rif->dev != dev &&
8114 !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr,
8115 mlxsw_sp->mac_mask)) {
8116 NL_SET_ERR_MSG_MOD(extack, "All router interface MAC addresses must have the same prefix");
8124 static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
8125 struct net_device *dev,
8126 unsigned long event,
8127 struct netlink_ext_ack *extack)
8129 if (mlxsw_sp_port_dev_check(dev))
8130 return mlxsw_sp_inetaddr_port_event(dev, event, extack);
8131 else if (netif_is_lag_master(dev))
8132 return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
8133 else if (netif_is_bridge_master(dev))
8134 return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event,
8136 else if (is_vlan_dev(dev))
8137 return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
8139 else if (netif_is_macvlan(dev))
8140 return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
8146 static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
8147 unsigned long event, void *ptr)
8149 struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
8150 struct net_device *dev = ifa->ifa_dev->dev;
8151 struct mlxsw_sp_router *router;
8152 struct mlxsw_sp_rif *rif;
8155 /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
8156 if (event == NETDEV_UP)
8159 router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
8160 mutex_lock(&router->lock);
8161 rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
8162 if (!mlxsw_sp_rif_should_config(rif, dev, event))
8165 err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL);
8167 mutex_unlock(&router->lock);
8168 return notifier_from_errno(err);
8171 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
8172 unsigned long event, void *ptr)
8174 struct in_validator_info *ivi = (struct in_validator_info *) ptr;
8175 struct net_device *dev = ivi->ivi_dev->dev;
8176 struct mlxsw_sp *mlxsw_sp;
8177 struct mlxsw_sp_rif *rif;
8180 mlxsw_sp = mlxsw_sp_lower_get(dev);
8184 mutex_lock(&mlxsw_sp->router->lock);
8185 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8186 if (!mlxsw_sp_rif_should_config(rif, dev, event))
8189 err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
8194 err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack);
8196 mutex_unlock(&mlxsw_sp->router->lock);
8197 return notifier_from_errno(err);
8200 struct mlxsw_sp_inet6addr_event_work {
8201 struct work_struct work;
8202 struct mlxsw_sp *mlxsw_sp;
8203 struct net_device *dev;
8204 unsigned long event;
8207 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
8209 struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
8210 container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
8211 struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp;
8212 struct net_device *dev = inet6addr_work->dev;
8213 unsigned long event = inet6addr_work->event;
8214 struct mlxsw_sp_rif *rif;
8217 mutex_lock(&mlxsw_sp->router->lock);
8219 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8220 if (!mlxsw_sp_rif_should_config(rif, dev, event))
8223 __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL);
8225 mutex_unlock(&mlxsw_sp->router->lock);
8228 kfree(inet6addr_work);
8231 /* Called with rcu_read_lock() */
8232 static int mlxsw_sp_inet6addr_event(struct notifier_block *nb,
8233 unsigned long event, void *ptr)
8235 struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
8236 struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
8237 struct net_device *dev = if6->idev->dev;
8238 struct mlxsw_sp_router *router;
8240 /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
8241 if (event == NETDEV_UP)
8244 inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
8245 if (!inet6addr_work)
8248 router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb);
8249 INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
8250 inet6addr_work->mlxsw_sp = router->mlxsw_sp;
8251 inet6addr_work->dev = dev;
8252 inet6addr_work->event = event;
8254 mlxsw_core_schedule_work(&inet6addr_work->work);
8259 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
8260 unsigned long event, void *ptr)
8262 struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
8263 struct net_device *dev = i6vi->i6vi_dev->dev;
8264 struct mlxsw_sp *mlxsw_sp;
8265 struct mlxsw_sp_rif *rif;
8268 mlxsw_sp = mlxsw_sp_lower_get(dev);
8272 mutex_lock(&mlxsw_sp->router->lock);
8273 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8274 if (!mlxsw_sp_rif_should_config(rif, dev, event))
8277 err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
8282 err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack);
8284 mutex_unlock(&mlxsw_sp->router->lock);
8285 return notifier_from_errno(err);
8288 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
8289 const char *mac, int mtu)
8291 char ritr_pl[MLXSW_REG_RITR_LEN];
8294 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
8295 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8299 mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
8300 mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
8301 mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
8302 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8306 mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp,
8307 struct mlxsw_sp_rif *rif)
8309 struct net_device *dev = rif->dev;
8313 fid_index = mlxsw_sp_fid_index(rif->fid);
8315 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
8319 err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
8324 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
8326 goto err_rif_fdb_op;
8328 if (rif->mtu != dev->mtu) {
8329 struct mlxsw_sp_vr *vr;
8332 /* The RIF is relevant only to its mr_table instance, as unlike
8333 * unicast routing, in multicast routing a RIF cannot be shared
8334 * between several multicast routing tables.
8336 vr = &mlxsw_sp->router->vrs[rif->vr_id];
8337 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
8338 mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
8342 ether_addr_copy(rif->addr, dev->dev_addr);
8343 rif->mtu = dev->mtu;
8345 netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
8350 mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
8352 mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
8356 static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
8357 struct netdev_notifier_pre_changeaddr_info *info)
8359 struct netlink_ext_ack *extack;
8361 extack = netdev_notifier_info_to_extack(&info->info);
8362 return mlxsw_sp_router_port_check_rif_addr(rif->mlxsw_sp, rif->dev,
8363 info->dev_addr, extack);
8366 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
8367 unsigned long event, void *ptr)
8369 struct mlxsw_sp *mlxsw_sp;
8370 struct mlxsw_sp_rif *rif;
8373 mlxsw_sp = mlxsw_sp_lower_get(dev);
8377 mutex_lock(&mlxsw_sp->router->lock);
8378 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8383 case NETDEV_CHANGEMTU:
8384 case NETDEV_CHANGEADDR:
8385 err = mlxsw_sp_router_port_change_event(mlxsw_sp, rif);
8387 case NETDEV_PRE_CHANGEADDR:
8388 err = mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
8393 mutex_unlock(&mlxsw_sp->router->lock);
8397 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
8398 struct net_device *l3_dev,
8399 struct netlink_ext_ack *extack)
8401 struct mlxsw_sp_rif *rif;
8403 /* If netdev is already associated with a RIF, then we need to
8404 * destroy it and create a new one with the new virtual router ID.
8406 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
8408 __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN,
8411 return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack);
8414 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
8415 struct net_device *l3_dev)
8417 struct mlxsw_sp_rif *rif;
8419 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
8422 __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL);
8425 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
8426 struct netdev_notifier_changeupper_info *info)
8428 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
8431 /* We do not create a RIF for a macvlan, but only use it to
8432 * direct more MAC addresses to the router.
8434 if (!mlxsw_sp || netif_is_macvlan(l3_dev))
8437 mutex_lock(&mlxsw_sp->router->lock);
8439 case NETDEV_PRECHANGEUPPER:
8441 case NETDEV_CHANGEUPPER:
8442 if (info->linking) {
8443 struct netlink_ext_ack *extack;
8445 extack = netdev_notifier_info_to_extack(&info->info);
8446 err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
8448 mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
8452 mutex_unlock(&mlxsw_sp->router->lock);
8457 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev,
8458 struct netdev_nested_priv *priv)
8460 struct mlxsw_sp_rif *rif = (struct mlxsw_sp_rif *)priv->data;
8462 if (!netif_is_macvlan(dev))
8465 return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
8466 mlxsw_sp_fid_index(rif->fid), false);
8469 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
8471 struct netdev_nested_priv priv = {
8472 .data = (void *)rif,
8475 if (!netif_is_macvlan_port(rif->dev))
8478 netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
8479 return netdev_walk_all_upper_dev_rcu(rif->dev,
8480 __mlxsw_sp_rif_macvlan_flush, &priv);
8483 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
8484 const struct mlxsw_sp_rif_params *params)
8486 struct mlxsw_sp_rif_subport *rif_subport;
8488 rif_subport = mlxsw_sp_rif_subport_rif(rif);
8489 refcount_set(&rif_subport->ref_count, 1);
8490 rif_subport->vid = params->vid;
8491 rif_subport->lag = params->lag;
8493 rif_subport->lag_id = params->lag_id;
8495 rif_subport->system_port = params->system_port;
8498 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
8500 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8501 struct mlxsw_sp_rif_subport *rif_subport;
8502 char ritr_pl[MLXSW_REG_RITR_LEN];
8504 rif_subport = mlxsw_sp_rif_subport_rif(rif);
8505 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
8506 rif->rif_index, rif->vr_id, rif->dev->mtu);
8507 mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
8508 mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
8509 rif_subport->lag ? rif_subport->lag_id :
8510 rif_subport->system_port,
8513 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8516 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
8520 err = mlxsw_sp_rif_subport_op(rif, true);
8524 err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
8525 mlxsw_sp_fid_index(rif->fid), true);
8527 goto err_rif_fdb_op;
8529 mlxsw_sp_fid_rif_set(rif->fid, rif);
8533 mlxsw_sp_rif_subport_op(rif, false);
8537 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
8539 struct mlxsw_sp_fid *fid = rif->fid;
8541 mlxsw_sp_fid_rif_set(fid, NULL);
8542 mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
8543 mlxsw_sp_fid_index(fid), false);
8544 mlxsw_sp_rif_macvlan_flush(rif);
8545 mlxsw_sp_rif_subport_op(rif, false);
8548 static struct mlxsw_sp_fid *
8549 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
8550 struct netlink_ext_ack *extack)
8552 return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
8555 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
8556 .type = MLXSW_SP_RIF_TYPE_SUBPORT,
8557 .rif_size = sizeof(struct mlxsw_sp_rif_subport),
8558 .setup = mlxsw_sp_rif_subport_setup,
8559 .configure = mlxsw_sp_rif_subport_configure,
8560 .deconfigure = mlxsw_sp_rif_subport_deconfigure,
8561 .fid_get = mlxsw_sp_rif_subport_fid_get,
8564 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
8565 enum mlxsw_reg_ritr_if_type type,
8566 u16 vid_fid, bool enable)
8568 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8569 char ritr_pl[MLXSW_REG_RITR_LEN];
8571 mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
8573 mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
8574 mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
8576 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8579 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
8581 return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
8584 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
8586 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8587 u16 fid_index = mlxsw_sp_fid_index(rif->fid);
8590 err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
8595 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
8596 mlxsw_sp_router_port(mlxsw_sp), true);
8598 goto err_fid_mc_flood_set;
8600 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
8601 mlxsw_sp_router_port(mlxsw_sp), true);
8603 goto err_fid_bc_flood_set;
8605 err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
8606 mlxsw_sp_fid_index(rif->fid), true);
8608 goto err_rif_fdb_op;
8610 mlxsw_sp_fid_rif_set(rif->fid, rif);
8614 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
8615 mlxsw_sp_router_port(mlxsw_sp), false);
8616 err_fid_bc_flood_set:
8617 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
8618 mlxsw_sp_router_port(mlxsw_sp), false);
8619 err_fid_mc_flood_set:
8620 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
8624 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
8626 u16 fid_index = mlxsw_sp_fid_index(rif->fid);
8627 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8628 struct mlxsw_sp_fid *fid = rif->fid;
8630 mlxsw_sp_fid_rif_set(fid, NULL);
8631 mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
8632 mlxsw_sp_fid_index(fid), false);
8633 mlxsw_sp_rif_macvlan_flush(rif);
8634 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
8635 mlxsw_sp_router_port(mlxsw_sp), false);
8636 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
8637 mlxsw_sp_router_port(mlxsw_sp), false);
8638 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
8641 static struct mlxsw_sp_fid *
8642 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
8643 struct netlink_ext_ack *extack)
8645 return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
8648 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
8650 struct switchdev_notifier_fdb_info info;
8651 struct net_device *dev;
8653 dev = br_fdb_find_port(rif->dev, mac, 0);
8659 call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
8663 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
8664 .type = MLXSW_SP_RIF_TYPE_FID,
8665 .rif_size = sizeof(struct mlxsw_sp_rif),
8666 .configure = mlxsw_sp_rif_fid_configure,
8667 .deconfigure = mlxsw_sp_rif_fid_deconfigure,
8668 .fid_get = mlxsw_sp_rif_fid_fid_get,
8669 .fdb_del = mlxsw_sp_rif_fid_fdb_del,
8672 static struct mlxsw_sp_fid *
8673 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
8674 struct netlink_ext_ack *extack)
8676 struct net_device *br_dev;
8680 if (is_vlan_dev(rif->dev)) {
8681 vid = vlan_dev_vlan_id(rif->dev);
8682 br_dev = vlan_dev_real_dev(rif->dev);
8683 if (WARN_ON(!netif_is_bridge_master(br_dev)))
8684 return ERR_PTR(-EINVAL);
8686 err = br_vlan_get_pvid(rif->dev, &vid);
8687 if (err < 0 || !vid) {
8688 NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
8689 return ERR_PTR(-EINVAL);
8693 return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
8696 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
8698 u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
8699 struct switchdev_notifier_fdb_info info;
8700 struct net_device *br_dev;
8701 struct net_device *dev;
8703 br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
8704 dev = br_fdb_find_port(br_dev, mac, vid);
8710 call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
8714 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_emu_ops = {
8715 .type = MLXSW_SP_RIF_TYPE_VLAN,
8716 .rif_size = sizeof(struct mlxsw_sp_rif),
8717 .configure = mlxsw_sp_rif_fid_configure,
8718 .deconfigure = mlxsw_sp_rif_fid_deconfigure,
8719 .fid_get = mlxsw_sp_rif_vlan_fid_get,
8720 .fdb_del = mlxsw_sp_rif_vlan_fdb_del,
8723 static struct mlxsw_sp_rif_ipip_lb *
8724 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
8726 return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
8730 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
8731 const struct mlxsw_sp_rif_params *params)
8733 struct mlxsw_sp_rif_params_ipip_lb *params_lb;
8734 struct mlxsw_sp_rif_ipip_lb *rif_lb;
8736 params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
8738 rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
8739 rif_lb->lb_config = params_lb->lb_config;
8743 mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
8745 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
8746 u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
8747 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8748 struct mlxsw_sp_vr *ul_vr;
8751 ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
8753 return PTR_ERR(ul_vr);
8755 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
8757 goto err_loopback_op;
8759 lb_rif->ul_vr_id = ul_vr->id;
8760 lb_rif->ul_rif_id = 0;
8765 mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
8769 static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
8771 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
8772 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8773 struct mlxsw_sp_vr *ul_vr;
8775 ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
8776 mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
8779 mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
8782 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
8783 .type = MLXSW_SP_RIF_TYPE_IPIP_LB,
8784 .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb),
8785 .setup = mlxsw_sp_rif_ipip_lb_setup,
8786 .configure = mlxsw_sp1_rif_ipip_lb_configure,
8787 .deconfigure = mlxsw_sp1_rif_ipip_lb_deconfigure,
8790 const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
8791 [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops,
8792 [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops,
8793 [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops,
8794 [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp1_rif_ipip_lb_ops,
8798 mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
8800 struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
8801 char ritr_pl[MLXSW_REG_RITR_LEN];
8803 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
8804 ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
8805 mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
8806 MLXSW_REG_RITR_LOOPBACK_GENERIC);
8808 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8811 static struct mlxsw_sp_rif *
8812 mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
8813 struct netlink_ext_ack *extack)
8815 struct mlxsw_sp_rif *ul_rif;
8819 err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
8821 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
8822 return ERR_PTR(err);
8825 ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL);
8827 return ERR_PTR(-ENOMEM);
8829 mlxsw_sp->router->rifs[rif_index] = ul_rif;
8830 ul_rif->mlxsw_sp = mlxsw_sp;
8831 err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
8838 mlxsw_sp->router->rifs[rif_index] = NULL;
8840 return ERR_PTR(err);
8843 static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
8845 struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
8847 mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
8848 mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
8852 static struct mlxsw_sp_rif *
8853 mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
8854 struct netlink_ext_ack *extack)
8856 struct mlxsw_sp_vr *vr;
8859 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
8861 return ERR_CAST(vr);
8863 if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
8866 vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack);
8867 if (IS_ERR(vr->ul_rif)) {
8868 err = PTR_ERR(vr->ul_rif);
8869 goto err_ul_rif_create;
8873 refcount_set(&vr->ul_rif_refcnt, 1);
8878 mlxsw_sp_vr_put(mlxsw_sp, vr);
8879 return ERR_PTR(err);
8882 static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
8884 struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
8885 struct mlxsw_sp_vr *vr;
8887 vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
8889 if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
8893 mlxsw_sp_ul_rif_destroy(ul_rif);
8894 mlxsw_sp_vr_put(mlxsw_sp, vr);
8897 int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
8900 struct mlxsw_sp_rif *ul_rif;
8903 mutex_lock(&mlxsw_sp->router->lock);
8904 ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
8905 if (IS_ERR(ul_rif)) {
8906 err = PTR_ERR(ul_rif);
8909 *ul_rif_index = ul_rif->rif_index;
8911 mutex_unlock(&mlxsw_sp->router->lock);
8915 void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index)
8917 struct mlxsw_sp_rif *ul_rif;
8919 mutex_lock(&mlxsw_sp->router->lock);
8920 ul_rif = mlxsw_sp->router->rifs[ul_rif_index];
8921 if (WARN_ON(!ul_rif))
8924 mlxsw_sp_ul_rif_put(ul_rif);
8926 mutex_unlock(&mlxsw_sp->router->lock);
8930 mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
8932 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
8933 u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
8934 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8935 struct mlxsw_sp_rif *ul_rif;
8938 ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
8940 return PTR_ERR(ul_rif);
8942 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
8944 goto err_loopback_op;
8946 lb_rif->ul_vr_id = 0;
8947 lb_rif->ul_rif_id = ul_rif->rif_index;
8952 mlxsw_sp_ul_rif_put(ul_rif);
8956 static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
8958 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
8959 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8960 struct mlxsw_sp_rif *ul_rif;
8962 ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
8963 mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
8964 mlxsw_sp_ul_rif_put(ul_rif);
8967 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
8968 .type = MLXSW_SP_RIF_TYPE_IPIP_LB,
8969 .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb),
8970 .setup = mlxsw_sp_rif_ipip_lb_setup,
8971 .configure = mlxsw_sp2_rif_ipip_lb_configure,
8972 .deconfigure = mlxsw_sp2_rif_ipip_lb_deconfigure,
8975 const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
8976 [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops,
8977 [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops,
8978 [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops,
8979 [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp2_rif_ipip_lb_ops,
8982 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
8984 u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
8986 mlxsw_sp->router->rifs = kcalloc(max_rifs,
8987 sizeof(struct mlxsw_sp_rif *),
8989 if (!mlxsw_sp->router->rifs)
8995 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
8999 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
9000 WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
9002 kfree(mlxsw_sp->router->rifs);
9006 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
9008 char tigcr_pl[MLXSW_REG_TIGCR_LEN];
9010 mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
9011 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
9014 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
9018 mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
9019 INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
9021 err = mlxsw_sp_ipip_ecn_encap_init(mlxsw_sp);
9024 err = mlxsw_sp_ipip_ecn_decap_init(mlxsw_sp);
9028 return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
9031 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
9033 WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
9036 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
9038 struct mlxsw_sp_router *router;
9040 /* Flush pending FIB notifications and then flush the device's
9041 * table before requesting another dump. The FIB notification
9042 * block is unregistered, so no need to take RTNL.
9044 mlxsw_core_flush_owq();
9045 router = container_of(nb, struct mlxsw_sp_router, fib_nb);
9046 mlxsw_sp_router_fib_flush(router->mlxsw_sp);
9049 #ifdef CONFIG_IP_ROUTE_MULTIPATH
9050 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
9052 mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
9055 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
9057 mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
9060 static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl)
9062 struct net *net = mlxsw_sp_net(mlxsw_sp);
9063 bool only_l3 = !net->ipv4.sysctl_fib_multipath_hash_policy;
9065 mlxsw_sp_mp_hash_header_set(recr2_pl,
9066 MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
9067 mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
9068 mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
9069 mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
9072 mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
9073 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
9074 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
9075 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
9078 static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl)
9080 bool only_l3 = !ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp));
9082 mlxsw_sp_mp_hash_header_set(recr2_pl,
9083 MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
9084 mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
9085 mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
9086 mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
9087 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
9089 mlxsw_sp_mp_hash_field_set(recr2_pl,
9090 MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
9092 mlxsw_sp_mp_hash_header_set(recr2_pl,
9093 MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
9094 mlxsw_sp_mp_hash_field_set(recr2_pl,
9095 MLXSW_REG_RECR2_TCP_UDP_SPORT);
9096 mlxsw_sp_mp_hash_field_set(recr2_pl,
9097 MLXSW_REG_RECR2_TCP_UDP_DPORT);
9101 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
9103 char recr2_pl[MLXSW_REG_RECR2_LEN];
9106 seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
9107 mlxsw_reg_recr2_pack(recr2_pl, seed);
9108 mlxsw_sp_mp4_hash_init(mlxsw_sp, recr2_pl);
9109 mlxsw_sp_mp6_hash_init(mlxsw_sp, recr2_pl);
9111 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
9114 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
9120 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
9122 char rdpm_pl[MLXSW_REG_RDPM_LEN];
9125 MLXSW_REG_ZERO(rdpm, rdpm_pl);
9127 /* HW is determining switch priority based on DSCP-bits, but the
9128 * kernel is still doing that based on the ToS. Since there's a
9129 * mismatch in bits we need to make sure to translate the right
9130 * value ToS would observe, skipping the 2 least-significant ECN bits.
9132 for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
9133 mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
9135 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
9138 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
9140 struct net *net = mlxsw_sp_net(mlxsw_sp);
9141 bool usp = net->ipv4.sysctl_ip_fwd_update_priority;
9142 char rgcr_pl[MLXSW_REG_RGCR_LEN];
9145 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
9147 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
9149 mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
9150 mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
9151 mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
9152 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
9155 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
9157 char rgcr_pl[MLXSW_REG_RGCR_LEN];
9159 mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
9160 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
9163 static const struct mlxsw_sp_router_ll_ops mlxsw_sp_router_ll_basic_ops = {
9164 .init = mlxsw_sp_router_ll_basic_init,
9165 .ralta_write = mlxsw_sp_router_ll_basic_ralta_write,
9166 .ralst_write = mlxsw_sp_router_ll_basic_ralst_write,
9167 .raltb_write = mlxsw_sp_router_ll_basic_raltb_write,
9168 .fib_entry_op_ctx_size = sizeof(struct mlxsw_sp_fib_entry_op_ctx_basic),
9169 .fib_entry_pack = mlxsw_sp_router_ll_basic_fib_entry_pack,
9170 .fib_entry_act_remote_pack = mlxsw_sp_router_ll_basic_fib_entry_act_remote_pack,
9171 .fib_entry_act_local_pack = mlxsw_sp_router_ll_basic_fib_entry_act_local_pack,
9172 .fib_entry_act_ip2me_pack = mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_pack,
9173 .fib_entry_act_ip2me_tun_pack = mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_tun_pack,
9174 .fib_entry_commit = mlxsw_sp_router_ll_basic_fib_entry_commit,
9175 .fib_entry_is_committed = mlxsw_sp_router_ll_basic_fib_entry_is_committed,
9178 static int mlxsw_sp_router_ll_op_ctx_init(struct mlxsw_sp_router *router)
9180 size_t max_size = 0;
9183 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
9184 size_t size = router->proto_ll_ops[i]->fib_entry_op_ctx_size;
9186 if (size > max_size)
9189 router->ll_op_ctx = kzalloc(sizeof(*router->ll_op_ctx) + max_size,
9191 if (!router->ll_op_ctx)
9193 INIT_LIST_HEAD(&router->ll_op_ctx->fib_entry_priv_list);
9197 static void mlxsw_sp_router_ll_op_ctx_fini(struct mlxsw_sp_router *router)
9199 WARN_ON(!list_empty(&router->ll_op_ctx->fib_entry_priv_list));
9200 kfree(router->ll_op_ctx);
9203 static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp)
9208 /* Create a generic loopback RIF associated with the main table
9209 * (default VRF). Any table can be used, but the main table exists
9210 * anyway, so we do not waste resources.
9212 err = mlxsw_sp_router_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN,
9217 mlxsw_sp->router->lb_rif_index = lb_rif_index;
9222 static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp)
9224 mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->router->lb_rif_index);
9227 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
9228 struct netlink_ext_ack *extack)
9230 struct mlxsw_sp_router *router;
9233 router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
9236 mutex_init(&router->lock);
9237 mlxsw_sp->router = router;
9238 router->mlxsw_sp = mlxsw_sp;
9240 err = mlxsw_sp_router_xm_init(mlxsw_sp);
9244 router->proto_ll_ops[MLXSW_SP_L3_PROTO_IPV4] = mlxsw_sp_router_xm_ipv4_is_supported(mlxsw_sp) ?
9245 &mlxsw_sp_router_ll_xm_ops :
9246 &mlxsw_sp_router_ll_basic_ops;
9247 router->proto_ll_ops[MLXSW_SP_L3_PROTO_IPV6] = &mlxsw_sp_router_ll_basic_ops;
9249 err = mlxsw_sp_router_ll_op_ctx_init(router);
9251 goto err_ll_op_ctx_init;
9253 INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
9254 err = __mlxsw_sp_router_init(mlxsw_sp);
9256 goto err_router_init;
9258 err = mlxsw_sp_rifs_init(mlxsw_sp);
9262 err = mlxsw_sp_ipips_init(mlxsw_sp);
9264 goto err_ipips_init;
9266 err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
9267 &mlxsw_sp_nexthop_ht_params);
9269 goto err_nexthop_ht_init;
9271 err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
9272 &mlxsw_sp_nexthop_group_ht_params);
9274 goto err_nexthop_group_ht_init;
9276 INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
9277 err = mlxsw_sp_lpm_init(mlxsw_sp);
9281 err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
9285 err = mlxsw_sp_vrs_init(mlxsw_sp);
9289 err = mlxsw_sp_lb_rif_init(mlxsw_sp);
9291 goto err_lb_rif_init;
9293 err = mlxsw_sp_neigh_init(mlxsw_sp);
9295 goto err_neigh_init;
9297 err = mlxsw_sp_mp_hash_init(mlxsw_sp);
9299 goto err_mp_hash_init;
9301 err = mlxsw_sp_dscp_init(mlxsw_sp);
9305 INIT_WORK(&router->fib_event_work, mlxsw_sp_router_fib_event_work);
9306 INIT_LIST_HEAD(&router->fib_event_queue);
9307 spin_lock_init(&router->fib_event_queue_lock);
9309 router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
9310 err = register_inetaddr_notifier(&router->inetaddr_nb);
9312 goto err_register_inetaddr_notifier;
9314 router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
9315 err = register_inet6addr_notifier(&router->inet6addr_nb);
9317 goto err_register_inet6addr_notifier;
9319 mlxsw_sp->router->netevent_nb.notifier_call =
9320 mlxsw_sp_router_netevent_event;
9321 err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
9323 goto err_register_netevent_notifier;
9325 mlxsw_sp->router->nexthop_nb.notifier_call =
9326 mlxsw_sp_nexthop_obj_event;
9327 err = register_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
9328 &mlxsw_sp->router->nexthop_nb,
9331 goto err_register_nexthop_notifier;
9333 mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
9334 err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp),
9335 &mlxsw_sp->router->fib_nb,
9336 mlxsw_sp_router_fib_dump_flush, extack);
9338 goto err_register_fib_notifier;
9342 err_register_fib_notifier:
9343 unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
9344 &mlxsw_sp->router->nexthop_nb);
9345 err_register_nexthop_notifier:
9346 unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
9347 err_register_netevent_notifier:
9348 unregister_inet6addr_notifier(&router->inet6addr_nb);
9349 err_register_inet6addr_notifier:
9350 unregister_inetaddr_notifier(&router->inetaddr_nb);
9351 err_register_inetaddr_notifier:
9352 mlxsw_core_flush_owq();
9353 WARN_ON(!list_empty(&router->fib_event_queue));
9356 mlxsw_sp_neigh_fini(mlxsw_sp);
9358 mlxsw_sp_lb_rif_fini(mlxsw_sp);
9360 mlxsw_sp_vrs_fini(mlxsw_sp);
9362 mlxsw_sp_mr_fini(mlxsw_sp);
9364 mlxsw_sp_lpm_fini(mlxsw_sp);
9366 rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
9367 err_nexthop_group_ht_init:
9368 rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
9369 err_nexthop_ht_init:
9370 mlxsw_sp_ipips_fini(mlxsw_sp);
9372 mlxsw_sp_rifs_fini(mlxsw_sp);
9374 __mlxsw_sp_router_fini(mlxsw_sp);
9376 mlxsw_sp_router_ll_op_ctx_fini(router);
9378 mlxsw_sp_router_xm_fini(mlxsw_sp);
9380 mutex_destroy(&mlxsw_sp->router->lock);
9381 kfree(mlxsw_sp->router);
9385 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
9387 unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp),
9388 &mlxsw_sp->router->fib_nb);
9389 unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
9390 &mlxsw_sp->router->nexthop_nb);
9391 unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
9392 unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb);
9393 unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb);
9394 mlxsw_core_flush_owq();
9395 WARN_ON(!list_empty(&mlxsw_sp->router->fib_event_queue));
9396 mlxsw_sp_neigh_fini(mlxsw_sp);
9397 mlxsw_sp_lb_rif_fini(mlxsw_sp);
9398 mlxsw_sp_vrs_fini(mlxsw_sp);
9399 mlxsw_sp_mr_fini(mlxsw_sp);
9400 mlxsw_sp_lpm_fini(mlxsw_sp);
9401 rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
9402 rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
9403 mlxsw_sp_ipips_fini(mlxsw_sp);
9404 mlxsw_sp_rifs_fini(mlxsw_sp);
9405 __mlxsw_sp_router_fini(mlxsw_sp);
9406 mlxsw_sp_router_ll_op_ctx_fini(mlxsw_sp->router);
9407 mlxsw_sp_router_xm_fini(mlxsw_sp);
9408 mutex_destroy(&mlxsw_sp->router->lock);
9409 kfree(mlxsw_sp->router);