Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[linux-2.6-block.git] / drivers / net / ethernet / mellanox / mlxsw / spectrum_router.c
1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
3
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
8 #include <linux/in6.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/random.h>
17 #include <linux/if_macvlan.h>
18 #include <net/netevent.h>
19 #include <net/neighbour.h>
20 #include <net/arp.h>
21 #include <net/ip_fib.h>
22 #include <net/ip6_fib.h>
23 #include <net/fib_rules.h>
24 #include <net/ip_tunnels.h>
25 #include <net/l3mdev.h>
26 #include <net/addrconf.h>
27 #include <net/ndisc.h>
28 #include <net/ipv6.h>
29 #include <net/fib_notifier.h>
30 #include <net/switchdev.h>
31
32 #include "spectrum.h"
33 #include "core.h"
34 #include "reg.h"
35 #include "spectrum_cnt.h"
36 #include "spectrum_dpipe.h"
37 #include "spectrum_ipip.h"
38 #include "spectrum_mr.h"
39 #include "spectrum_mr_tcam.h"
40 #include "spectrum_router.h"
41 #include "spectrum_span.h"
42
43 struct mlxsw_sp_fib;
44 struct mlxsw_sp_vr;
45 struct mlxsw_sp_lpm_tree;
46 struct mlxsw_sp_rif_ops;
47
48 struct mlxsw_sp_router {
49         struct mlxsw_sp *mlxsw_sp;
50         struct mlxsw_sp_rif **rifs;
51         struct mlxsw_sp_vr *vrs;
52         struct rhashtable neigh_ht;
53         struct rhashtable nexthop_group_ht;
54         struct rhashtable nexthop_ht;
55         struct list_head nexthop_list;
56         struct {
57                 /* One tree for each protocol: IPv4 and IPv6 */
58                 struct mlxsw_sp_lpm_tree *proto_trees[2];
59                 struct mlxsw_sp_lpm_tree *trees;
60                 unsigned int tree_count;
61         } lpm;
62         struct {
63                 struct delayed_work dw;
64                 unsigned long interval; /* ms */
65         } neighs_update;
66         struct delayed_work nexthop_probe_dw;
67 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
68         struct list_head nexthop_neighs_list;
69         struct list_head ipip_list;
70         bool aborted;
71         struct notifier_block fib_nb;
72         struct notifier_block netevent_nb;
73         const struct mlxsw_sp_rif_ops **rif_ops_arr;
74         const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
75 };
76
77 struct mlxsw_sp_rif {
78         struct list_head nexthop_list;
79         struct list_head neigh_list;
80         struct net_device *dev;
81         struct mlxsw_sp_fid *fid;
82         unsigned char addr[ETH_ALEN];
83         int mtu;
84         u16 rif_index;
85         u16 vr_id;
86         const struct mlxsw_sp_rif_ops *ops;
87         struct mlxsw_sp *mlxsw_sp;
88
89         unsigned int counter_ingress;
90         bool counter_ingress_valid;
91         unsigned int counter_egress;
92         bool counter_egress_valid;
93 };
94
95 struct mlxsw_sp_rif_params {
96         struct net_device *dev;
97         union {
98                 u16 system_port;
99                 u16 lag_id;
100         };
101         u16 vid;
102         bool lag;
103 };
104
105 struct mlxsw_sp_rif_subport {
106         struct mlxsw_sp_rif common;
107         union {
108                 u16 system_port;
109                 u16 lag_id;
110         };
111         u16 vid;
112         bool lag;
113 };
114
115 struct mlxsw_sp_rif_ipip_lb {
116         struct mlxsw_sp_rif common;
117         struct mlxsw_sp_rif_ipip_lb_config lb_config;
118         u16 ul_vr_id; /* Reserved for Spectrum-2. */
119 };
120
121 struct mlxsw_sp_rif_params_ipip_lb {
122         struct mlxsw_sp_rif_params common;
123         struct mlxsw_sp_rif_ipip_lb_config lb_config;
124 };
125
126 struct mlxsw_sp_rif_ops {
127         enum mlxsw_sp_rif_type type;
128         size_t rif_size;
129
130         void (*setup)(struct mlxsw_sp_rif *rif,
131                       const struct mlxsw_sp_rif_params *params);
132         int (*configure)(struct mlxsw_sp_rif *rif);
133         void (*deconfigure)(struct mlxsw_sp_rif *rif);
134         struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
135                                          struct netlink_ext_ack *extack);
136         void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
137 };
138
139 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
140 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
141                                   struct mlxsw_sp_lpm_tree *lpm_tree);
142 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
143                                      const struct mlxsw_sp_fib *fib,
144                                      u8 tree_id);
145 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
146                                        const struct mlxsw_sp_fib *fib);
147
148 static unsigned int *
149 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
150                            enum mlxsw_sp_rif_counter_dir dir)
151 {
152         switch (dir) {
153         case MLXSW_SP_RIF_COUNTER_EGRESS:
154                 return &rif->counter_egress;
155         case MLXSW_SP_RIF_COUNTER_INGRESS:
156                 return &rif->counter_ingress;
157         }
158         return NULL;
159 }
160
161 static bool
162 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
163                                enum mlxsw_sp_rif_counter_dir dir)
164 {
165         switch (dir) {
166         case MLXSW_SP_RIF_COUNTER_EGRESS:
167                 return rif->counter_egress_valid;
168         case MLXSW_SP_RIF_COUNTER_INGRESS:
169                 return rif->counter_ingress_valid;
170         }
171         return false;
172 }
173
174 static void
175 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
176                                enum mlxsw_sp_rif_counter_dir dir,
177                                bool valid)
178 {
179         switch (dir) {
180         case MLXSW_SP_RIF_COUNTER_EGRESS:
181                 rif->counter_egress_valid = valid;
182                 break;
183         case MLXSW_SP_RIF_COUNTER_INGRESS:
184                 rif->counter_ingress_valid = valid;
185                 break;
186         }
187 }
188
189 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
190                                      unsigned int counter_index, bool enable,
191                                      enum mlxsw_sp_rif_counter_dir dir)
192 {
193         char ritr_pl[MLXSW_REG_RITR_LEN];
194         bool is_egress = false;
195         int err;
196
197         if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
198                 is_egress = true;
199         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
200         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
201         if (err)
202                 return err;
203
204         mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
205                                     is_egress);
206         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
207 }
208
209 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
210                                    struct mlxsw_sp_rif *rif,
211                                    enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
212 {
213         char ricnt_pl[MLXSW_REG_RICNT_LEN];
214         unsigned int *p_counter_index;
215         bool valid;
216         int err;
217
218         valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
219         if (!valid)
220                 return -EINVAL;
221
222         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
223         if (!p_counter_index)
224                 return -EINVAL;
225         mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
226                              MLXSW_REG_RICNT_OPCODE_NOP);
227         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
228         if (err)
229                 return err;
230         *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
231         return 0;
232 }
233
234 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
235                                       unsigned int counter_index)
236 {
237         char ricnt_pl[MLXSW_REG_RICNT_LEN];
238
239         mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
240                              MLXSW_REG_RICNT_OPCODE_CLEAR);
241         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
242 }
243
244 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
245                                struct mlxsw_sp_rif *rif,
246                                enum mlxsw_sp_rif_counter_dir dir)
247 {
248         unsigned int *p_counter_index;
249         int err;
250
251         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
252         if (!p_counter_index)
253                 return -EINVAL;
254         err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
255                                      p_counter_index);
256         if (err)
257                 return err;
258
259         err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
260         if (err)
261                 goto err_counter_clear;
262
263         err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
264                                         *p_counter_index, true, dir);
265         if (err)
266                 goto err_counter_edit;
267         mlxsw_sp_rif_counter_valid_set(rif, dir, true);
268         return 0;
269
270 err_counter_edit:
271 err_counter_clear:
272         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
273                               *p_counter_index);
274         return err;
275 }
276
277 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
278                                struct mlxsw_sp_rif *rif,
279                                enum mlxsw_sp_rif_counter_dir dir)
280 {
281         unsigned int *p_counter_index;
282
283         if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
284                 return;
285
286         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
287         if (WARN_ON(!p_counter_index))
288                 return;
289         mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
290                                   *p_counter_index, false, dir);
291         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
292                               *p_counter_index);
293         mlxsw_sp_rif_counter_valid_set(rif, dir, false);
294 }
295
296 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
297 {
298         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
299         struct devlink *devlink;
300
301         devlink = priv_to_devlink(mlxsw_sp->core);
302         if (!devlink_dpipe_table_counter_enabled(devlink,
303                                                  MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
304                 return;
305         mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
306 }
307
308 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
309 {
310         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
311
312         mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
313 }
314
315 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
316
317 struct mlxsw_sp_prefix_usage {
318         DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
319 };
320
321 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
322         for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
323
324 static bool
325 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
326                          struct mlxsw_sp_prefix_usage *prefix_usage2)
327 {
328         return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
329 }
330
331 static void
332 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
333                           struct mlxsw_sp_prefix_usage *prefix_usage2)
334 {
335         memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
336 }
337
338 static void
339 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
340                           unsigned char prefix_len)
341 {
342         set_bit(prefix_len, prefix_usage->b);
343 }
344
345 static void
346 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
347                             unsigned char prefix_len)
348 {
349         clear_bit(prefix_len, prefix_usage->b);
350 }
351
352 struct mlxsw_sp_fib_key {
353         unsigned char addr[sizeof(struct in6_addr)];
354         unsigned char prefix_len;
355 };
356
357 enum mlxsw_sp_fib_entry_type {
358         MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
359         MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
360         MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
361
362         /* This is a special case of local delivery, where a packet should be
363          * decapsulated on reception. Note that there is no corresponding ENCAP,
364          * because that's a type of next hop, not of FIB entry. (There can be
365          * several next hops in a REMOTE entry, and some of them may be
366          * encapsulating entries.)
367          */
368         MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
369         MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
370 };
371
372 struct mlxsw_sp_nexthop_group;
373
374 struct mlxsw_sp_fib_node {
375         struct list_head entry_list;
376         struct list_head list;
377         struct rhash_head ht_node;
378         struct mlxsw_sp_fib *fib;
379         struct mlxsw_sp_fib_key key;
380 };
381
382 struct mlxsw_sp_fib_entry_decap {
383         struct mlxsw_sp_ipip_entry *ipip_entry;
384         u32 tunnel_index;
385 };
386
387 struct mlxsw_sp_fib_entry {
388         struct list_head list;
389         struct mlxsw_sp_fib_node *fib_node;
390         enum mlxsw_sp_fib_entry_type type;
391         struct list_head nexthop_group_node;
392         struct mlxsw_sp_nexthop_group *nh_group;
393         struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
394 };
395
396 struct mlxsw_sp_fib4_entry {
397         struct mlxsw_sp_fib_entry common;
398         u32 tb_id;
399         u32 prio;
400         u8 tos;
401         u8 type;
402 };
403
404 struct mlxsw_sp_fib6_entry {
405         struct mlxsw_sp_fib_entry common;
406         struct list_head rt6_list;
407         unsigned int nrt6;
408 };
409
410 struct mlxsw_sp_rt6 {
411         struct list_head list;
412         struct fib6_info *rt;
413 };
414
415 struct mlxsw_sp_lpm_tree {
416         u8 id; /* tree ID */
417         unsigned int ref_count;
418         enum mlxsw_sp_l3proto proto;
419         unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
420         struct mlxsw_sp_prefix_usage prefix_usage;
421 };
422
423 struct mlxsw_sp_fib {
424         struct rhashtable ht;
425         struct list_head node_list;
426         struct mlxsw_sp_vr *vr;
427         struct mlxsw_sp_lpm_tree *lpm_tree;
428         enum mlxsw_sp_l3proto proto;
429 };
430
431 struct mlxsw_sp_vr {
432         u16 id; /* virtual router ID */
433         u32 tb_id; /* kernel fib table id */
434         unsigned int rif_count;
435         struct mlxsw_sp_fib *fib4;
436         struct mlxsw_sp_fib *fib6;
437         struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
438 };
439
440 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
441
442 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
443                                                 struct mlxsw_sp_vr *vr,
444                                                 enum mlxsw_sp_l3proto proto)
445 {
446         struct mlxsw_sp_lpm_tree *lpm_tree;
447         struct mlxsw_sp_fib *fib;
448         int err;
449
450         lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
451         fib = kzalloc(sizeof(*fib), GFP_KERNEL);
452         if (!fib)
453                 return ERR_PTR(-ENOMEM);
454         err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
455         if (err)
456                 goto err_rhashtable_init;
457         INIT_LIST_HEAD(&fib->node_list);
458         fib->proto = proto;
459         fib->vr = vr;
460         fib->lpm_tree = lpm_tree;
461         mlxsw_sp_lpm_tree_hold(lpm_tree);
462         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
463         if (err)
464                 goto err_lpm_tree_bind;
465         return fib;
466
467 err_lpm_tree_bind:
468         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
469 err_rhashtable_init:
470         kfree(fib);
471         return ERR_PTR(err);
472 }
473
474 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
475                                  struct mlxsw_sp_fib *fib)
476 {
477         mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
478         mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
479         WARN_ON(!list_empty(&fib->node_list));
480         rhashtable_destroy(&fib->ht);
481         kfree(fib);
482 }
483
484 static struct mlxsw_sp_lpm_tree *
485 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
486 {
487         static struct mlxsw_sp_lpm_tree *lpm_tree;
488         int i;
489
490         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
491                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
492                 if (lpm_tree->ref_count == 0)
493                         return lpm_tree;
494         }
495         return NULL;
496 }
497
498 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
499                                    struct mlxsw_sp_lpm_tree *lpm_tree)
500 {
501         char ralta_pl[MLXSW_REG_RALTA_LEN];
502
503         mlxsw_reg_ralta_pack(ralta_pl, true,
504                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
505                              lpm_tree->id);
506         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
507 }
508
509 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
510                                    struct mlxsw_sp_lpm_tree *lpm_tree)
511 {
512         char ralta_pl[MLXSW_REG_RALTA_LEN];
513
514         mlxsw_reg_ralta_pack(ralta_pl, false,
515                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
516                              lpm_tree->id);
517         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
518 }
519
520 static int
521 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
522                                   struct mlxsw_sp_prefix_usage *prefix_usage,
523                                   struct mlxsw_sp_lpm_tree *lpm_tree)
524 {
525         char ralst_pl[MLXSW_REG_RALST_LEN];
526         u8 root_bin = 0;
527         u8 prefix;
528         u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
529
530         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
531                 root_bin = prefix;
532
533         mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
534         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
535                 if (prefix == 0)
536                         continue;
537                 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
538                                          MLXSW_REG_RALST_BIN_NO_CHILD);
539                 last_prefix = prefix;
540         }
541         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
542 }
543
544 static struct mlxsw_sp_lpm_tree *
545 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
546                          struct mlxsw_sp_prefix_usage *prefix_usage,
547                          enum mlxsw_sp_l3proto proto)
548 {
549         struct mlxsw_sp_lpm_tree *lpm_tree;
550         int err;
551
552         lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
553         if (!lpm_tree)
554                 return ERR_PTR(-EBUSY);
555         lpm_tree->proto = proto;
556         err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
557         if (err)
558                 return ERR_PTR(err);
559
560         err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
561                                                 lpm_tree);
562         if (err)
563                 goto err_left_struct_set;
564         memcpy(&lpm_tree->prefix_usage, prefix_usage,
565                sizeof(lpm_tree->prefix_usage));
566         memset(&lpm_tree->prefix_ref_count, 0,
567                sizeof(lpm_tree->prefix_ref_count));
568         lpm_tree->ref_count = 1;
569         return lpm_tree;
570
571 err_left_struct_set:
572         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
573         return ERR_PTR(err);
574 }
575
576 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
577                                       struct mlxsw_sp_lpm_tree *lpm_tree)
578 {
579         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
580 }
581
582 static struct mlxsw_sp_lpm_tree *
583 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
584                       struct mlxsw_sp_prefix_usage *prefix_usage,
585                       enum mlxsw_sp_l3proto proto)
586 {
587         struct mlxsw_sp_lpm_tree *lpm_tree;
588         int i;
589
590         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
591                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
592                 if (lpm_tree->ref_count != 0 &&
593                     lpm_tree->proto == proto &&
594                     mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
595                                              prefix_usage)) {
596                         mlxsw_sp_lpm_tree_hold(lpm_tree);
597                         return lpm_tree;
598                 }
599         }
600         return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
601 }
602
603 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
604 {
605         lpm_tree->ref_count++;
606 }
607
608 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
609                                   struct mlxsw_sp_lpm_tree *lpm_tree)
610 {
611         if (--lpm_tree->ref_count == 0)
612                 mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
613 }
614
615 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
616
617 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
618 {
619         struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
620         struct mlxsw_sp_lpm_tree *lpm_tree;
621         u64 max_trees;
622         int err, i;
623
624         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
625                 return -EIO;
626
627         max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
628         mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
629         mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
630                                              sizeof(struct mlxsw_sp_lpm_tree),
631                                              GFP_KERNEL);
632         if (!mlxsw_sp->router->lpm.trees)
633                 return -ENOMEM;
634
635         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
636                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
637                 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
638         }
639
640         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
641                                          MLXSW_SP_L3_PROTO_IPV4);
642         if (IS_ERR(lpm_tree)) {
643                 err = PTR_ERR(lpm_tree);
644                 goto err_ipv4_tree_get;
645         }
646         mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
647
648         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
649                                          MLXSW_SP_L3_PROTO_IPV6);
650         if (IS_ERR(lpm_tree)) {
651                 err = PTR_ERR(lpm_tree);
652                 goto err_ipv6_tree_get;
653         }
654         mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
655
656         return 0;
657
658 err_ipv6_tree_get:
659         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
660         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
661 err_ipv4_tree_get:
662         kfree(mlxsw_sp->router->lpm.trees);
663         return err;
664 }
665
666 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
667 {
668         struct mlxsw_sp_lpm_tree *lpm_tree;
669
670         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
671         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
672
673         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
674         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
675
676         kfree(mlxsw_sp->router->lpm.trees);
677 }
678
679 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
680 {
681         return !!vr->fib4 || !!vr->fib6 ||
682                !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
683                !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
684 }
685
686 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
687 {
688         struct mlxsw_sp_vr *vr;
689         int i;
690
691         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
692                 vr = &mlxsw_sp->router->vrs[i];
693                 if (!mlxsw_sp_vr_is_used(vr))
694                         return vr;
695         }
696         return NULL;
697 }
698
699 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
700                                      const struct mlxsw_sp_fib *fib, u8 tree_id)
701 {
702         char raltb_pl[MLXSW_REG_RALTB_LEN];
703
704         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
705                              (enum mlxsw_reg_ralxx_protocol) fib->proto,
706                              tree_id);
707         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
708 }
709
710 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
711                                        const struct mlxsw_sp_fib *fib)
712 {
713         char raltb_pl[MLXSW_REG_RALTB_LEN];
714
715         /* Bind to tree 0 which is default */
716         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
717                              (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
718         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
719 }
720
721 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
722 {
723         /* For our purpose, squash main, default and local tables into one */
724         if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
725                 tb_id = RT_TABLE_MAIN;
726         return tb_id;
727 }
728
729 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
730                                             u32 tb_id)
731 {
732         struct mlxsw_sp_vr *vr;
733         int i;
734
735         tb_id = mlxsw_sp_fix_tb_id(tb_id);
736
737         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
738                 vr = &mlxsw_sp->router->vrs[i];
739                 if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
740                         return vr;
741         }
742         return NULL;
743 }
744
745 int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
746                                 u16 *vr_id)
747 {
748         struct mlxsw_sp_vr *vr;
749
750         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
751         if (!vr)
752                 return -ESRCH;
753         *vr_id = vr->id;
754
755         return 0;
756 }
757
758 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
759                                             enum mlxsw_sp_l3proto proto)
760 {
761         switch (proto) {
762         case MLXSW_SP_L3_PROTO_IPV4:
763                 return vr->fib4;
764         case MLXSW_SP_L3_PROTO_IPV6:
765                 return vr->fib6;
766         }
767         return NULL;
768 }
769
770 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
771                                               u32 tb_id,
772                                               struct netlink_ext_ack *extack)
773 {
774         struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
775         struct mlxsw_sp_fib *fib4;
776         struct mlxsw_sp_fib *fib6;
777         struct mlxsw_sp_vr *vr;
778         int err;
779
780         vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
781         if (!vr) {
782                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
783                 return ERR_PTR(-EBUSY);
784         }
785         fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
786         if (IS_ERR(fib4))
787                 return ERR_CAST(fib4);
788         fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
789         if (IS_ERR(fib6)) {
790                 err = PTR_ERR(fib6);
791                 goto err_fib6_create;
792         }
793         mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
794                                              MLXSW_SP_L3_PROTO_IPV4);
795         if (IS_ERR(mr4_table)) {
796                 err = PTR_ERR(mr4_table);
797                 goto err_mr4_table_create;
798         }
799         mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
800                                              MLXSW_SP_L3_PROTO_IPV6);
801         if (IS_ERR(mr6_table)) {
802                 err = PTR_ERR(mr6_table);
803                 goto err_mr6_table_create;
804         }
805
806         vr->fib4 = fib4;
807         vr->fib6 = fib6;
808         vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
809         vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
810         vr->tb_id = tb_id;
811         return vr;
812
813 err_mr6_table_create:
814         mlxsw_sp_mr_table_destroy(mr4_table);
815 err_mr4_table_create:
816         mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
817 err_fib6_create:
818         mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
819         return ERR_PTR(err);
820 }
821
822 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
823                                 struct mlxsw_sp_vr *vr)
824 {
825         mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
826         vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
827         mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
828         vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
829         mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
830         vr->fib6 = NULL;
831         mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
832         vr->fib4 = NULL;
833 }
834
835 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
836                                            struct netlink_ext_ack *extack)
837 {
838         struct mlxsw_sp_vr *vr;
839
840         tb_id = mlxsw_sp_fix_tb_id(tb_id);
841         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
842         if (!vr)
843                 vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
844         return vr;
845 }
846
847 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
848 {
849         if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
850             list_empty(&vr->fib6->node_list) &&
851             mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
852             mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
853                 mlxsw_sp_vr_destroy(mlxsw_sp, vr);
854 }
855
856 static bool
857 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
858                                     enum mlxsw_sp_l3proto proto, u8 tree_id)
859 {
860         struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
861
862         if (!mlxsw_sp_vr_is_used(vr))
863                 return false;
864         if (fib->lpm_tree->id == tree_id)
865                 return true;
866         return false;
867 }
868
869 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
870                                         struct mlxsw_sp_fib *fib,
871                                         struct mlxsw_sp_lpm_tree *new_tree)
872 {
873         struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
874         int err;
875
876         fib->lpm_tree = new_tree;
877         mlxsw_sp_lpm_tree_hold(new_tree);
878         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
879         if (err)
880                 goto err_tree_bind;
881         mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
882         return 0;
883
884 err_tree_bind:
885         mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
886         fib->lpm_tree = old_tree;
887         return err;
888 }
889
890 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
891                                          struct mlxsw_sp_fib *fib,
892                                          struct mlxsw_sp_lpm_tree *new_tree)
893 {
894         enum mlxsw_sp_l3proto proto = fib->proto;
895         struct mlxsw_sp_lpm_tree *old_tree;
896         u8 old_id, new_id = new_tree->id;
897         struct mlxsw_sp_vr *vr;
898         int i, err;
899
900         old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
901         old_id = old_tree->id;
902
903         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
904                 vr = &mlxsw_sp->router->vrs[i];
905                 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
906                         continue;
907                 err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
908                                                    mlxsw_sp_vr_fib(vr, proto),
909                                                    new_tree);
910                 if (err)
911                         goto err_tree_replace;
912         }
913
914         memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
915                sizeof(new_tree->prefix_ref_count));
916         mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
917         mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
918
919         return 0;
920
921 err_tree_replace:
922         for (i--; i >= 0; i--) {
923                 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
924                         continue;
925                 mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
926                                              mlxsw_sp_vr_fib(vr, proto),
927                                              old_tree);
928         }
929         return err;
930 }
931
932 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
933 {
934         struct mlxsw_sp_vr *vr;
935         u64 max_vrs;
936         int i;
937
938         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
939                 return -EIO;
940
941         max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
942         mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
943                                         GFP_KERNEL);
944         if (!mlxsw_sp->router->vrs)
945                 return -ENOMEM;
946
947         for (i = 0; i < max_vrs; i++) {
948                 vr = &mlxsw_sp->router->vrs[i];
949                 vr->id = i;
950         }
951
952         return 0;
953 }
954
955 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
956
957 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
958 {
959         /* At this stage we're guaranteed not to have new incoming
960          * FIB notifications and the work queue is free from FIBs
961          * sitting on top of mlxsw netdevs. However, we can still
962          * have other FIBs queued. Flush the queue before flushing
963          * the device's tables. No need for locks, as we're the only
964          * writer.
965          */
966         mlxsw_core_flush_owq();
967         mlxsw_sp_router_fib_flush(mlxsw_sp);
968         kfree(mlxsw_sp->router->vrs);
969 }
970
971 static struct net_device *
972 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
973 {
974         struct ip_tunnel *tun = netdev_priv(ol_dev);
975         struct net *net = dev_net(ol_dev);
976
977         return __dev_get_by_index(net, tun->parms.link);
978 }
979
980 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
981 {
982         struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
983
984         if (d)
985                 return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
986         else
987                 return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
988 }
989
990 static struct mlxsw_sp_rif *
991 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
992                     const struct mlxsw_sp_rif_params *params,
993                     struct netlink_ext_ack *extack);
994
995 static struct mlxsw_sp_rif_ipip_lb *
996 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
997                                 enum mlxsw_sp_ipip_type ipipt,
998                                 struct net_device *ol_dev,
999                                 struct netlink_ext_ack *extack)
1000 {
1001         struct mlxsw_sp_rif_params_ipip_lb lb_params;
1002         const struct mlxsw_sp_ipip_ops *ipip_ops;
1003         struct mlxsw_sp_rif *rif;
1004
1005         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1006         lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1007                 .common.dev = ol_dev,
1008                 .common.lag = false,
1009                 .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1010         };
1011
1012         rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1013         if (IS_ERR(rif))
1014                 return ERR_CAST(rif);
1015         return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1016 }
1017
1018 static struct mlxsw_sp_ipip_entry *
1019 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1020                           enum mlxsw_sp_ipip_type ipipt,
1021                           struct net_device *ol_dev)
1022 {
1023         const struct mlxsw_sp_ipip_ops *ipip_ops;
1024         struct mlxsw_sp_ipip_entry *ipip_entry;
1025         struct mlxsw_sp_ipip_entry *ret = NULL;
1026
1027         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1028         ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1029         if (!ipip_entry)
1030                 return ERR_PTR(-ENOMEM);
1031
1032         ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1033                                                             ol_dev, NULL);
1034         if (IS_ERR(ipip_entry->ol_lb)) {
1035                 ret = ERR_CAST(ipip_entry->ol_lb);
1036                 goto err_ol_ipip_lb_create;
1037         }
1038
1039         ipip_entry->ipipt = ipipt;
1040         ipip_entry->ol_dev = ol_dev;
1041
1042         switch (ipip_ops->ul_proto) {
1043         case MLXSW_SP_L3_PROTO_IPV4:
1044                 ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1045                 break;
1046         case MLXSW_SP_L3_PROTO_IPV6:
1047                 WARN_ON(1);
1048                 break;
1049         }
1050
1051         return ipip_entry;
1052
1053 err_ol_ipip_lb_create:
1054         kfree(ipip_entry);
1055         return ret;
1056 }
1057
1058 static void
1059 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1060 {
1061         mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1062         kfree(ipip_entry);
1063 }
1064
1065 static bool
1066 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1067                                   const enum mlxsw_sp_l3proto ul_proto,
1068                                   union mlxsw_sp_l3addr saddr,
1069                                   u32 ul_tb_id,
1070                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1071 {
1072         u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1073         enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1074         union mlxsw_sp_l3addr tun_saddr;
1075
1076         if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1077                 return false;
1078
1079         tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1080         return tun_ul_tb_id == ul_tb_id &&
1081                mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1082 }
1083
1084 static int
1085 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1086                               struct mlxsw_sp_fib_entry *fib_entry,
1087                               struct mlxsw_sp_ipip_entry *ipip_entry)
1088 {
1089         u32 tunnel_index;
1090         int err;
1091
1092         err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1093                                   1, &tunnel_index);
1094         if (err)
1095                 return err;
1096
1097         ipip_entry->decap_fib_entry = fib_entry;
1098         fib_entry->decap.ipip_entry = ipip_entry;
1099         fib_entry->decap.tunnel_index = tunnel_index;
1100         return 0;
1101 }
1102
1103 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1104                                           struct mlxsw_sp_fib_entry *fib_entry)
1105 {
1106         /* Unlink this node from the IPIP entry that it's the decap entry of. */
1107         fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1108         fib_entry->decap.ipip_entry = NULL;
1109         mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1110                            1, fib_entry->decap.tunnel_index);
1111 }
1112
1113 static struct mlxsw_sp_fib_node *
1114 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1115                          size_t addr_len, unsigned char prefix_len);
1116 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1117                                      struct mlxsw_sp_fib_entry *fib_entry);
1118
1119 static void
1120 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1121                                  struct mlxsw_sp_ipip_entry *ipip_entry)
1122 {
1123         struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1124
1125         mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1126         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1127
1128         mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1129 }
1130
1131 static void
1132 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1133                                   struct mlxsw_sp_ipip_entry *ipip_entry,
1134                                   struct mlxsw_sp_fib_entry *decap_fib_entry)
1135 {
1136         if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1137                                           ipip_entry))
1138                 return;
1139         decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1140
1141         if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1142                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1143 }
1144
1145 static struct mlxsw_sp_fib_entry *
1146 mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1147                                      enum mlxsw_sp_l3proto proto,
1148                                      const union mlxsw_sp_l3addr *addr,
1149                                      enum mlxsw_sp_fib_entry_type type)
1150 {
1151         struct mlxsw_sp_fib_entry *fib_entry;
1152         struct mlxsw_sp_fib_node *fib_node;
1153         unsigned char addr_prefix_len;
1154         struct mlxsw_sp_fib *fib;
1155         struct mlxsw_sp_vr *vr;
1156         const void *addrp;
1157         size_t addr_len;
1158         u32 addr4;
1159
1160         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1161         if (!vr)
1162                 return NULL;
1163         fib = mlxsw_sp_vr_fib(vr, proto);
1164
1165         switch (proto) {
1166         case MLXSW_SP_L3_PROTO_IPV4:
1167                 addr4 = be32_to_cpu(addr->addr4);
1168                 addrp = &addr4;
1169                 addr_len = 4;
1170                 addr_prefix_len = 32;
1171                 break;
1172         case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
1173         default:
1174                 WARN_ON(1);
1175                 return NULL;
1176         }
1177
1178         fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1179                                             addr_prefix_len);
1180         if (!fib_node || list_empty(&fib_node->entry_list))
1181                 return NULL;
1182
1183         fib_entry = list_first_entry(&fib_node->entry_list,
1184                                      struct mlxsw_sp_fib_entry, list);
1185         if (fib_entry->type != type)
1186                 return NULL;
1187
1188         return fib_entry;
1189 }
1190
1191 /* Given an IPIP entry, find the corresponding decap route. */
1192 static struct mlxsw_sp_fib_entry *
1193 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1194                                struct mlxsw_sp_ipip_entry *ipip_entry)
1195 {
1196         static struct mlxsw_sp_fib_node *fib_node;
1197         const struct mlxsw_sp_ipip_ops *ipip_ops;
1198         struct mlxsw_sp_fib_entry *fib_entry;
1199         unsigned char saddr_prefix_len;
1200         union mlxsw_sp_l3addr saddr;
1201         struct mlxsw_sp_fib *ul_fib;
1202         struct mlxsw_sp_vr *ul_vr;
1203         const void *saddrp;
1204         size_t saddr_len;
1205         u32 ul_tb_id;
1206         u32 saddr4;
1207
1208         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1209
1210         ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1211         ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1212         if (!ul_vr)
1213                 return NULL;
1214
1215         ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1216         saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1217                                            ipip_entry->ol_dev);
1218
1219         switch (ipip_ops->ul_proto) {
1220         case MLXSW_SP_L3_PROTO_IPV4:
1221                 saddr4 = be32_to_cpu(saddr.addr4);
1222                 saddrp = &saddr4;
1223                 saddr_len = 4;
1224                 saddr_prefix_len = 32;
1225                 break;
1226         case MLXSW_SP_L3_PROTO_IPV6:
1227                 WARN_ON(1);
1228                 return NULL;
1229         }
1230
1231         fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1232                                             saddr_prefix_len);
1233         if (!fib_node || list_empty(&fib_node->entry_list))
1234                 return NULL;
1235
1236         fib_entry = list_first_entry(&fib_node->entry_list,
1237                                      struct mlxsw_sp_fib_entry, list);
1238         if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1239                 return NULL;
1240
1241         return fib_entry;
1242 }
1243
1244 static struct mlxsw_sp_ipip_entry *
1245 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1246                            enum mlxsw_sp_ipip_type ipipt,
1247                            struct net_device *ol_dev)
1248 {
1249         struct mlxsw_sp_ipip_entry *ipip_entry;
1250
1251         ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1252         if (IS_ERR(ipip_entry))
1253                 return ipip_entry;
1254
1255         list_add_tail(&ipip_entry->ipip_list_node,
1256                       &mlxsw_sp->router->ipip_list);
1257
1258         return ipip_entry;
1259 }
1260
1261 static void
1262 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1263                             struct mlxsw_sp_ipip_entry *ipip_entry)
1264 {
1265         list_del(&ipip_entry->ipip_list_node);
1266         mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1267 }
1268
1269 static bool
1270 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1271                                   const struct net_device *ul_dev,
1272                                   enum mlxsw_sp_l3proto ul_proto,
1273                                   union mlxsw_sp_l3addr ul_dip,
1274                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1275 {
1276         u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1277         enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1278
1279         if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1280                 return false;
1281
1282         return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1283                                                  ul_tb_id, ipip_entry);
1284 }
1285
1286 /* Given decap parameters, find the corresponding IPIP entry. */
1287 static struct mlxsw_sp_ipip_entry *
1288 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1289                                   const struct net_device *ul_dev,
1290                                   enum mlxsw_sp_l3proto ul_proto,
1291                                   union mlxsw_sp_l3addr ul_dip)
1292 {
1293         struct mlxsw_sp_ipip_entry *ipip_entry;
1294
1295         list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1296                             ipip_list_node)
1297                 if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1298                                                       ul_proto, ul_dip,
1299                                                       ipip_entry))
1300                         return ipip_entry;
1301
1302         return NULL;
1303 }
1304
1305 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1306                                       const struct net_device *dev,
1307                                       enum mlxsw_sp_ipip_type *p_type)
1308 {
1309         struct mlxsw_sp_router *router = mlxsw_sp->router;
1310         const struct mlxsw_sp_ipip_ops *ipip_ops;
1311         enum mlxsw_sp_ipip_type ipipt;
1312
1313         for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1314                 ipip_ops = router->ipip_ops_arr[ipipt];
1315                 if (dev->type == ipip_ops->dev_type) {
1316                         if (p_type)
1317                                 *p_type = ipipt;
1318                         return true;
1319                 }
1320         }
1321         return false;
1322 }
1323
1324 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1325                                 const struct net_device *dev)
1326 {
1327         return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1328 }
1329
1330 static struct mlxsw_sp_ipip_entry *
1331 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1332                                    const struct net_device *ol_dev)
1333 {
1334         struct mlxsw_sp_ipip_entry *ipip_entry;
1335
1336         list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1337                             ipip_list_node)
1338                 if (ipip_entry->ol_dev == ol_dev)
1339                         return ipip_entry;
1340
1341         return NULL;
1342 }
1343
1344 static struct mlxsw_sp_ipip_entry *
1345 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1346                                    const struct net_device *ul_dev,
1347                                    struct mlxsw_sp_ipip_entry *start)
1348 {
1349         struct mlxsw_sp_ipip_entry *ipip_entry;
1350
1351         ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1352                                         ipip_list_node);
1353         list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1354                                      ipip_list_node) {
1355                 struct net_device *ipip_ul_dev =
1356                         __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1357
1358                 if (ipip_ul_dev == ul_dev)
1359                         return ipip_entry;
1360         }
1361
1362         return NULL;
1363 }
1364
1365 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1366                                 const struct net_device *dev)
1367 {
1368         return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1369 }
1370
1371 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1372                                                 const struct net_device *ol_dev,
1373                                                 enum mlxsw_sp_ipip_type ipipt)
1374 {
1375         const struct mlxsw_sp_ipip_ops *ops
1376                 = mlxsw_sp->router->ipip_ops_arr[ipipt];
1377
1378         /* For deciding whether decap should be offloaded, we don't care about
1379          * overlay protocol, so ask whether either one is supported.
1380          */
1381         return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1382                ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1383 }
1384
1385 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1386                                                 struct net_device *ol_dev)
1387 {
1388         struct mlxsw_sp_ipip_entry *ipip_entry;
1389         enum mlxsw_sp_l3proto ul_proto;
1390         enum mlxsw_sp_ipip_type ipipt;
1391         union mlxsw_sp_l3addr saddr;
1392         u32 ul_tb_id;
1393
1394         mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1395         if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1396                 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1397                 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1398                 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1399                 if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1400                                                           saddr, ul_tb_id,
1401                                                           NULL)) {
1402                         ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1403                                                                 ol_dev);
1404                         if (IS_ERR(ipip_entry))
1405                                 return PTR_ERR(ipip_entry);
1406                 }
1407         }
1408
1409         return 0;
1410 }
1411
1412 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1413                                                    struct net_device *ol_dev)
1414 {
1415         struct mlxsw_sp_ipip_entry *ipip_entry;
1416
1417         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1418         if (ipip_entry)
1419                 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1420 }
1421
1422 static void
1423 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1424                                 struct mlxsw_sp_ipip_entry *ipip_entry)
1425 {
1426         struct mlxsw_sp_fib_entry *decap_fib_entry;
1427
1428         decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1429         if (decap_fib_entry)
1430                 mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1431                                                   decap_fib_entry);
1432 }
1433
1434 static int
1435 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
1436                         struct mlxsw_sp_vr *ul_vr, bool enable)
1437 {
1438         struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1439         struct mlxsw_sp_rif *rif = &lb_rif->common;
1440         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1441         char ritr_pl[MLXSW_REG_RITR_LEN];
1442         u32 saddr4;
1443
1444         switch (lb_cf.ul_protocol) {
1445         case MLXSW_SP_L3_PROTO_IPV4:
1446                 saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1447                 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1448                                     rif->rif_index, rif->vr_id, rif->dev->mtu);
1449                 mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1450                             MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1451                             ul_vr->id, saddr4, lb_cf.okey);
1452                 break;
1453
1454         case MLXSW_SP_L3_PROTO_IPV6:
1455                 return -EAFNOSUPPORT;
1456         }
1457
1458         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1459 }
1460
1461 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1462                                                  struct net_device *ol_dev)
1463 {
1464         struct mlxsw_sp_ipip_entry *ipip_entry;
1465         struct mlxsw_sp_rif_ipip_lb *lb_rif;
1466         struct mlxsw_sp_vr *ul_vr;
1467         int err = 0;
1468
1469         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1470         if (ipip_entry) {
1471                 lb_rif = ipip_entry->ol_lb;
1472                 ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
1473                 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
1474                 if (err)
1475                         goto out;
1476                 lb_rif->common.mtu = ol_dev->mtu;
1477         }
1478
1479 out:
1480         return err;
1481 }
1482
1483 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1484                                                 struct net_device *ol_dev)
1485 {
1486         struct mlxsw_sp_ipip_entry *ipip_entry;
1487
1488         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1489         if (ipip_entry)
1490                 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1491 }
1492
1493 static void
1494 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1495                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1496 {
1497         if (ipip_entry->decap_fib_entry)
1498                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1499 }
1500
1501 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1502                                                   struct net_device *ol_dev)
1503 {
1504         struct mlxsw_sp_ipip_entry *ipip_entry;
1505
1506         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1507         if (ipip_entry)
1508                 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1509 }
1510
1511 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1512                                          struct mlxsw_sp_rif *old_rif,
1513                                          struct mlxsw_sp_rif *new_rif);
1514 static int
1515 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1516                                  struct mlxsw_sp_ipip_entry *ipip_entry,
1517                                  bool keep_encap,
1518                                  struct netlink_ext_ack *extack)
1519 {
1520         struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1521         struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1522
1523         new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1524                                                      ipip_entry->ipipt,
1525                                                      ipip_entry->ol_dev,
1526                                                      extack);
1527         if (IS_ERR(new_lb_rif))
1528                 return PTR_ERR(new_lb_rif);
1529         ipip_entry->ol_lb = new_lb_rif;
1530
1531         if (keep_encap)
1532                 mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1533                                              &new_lb_rif->common);
1534
1535         mlxsw_sp_rif_destroy(&old_lb_rif->common);
1536
1537         return 0;
1538 }
1539
1540 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1541                                         struct mlxsw_sp_rif *rif);
1542
1543 /**
1544  * Update the offload related to an IPIP entry. This always updates decap, and
1545  * in addition to that it also:
1546  * @recreate_loopback: recreates the associated loopback RIF
1547  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1548  *              relevant when recreate_loopback is true.
1549  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1550  *                   is only relevant when recreate_loopback is false.
1551  */
1552 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1553                                         struct mlxsw_sp_ipip_entry *ipip_entry,
1554                                         bool recreate_loopback,
1555                                         bool keep_encap,
1556                                         bool update_nexthops,
1557                                         struct netlink_ext_ack *extack)
1558 {
1559         int err;
1560
1561         /* RIFs can't be edited, so to update loopback, we need to destroy and
1562          * recreate it. That creates a window of opportunity where RALUE and
1563          * RATR registers end up referencing a RIF that's already gone. RATRs
1564          * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1565          * of RALUE, demote the decap route back.
1566          */
1567         if (ipip_entry->decap_fib_entry)
1568                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1569
1570         if (recreate_loopback) {
1571                 err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1572                                                        keep_encap, extack);
1573                 if (err)
1574                         return err;
1575         } else if (update_nexthops) {
1576                 mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1577                                             &ipip_entry->ol_lb->common);
1578         }
1579
1580         if (ipip_entry->ol_dev->flags & IFF_UP)
1581                 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1582
1583         return 0;
1584 }
1585
1586 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1587                                                 struct net_device *ol_dev,
1588                                                 struct netlink_ext_ack *extack)
1589 {
1590         struct mlxsw_sp_ipip_entry *ipip_entry =
1591                 mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1592         enum mlxsw_sp_l3proto ul_proto;
1593         union mlxsw_sp_l3addr saddr;
1594         u32 ul_tb_id;
1595
1596         if (!ipip_entry)
1597                 return 0;
1598
1599         /* For flat configuration cases, moving overlay to a different VRF might
1600          * cause local address conflict, and the conflicting tunnels need to be
1601          * demoted.
1602          */
1603         ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1604         ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1605         saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1606         if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1607                                                  saddr, ul_tb_id,
1608                                                  ipip_entry)) {
1609                 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1610                 return 0;
1611         }
1612
1613         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1614                                                    true, false, false, extack);
1615 }
1616
1617 static int
1618 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1619                                      struct mlxsw_sp_ipip_entry *ipip_entry,
1620                                      struct net_device *ul_dev,
1621                                      struct netlink_ext_ack *extack)
1622 {
1623         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1624                                                    true, true, false, extack);
1625 }
1626
1627 static int
1628 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1629                                     struct mlxsw_sp_ipip_entry *ipip_entry,
1630                                     struct net_device *ul_dev)
1631 {
1632         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1633                                                    false, false, true, NULL);
1634 }
1635
1636 static int
1637 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1638                                       struct mlxsw_sp_ipip_entry *ipip_entry,
1639                                       struct net_device *ul_dev)
1640 {
1641         /* A down underlay device causes encapsulated packets to not be
1642          * forwarded, but decap still works. So refresh next hops without
1643          * touching anything else.
1644          */
1645         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1646                                                    false, false, true, NULL);
1647 }
1648
1649 static int
1650 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1651                                         struct net_device *ol_dev,
1652                                         struct netlink_ext_ack *extack)
1653 {
1654         const struct mlxsw_sp_ipip_ops *ipip_ops;
1655         struct mlxsw_sp_ipip_entry *ipip_entry;
1656         int err;
1657
1658         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1659         if (!ipip_entry)
1660                 /* A change might make a tunnel eligible for offloading, but
1661                  * that is currently not implemented. What falls to slow path
1662                  * stays there.
1663                  */
1664                 return 0;
1665
1666         /* A change might make a tunnel not eligible for offloading. */
1667         if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1668                                                  ipip_entry->ipipt)) {
1669                 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1670                 return 0;
1671         }
1672
1673         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1674         err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1675         return err;
1676 }
1677
1678 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1679                                        struct mlxsw_sp_ipip_entry *ipip_entry)
1680 {
1681         struct net_device *ol_dev = ipip_entry->ol_dev;
1682
1683         if (ol_dev->flags & IFF_UP)
1684                 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1685         mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1686 }
1687
1688 /* The configuration where several tunnels have the same local address in the
1689  * same underlay table needs special treatment in the HW. That is currently not
1690  * implemented in the driver. This function finds and demotes the first tunnel
1691  * with a given source address, except the one passed in in the argument
1692  * `except'.
1693  */
1694 bool
1695 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1696                                      enum mlxsw_sp_l3proto ul_proto,
1697                                      union mlxsw_sp_l3addr saddr,
1698                                      u32 ul_tb_id,
1699                                      const struct mlxsw_sp_ipip_entry *except)
1700 {
1701         struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1702
1703         list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1704                                  ipip_list_node) {
1705                 if (ipip_entry != except &&
1706                     mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1707                                                       ul_tb_id, ipip_entry)) {
1708                         mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1709                         return true;
1710                 }
1711         }
1712
1713         return false;
1714 }
1715
1716 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1717                                                      struct net_device *ul_dev)
1718 {
1719         struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1720
1721         list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1722                                  ipip_list_node) {
1723                 struct net_device *ipip_ul_dev =
1724                         __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1725
1726                 if (ipip_ul_dev == ul_dev)
1727                         mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1728         }
1729 }
1730
1731 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1732                                      struct net_device *ol_dev,
1733                                      unsigned long event,
1734                                      struct netdev_notifier_info *info)
1735 {
1736         struct netdev_notifier_changeupper_info *chup;
1737         struct netlink_ext_ack *extack;
1738
1739         switch (event) {
1740         case NETDEV_REGISTER:
1741                 return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1742         case NETDEV_UNREGISTER:
1743                 mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1744                 return 0;
1745         case NETDEV_UP:
1746                 mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1747                 return 0;
1748         case NETDEV_DOWN:
1749                 mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1750                 return 0;
1751         case NETDEV_CHANGEUPPER:
1752                 chup = container_of(info, typeof(*chup), info);
1753                 extack = info->extack;
1754                 if (netif_is_l3_master(chup->upper_dev))
1755                         return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1756                                                                     ol_dev,
1757                                                                     extack);
1758                 return 0;
1759         case NETDEV_CHANGE:
1760                 extack = info->extack;
1761                 return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1762                                                                ol_dev, extack);
1763         case NETDEV_CHANGEMTU:
1764                 return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1765         }
1766         return 0;
1767 }
1768
1769 static int
1770 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1771                                    struct mlxsw_sp_ipip_entry *ipip_entry,
1772                                    struct net_device *ul_dev,
1773                                    unsigned long event,
1774                                    struct netdev_notifier_info *info)
1775 {
1776         struct netdev_notifier_changeupper_info *chup;
1777         struct netlink_ext_ack *extack;
1778
1779         switch (event) {
1780         case NETDEV_CHANGEUPPER:
1781                 chup = container_of(info, typeof(*chup), info);
1782                 extack = info->extack;
1783                 if (netif_is_l3_master(chup->upper_dev))
1784                         return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1785                                                                     ipip_entry,
1786                                                                     ul_dev,
1787                                                                     extack);
1788                 break;
1789
1790         case NETDEV_UP:
1791                 return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1792                                                            ul_dev);
1793         case NETDEV_DOWN:
1794                 return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1795                                                              ipip_entry,
1796                                                              ul_dev);
1797         }
1798         return 0;
1799 }
1800
1801 int
1802 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1803                                  struct net_device *ul_dev,
1804                                  unsigned long event,
1805                                  struct netdev_notifier_info *info)
1806 {
1807         struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1808         int err;
1809
1810         while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1811                                                                 ul_dev,
1812                                                                 ipip_entry))) {
1813                 err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1814                                                          ul_dev, event, info);
1815                 if (err) {
1816                         mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1817                                                                  ul_dev);
1818                         return err;
1819                 }
1820         }
1821
1822         return 0;
1823 }
1824
1825 int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1826                                       enum mlxsw_sp_l3proto ul_proto,
1827                                       const union mlxsw_sp_l3addr *ul_sip,
1828                                       u32 tunnel_index)
1829 {
1830         enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1831         struct mlxsw_sp_fib_entry *fib_entry;
1832         int err;
1833
1834         /* It is valid to create a tunnel with a local IP and only later
1835          * assign this IP address to a local interface
1836          */
1837         fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1838                                                          ul_proto, ul_sip,
1839                                                          type);
1840         if (!fib_entry)
1841                 return 0;
1842
1843         fib_entry->decap.tunnel_index = tunnel_index;
1844         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1845
1846         err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1847         if (err)
1848                 goto err_fib_entry_update;
1849
1850         return 0;
1851
1852 err_fib_entry_update:
1853         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1854         mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1855         return err;
1856 }
1857
1858 void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1859                                       enum mlxsw_sp_l3proto ul_proto,
1860                                       const union mlxsw_sp_l3addr *ul_sip)
1861 {
1862         enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1863         struct mlxsw_sp_fib_entry *fib_entry;
1864
1865         fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1866                                                          ul_proto, ul_sip,
1867                                                          type);
1868         if (!fib_entry)
1869                 return;
1870
1871         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1872         mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1873 }
1874
1875 struct mlxsw_sp_neigh_key {
1876         struct neighbour *n;
1877 };
1878
1879 struct mlxsw_sp_neigh_entry {
1880         struct list_head rif_list_node;
1881         struct rhash_head ht_node;
1882         struct mlxsw_sp_neigh_key key;
1883         u16 rif;
1884         bool connected;
1885         unsigned char ha[ETH_ALEN];
1886         struct list_head nexthop_list; /* list of nexthops using
1887                                         * this neigh entry
1888                                         */
1889         struct list_head nexthop_neighs_list_node;
1890         unsigned int counter_index;
1891         bool counter_valid;
1892 };
1893
1894 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1895         .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1896         .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1897         .key_len = sizeof(struct mlxsw_sp_neigh_key),
1898 };
1899
1900 struct mlxsw_sp_neigh_entry *
1901 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1902                         struct mlxsw_sp_neigh_entry *neigh_entry)
1903 {
1904         if (!neigh_entry) {
1905                 if (list_empty(&rif->neigh_list))
1906                         return NULL;
1907                 else
1908                         return list_first_entry(&rif->neigh_list,
1909                                                 typeof(*neigh_entry),
1910                                                 rif_list_node);
1911         }
1912         if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1913                 return NULL;
1914         return list_next_entry(neigh_entry, rif_list_node);
1915 }
1916
1917 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1918 {
1919         return neigh_entry->key.n->tbl->family;
1920 }
1921
1922 unsigned char *
1923 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1924 {
1925         return neigh_entry->ha;
1926 }
1927
1928 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1929 {
1930         struct neighbour *n;
1931
1932         n = neigh_entry->key.n;
1933         return ntohl(*((__be32 *) n->primary_key));
1934 }
1935
1936 struct in6_addr *
1937 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1938 {
1939         struct neighbour *n;
1940
1941         n = neigh_entry->key.n;
1942         return (struct in6_addr *) &n->primary_key;
1943 }
1944
1945 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1946                                struct mlxsw_sp_neigh_entry *neigh_entry,
1947                                u64 *p_counter)
1948 {
1949         if (!neigh_entry->counter_valid)
1950                 return -EINVAL;
1951
1952         return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1953                                          p_counter, NULL);
1954 }
1955
1956 static struct mlxsw_sp_neigh_entry *
1957 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1958                            u16 rif)
1959 {
1960         struct mlxsw_sp_neigh_entry *neigh_entry;
1961
1962         neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1963         if (!neigh_entry)
1964                 return NULL;
1965
1966         neigh_entry->key.n = n;
1967         neigh_entry->rif = rif;
1968         INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1969
1970         return neigh_entry;
1971 }
1972
1973 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1974 {
1975         kfree(neigh_entry);
1976 }
1977
1978 static int
1979 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1980                             struct mlxsw_sp_neigh_entry *neigh_entry)
1981 {
1982         return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1983                                       &neigh_entry->ht_node,
1984                                       mlxsw_sp_neigh_ht_params);
1985 }
1986
1987 static void
1988 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1989                             struct mlxsw_sp_neigh_entry *neigh_entry)
1990 {
1991         rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1992                                &neigh_entry->ht_node,
1993                                mlxsw_sp_neigh_ht_params);
1994 }
1995
1996 static bool
1997 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
1998                                     struct mlxsw_sp_neigh_entry *neigh_entry)
1999 {
2000         struct devlink *devlink;
2001         const char *table_name;
2002
2003         switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2004         case AF_INET:
2005                 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2006                 break;
2007         case AF_INET6:
2008                 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2009                 break;
2010         default:
2011                 WARN_ON(1);
2012                 return false;
2013         }
2014
2015         devlink = priv_to_devlink(mlxsw_sp->core);
2016         return devlink_dpipe_table_counter_enabled(devlink, table_name);
2017 }
2018
2019 static void
2020 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2021                              struct mlxsw_sp_neigh_entry *neigh_entry)
2022 {
2023         if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2024                 return;
2025
2026         if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2027                 return;
2028
2029         neigh_entry->counter_valid = true;
2030 }
2031
2032 static void
2033 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2034                             struct mlxsw_sp_neigh_entry *neigh_entry)
2035 {
2036         if (!neigh_entry->counter_valid)
2037                 return;
2038         mlxsw_sp_flow_counter_free(mlxsw_sp,
2039                                    neigh_entry->counter_index);
2040         neigh_entry->counter_valid = false;
2041 }
2042
2043 static struct mlxsw_sp_neigh_entry *
2044 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2045 {
2046         struct mlxsw_sp_neigh_entry *neigh_entry;
2047         struct mlxsw_sp_rif *rif;
2048         int err;
2049
2050         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2051         if (!rif)
2052                 return ERR_PTR(-EINVAL);
2053
2054         neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2055         if (!neigh_entry)
2056                 return ERR_PTR(-ENOMEM);
2057
2058         err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2059         if (err)
2060                 goto err_neigh_entry_insert;
2061
2062         mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2063         list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2064
2065         return neigh_entry;
2066
2067 err_neigh_entry_insert:
2068         mlxsw_sp_neigh_entry_free(neigh_entry);
2069         return ERR_PTR(err);
2070 }
2071
2072 static void
2073 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2074                              struct mlxsw_sp_neigh_entry *neigh_entry)
2075 {
2076         list_del(&neigh_entry->rif_list_node);
2077         mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2078         mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2079         mlxsw_sp_neigh_entry_free(neigh_entry);
2080 }
2081
2082 static struct mlxsw_sp_neigh_entry *
2083 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2084 {
2085         struct mlxsw_sp_neigh_key key;
2086
2087         key.n = n;
2088         return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2089                                       &key, mlxsw_sp_neigh_ht_params);
2090 }
2091
2092 static void
2093 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2094 {
2095         unsigned long interval;
2096
2097 #if IS_ENABLED(CONFIG_IPV6)
2098         interval = min_t(unsigned long,
2099                          NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2100                          NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2101 #else
2102         interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2103 #endif
2104         mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2105 }
2106
2107 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2108                                                    char *rauhtd_pl,
2109                                                    int ent_index)
2110 {
2111         struct net_device *dev;
2112         struct neighbour *n;
2113         __be32 dipn;
2114         u32 dip;
2115         u16 rif;
2116
2117         mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2118
2119         if (!mlxsw_sp->router->rifs[rif]) {
2120                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2121                 return;
2122         }
2123
2124         dipn = htonl(dip);
2125         dev = mlxsw_sp->router->rifs[rif]->dev;
2126         n = neigh_lookup(&arp_tbl, &dipn, dev);
2127         if (!n)
2128                 return;
2129
2130         netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2131         neigh_event_send(n, NULL);
2132         neigh_release(n);
2133 }
2134
2135 #if IS_ENABLED(CONFIG_IPV6)
2136 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2137                                                    char *rauhtd_pl,
2138                                                    int rec_index)
2139 {
2140         struct net_device *dev;
2141         struct neighbour *n;
2142         struct in6_addr dip;
2143         u16 rif;
2144
2145         mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2146                                          (char *) &dip);
2147
2148         if (!mlxsw_sp->router->rifs[rif]) {
2149                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2150                 return;
2151         }
2152
2153         dev = mlxsw_sp->router->rifs[rif]->dev;
2154         n = neigh_lookup(&nd_tbl, &dip, dev);
2155         if (!n)
2156                 return;
2157
2158         netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2159         neigh_event_send(n, NULL);
2160         neigh_release(n);
2161 }
2162 #else
2163 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2164                                                    char *rauhtd_pl,
2165                                                    int rec_index)
2166 {
2167 }
2168 #endif
2169
2170 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2171                                                    char *rauhtd_pl,
2172                                                    int rec_index)
2173 {
2174         u8 num_entries;
2175         int i;
2176
2177         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2178                                                                 rec_index);
2179         /* Hardware starts counting at 0, so add 1. */
2180         num_entries++;
2181
2182         /* Each record consists of several neighbour entries. */
2183         for (i = 0; i < num_entries; i++) {
2184                 int ent_index;
2185
2186                 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2187                 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2188                                                        ent_index);
2189         }
2190
2191 }
2192
2193 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2194                                                    char *rauhtd_pl,
2195                                                    int rec_index)
2196 {
2197         /* One record contains one entry. */
2198         mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2199                                                rec_index);
2200 }
2201
2202 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2203                                               char *rauhtd_pl, int rec_index)
2204 {
2205         switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2206         case MLXSW_REG_RAUHTD_TYPE_IPV4:
2207                 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2208                                                        rec_index);
2209                 break;
2210         case MLXSW_REG_RAUHTD_TYPE_IPV6:
2211                 mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2212                                                        rec_index);
2213                 break;
2214         }
2215 }
2216
2217 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2218 {
2219         u8 num_rec, last_rec_index, num_entries;
2220
2221         num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2222         last_rec_index = num_rec - 1;
2223
2224         if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2225                 return false;
2226         if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2227             MLXSW_REG_RAUHTD_TYPE_IPV6)
2228                 return true;
2229
2230         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2231                                                                 last_rec_index);
2232         if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2233                 return true;
2234         return false;
2235 }
2236
2237 static int
2238 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2239                                        char *rauhtd_pl,
2240                                        enum mlxsw_reg_rauhtd_type type)
2241 {
2242         int i, num_rec;
2243         int err;
2244
2245         /* Make sure the neighbour's netdev isn't removed in the
2246          * process.
2247          */
2248         rtnl_lock();
2249         do {
2250                 mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2251                 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2252                                       rauhtd_pl);
2253                 if (err) {
2254                         dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2255                         break;
2256                 }
2257                 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2258                 for (i = 0; i < num_rec; i++)
2259                         mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2260                                                           i);
2261         } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2262         rtnl_unlock();
2263
2264         return err;
2265 }
2266
2267 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2268 {
2269         enum mlxsw_reg_rauhtd_type type;
2270         char *rauhtd_pl;
2271         int err;
2272
2273         rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2274         if (!rauhtd_pl)
2275                 return -ENOMEM;
2276
2277         type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2278         err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2279         if (err)
2280                 goto out;
2281
2282         type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2283         err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2284 out:
2285         kfree(rauhtd_pl);
2286         return err;
2287 }
2288
2289 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2290 {
2291         struct mlxsw_sp_neigh_entry *neigh_entry;
2292
2293         /* Take RTNL mutex here to prevent lists from changes */
2294         rtnl_lock();
2295         list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2296                             nexthop_neighs_list_node)
2297                 /* If this neigh have nexthops, make the kernel think this neigh
2298                  * is active regardless of the traffic.
2299                  */
2300                 neigh_event_send(neigh_entry->key.n, NULL);
2301         rtnl_unlock();
2302 }
2303
2304 static void
2305 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2306 {
2307         unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2308
2309         mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2310                                msecs_to_jiffies(interval));
2311 }
2312
2313 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2314 {
2315         struct mlxsw_sp_router *router;
2316         int err;
2317
2318         router = container_of(work, struct mlxsw_sp_router,
2319                               neighs_update.dw.work);
2320         err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2321         if (err)
2322                 dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2323
2324         mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2325
2326         mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2327 }
2328
2329 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2330 {
2331         struct mlxsw_sp_neigh_entry *neigh_entry;
2332         struct mlxsw_sp_router *router;
2333
2334         router = container_of(work, struct mlxsw_sp_router,
2335                               nexthop_probe_dw.work);
2336         /* Iterate over nexthop neighbours, find those who are unresolved and
2337          * send arp on them. This solves the chicken-egg problem when
2338          * the nexthop wouldn't get offloaded until the neighbor is resolved
2339          * but it wouldn't get resolved ever in case traffic is flowing in HW
2340          * using different nexthop.
2341          *
2342          * Take RTNL mutex here to prevent lists from changes.
2343          */
2344         rtnl_lock();
2345         list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2346                             nexthop_neighs_list_node)
2347                 if (!neigh_entry->connected)
2348                         neigh_event_send(neigh_entry->key.n, NULL);
2349         rtnl_unlock();
2350
2351         mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2352                                MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2353 }
2354
2355 static void
2356 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2357                               struct mlxsw_sp_neigh_entry *neigh_entry,
2358                               bool removing);
2359
2360 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2361 {
2362         return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2363                         MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2364 }
2365
2366 static void
2367 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2368                                 struct mlxsw_sp_neigh_entry *neigh_entry,
2369                                 enum mlxsw_reg_rauht_op op)
2370 {
2371         struct neighbour *n = neigh_entry->key.n;
2372         u32 dip = ntohl(*((__be32 *) n->primary_key));
2373         char rauht_pl[MLXSW_REG_RAUHT_LEN];
2374
2375         mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2376                               dip);
2377         if (neigh_entry->counter_valid)
2378                 mlxsw_reg_rauht_pack_counter(rauht_pl,
2379                                              neigh_entry->counter_index);
2380         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2381 }
2382
2383 static void
2384 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2385                                 struct mlxsw_sp_neigh_entry *neigh_entry,
2386                                 enum mlxsw_reg_rauht_op op)
2387 {
2388         struct neighbour *n = neigh_entry->key.n;
2389         char rauht_pl[MLXSW_REG_RAUHT_LEN];
2390         const char *dip = n->primary_key;
2391
2392         mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2393                               dip);
2394         if (neigh_entry->counter_valid)
2395                 mlxsw_reg_rauht_pack_counter(rauht_pl,
2396                                              neigh_entry->counter_index);
2397         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2398 }
2399
2400 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2401 {
2402         struct neighbour *n = neigh_entry->key.n;
2403
2404         /* Packets with a link-local destination address are trapped
2405          * after LPM lookup and never reach the neighbour table, so
2406          * there is no need to program such neighbours to the device.
2407          */
2408         if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2409             IPV6_ADDR_LINKLOCAL)
2410                 return true;
2411         return false;
2412 }
2413
2414 static void
2415 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2416                             struct mlxsw_sp_neigh_entry *neigh_entry,
2417                             bool adding)
2418 {
2419         if (!adding && !neigh_entry->connected)
2420                 return;
2421         neigh_entry->connected = adding;
2422         if (neigh_entry->key.n->tbl->family == AF_INET) {
2423                 mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2424                                                 mlxsw_sp_rauht_op(adding));
2425         } else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2426                 if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2427                         return;
2428                 mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2429                                                 mlxsw_sp_rauht_op(adding));
2430         } else {
2431                 WARN_ON_ONCE(1);
2432         }
2433 }
2434
2435 void
2436 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2437                                     struct mlxsw_sp_neigh_entry *neigh_entry,
2438                                     bool adding)
2439 {
2440         if (adding)
2441                 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2442         else
2443                 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2444         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2445 }
2446
2447 struct mlxsw_sp_netevent_work {
2448         struct work_struct work;
2449         struct mlxsw_sp *mlxsw_sp;
2450         struct neighbour *n;
2451 };
2452
2453 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2454 {
2455         struct mlxsw_sp_netevent_work *net_work =
2456                 container_of(work, struct mlxsw_sp_netevent_work, work);
2457         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2458         struct mlxsw_sp_neigh_entry *neigh_entry;
2459         struct neighbour *n = net_work->n;
2460         unsigned char ha[ETH_ALEN];
2461         bool entry_connected;
2462         u8 nud_state, dead;
2463
2464         /* If these parameters are changed after we release the lock,
2465          * then we are guaranteed to receive another event letting us
2466          * know about it.
2467          */
2468         read_lock_bh(&n->lock);
2469         memcpy(ha, n->ha, ETH_ALEN);
2470         nud_state = n->nud_state;
2471         dead = n->dead;
2472         read_unlock_bh(&n->lock);
2473
2474         rtnl_lock();
2475         mlxsw_sp_span_respin(mlxsw_sp);
2476
2477         entry_connected = nud_state & NUD_VALID && !dead;
2478         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2479         if (!entry_connected && !neigh_entry)
2480                 goto out;
2481         if (!neigh_entry) {
2482                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2483                 if (IS_ERR(neigh_entry))
2484                         goto out;
2485         }
2486
2487         memcpy(neigh_entry->ha, ha, ETH_ALEN);
2488         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2489         mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2490
2491         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2492                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2493
2494 out:
2495         rtnl_unlock();
2496         neigh_release(n);
2497         kfree(net_work);
2498 }
2499
2500 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2501
2502 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2503 {
2504         struct mlxsw_sp_netevent_work *net_work =
2505                 container_of(work, struct mlxsw_sp_netevent_work, work);
2506         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2507
2508         mlxsw_sp_mp_hash_init(mlxsw_sp);
2509         kfree(net_work);
2510 }
2511
2512 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2513
2514 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2515 {
2516         struct mlxsw_sp_netevent_work *net_work =
2517                 container_of(work, struct mlxsw_sp_netevent_work, work);
2518         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2519
2520         __mlxsw_sp_router_init(mlxsw_sp);
2521         kfree(net_work);
2522 }
2523
2524 static int mlxsw_sp_router_schedule_work(struct net *net,
2525                                          struct notifier_block *nb,
2526                                          void (*cb)(struct work_struct *))
2527 {
2528         struct mlxsw_sp_netevent_work *net_work;
2529         struct mlxsw_sp_router *router;
2530
2531         if (!net_eq(net, &init_net))
2532                 return NOTIFY_DONE;
2533
2534         net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2535         if (!net_work)
2536                 return NOTIFY_BAD;
2537
2538         router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2539         INIT_WORK(&net_work->work, cb);
2540         net_work->mlxsw_sp = router->mlxsw_sp;
2541         mlxsw_core_schedule_work(&net_work->work);
2542         return NOTIFY_DONE;
2543 }
2544
2545 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2546                                           unsigned long event, void *ptr)
2547 {
2548         struct mlxsw_sp_netevent_work *net_work;
2549         struct mlxsw_sp_port *mlxsw_sp_port;
2550         struct mlxsw_sp *mlxsw_sp;
2551         unsigned long interval;
2552         struct neigh_parms *p;
2553         struct neighbour *n;
2554
2555         switch (event) {
2556         case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2557                 p = ptr;
2558
2559                 /* We don't care about changes in the default table. */
2560                 if (!p->dev || (p->tbl->family != AF_INET &&
2561                                 p->tbl->family != AF_INET6))
2562                         return NOTIFY_DONE;
2563
2564                 /* We are in atomic context and can't take RTNL mutex,
2565                  * so use RCU variant to walk the device chain.
2566                  */
2567                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2568                 if (!mlxsw_sp_port)
2569                         return NOTIFY_DONE;
2570
2571                 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2572                 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2573                 mlxsw_sp->router->neighs_update.interval = interval;
2574
2575                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2576                 break;
2577         case NETEVENT_NEIGH_UPDATE:
2578                 n = ptr;
2579
2580                 if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2581                         return NOTIFY_DONE;
2582
2583                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2584                 if (!mlxsw_sp_port)
2585                         return NOTIFY_DONE;
2586
2587                 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2588                 if (!net_work) {
2589                         mlxsw_sp_port_dev_put(mlxsw_sp_port);
2590                         return NOTIFY_BAD;
2591                 }
2592
2593                 INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2594                 net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2595                 net_work->n = n;
2596
2597                 /* Take a reference to ensure the neighbour won't be
2598                  * destructed until we drop the reference in delayed
2599                  * work.
2600                  */
2601                 neigh_clone(n);
2602                 mlxsw_core_schedule_work(&net_work->work);
2603                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2604                 break;
2605         case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2606         case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2607                 return mlxsw_sp_router_schedule_work(ptr, nb,
2608                                 mlxsw_sp_router_mp_hash_event_work);
2609
2610         case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2611                 return mlxsw_sp_router_schedule_work(ptr, nb,
2612                                 mlxsw_sp_router_update_priority_work);
2613         }
2614
2615         return NOTIFY_DONE;
2616 }
2617
2618 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2619 {
2620         int err;
2621
2622         err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2623                               &mlxsw_sp_neigh_ht_params);
2624         if (err)
2625                 return err;
2626
2627         /* Initialize the polling interval according to the default
2628          * table.
2629          */
2630         mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2631
2632         /* Create the delayed works for the activity_update */
2633         INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2634                           mlxsw_sp_router_neighs_update_work);
2635         INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2636                           mlxsw_sp_router_probe_unresolved_nexthops);
2637         mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2638         mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2639         return 0;
2640 }
2641
2642 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2643 {
2644         cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2645         cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2646         rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2647 }
2648
2649 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2650                                          struct mlxsw_sp_rif *rif)
2651 {
2652         struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2653
2654         list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2655                                  rif_list_node) {
2656                 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2657                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2658         }
2659 }
2660
2661 enum mlxsw_sp_nexthop_type {
2662         MLXSW_SP_NEXTHOP_TYPE_ETH,
2663         MLXSW_SP_NEXTHOP_TYPE_IPIP,
2664 };
2665
2666 struct mlxsw_sp_nexthop_key {
2667         struct fib_nh *fib_nh;
2668 };
2669
2670 struct mlxsw_sp_nexthop {
2671         struct list_head neigh_list_node; /* member of neigh entry list */
2672         struct list_head rif_list_node;
2673         struct list_head router_list_node;
2674         struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2675                                                 * this belongs to
2676                                                 */
2677         struct rhash_head ht_node;
2678         struct mlxsw_sp_nexthop_key key;
2679         unsigned char gw_addr[sizeof(struct in6_addr)];
2680         int ifindex;
2681         int nh_weight;
2682         int norm_nh_weight;
2683         int num_adj_entries;
2684         struct mlxsw_sp_rif *rif;
2685         u8 should_offload:1, /* set indicates this neigh is connected and
2686                               * should be put to KVD linear area of this group.
2687                               */
2688            offloaded:1, /* set in case the neigh is actually put into
2689                          * KVD linear area of this group.
2690                          */
2691            update:1; /* set indicates that MAC of this neigh should be
2692                       * updated in HW
2693                       */
2694         enum mlxsw_sp_nexthop_type type;
2695         union {
2696                 struct mlxsw_sp_neigh_entry *neigh_entry;
2697                 struct mlxsw_sp_ipip_entry *ipip_entry;
2698         };
2699         unsigned int counter_index;
2700         bool counter_valid;
2701 };
2702
2703 struct mlxsw_sp_nexthop_group {
2704         void *priv;
2705         struct rhash_head ht_node;
2706         struct list_head fib_list; /* list of fib entries that use this group */
2707         struct neigh_table *neigh_tbl;
2708         u8 adj_index_valid:1,
2709            gateway:1; /* routes using the group use a gateway */
2710         u32 adj_index;
2711         u16 ecmp_size;
2712         u16 count;
2713         int sum_norm_weight;
2714         struct mlxsw_sp_nexthop nexthops[0];
2715 #define nh_rif  nexthops[0].rif
2716 };
2717
2718 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2719                                     struct mlxsw_sp_nexthop *nh)
2720 {
2721         struct devlink *devlink;
2722
2723         devlink = priv_to_devlink(mlxsw_sp->core);
2724         if (!devlink_dpipe_table_counter_enabled(devlink,
2725                                                  MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2726                 return;
2727
2728         if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2729                 return;
2730
2731         nh->counter_valid = true;
2732 }
2733
2734 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2735                                    struct mlxsw_sp_nexthop *nh)
2736 {
2737         if (!nh->counter_valid)
2738                 return;
2739         mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2740         nh->counter_valid = false;
2741 }
2742
2743 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2744                                  struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2745 {
2746         if (!nh->counter_valid)
2747                 return -EINVAL;
2748
2749         return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2750                                          p_counter, NULL);
2751 }
2752
2753 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2754                                                struct mlxsw_sp_nexthop *nh)
2755 {
2756         if (!nh) {
2757                 if (list_empty(&router->nexthop_list))
2758                         return NULL;
2759                 else
2760                         return list_first_entry(&router->nexthop_list,
2761                                                 typeof(*nh), router_list_node);
2762         }
2763         if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2764                 return NULL;
2765         return list_next_entry(nh, router_list_node);
2766 }
2767
2768 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2769 {
2770         return nh->offloaded;
2771 }
2772
2773 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2774 {
2775         if (!nh->offloaded)
2776                 return NULL;
2777         return nh->neigh_entry->ha;
2778 }
2779
2780 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2781                              u32 *p_adj_size, u32 *p_adj_hash_index)
2782 {
2783         struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2784         u32 adj_hash_index = 0;
2785         int i;
2786
2787         if (!nh->offloaded || !nh_grp->adj_index_valid)
2788                 return -EINVAL;
2789
2790         *p_adj_index = nh_grp->adj_index;
2791         *p_adj_size = nh_grp->ecmp_size;
2792
2793         for (i = 0; i < nh_grp->count; i++) {
2794                 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2795
2796                 if (nh_iter == nh)
2797                         break;
2798                 if (nh_iter->offloaded)
2799                         adj_hash_index += nh_iter->num_adj_entries;
2800         }
2801
2802         *p_adj_hash_index = adj_hash_index;
2803         return 0;
2804 }
2805
2806 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2807 {
2808         return nh->rif;
2809 }
2810
2811 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2812 {
2813         struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2814         int i;
2815
2816         for (i = 0; i < nh_grp->count; i++) {
2817                 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2818
2819                 if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2820                         return true;
2821         }
2822         return false;
2823 }
2824
2825 static struct fib_info *
2826 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2827 {
2828         return nh_grp->priv;
2829 }
2830
2831 struct mlxsw_sp_nexthop_group_cmp_arg {
2832         enum mlxsw_sp_l3proto proto;
2833         union {
2834                 struct fib_info *fi;
2835                 struct mlxsw_sp_fib6_entry *fib6_entry;
2836         };
2837 };
2838
2839 static bool
2840 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2841                                     const struct in6_addr *gw, int ifindex,
2842                                     int weight)
2843 {
2844         int i;
2845
2846         for (i = 0; i < nh_grp->count; i++) {
2847                 const struct mlxsw_sp_nexthop *nh;
2848
2849                 nh = &nh_grp->nexthops[i];
2850                 if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2851                     ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2852                         return true;
2853         }
2854
2855         return false;
2856 }
2857
2858 static bool
2859 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2860                             const struct mlxsw_sp_fib6_entry *fib6_entry)
2861 {
2862         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2863
2864         if (nh_grp->count != fib6_entry->nrt6)
2865                 return false;
2866
2867         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2868                 struct in6_addr *gw;
2869                 int ifindex, weight;
2870
2871                 ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex;
2872                 weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight;
2873                 gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw;
2874                 if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2875                                                          weight))
2876                         return false;
2877         }
2878
2879         return true;
2880 }
2881
2882 static int
2883 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2884 {
2885         const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2886         const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2887
2888         switch (cmp_arg->proto) {
2889         case MLXSW_SP_L3_PROTO_IPV4:
2890                 return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2891         case MLXSW_SP_L3_PROTO_IPV6:
2892                 return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2893                                                     cmp_arg->fib6_entry);
2894         default:
2895                 WARN_ON(1);
2896                 return 1;
2897         }
2898 }
2899
2900 static int
2901 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2902 {
2903         return nh_grp->neigh_tbl->family;
2904 }
2905
2906 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2907 {
2908         const struct mlxsw_sp_nexthop_group *nh_grp = data;
2909         const struct mlxsw_sp_nexthop *nh;
2910         struct fib_info *fi;
2911         unsigned int val;
2912         int i;
2913
2914         switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2915         case AF_INET:
2916                 fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2917                 return jhash(&fi, sizeof(fi), seed);
2918         case AF_INET6:
2919                 val = nh_grp->count;
2920                 for (i = 0; i < nh_grp->count; i++) {
2921                         nh = &nh_grp->nexthops[i];
2922                         val ^= nh->ifindex;
2923                 }
2924                 return jhash(&val, sizeof(val), seed);
2925         default:
2926                 WARN_ON(1);
2927                 return 0;
2928         }
2929 }
2930
2931 static u32
2932 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2933 {
2934         unsigned int val = fib6_entry->nrt6;
2935         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2936         struct net_device *dev;
2937
2938         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2939                 dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev;
2940                 val ^= dev->ifindex;
2941         }
2942
2943         return jhash(&val, sizeof(val), seed);
2944 }
2945
2946 static u32
2947 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2948 {
2949         const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2950
2951         switch (cmp_arg->proto) {
2952         case MLXSW_SP_L3_PROTO_IPV4:
2953                 return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2954         case MLXSW_SP_L3_PROTO_IPV6:
2955                 return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2956         default:
2957                 WARN_ON(1);
2958                 return 0;
2959         }
2960 }
2961
2962 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2963         .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2964         .hashfn      = mlxsw_sp_nexthop_group_hash,
2965         .obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2966         .obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2967 };
2968
2969 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2970                                          struct mlxsw_sp_nexthop_group *nh_grp)
2971 {
2972         if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2973             !nh_grp->gateway)
2974                 return 0;
2975
2976         return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2977                                       &nh_grp->ht_node,
2978                                       mlxsw_sp_nexthop_group_ht_params);
2979 }
2980
2981 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2982                                           struct mlxsw_sp_nexthop_group *nh_grp)
2983 {
2984         if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2985             !nh_grp->gateway)
2986                 return;
2987
2988         rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2989                                &nh_grp->ht_node,
2990                                mlxsw_sp_nexthop_group_ht_params);
2991 }
2992
2993 static struct mlxsw_sp_nexthop_group *
2994 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
2995                                struct fib_info *fi)
2996 {
2997         struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2998
2999         cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
3000         cmp_arg.fi = fi;
3001         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3002                                       &cmp_arg,
3003                                       mlxsw_sp_nexthop_group_ht_params);
3004 }
3005
3006 static struct mlxsw_sp_nexthop_group *
3007 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3008                                struct mlxsw_sp_fib6_entry *fib6_entry)
3009 {
3010         struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3011
3012         cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
3013         cmp_arg.fib6_entry = fib6_entry;
3014         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3015                                       &cmp_arg,
3016                                       mlxsw_sp_nexthop_group_ht_params);
3017 }
3018
3019 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3020         .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3021         .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3022         .key_len = sizeof(struct mlxsw_sp_nexthop_key),
3023 };
3024
3025 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3026                                    struct mlxsw_sp_nexthop *nh)
3027 {
3028         return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3029                                       &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3030 }
3031
3032 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3033                                     struct mlxsw_sp_nexthop *nh)
3034 {
3035         rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3036                                mlxsw_sp_nexthop_ht_params);
3037 }
3038
3039 static struct mlxsw_sp_nexthop *
3040 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3041                         struct mlxsw_sp_nexthop_key key)
3042 {
3043         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3044                                       mlxsw_sp_nexthop_ht_params);
3045 }
3046
3047 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3048                                              const struct mlxsw_sp_fib *fib,
3049                                              u32 adj_index, u16 ecmp_size,
3050                                              u32 new_adj_index,
3051                                              u16 new_ecmp_size)
3052 {
3053         char raleu_pl[MLXSW_REG_RALEU_LEN];
3054
3055         mlxsw_reg_raleu_pack(raleu_pl,
3056                              (enum mlxsw_reg_ralxx_protocol) fib->proto,
3057                              fib->vr->id, adj_index, ecmp_size, new_adj_index,
3058                              new_ecmp_size);
3059         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3060 }
3061
3062 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3063                                           struct mlxsw_sp_nexthop_group *nh_grp,
3064                                           u32 old_adj_index, u16 old_ecmp_size)
3065 {
3066         struct mlxsw_sp_fib_entry *fib_entry;
3067         struct mlxsw_sp_fib *fib = NULL;
3068         int err;
3069
3070         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3071                 if (fib == fib_entry->fib_node->fib)
3072                         continue;
3073                 fib = fib_entry->fib_node->fib;
3074                 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
3075                                                         old_adj_index,
3076                                                         old_ecmp_size,
3077                                                         nh_grp->adj_index,
3078                                                         nh_grp->ecmp_size);
3079                 if (err)
3080                         return err;
3081         }
3082         return 0;
3083 }
3084
3085 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3086                                      struct mlxsw_sp_nexthop *nh)
3087 {
3088         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3089         char ratr_pl[MLXSW_REG_RATR_LEN];
3090
3091         mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
3092                             true, MLXSW_REG_RATR_TYPE_ETHERNET,
3093                             adj_index, neigh_entry->rif);
3094         mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3095         if (nh->counter_valid)
3096                 mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3097         else
3098                 mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3099
3100         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3101 }
3102
3103 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3104                             struct mlxsw_sp_nexthop *nh)
3105 {
3106         int i;
3107
3108         for (i = 0; i < nh->num_adj_entries; i++) {
3109                 int err;
3110
3111                 err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
3112                 if (err)
3113                         return err;
3114         }
3115
3116         return 0;
3117 }
3118
3119 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3120                                           u32 adj_index,
3121                                           struct mlxsw_sp_nexthop *nh)
3122 {
3123         const struct mlxsw_sp_ipip_ops *ipip_ops;
3124
3125         ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3126         return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
3127 }
3128
3129 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3130                                         u32 adj_index,
3131                                         struct mlxsw_sp_nexthop *nh)
3132 {
3133         int i;
3134
3135         for (i = 0; i < nh->num_adj_entries; i++) {
3136                 int err;
3137
3138                 err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3139                                                      nh);
3140                 if (err)
3141                         return err;
3142         }
3143
3144         return 0;
3145 }
3146
3147 static int
3148 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3149                               struct mlxsw_sp_nexthop_group *nh_grp,
3150                               bool reallocate)
3151 {
3152         u32 adj_index = nh_grp->adj_index; /* base */
3153         struct mlxsw_sp_nexthop *nh;
3154         int i;
3155         int err;
3156
3157         for (i = 0; i < nh_grp->count; i++) {
3158                 nh = &nh_grp->nexthops[i];
3159
3160                 if (!nh->should_offload) {
3161                         nh->offloaded = 0;
3162                         continue;
3163                 }
3164
3165                 if (nh->update || reallocate) {
3166                         switch (nh->type) {
3167                         case MLXSW_SP_NEXTHOP_TYPE_ETH:
3168                                 err = mlxsw_sp_nexthop_update
3169                                             (mlxsw_sp, adj_index, nh);
3170                                 break;
3171                         case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3172                                 err = mlxsw_sp_nexthop_ipip_update
3173                                             (mlxsw_sp, adj_index, nh);
3174                                 break;
3175                         }
3176                         if (err)
3177                                 return err;
3178                         nh->update = 0;
3179                         nh->offloaded = 1;
3180                 }
3181                 adj_index += nh->num_adj_entries;
3182         }
3183         return 0;
3184 }
3185
3186 static bool
3187 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3188                                  const struct mlxsw_sp_fib_entry *fib_entry);
3189
3190 static int
3191 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3192                                     struct mlxsw_sp_nexthop_group *nh_grp)
3193 {
3194         struct mlxsw_sp_fib_entry *fib_entry;
3195         int err;
3196
3197         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3198                 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3199                                                       fib_entry))
3200                         continue;
3201                 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3202                 if (err)
3203                         return err;
3204         }
3205         return 0;
3206 }
3207
3208 static void
3209 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3210                                    enum mlxsw_reg_ralue_op op, int err);
3211
3212 static void
3213 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3214 {
3215         enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3216         struct mlxsw_sp_fib_entry *fib_entry;
3217
3218         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3219                 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3220                                                       fib_entry))
3221                         continue;
3222                 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3223         }
3224 }
3225
3226 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3227 {
3228         /* Valid sizes for an adjacency group are:
3229          * 1-64, 512, 1024, 2048 and 4096.
3230          */
3231         if (*p_adj_grp_size <= 64)
3232                 return;
3233         else if (*p_adj_grp_size <= 512)
3234                 *p_adj_grp_size = 512;
3235         else if (*p_adj_grp_size <= 1024)
3236                 *p_adj_grp_size = 1024;
3237         else if (*p_adj_grp_size <= 2048)
3238                 *p_adj_grp_size = 2048;
3239         else
3240                 *p_adj_grp_size = 4096;
3241 }
3242
3243 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3244                                              unsigned int alloc_size)
3245 {
3246         if (alloc_size >= 4096)
3247                 *p_adj_grp_size = 4096;
3248         else if (alloc_size >= 2048)
3249                 *p_adj_grp_size = 2048;
3250         else if (alloc_size >= 1024)
3251                 *p_adj_grp_size = 1024;
3252         else if (alloc_size >= 512)
3253                 *p_adj_grp_size = 512;
3254 }
3255
3256 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3257                                      u16 *p_adj_grp_size)
3258 {
3259         unsigned int alloc_size;
3260         int err;
3261
3262         /* Round up the requested group size to the next size supported
3263          * by the device and make sure the request can be satisfied.
3264          */
3265         mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3266         err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3267                                               MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3268                                               *p_adj_grp_size, &alloc_size);
3269         if (err)
3270                 return err;
3271         /* It is possible the allocation results in more allocated
3272          * entries than requested. Try to use as much of them as
3273          * possible.
3274          */
3275         mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3276
3277         return 0;
3278 }
3279
3280 static void
3281 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3282 {
3283         int i, g = 0, sum_norm_weight = 0;
3284         struct mlxsw_sp_nexthop *nh;
3285
3286         for (i = 0; i < nh_grp->count; i++) {
3287                 nh = &nh_grp->nexthops[i];
3288
3289                 if (!nh->should_offload)
3290                         continue;
3291                 if (g > 0)
3292                         g = gcd(nh->nh_weight, g);
3293                 else
3294                         g = nh->nh_weight;
3295         }
3296
3297         for (i = 0; i < nh_grp->count; i++) {
3298                 nh = &nh_grp->nexthops[i];
3299
3300                 if (!nh->should_offload)
3301                         continue;
3302                 nh->norm_nh_weight = nh->nh_weight / g;
3303                 sum_norm_weight += nh->norm_nh_weight;
3304         }
3305
3306         nh_grp->sum_norm_weight = sum_norm_weight;
3307 }
3308
3309 static void
3310 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3311 {
3312         int total = nh_grp->sum_norm_weight;
3313         u16 ecmp_size = nh_grp->ecmp_size;
3314         int i, weight = 0, lower_bound = 0;
3315
3316         for (i = 0; i < nh_grp->count; i++) {
3317                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3318                 int upper_bound;
3319
3320                 if (!nh->should_offload)
3321                         continue;
3322                 weight += nh->norm_nh_weight;
3323                 upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3324                 nh->num_adj_entries = upper_bound - lower_bound;
3325                 lower_bound = upper_bound;
3326         }
3327 }
3328
3329 static void
3330 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3331                                struct mlxsw_sp_nexthop_group *nh_grp)
3332 {
3333         u16 ecmp_size, old_ecmp_size;
3334         struct mlxsw_sp_nexthop *nh;
3335         bool offload_change = false;
3336         u32 adj_index;
3337         bool old_adj_index_valid;
3338         u32 old_adj_index;
3339         int i;
3340         int err;
3341
3342         if (!nh_grp->gateway) {
3343                 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3344                 return;
3345         }
3346
3347         for (i = 0; i < nh_grp->count; i++) {
3348                 nh = &nh_grp->nexthops[i];
3349
3350                 if (nh->should_offload != nh->offloaded) {
3351                         offload_change = true;
3352                         if (nh->should_offload)
3353                                 nh->update = 1;
3354                 }
3355         }
3356         if (!offload_change) {
3357                 /* Nothing was added or removed, so no need to reallocate. Just
3358                  * update MAC on existing adjacency indexes.
3359                  */
3360                 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3361                 if (err) {
3362                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3363                         goto set_trap;
3364                 }
3365                 return;
3366         }
3367         mlxsw_sp_nexthop_group_normalize(nh_grp);
3368         if (!nh_grp->sum_norm_weight)
3369                 /* No neigh of this group is connected so we just set
3370                  * the trap and let everthing flow through kernel.
3371                  */
3372                 goto set_trap;
3373
3374         ecmp_size = nh_grp->sum_norm_weight;
3375         err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3376         if (err)
3377                 /* No valid allocation size available. */
3378                 goto set_trap;
3379
3380         err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3381                                   ecmp_size, &adj_index);
3382         if (err) {
3383                 /* We ran out of KVD linear space, just set the
3384                  * trap and let everything flow through kernel.
3385                  */
3386                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3387                 goto set_trap;
3388         }
3389         old_adj_index_valid = nh_grp->adj_index_valid;
3390         old_adj_index = nh_grp->adj_index;
3391         old_ecmp_size = nh_grp->ecmp_size;
3392         nh_grp->adj_index_valid = 1;
3393         nh_grp->adj_index = adj_index;
3394         nh_grp->ecmp_size = ecmp_size;
3395         mlxsw_sp_nexthop_group_rebalance(nh_grp);
3396         err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3397         if (err) {
3398                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3399                 goto set_trap;
3400         }
3401
3402         if (!old_adj_index_valid) {
3403                 /* The trap was set for fib entries, so we have to call
3404                  * fib entry update to unset it and use adjacency index.
3405                  */
3406                 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3407                 if (err) {
3408                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3409                         goto set_trap;
3410                 }
3411                 return;
3412         }
3413
3414         err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3415                                              old_adj_index, old_ecmp_size);
3416         mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3417                            old_ecmp_size, old_adj_index);
3418         if (err) {
3419                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3420                 goto set_trap;
3421         }
3422
3423         /* Offload state within the group changed, so update the flags. */
3424         mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3425
3426         return;
3427
3428 set_trap:
3429         old_adj_index_valid = nh_grp->adj_index_valid;
3430         nh_grp->adj_index_valid = 0;
3431         for (i = 0; i < nh_grp->count; i++) {
3432                 nh = &nh_grp->nexthops[i];
3433                 nh->offloaded = 0;
3434         }
3435         err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3436         if (err)
3437                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3438         if (old_adj_index_valid)
3439                 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3440                                    nh_grp->ecmp_size, nh_grp->adj_index);
3441 }
3442
3443 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3444                                             bool removing)
3445 {
3446         if (!removing)
3447                 nh->should_offload = 1;
3448         else
3449                 nh->should_offload = 0;
3450         nh->update = 1;
3451 }
3452
3453 static void
3454 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3455                               struct mlxsw_sp_neigh_entry *neigh_entry,
3456                               bool removing)
3457 {
3458         struct mlxsw_sp_nexthop *nh;
3459
3460         list_for_each_entry(nh, &neigh_entry->nexthop_list,
3461                             neigh_list_node) {
3462                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3463                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3464         }
3465 }
3466
3467 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3468                                       struct mlxsw_sp_rif *rif)
3469 {
3470         if (nh->rif)
3471                 return;
3472
3473         nh->rif = rif;
3474         list_add(&nh->rif_list_node, &rif->nexthop_list);
3475 }
3476
3477 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3478 {
3479         if (!nh->rif)
3480                 return;
3481
3482         list_del(&nh->rif_list_node);
3483         nh->rif = NULL;
3484 }
3485
3486 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3487                                        struct mlxsw_sp_nexthop *nh)
3488 {
3489         struct mlxsw_sp_neigh_entry *neigh_entry;
3490         struct neighbour *n;
3491         u8 nud_state, dead;
3492         int err;
3493
3494         if (!nh->nh_grp->gateway || nh->neigh_entry)
3495                 return 0;
3496
3497         /* Take a reference of neigh here ensuring that neigh would
3498          * not be destructed before the nexthop entry is finished.
3499          * The reference is taken either in neigh_lookup() or
3500          * in neigh_create() in case n is not found.
3501          */
3502         n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3503         if (!n) {
3504                 n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3505                                  nh->rif->dev);
3506                 if (IS_ERR(n))
3507                         return PTR_ERR(n);
3508                 neigh_event_send(n, NULL);
3509         }
3510         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3511         if (!neigh_entry) {
3512                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3513                 if (IS_ERR(neigh_entry)) {
3514                         err = -EINVAL;
3515                         goto err_neigh_entry_create;
3516                 }
3517         }
3518
3519         /* If that is the first nexthop connected to that neigh, add to
3520          * nexthop_neighs_list
3521          */
3522         if (list_empty(&neigh_entry->nexthop_list))
3523                 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3524                               &mlxsw_sp->router->nexthop_neighs_list);
3525
3526         nh->neigh_entry = neigh_entry;
3527         list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3528         read_lock_bh(&n->lock);
3529         nud_state = n->nud_state;
3530         dead = n->dead;
3531         read_unlock_bh(&n->lock);
3532         __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3533
3534         return 0;
3535
3536 err_neigh_entry_create:
3537         neigh_release(n);
3538         return err;
3539 }
3540
3541 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3542                                         struct mlxsw_sp_nexthop *nh)
3543 {
3544         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3545         struct neighbour *n;
3546
3547         if (!neigh_entry)
3548                 return;
3549         n = neigh_entry->key.n;
3550
3551         __mlxsw_sp_nexthop_neigh_update(nh, true);
3552         list_del(&nh->neigh_list_node);
3553         nh->neigh_entry = NULL;
3554
3555         /* If that is the last nexthop connected to that neigh, remove from
3556          * nexthop_neighs_list
3557          */
3558         if (list_empty(&neigh_entry->nexthop_list))
3559                 list_del(&neigh_entry->nexthop_neighs_list_node);
3560
3561         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3562                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3563
3564         neigh_release(n);
3565 }
3566
3567 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3568 {
3569         struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3570
3571         return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3572 }
3573
3574 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3575                                        struct mlxsw_sp_nexthop *nh,
3576                                        struct mlxsw_sp_ipip_entry *ipip_entry)
3577 {
3578         bool removing;
3579
3580         if (!nh->nh_grp->gateway || nh->ipip_entry)
3581                 return;
3582
3583         nh->ipip_entry = ipip_entry;
3584         removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3585         __mlxsw_sp_nexthop_neigh_update(nh, removing);
3586         mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3587 }
3588
3589 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3590                                        struct mlxsw_sp_nexthop *nh)
3591 {
3592         struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3593
3594         if (!ipip_entry)
3595                 return;
3596
3597         __mlxsw_sp_nexthop_neigh_update(nh, true);
3598         nh->ipip_entry = NULL;
3599 }
3600
3601 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3602                                         const struct fib_nh *fib_nh,
3603                                         enum mlxsw_sp_ipip_type *p_ipipt)
3604 {
3605         struct net_device *dev = fib_nh->nh_dev;
3606
3607         return dev &&
3608                fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3609                mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3610 }
3611
3612 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3613                                        struct mlxsw_sp_nexthop *nh)
3614 {
3615         switch (nh->type) {
3616         case MLXSW_SP_NEXTHOP_TYPE_ETH:
3617                 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3618                 mlxsw_sp_nexthop_rif_fini(nh);
3619                 break;
3620         case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3621                 mlxsw_sp_nexthop_rif_fini(nh);
3622                 mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3623                 break;
3624         }
3625 }
3626
3627 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3628                                        struct mlxsw_sp_nexthop *nh,
3629                                        struct fib_nh *fib_nh)
3630 {
3631         const struct mlxsw_sp_ipip_ops *ipip_ops;
3632         struct net_device *dev = fib_nh->nh_dev;
3633         struct mlxsw_sp_ipip_entry *ipip_entry;
3634         struct mlxsw_sp_rif *rif;
3635         int err;
3636
3637         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3638         if (ipip_entry) {
3639                 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3640                 if (ipip_ops->can_offload(mlxsw_sp, dev,
3641                                           MLXSW_SP_L3_PROTO_IPV4)) {
3642                         nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3643                         mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3644                         return 0;
3645                 }
3646         }
3647
3648         nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3649         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3650         if (!rif)
3651                 return 0;
3652
3653         mlxsw_sp_nexthop_rif_init(nh, rif);
3654         err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3655         if (err)
3656                 goto err_neigh_init;
3657
3658         return 0;
3659
3660 err_neigh_init:
3661         mlxsw_sp_nexthop_rif_fini(nh);
3662         return err;
3663 }
3664
3665 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3666                                         struct mlxsw_sp_nexthop *nh)
3667 {
3668         mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3669 }
3670
3671 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3672                                   struct mlxsw_sp_nexthop_group *nh_grp,
3673                                   struct mlxsw_sp_nexthop *nh,
3674                                   struct fib_nh *fib_nh)
3675 {
3676         struct net_device *dev = fib_nh->nh_dev;
3677         struct in_device *in_dev;
3678         int err;
3679
3680         nh->nh_grp = nh_grp;
3681         nh->key.fib_nh = fib_nh;
3682 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3683         nh->nh_weight = fib_nh->nh_weight;
3684 #else
3685         nh->nh_weight = 1;
3686 #endif
3687         memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3688         err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3689         if (err)
3690                 return err;
3691
3692         mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3693         list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3694
3695         if (!dev)
3696                 return 0;
3697
3698         in_dev = __in_dev_get_rtnl(dev);
3699         if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3700             fib_nh->nh_flags & RTNH_F_LINKDOWN)
3701                 return 0;
3702
3703         err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3704         if (err)
3705                 goto err_nexthop_neigh_init;
3706
3707         return 0;
3708
3709 err_nexthop_neigh_init:
3710         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3711         return err;
3712 }
3713
3714 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3715                                    struct mlxsw_sp_nexthop *nh)
3716 {
3717         mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3718         list_del(&nh->router_list_node);
3719         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3720         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3721 }
3722
3723 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3724                                     unsigned long event, struct fib_nh *fib_nh)
3725 {
3726         struct mlxsw_sp_nexthop_key key;
3727         struct mlxsw_sp_nexthop *nh;
3728
3729         if (mlxsw_sp->router->aborted)
3730                 return;
3731
3732         key.fib_nh = fib_nh;
3733         nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3734         if (WARN_ON_ONCE(!nh))
3735                 return;
3736
3737         switch (event) {
3738         case FIB_EVENT_NH_ADD:
3739                 mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3740                 break;
3741         case FIB_EVENT_NH_DEL:
3742                 mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3743                 break;
3744         }
3745
3746         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3747 }
3748
3749 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3750                                         struct mlxsw_sp_rif *rif)
3751 {
3752         struct mlxsw_sp_nexthop *nh;
3753         bool removing;
3754
3755         list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3756                 switch (nh->type) {
3757                 case MLXSW_SP_NEXTHOP_TYPE_ETH:
3758                         removing = false;
3759                         break;
3760                 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3761                         removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3762                         break;
3763                 default:
3764                         WARN_ON(1);
3765                         continue;
3766                 }
3767
3768                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3769                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3770         }
3771 }
3772
3773 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3774                                          struct mlxsw_sp_rif *old_rif,
3775                                          struct mlxsw_sp_rif *new_rif)
3776 {
3777         struct mlxsw_sp_nexthop *nh;
3778
3779         list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3780         list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3781                 nh->rif = new_rif;
3782         mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3783 }
3784
3785 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3786                                            struct mlxsw_sp_rif *rif)
3787 {
3788         struct mlxsw_sp_nexthop *nh, *tmp;
3789
3790         list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3791                 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3792                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3793         }
3794 }
3795
3796 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3797                                    const struct fib_info *fi)
3798 {
3799         return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3800                mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3801 }
3802
3803 static struct mlxsw_sp_nexthop_group *
3804 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3805 {
3806         struct mlxsw_sp_nexthop_group *nh_grp;
3807         struct mlxsw_sp_nexthop *nh;
3808         struct fib_nh *fib_nh;
3809         size_t alloc_size;
3810         int i;
3811         int err;
3812
3813         alloc_size = sizeof(*nh_grp) +
3814                      fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
3815         nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3816         if (!nh_grp)
3817                 return ERR_PTR(-ENOMEM);
3818         nh_grp->priv = fi;
3819         INIT_LIST_HEAD(&nh_grp->fib_list);
3820         nh_grp->neigh_tbl = &arp_tbl;
3821
3822         nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3823         nh_grp->count = fi->fib_nhs;
3824         fib_info_hold(fi);
3825         for (i = 0; i < nh_grp->count; i++) {
3826                 nh = &nh_grp->nexthops[i];
3827                 fib_nh = &fi->fib_nh[i];
3828                 err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3829                 if (err)
3830                         goto err_nexthop4_init;
3831         }
3832         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3833         if (err)
3834                 goto err_nexthop_group_insert;
3835         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3836         return nh_grp;
3837
3838 err_nexthop_group_insert:
3839 err_nexthop4_init:
3840         for (i--; i >= 0; i--) {
3841                 nh = &nh_grp->nexthops[i];
3842                 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3843         }
3844         fib_info_put(fi);
3845         kfree(nh_grp);
3846         return ERR_PTR(err);
3847 }
3848
3849 static void
3850 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3851                                 struct mlxsw_sp_nexthop_group *nh_grp)
3852 {
3853         struct mlxsw_sp_nexthop *nh;
3854         int i;
3855
3856         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3857         for (i = 0; i < nh_grp->count; i++) {
3858                 nh = &nh_grp->nexthops[i];
3859                 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3860         }
3861         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3862         WARN_ON_ONCE(nh_grp->adj_index_valid);
3863         fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3864         kfree(nh_grp);
3865 }
3866
3867 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3868                                        struct mlxsw_sp_fib_entry *fib_entry,
3869                                        struct fib_info *fi)
3870 {
3871         struct mlxsw_sp_nexthop_group *nh_grp;
3872
3873         nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3874         if (!nh_grp) {
3875                 nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3876                 if (IS_ERR(nh_grp))
3877                         return PTR_ERR(nh_grp);
3878         }
3879         list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3880         fib_entry->nh_group = nh_grp;
3881         return 0;
3882 }
3883
3884 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3885                                         struct mlxsw_sp_fib_entry *fib_entry)
3886 {
3887         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3888
3889         list_del(&fib_entry->nexthop_group_node);
3890         if (!list_empty(&nh_grp->fib_list))
3891                 return;
3892         mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3893 }
3894
3895 static bool
3896 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3897 {
3898         struct mlxsw_sp_fib4_entry *fib4_entry;
3899
3900         fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3901                                   common);
3902         return !fib4_entry->tos;
3903 }
3904
3905 static bool
3906 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3907 {
3908         struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3909
3910         switch (fib_entry->fib_node->fib->proto) {
3911         case MLXSW_SP_L3_PROTO_IPV4:
3912                 if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3913                         return false;
3914                 break;
3915         case MLXSW_SP_L3_PROTO_IPV6:
3916                 break;
3917         }
3918
3919         switch (fib_entry->type) {
3920         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3921                 return !!nh_group->adj_index_valid;
3922         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3923                 return !!nh_group->nh_rif;
3924         case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3925         case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
3926                 return true;
3927         default:
3928                 return false;
3929         }
3930 }
3931
3932 static struct mlxsw_sp_nexthop *
3933 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3934                      const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3935 {
3936         int i;
3937
3938         for (i = 0; i < nh_grp->count; i++) {
3939                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3940                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
3941
3942                 if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev &&
3943                     ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3944                                     &rt->fib6_nh.nh_gw))
3945                         return nh;
3946                 continue;
3947         }
3948
3949         return NULL;
3950 }
3951
3952 static void
3953 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3954 {
3955         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3956         int i;
3957
3958         if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3959             fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP ||
3960             fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) {
3961                 nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3962                 return;
3963         }
3964
3965         for (i = 0; i < nh_grp->count; i++) {
3966                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3967
3968                 if (nh->offloaded)
3969                         nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3970                 else
3971                         nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3972         }
3973 }
3974
3975 static void
3976 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3977 {
3978         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3979         int i;
3980
3981         if (!list_is_singular(&nh_grp->fib_list))
3982                 return;
3983
3984         for (i = 0; i < nh_grp->count; i++) {
3985                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3986
3987                 nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3988         }
3989 }
3990
3991 static void
3992 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3993 {
3994         struct mlxsw_sp_fib6_entry *fib6_entry;
3995         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3996
3997         fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3998                                   common);
3999
4000         if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
4001                 list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4002                                  list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
4003                 return;
4004         }
4005
4006         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4007                 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4008                 struct mlxsw_sp_nexthop *nh;
4009
4010                 nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
4011                 if (nh && nh->offloaded)
4012                         mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
4013                 else
4014                         mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
4015         }
4016 }
4017
4018 static void
4019 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4020 {
4021         struct mlxsw_sp_fib6_entry *fib6_entry;
4022         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4023
4024         fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4025                                   common);
4026         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4027                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
4028
4029                 rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
4030         }
4031 }
4032
4033 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
4034 {
4035         switch (fib_entry->fib_node->fib->proto) {
4036         case MLXSW_SP_L3_PROTO_IPV4:
4037                 mlxsw_sp_fib4_entry_offload_set(fib_entry);
4038                 break;
4039         case MLXSW_SP_L3_PROTO_IPV6:
4040                 mlxsw_sp_fib6_entry_offload_set(fib_entry);
4041                 break;
4042         }
4043 }
4044
4045 static void
4046 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4047 {
4048         switch (fib_entry->fib_node->fib->proto) {
4049         case MLXSW_SP_L3_PROTO_IPV4:
4050                 mlxsw_sp_fib4_entry_offload_unset(fib_entry);
4051                 break;
4052         case MLXSW_SP_L3_PROTO_IPV6:
4053                 mlxsw_sp_fib6_entry_offload_unset(fib_entry);
4054                 break;
4055         }
4056 }
4057
4058 static void
4059 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
4060                                    enum mlxsw_reg_ralue_op op, int err)
4061 {
4062         switch (op) {
4063         case MLXSW_REG_RALUE_OP_WRITE_DELETE:
4064                 return mlxsw_sp_fib_entry_offload_unset(fib_entry);
4065         case MLXSW_REG_RALUE_OP_WRITE_WRITE:
4066                 if (err)
4067                         return;
4068                 if (mlxsw_sp_fib_entry_should_offload(fib_entry))
4069                         mlxsw_sp_fib_entry_offload_set(fib_entry);
4070                 else
4071                         mlxsw_sp_fib_entry_offload_unset(fib_entry);
4072                 return;
4073         default:
4074                 return;
4075         }
4076 }
4077
4078 static void
4079 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
4080                               const struct mlxsw_sp_fib_entry *fib_entry,
4081                               enum mlxsw_reg_ralue_op op)
4082 {
4083         struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
4084         enum mlxsw_reg_ralxx_protocol proto;
4085         u32 *p_dip;
4086
4087         proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
4088
4089         switch (fib->proto) {
4090         case MLXSW_SP_L3_PROTO_IPV4:
4091                 p_dip = (u32 *) fib_entry->fib_node->key.addr;
4092                 mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
4093                                       fib_entry->fib_node->key.prefix_len,
4094                                       *p_dip);
4095                 break;
4096         case MLXSW_SP_L3_PROTO_IPV6:
4097                 mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
4098                                       fib_entry->fib_node->key.prefix_len,
4099                                       fib_entry->fib_node->key.addr);
4100                 break;
4101         }
4102 }
4103
4104 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
4105                                         struct mlxsw_sp_fib_entry *fib_entry,
4106                                         enum mlxsw_reg_ralue_op op)
4107 {
4108         char ralue_pl[MLXSW_REG_RALUE_LEN];
4109         enum mlxsw_reg_ralue_trap_action trap_action;
4110         u16 trap_id = 0;
4111         u32 adjacency_index = 0;
4112         u16 ecmp_size = 0;
4113
4114         /* In case the nexthop group adjacency index is valid, use it
4115          * with provided ECMP size. Otherwise, setup trap and pass
4116          * traffic to kernel.
4117          */
4118         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4119                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4120                 adjacency_index = fib_entry->nh_group->adj_index;
4121                 ecmp_size = fib_entry->nh_group->ecmp_size;
4122         } else {
4123                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4124                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4125         }
4126
4127         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4128         mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
4129                                         adjacency_index, ecmp_size);
4130         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4131 }
4132
4133 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
4134                                        struct mlxsw_sp_fib_entry *fib_entry,
4135                                        enum mlxsw_reg_ralue_op op)
4136 {
4137         struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
4138         enum mlxsw_reg_ralue_trap_action trap_action;
4139         char ralue_pl[MLXSW_REG_RALUE_LEN];
4140         u16 trap_id = 0;
4141         u16 rif_index = 0;
4142
4143         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4144                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4145                 rif_index = rif->rif_index;
4146         } else {
4147                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4148                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4149         }
4150
4151         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4152         mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
4153                                        rif_index);
4154         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4155 }
4156
4157 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
4158                                       struct mlxsw_sp_fib_entry *fib_entry,
4159                                       enum mlxsw_reg_ralue_op op)
4160 {
4161         char ralue_pl[MLXSW_REG_RALUE_LEN];
4162
4163         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4164         mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
4165         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4166 }
4167
4168 static int
4169 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
4170                                  struct mlxsw_sp_fib_entry *fib_entry,
4171                                  enum mlxsw_reg_ralue_op op)
4172 {
4173         struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
4174         const struct mlxsw_sp_ipip_ops *ipip_ops;
4175
4176         if (WARN_ON(!ipip_entry))
4177                 return -EINVAL;
4178
4179         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4180         return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
4181                                       fib_entry->decap.tunnel_index);
4182 }
4183
4184 static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
4185                                            struct mlxsw_sp_fib_entry *fib_entry,
4186                                            enum mlxsw_reg_ralue_op op)
4187 {
4188         char ralue_pl[MLXSW_REG_RALUE_LEN];
4189
4190         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4191         mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
4192                                            fib_entry->decap.tunnel_index);
4193         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4194 }
4195
4196 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4197                                    struct mlxsw_sp_fib_entry *fib_entry,
4198                                    enum mlxsw_reg_ralue_op op)
4199 {
4200         switch (fib_entry->type) {
4201         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4202                 return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4203         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4204                 return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4205         case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4206                 return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4207         case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4208                 return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4209                                                         fib_entry, op);
4210         case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
4211                 return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
4212         }
4213         return -EINVAL;
4214 }
4215
4216 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4217                                  struct mlxsw_sp_fib_entry *fib_entry,
4218                                  enum mlxsw_reg_ralue_op op)
4219 {
4220         int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4221
4222         mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
4223
4224         return err;
4225 }
4226
4227 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4228                                      struct mlxsw_sp_fib_entry *fib_entry)
4229 {
4230         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4231                                      MLXSW_REG_RALUE_OP_WRITE_WRITE);
4232 }
4233
4234 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4235                                   struct mlxsw_sp_fib_entry *fib_entry)
4236 {
4237         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4238                                      MLXSW_REG_RALUE_OP_WRITE_DELETE);
4239 }
4240
4241 static int
4242 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4243                              const struct fib_entry_notifier_info *fen_info,
4244                              struct mlxsw_sp_fib_entry *fib_entry)
4245 {
4246         union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4247         u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
4248         struct net_device *dev = fen_info->fi->fib_dev;
4249         struct mlxsw_sp_ipip_entry *ipip_entry;
4250         struct fib_info *fi = fen_info->fi;
4251
4252         switch (fen_info->type) {
4253         case RTN_LOCAL:
4254                 ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4255                                                  MLXSW_SP_L3_PROTO_IPV4, dip);
4256                 if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4257                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4258                         return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4259                                                              fib_entry,
4260                                                              ipip_entry);
4261                 }
4262                 if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id,
4263                                                      dip.addr4)) {
4264                         u32 t_index;
4265
4266                         t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp);
4267                         fib_entry->decap.tunnel_index = t_index;
4268                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
4269                         return 0;
4270                 }
4271                 /* fall through */
4272         case RTN_BROADCAST:
4273                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4274                 return 0;
4275         case RTN_UNREACHABLE: /* fall through */
4276         case RTN_BLACKHOLE: /* fall through */
4277         case RTN_PROHIBIT:
4278                 /* Packets hitting these routes need to be trapped, but
4279                  * can do so with a lower priority than packets directed
4280                  * at the host, so use action type local instead of trap.
4281                  */
4282                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4283                 return 0;
4284         case RTN_UNICAST:
4285                 if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4286                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4287                 else
4288                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4289                 return 0;
4290         default:
4291                 return -EINVAL;
4292         }
4293 }
4294
4295 static struct mlxsw_sp_fib4_entry *
4296 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4297                            struct mlxsw_sp_fib_node *fib_node,
4298                            const struct fib_entry_notifier_info *fen_info)
4299 {
4300         struct mlxsw_sp_fib4_entry *fib4_entry;
4301         struct mlxsw_sp_fib_entry *fib_entry;
4302         int err;
4303
4304         fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4305         if (!fib4_entry)
4306                 return ERR_PTR(-ENOMEM);
4307         fib_entry = &fib4_entry->common;
4308
4309         err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4310         if (err)
4311                 goto err_fib4_entry_type_set;
4312
4313         err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4314         if (err)
4315                 goto err_nexthop4_group_get;
4316
4317         fib4_entry->prio = fen_info->fi->fib_priority;
4318         fib4_entry->tb_id = fen_info->tb_id;
4319         fib4_entry->type = fen_info->type;
4320         fib4_entry->tos = fen_info->tos;
4321
4322         fib_entry->fib_node = fib_node;
4323
4324         return fib4_entry;
4325
4326 err_nexthop4_group_get:
4327 err_fib4_entry_type_set:
4328         kfree(fib4_entry);
4329         return ERR_PTR(err);
4330 }
4331
4332 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4333                                         struct mlxsw_sp_fib4_entry *fib4_entry)
4334 {
4335         mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4336         kfree(fib4_entry);
4337 }
4338
4339 static struct mlxsw_sp_fib4_entry *
4340 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4341                            const struct fib_entry_notifier_info *fen_info)
4342 {
4343         struct mlxsw_sp_fib4_entry *fib4_entry;
4344         struct mlxsw_sp_fib_node *fib_node;
4345         struct mlxsw_sp_fib *fib;
4346         struct mlxsw_sp_vr *vr;
4347
4348         vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4349         if (!vr)
4350                 return NULL;
4351         fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4352
4353         fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4354                                             sizeof(fen_info->dst),
4355                                             fen_info->dst_len);
4356         if (!fib_node)
4357                 return NULL;
4358
4359         list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4360                 if (fib4_entry->tb_id == fen_info->tb_id &&
4361                     fib4_entry->tos == fen_info->tos &&
4362                     fib4_entry->type == fen_info->type &&
4363                     mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4364                     fen_info->fi) {
4365                         return fib4_entry;
4366                 }
4367         }
4368
4369         return NULL;
4370 }
4371
4372 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4373         .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4374         .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4375         .key_len = sizeof(struct mlxsw_sp_fib_key),
4376         .automatic_shrinking = true,
4377 };
4378
4379 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4380                                     struct mlxsw_sp_fib_node *fib_node)
4381 {
4382         return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4383                                       mlxsw_sp_fib_ht_params);
4384 }
4385
4386 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4387                                      struct mlxsw_sp_fib_node *fib_node)
4388 {
4389         rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4390                                mlxsw_sp_fib_ht_params);
4391 }
4392
4393 static struct mlxsw_sp_fib_node *
4394 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4395                          size_t addr_len, unsigned char prefix_len)
4396 {
4397         struct mlxsw_sp_fib_key key;
4398
4399         memset(&key, 0, sizeof(key));
4400         memcpy(key.addr, addr, addr_len);
4401         key.prefix_len = prefix_len;
4402         return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4403 }
4404
4405 static struct mlxsw_sp_fib_node *
4406 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4407                          size_t addr_len, unsigned char prefix_len)
4408 {
4409         struct mlxsw_sp_fib_node *fib_node;
4410
4411         fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4412         if (!fib_node)
4413                 return NULL;
4414
4415         INIT_LIST_HEAD(&fib_node->entry_list);
4416         list_add(&fib_node->list, &fib->node_list);
4417         memcpy(fib_node->key.addr, addr, addr_len);
4418         fib_node->key.prefix_len = prefix_len;
4419
4420         return fib_node;
4421 }
4422
4423 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4424 {
4425         list_del(&fib_node->list);
4426         WARN_ON(!list_empty(&fib_node->entry_list));
4427         kfree(fib_node);
4428 }
4429
4430 static bool
4431 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4432                                  const struct mlxsw_sp_fib_entry *fib_entry)
4433 {
4434         return list_first_entry(&fib_node->entry_list,
4435                                 struct mlxsw_sp_fib_entry, list) == fib_entry;
4436 }
4437
4438 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4439                                       struct mlxsw_sp_fib_node *fib_node)
4440 {
4441         struct mlxsw_sp_prefix_usage req_prefix_usage;
4442         struct mlxsw_sp_fib *fib = fib_node->fib;
4443         struct mlxsw_sp_lpm_tree *lpm_tree;
4444         int err;
4445
4446         lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4447         if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4448                 goto out;
4449
4450         mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4451         mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4452         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4453                                          fib->proto);
4454         if (IS_ERR(lpm_tree))
4455                 return PTR_ERR(lpm_tree);
4456
4457         err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4458         if (err)
4459                 goto err_lpm_tree_replace;
4460
4461 out:
4462         lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4463         return 0;
4464
4465 err_lpm_tree_replace:
4466         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4467         return err;
4468 }
4469
4470 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4471                                          struct mlxsw_sp_fib_node *fib_node)
4472 {
4473         struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4474         struct mlxsw_sp_prefix_usage req_prefix_usage;
4475         struct mlxsw_sp_fib *fib = fib_node->fib;
4476         int err;
4477
4478         if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4479                 return;
4480         /* Try to construct a new LPM tree from the current prefix usage
4481          * minus the unused one. If we fail, continue using the old one.
4482          */
4483         mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4484         mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4485                                     fib_node->key.prefix_len);
4486         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4487                                          fib->proto);
4488         if (IS_ERR(lpm_tree))
4489                 return;
4490
4491         err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4492         if (err)
4493                 goto err_lpm_tree_replace;
4494
4495         return;
4496
4497 err_lpm_tree_replace:
4498         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4499 }
4500
4501 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4502                                   struct mlxsw_sp_fib_node *fib_node,
4503                                   struct mlxsw_sp_fib *fib)
4504 {
4505         int err;
4506
4507         err = mlxsw_sp_fib_node_insert(fib, fib_node);
4508         if (err)
4509                 return err;
4510         fib_node->fib = fib;
4511
4512         err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4513         if (err)
4514                 goto err_fib_lpm_tree_link;
4515
4516         return 0;
4517
4518 err_fib_lpm_tree_link:
4519         fib_node->fib = NULL;
4520         mlxsw_sp_fib_node_remove(fib, fib_node);
4521         return err;
4522 }
4523
4524 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4525                                    struct mlxsw_sp_fib_node *fib_node)
4526 {
4527         struct mlxsw_sp_fib *fib = fib_node->fib;
4528
4529         mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4530         fib_node->fib = NULL;
4531         mlxsw_sp_fib_node_remove(fib, fib_node);
4532 }
4533
4534 static struct mlxsw_sp_fib_node *
4535 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4536                       size_t addr_len, unsigned char prefix_len,
4537                       enum mlxsw_sp_l3proto proto)
4538 {
4539         struct mlxsw_sp_fib_node *fib_node;
4540         struct mlxsw_sp_fib *fib;
4541         struct mlxsw_sp_vr *vr;
4542         int err;
4543
4544         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4545         if (IS_ERR(vr))
4546                 return ERR_CAST(vr);
4547         fib = mlxsw_sp_vr_fib(vr, proto);
4548
4549         fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4550         if (fib_node)
4551                 return fib_node;
4552
4553         fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4554         if (!fib_node) {
4555                 err = -ENOMEM;
4556                 goto err_fib_node_create;
4557         }
4558
4559         err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4560         if (err)
4561                 goto err_fib_node_init;
4562
4563         return fib_node;
4564
4565 err_fib_node_init:
4566         mlxsw_sp_fib_node_destroy(fib_node);
4567 err_fib_node_create:
4568         mlxsw_sp_vr_put(mlxsw_sp, vr);
4569         return ERR_PTR(err);
4570 }
4571
4572 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4573                                   struct mlxsw_sp_fib_node *fib_node)
4574 {
4575         struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4576
4577         if (!list_empty(&fib_node->entry_list))
4578                 return;
4579         mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4580         mlxsw_sp_fib_node_destroy(fib_node);
4581         mlxsw_sp_vr_put(mlxsw_sp, vr);
4582 }
4583
4584 static struct mlxsw_sp_fib4_entry *
4585 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4586                               const struct mlxsw_sp_fib4_entry *new4_entry)
4587 {
4588         struct mlxsw_sp_fib4_entry *fib4_entry;
4589
4590         list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4591                 if (fib4_entry->tb_id > new4_entry->tb_id)
4592                         continue;
4593                 if (fib4_entry->tb_id != new4_entry->tb_id)
4594                         break;
4595                 if (fib4_entry->tos > new4_entry->tos)
4596                         continue;
4597                 if (fib4_entry->prio >= new4_entry->prio ||
4598                     fib4_entry->tos < new4_entry->tos)
4599                         return fib4_entry;
4600         }
4601
4602         return NULL;
4603 }
4604
4605 static int
4606 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4607                                struct mlxsw_sp_fib4_entry *new4_entry)
4608 {
4609         struct mlxsw_sp_fib_node *fib_node;
4610
4611         if (WARN_ON(!fib4_entry))
4612                 return -EINVAL;
4613
4614         fib_node = fib4_entry->common.fib_node;
4615         list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4616                                  common.list) {
4617                 if (fib4_entry->tb_id != new4_entry->tb_id ||
4618                     fib4_entry->tos != new4_entry->tos ||
4619                     fib4_entry->prio != new4_entry->prio)
4620                         break;
4621         }
4622
4623         list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4624         return 0;
4625 }
4626
4627 static int
4628 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4629                                bool replace, bool append)
4630 {
4631         struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4632         struct mlxsw_sp_fib4_entry *fib4_entry;
4633
4634         fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4635
4636         if (append)
4637                 return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4638         if (replace && WARN_ON(!fib4_entry))
4639                 return -EINVAL;
4640
4641         /* Insert new entry before replaced one, so that we can later
4642          * remove the second.
4643          */
4644         if (fib4_entry) {
4645                 list_add_tail(&new4_entry->common.list,
4646                               &fib4_entry->common.list);
4647         } else {
4648                 struct mlxsw_sp_fib4_entry *last;
4649
4650                 list_for_each_entry(last, &fib_node->entry_list, common.list) {
4651                         if (new4_entry->tb_id > last->tb_id)
4652                                 break;
4653                         fib4_entry = last;
4654                 }
4655
4656                 if (fib4_entry)
4657                         list_add(&new4_entry->common.list,
4658                                  &fib4_entry->common.list);
4659                 else
4660                         list_add(&new4_entry->common.list,
4661                                  &fib_node->entry_list);
4662         }
4663
4664         return 0;
4665 }
4666
4667 static void
4668 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4669 {
4670         list_del(&fib4_entry->common.list);
4671 }
4672
4673 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4674                                        struct mlxsw_sp_fib_entry *fib_entry)
4675 {
4676         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4677
4678         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4679                 return 0;
4680
4681         /* To prevent packet loss, overwrite the previously offloaded
4682          * entry.
4683          */
4684         if (!list_is_singular(&fib_node->entry_list)) {
4685                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4686                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4687
4688                 mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4689         }
4690
4691         return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4692 }
4693
4694 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4695                                         struct mlxsw_sp_fib_entry *fib_entry)
4696 {
4697         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4698
4699         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4700                 return;
4701
4702         /* Promote the next entry by overwriting the deleted entry */
4703         if (!list_is_singular(&fib_node->entry_list)) {
4704                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4705                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4706
4707                 mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4708                 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4709                 return;
4710         }
4711
4712         mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4713 }
4714
4715 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4716                                          struct mlxsw_sp_fib4_entry *fib4_entry,
4717                                          bool replace, bool append)
4718 {
4719         int err;
4720
4721         err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4722         if (err)
4723                 return err;
4724
4725         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4726         if (err)
4727                 goto err_fib_node_entry_add;
4728
4729         return 0;
4730
4731 err_fib_node_entry_add:
4732         mlxsw_sp_fib4_node_list_remove(fib4_entry);
4733         return err;
4734 }
4735
4736 static void
4737 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4738                                 struct mlxsw_sp_fib4_entry *fib4_entry)
4739 {
4740         mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4741         mlxsw_sp_fib4_node_list_remove(fib4_entry);
4742
4743         if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4744                 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4745 }
4746
4747 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4748                                         struct mlxsw_sp_fib4_entry *fib4_entry,
4749                                         bool replace)
4750 {
4751         struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4752         struct mlxsw_sp_fib4_entry *replaced;
4753
4754         if (!replace)
4755                 return;
4756
4757         /* We inserted the new entry before replaced one */
4758         replaced = list_next_entry(fib4_entry, common.list);
4759
4760         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4761         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4762         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4763 }
4764
4765 static int
4766 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4767                          const struct fib_entry_notifier_info *fen_info,
4768                          bool replace, bool append)
4769 {
4770         struct mlxsw_sp_fib4_entry *fib4_entry;
4771         struct mlxsw_sp_fib_node *fib_node;
4772         int err;
4773
4774         if (mlxsw_sp->router->aborted)
4775                 return 0;
4776
4777         fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4778                                          &fen_info->dst, sizeof(fen_info->dst),
4779                                          fen_info->dst_len,
4780                                          MLXSW_SP_L3_PROTO_IPV4);
4781         if (IS_ERR(fib_node)) {
4782                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4783                 return PTR_ERR(fib_node);
4784         }
4785
4786         fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4787         if (IS_ERR(fib4_entry)) {
4788                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4789                 err = PTR_ERR(fib4_entry);
4790                 goto err_fib4_entry_create;
4791         }
4792
4793         err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4794                                             append);
4795         if (err) {
4796                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4797                 goto err_fib4_node_entry_link;
4798         }
4799
4800         mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4801
4802         return 0;
4803
4804 err_fib4_node_entry_link:
4805         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4806 err_fib4_entry_create:
4807         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4808         return err;
4809 }
4810
4811 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4812                                      struct fib_entry_notifier_info *fen_info)
4813 {
4814         struct mlxsw_sp_fib4_entry *fib4_entry;
4815         struct mlxsw_sp_fib_node *fib_node;
4816
4817         if (mlxsw_sp->router->aborted)
4818                 return;
4819
4820         fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4821         if (WARN_ON(!fib4_entry))
4822                 return;
4823         fib_node = fib4_entry->common.fib_node;
4824
4825         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4826         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4827         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4828 }
4829
4830 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
4831 {
4832         /* Packets with link-local destination IP arriving to the router
4833          * are trapped to the CPU, so no need to program specific routes
4834          * for them.
4835          */
4836         if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
4837                 return true;
4838
4839         /* Multicast routes aren't supported, so ignore them. Neighbour
4840          * Discovery packets are specifically trapped.
4841          */
4842         if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
4843                 return true;
4844
4845         /* Cloned routes are irrelevant in the forwarding path. */
4846         if (rt->fib6_flags & RTF_CACHE)
4847                 return true;
4848
4849         return false;
4850 }
4851
4852 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
4853 {
4854         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4855
4856         mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4857         if (!mlxsw_sp_rt6)
4858                 return ERR_PTR(-ENOMEM);
4859
4860         /* In case of route replace, replaced route is deleted with
4861          * no notification. Take reference to prevent accessing freed
4862          * memory.
4863          */
4864         mlxsw_sp_rt6->rt = rt;
4865         fib6_info_hold(rt);
4866
4867         return mlxsw_sp_rt6;
4868 }
4869
4870 #if IS_ENABLED(CONFIG_IPV6)
4871 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4872 {
4873         fib6_info_release(rt);
4874 }
4875 #else
4876 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4877 {
4878 }
4879 #endif
4880
4881 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4882 {
4883         mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4884         kfree(mlxsw_sp_rt6);
4885 }
4886
4887 static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
4888 {
4889         /* RTF_CACHE routes are ignored */
4890         return (rt->fib6_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
4891 }
4892
4893 static struct fib6_info *
4894 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4895 {
4896         return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4897                                 list)->rt;
4898 }
4899
4900 static struct mlxsw_sp_fib6_entry *
4901 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4902                                  const struct fib6_info *nrt, bool replace)
4903 {
4904         struct mlxsw_sp_fib6_entry *fib6_entry;
4905
4906         if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4907                 return NULL;
4908
4909         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4910                 struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4911
4912                 /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4913                  * virtual router.
4914                  */
4915                 if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
4916                         continue;
4917                 if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
4918                         break;
4919                 if (rt->fib6_metric < nrt->fib6_metric)
4920                         continue;
4921                 if (rt->fib6_metric == nrt->fib6_metric &&
4922                     mlxsw_sp_fib6_rt_can_mp(rt))
4923                         return fib6_entry;
4924                 if (rt->fib6_metric > nrt->fib6_metric)
4925                         break;
4926         }
4927
4928         return NULL;
4929 }
4930
4931 static struct mlxsw_sp_rt6 *
4932 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4933                             const struct fib6_info *rt)
4934 {
4935         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4936
4937         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4938                 if (mlxsw_sp_rt6->rt == rt)
4939                         return mlxsw_sp_rt6;
4940         }
4941
4942         return NULL;
4943 }
4944
4945 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4946                                         const struct fib6_info *rt,
4947                                         enum mlxsw_sp_ipip_type *ret)
4948 {
4949         return rt->fib6_nh.nh_dev &&
4950                mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret);
4951 }
4952
4953 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4954                                        struct mlxsw_sp_nexthop_group *nh_grp,
4955                                        struct mlxsw_sp_nexthop *nh,
4956                                        const struct fib6_info *rt)
4957 {
4958         const struct mlxsw_sp_ipip_ops *ipip_ops;
4959         struct mlxsw_sp_ipip_entry *ipip_entry;
4960         struct net_device *dev = rt->fib6_nh.nh_dev;
4961         struct mlxsw_sp_rif *rif;
4962         int err;
4963
4964         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4965         if (ipip_entry) {
4966                 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4967                 if (ipip_ops->can_offload(mlxsw_sp, dev,
4968                                           MLXSW_SP_L3_PROTO_IPV6)) {
4969                         nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4970                         mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4971                         return 0;
4972                 }
4973         }
4974
4975         nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4976         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4977         if (!rif)
4978                 return 0;
4979         mlxsw_sp_nexthop_rif_init(nh, rif);
4980
4981         err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4982         if (err)
4983                 goto err_nexthop_neigh_init;
4984
4985         return 0;
4986
4987 err_nexthop_neigh_init:
4988         mlxsw_sp_nexthop_rif_fini(nh);
4989         return err;
4990 }
4991
4992 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
4993                                         struct mlxsw_sp_nexthop *nh)
4994 {
4995         mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4996 }
4997
4998 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
4999                                   struct mlxsw_sp_nexthop_group *nh_grp,
5000                                   struct mlxsw_sp_nexthop *nh,
5001                                   const struct fib6_info *rt)
5002 {
5003         struct net_device *dev = rt->fib6_nh.nh_dev;
5004
5005         nh->nh_grp = nh_grp;
5006         nh->nh_weight = rt->fib6_nh.nh_weight;
5007         memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr));
5008         mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
5009
5010         list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
5011
5012         if (!dev)
5013                 return 0;
5014         nh->ifindex = dev->ifindex;
5015
5016         return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
5017 }
5018
5019 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
5020                                    struct mlxsw_sp_nexthop *nh)
5021 {
5022         mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
5023         list_del(&nh->router_list_node);
5024         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
5025 }
5026
5027 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
5028                                     const struct fib6_info *rt)
5029 {
5030         return rt->fib6_flags & RTF_GATEWAY ||
5031                mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
5032 }
5033
5034 static struct mlxsw_sp_nexthop_group *
5035 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
5036                                struct mlxsw_sp_fib6_entry *fib6_entry)
5037 {
5038         struct mlxsw_sp_nexthop_group *nh_grp;
5039         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5040         struct mlxsw_sp_nexthop *nh;
5041         size_t alloc_size;
5042         int i = 0;
5043         int err;
5044
5045         alloc_size = sizeof(*nh_grp) +
5046                      fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
5047         nh_grp = kzalloc(alloc_size, GFP_KERNEL);
5048         if (!nh_grp)
5049                 return ERR_PTR(-ENOMEM);
5050         INIT_LIST_HEAD(&nh_grp->fib_list);
5051 #if IS_ENABLED(CONFIG_IPV6)
5052         nh_grp->neigh_tbl = &nd_tbl;
5053 #endif
5054         mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
5055                                         struct mlxsw_sp_rt6, list);
5056         nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
5057         nh_grp->count = fib6_entry->nrt6;
5058         for (i = 0; i < nh_grp->count; i++) {
5059                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
5060
5061                 nh = &nh_grp->nexthops[i];
5062                 err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
5063                 if (err)
5064                         goto err_nexthop6_init;
5065                 mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
5066         }
5067
5068         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5069         if (err)
5070                 goto err_nexthop_group_insert;
5071
5072         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5073         return nh_grp;
5074
5075 err_nexthop_group_insert:
5076 err_nexthop6_init:
5077         for (i--; i >= 0; i--) {
5078                 nh = &nh_grp->nexthops[i];
5079                 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5080         }
5081         kfree(nh_grp);
5082         return ERR_PTR(err);
5083 }
5084
5085 static void
5086 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
5087                                 struct mlxsw_sp_nexthop_group *nh_grp)
5088 {
5089         struct mlxsw_sp_nexthop *nh;
5090         int i = nh_grp->count;
5091
5092         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5093         for (i--; i >= 0; i--) {
5094                 nh = &nh_grp->nexthops[i];
5095                 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5096         }
5097         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5098         WARN_ON(nh_grp->adj_index_valid);
5099         kfree(nh_grp);
5100 }
5101
5102 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
5103                                        struct mlxsw_sp_fib6_entry *fib6_entry)
5104 {
5105         struct mlxsw_sp_nexthop_group *nh_grp;
5106
5107         nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
5108         if (!nh_grp) {
5109                 nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
5110                 if (IS_ERR(nh_grp))
5111                         return PTR_ERR(nh_grp);
5112         }
5113
5114         list_add_tail(&fib6_entry->common.nexthop_group_node,
5115                       &nh_grp->fib_list);
5116         fib6_entry->common.nh_group = nh_grp;
5117
5118         return 0;
5119 }
5120
5121 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
5122                                         struct mlxsw_sp_fib_entry *fib_entry)
5123 {
5124         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5125
5126         list_del(&fib_entry->nexthop_group_node);
5127         if (!list_empty(&nh_grp->fib_list))
5128                 return;
5129         mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
5130 }
5131
5132 static int
5133 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
5134                                struct mlxsw_sp_fib6_entry *fib6_entry)
5135 {
5136         struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
5137         int err;
5138
5139         fib6_entry->common.nh_group = NULL;
5140         list_del(&fib6_entry->common.nexthop_group_node);
5141
5142         err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5143         if (err)
5144                 goto err_nexthop6_group_get;
5145
5146         /* In case this entry is offloaded, then the adjacency index
5147          * currently associated with it in the device's table is that
5148          * of the old group. Start using the new one instead.
5149          */
5150         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5151         if (err)
5152                 goto err_fib_node_entry_add;
5153
5154         if (list_empty(&old_nh_grp->fib_list))
5155                 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
5156
5157         return 0;
5158
5159 err_fib_node_entry_add:
5160         mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5161 err_nexthop6_group_get:
5162         list_add_tail(&fib6_entry->common.nexthop_group_node,
5163                       &old_nh_grp->fib_list);
5164         fib6_entry->common.nh_group = old_nh_grp;
5165         return err;
5166 }
5167
5168 static int
5169 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
5170                                 struct mlxsw_sp_fib6_entry *fib6_entry,
5171                                 struct fib6_info *rt)
5172 {
5173         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5174         int err;
5175
5176         mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5177         if (IS_ERR(mlxsw_sp_rt6))
5178                 return PTR_ERR(mlxsw_sp_rt6);
5179
5180         list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5181         fib6_entry->nrt6++;
5182
5183         err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5184         if (err)
5185                 goto err_nexthop6_group_update;
5186
5187         return 0;
5188
5189 err_nexthop6_group_update:
5190         fib6_entry->nrt6--;
5191         list_del(&mlxsw_sp_rt6->list);
5192         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5193         return err;
5194 }
5195
5196 static void
5197 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
5198                                 struct mlxsw_sp_fib6_entry *fib6_entry,
5199                                 struct fib6_info *rt)
5200 {
5201         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5202
5203         mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
5204         if (WARN_ON(!mlxsw_sp_rt6))
5205                 return;
5206
5207         fib6_entry->nrt6--;
5208         list_del(&mlxsw_sp_rt6->list);
5209         mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5210         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5211 }
5212
5213 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5214                                          struct mlxsw_sp_fib_entry *fib_entry,
5215                                          const struct fib6_info *rt)
5216 {
5217         /* Packets hitting RTF_REJECT routes need to be discarded by the
5218          * stack. We can rely on their destination device not having a
5219          * RIF (it's the loopback device) and can thus use action type
5220          * local, which will cause them to be trapped with a lower
5221          * priority than packets that need to be locally received.
5222          */
5223         if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
5224                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5225         else if (rt->fib6_flags & RTF_REJECT)
5226                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5227         else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5228                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5229         else
5230                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5231 }
5232
5233 static void
5234 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5235 {
5236         struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5237
5238         list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5239                                  list) {
5240                 fib6_entry->nrt6--;
5241                 list_del(&mlxsw_sp_rt6->list);
5242                 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5243         }
5244 }
5245
5246 static struct mlxsw_sp_fib6_entry *
5247 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5248                            struct mlxsw_sp_fib_node *fib_node,
5249                            struct fib6_info *rt)
5250 {
5251         struct mlxsw_sp_fib6_entry *fib6_entry;
5252         struct mlxsw_sp_fib_entry *fib_entry;
5253         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5254         int err;
5255
5256         fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5257         if (!fib6_entry)
5258                 return ERR_PTR(-ENOMEM);
5259         fib_entry = &fib6_entry->common;
5260
5261         mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5262         if (IS_ERR(mlxsw_sp_rt6)) {
5263                 err = PTR_ERR(mlxsw_sp_rt6);
5264                 goto err_rt6_create;
5265         }
5266
5267         mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5268
5269         INIT_LIST_HEAD(&fib6_entry->rt6_list);
5270         list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5271         fib6_entry->nrt6 = 1;
5272         err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5273         if (err)
5274                 goto err_nexthop6_group_get;
5275
5276         fib_entry->fib_node = fib_node;
5277
5278         return fib6_entry;
5279
5280 err_nexthop6_group_get:
5281         list_del(&mlxsw_sp_rt6->list);
5282         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5283 err_rt6_create:
5284         kfree(fib6_entry);
5285         return ERR_PTR(err);
5286 }
5287
5288 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5289                                         struct mlxsw_sp_fib6_entry *fib6_entry)
5290 {
5291         mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5292         mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5293         WARN_ON(fib6_entry->nrt6);
5294         kfree(fib6_entry);
5295 }
5296
5297 static struct mlxsw_sp_fib6_entry *
5298 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5299                               const struct fib6_info *nrt, bool replace)
5300 {
5301         struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5302
5303         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5304                 struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5305
5306                 if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
5307                         continue;
5308                 if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
5309                         break;
5310                 if (replace && rt->fib6_metric == nrt->fib6_metric) {
5311                         if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5312                             mlxsw_sp_fib6_rt_can_mp(nrt))
5313                                 return fib6_entry;
5314                         if (mlxsw_sp_fib6_rt_can_mp(nrt))
5315                                 fallback = fallback ?: fib6_entry;
5316                 }
5317                 if (rt->fib6_metric > nrt->fib6_metric)
5318                         return fallback ?: fib6_entry;
5319         }
5320
5321         return fallback;
5322 }
5323
5324 static int
5325 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5326                                bool replace)
5327 {
5328         struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5329         struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5330         struct mlxsw_sp_fib6_entry *fib6_entry;
5331
5332         fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5333
5334         if (replace && WARN_ON(!fib6_entry))
5335                 return -EINVAL;
5336
5337         if (fib6_entry) {
5338                 list_add_tail(&new6_entry->common.list,
5339                               &fib6_entry->common.list);
5340         } else {
5341                 struct mlxsw_sp_fib6_entry *last;
5342
5343                 list_for_each_entry(last, &fib_node->entry_list, common.list) {
5344                         struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5345
5346                         if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
5347                                 break;
5348                         fib6_entry = last;
5349                 }
5350
5351                 if (fib6_entry)
5352                         list_add(&new6_entry->common.list,
5353                                  &fib6_entry->common.list);
5354                 else
5355                         list_add(&new6_entry->common.list,
5356                                  &fib_node->entry_list);
5357         }
5358
5359         return 0;
5360 }
5361
5362 static void
5363 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5364 {
5365         list_del(&fib6_entry->common.list);
5366 }
5367
5368 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5369                                          struct mlxsw_sp_fib6_entry *fib6_entry,
5370                                          bool replace)
5371 {
5372         int err;
5373
5374         err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5375         if (err)
5376                 return err;
5377
5378         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5379         if (err)
5380                 goto err_fib_node_entry_add;
5381
5382         return 0;
5383
5384 err_fib_node_entry_add:
5385         mlxsw_sp_fib6_node_list_remove(fib6_entry);
5386         return err;
5387 }
5388
5389 static void
5390 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5391                                 struct mlxsw_sp_fib6_entry *fib6_entry)
5392 {
5393         mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5394         mlxsw_sp_fib6_node_list_remove(fib6_entry);
5395 }
5396
5397 static struct mlxsw_sp_fib6_entry *
5398 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5399                            const struct fib6_info *rt)
5400 {
5401         struct mlxsw_sp_fib6_entry *fib6_entry;
5402         struct mlxsw_sp_fib_node *fib_node;
5403         struct mlxsw_sp_fib *fib;
5404         struct mlxsw_sp_vr *vr;
5405
5406         vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
5407         if (!vr)
5408                 return NULL;
5409         fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5410
5411         fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
5412                                             sizeof(rt->fib6_dst.addr),
5413                                             rt->fib6_dst.plen);
5414         if (!fib_node)
5415                 return NULL;
5416
5417         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5418                 struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5419
5420                 if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id &&
5421                     rt->fib6_metric == iter_rt->fib6_metric &&
5422                     mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5423                         return fib6_entry;
5424         }
5425
5426         return NULL;
5427 }
5428
5429 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5430                                         struct mlxsw_sp_fib6_entry *fib6_entry,
5431                                         bool replace)
5432 {
5433         struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5434         struct mlxsw_sp_fib6_entry *replaced;
5435
5436         if (!replace)
5437                 return;
5438
5439         replaced = list_next_entry(fib6_entry, common.list);
5440
5441         mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5442         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5443         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5444 }
5445
5446 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5447                                     struct fib6_info *rt, bool replace)
5448 {
5449         struct mlxsw_sp_fib6_entry *fib6_entry;
5450         struct mlxsw_sp_fib_node *fib_node;
5451         int err;
5452
5453         if (mlxsw_sp->router->aborted)
5454                 return 0;
5455
5456         if (rt->fib6_src.plen)
5457                 return -EINVAL;
5458
5459         if (mlxsw_sp_fib6_rt_should_ignore(rt))
5460                 return 0;
5461
5462         fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5463                                          &rt->fib6_dst.addr,
5464                                          sizeof(rt->fib6_dst.addr),
5465                                          rt->fib6_dst.plen,
5466                                          MLXSW_SP_L3_PROTO_IPV6);
5467         if (IS_ERR(fib_node))
5468                 return PTR_ERR(fib_node);
5469
5470         /* Before creating a new entry, try to append route to an existing
5471          * multipath entry.
5472          */
5473         fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5474         if (fib6_entry) {
5475                 err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5476                 if (err)
5477                         goto err_fib6_entry_nexthop_add;
5478                 return 0;
5479         }
5480
5481         fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5482         if (IS_ERR(fib6_entry)) {
5483                 err = PTR_ERR(fib6_entry);
5484                 goto err_fib6_entry_create;
5485         }
5486
5487         err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5488         if (err)
5489                 goto err_fib6_node_entry_link;
5490
5491         mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5492
5493         return 0;
5494
5495 err_fib6_node_entry_link:
5496         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5497 err_fib6_entry_create:
5498 err_fib6_entry_nexthop_add:
5499         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5500         return err;
5501 }
5502
5503 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5504                                      struct fib6_info *rt)
5505 {
5506         struct mlxsw_sp_fib6_entry *fib6_entry;
5507         struct mlxsw_sp_fib_node *fib_node;
5508
5509         if (mlxsw_sp->router->aborted)
5510                 return;
5511
5512         if (mlxsw_sp_fib6_rt_should_ignore(rt))
5513                 return;
5514
5515         fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5516         if (WARN_ON(!fib6_entry))
5517                 return;
5518
5519         /* If route is part of a multipath entry, but not the last one
5520          * removed, then only reduce its nexthop group.
5521          */
5522         if (!list_is_singular(&fib6_entry->rt6_list)) {
5523                 mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5524                 return;
5525         }
5526
5527         fib_node = fib6_entry->common.fib_node;
5528
5529         mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5530         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5531         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5532 }
5533
5534 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5535                                             enum mlxsw_reg_ralxx_protocol proto,
5536                                             u8 tree_id)
5537 {
5538         char ralta_pl[MLXSW_REG_RALTA_LEN];
5539         char ralst_pl[MLXSW_REG_RALST_LEN];
5540         int i, err;
5541
5542         mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5543         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5544         if (err)
5545                 return err;
5546
5547         mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5548         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5549         if (err)
5550                 return err;
5551
5552         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5553                 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5554                 char raltb_pl[MLXSW_REG_RALTB_LEN];
5555                 char ralue_pl[MLXSW_REG_RALUE_LEN];
5556
5557                 mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5558                 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5559                                       raltb_pl);
5560                 if (err)
5561                         return err;
5562
5563                 mlxsw_reg_ralue_pack(ralue_pl, proto,
5564                                      MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5565                 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5566                 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5567                                       ralue_pl);
5568                 if (err)
5569                         return err;
5570         }
5571
5572         return 0;
5573 }
5574
5575 static struct mlxsw_sp_mr_table *
5576 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
5577 {
5578         if (family == RTNL_FAMILY_IPMR)
5579                 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
5580         else
5581                 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
5582 }
5583
5584 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5585                                      struct mfc_entry_notifier_info *men_info,
5586                                      bool replace)
5587 {
5588         struct mlxsw_sp_mr_table *mrt;
5589         struct mlxsw_sp_vr *vr;
5590
5591         if (mlxsw_sp->router->aborted)
5592                 return 0;
5593
5594         vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5595         if (IS_ERR(vr))
5596                 return PTR_ERR(vr);
5597
5598         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5599         return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
5600 }
5601
5602 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5603                                       struct mfc_entry_notifier_info *men_info)
5604 {
5605         struct mlxsw_sp_mr_table *mrt;
5606         struct mlxsw_sp_vr *vr;
5607
5608         if (mlxsw_sp->router->aborted)
5609                 return;
5610
5611         vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5612         if (WARN_ON(!vr))
5613                 return;
5614
5615         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5616         mlxsw_sp_mr_route_del(mrt, men_info->mfc);
5617         mlxsw_sp_vr_put(mlxsw_sp, vr);
5618 }
5619
5620 static int
5621 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5622                               struct vif_entry_notifier_info *ven_info)
5623 {
5624         struct mlxsw_sp_mr_table *mrt;
5625         struct mlxsw_sp_rif *rif;
5626         struct mlxsw_sp_vr *vr;
5627
5628         if (mlxsw_sp->router->aborted)
5629                 return 0;
5630
5631         vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5632         if (IS_ERR(vr))
5633                 return PTR_ERR(vr);
5634
5635         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5636         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5637         return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
5638                                    ven_info->vif_index,
5639                                    ven_info->vif_flags, rif);
5640 }
5641
5642 static void
5643 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5644                               struct vif_entry_notifier_info *ven_info)
5645 {
5646         struct mlxsw_sp_mr_table *mrt;
5647         struct mlxsw_sp_vr *vr;
5648
5649         if (mlxsw_sp->router->aborted)
5650                 return;
5651
5652         vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5653         if (WARN_ON(!vr))
5654                 return;
5655
5656         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5657         mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
5658         mlxsw_sp_vr_put(mlxsw_sp, vr);
5659 }
5660
5661 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5662 {
5663         enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5664         int err;
5665
5666         err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5667                                                MLXSW_SP_LPM_TREE_MIN);
5668         if (err)
5669                 return err;
5670
5671         /* The multicast router code does not need an abort trap as by default,
5672          * packets that don't match any routes are trapped to the CPU.
5673          */
5674
5675         proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5676         return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5677                                                 MLXSW_SP_LPM_TREE_MIN + 1);
5678 }
5679
5680 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5681                                      struct mlxsw_sp_fib_node *fib_node)
5682 {
5683         struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5684
5685         list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5686                                  common.list) {
5687                 bool do_break = &tmp->common.list == &fib_node->entry_list;
5688
5689                 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5690                 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5691                 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5692                 /* Break when entry list is empty and node was freed.
5693                  * Otherwise, we'll access freed memory in the next
5694                  * iteration.
5695                  */
5696                 if (do_break)
5697                         break;
5698         }
5699 }
5700
5701 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5702                                      struct mlxsw_sp_fib_node *fib_node)
5703 {
5704         struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5705
5706         list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5707                                  common.list) {
5708                 bool do_break = &tmp->common.list == &fib_node->entry_list;
5709
5710                 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5711                 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5712                 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5713                 if (do_break)
5714                         break;
5715         }
5716 }
5717
5718 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5719                                     struct mlxsw_sp_fib_node *fib_node)
5720 {
5721         switch (fib_node->fib->proto) {
5722         case MLXSW_SP_L3_PROTO_IPV4:
5723                 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5724                 break;
5725         case MLXSW_SP_L3_PROTO_IPV6:
5726                 mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5727                 break;
5728         }
5729 }
5730
5731 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5732                                   struct mlxsw_sp_vr *vr,
5733                                   enum mlxsw_sp_l3proto proto)
5734 {
5735         struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5736         struct mlxsw_sp_fib_node *fib_node, *tmp;
5737
5738         list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5739                 bool do_break = &tmp->list == &fib->node_list;
5740
5741                 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5742                 if (do_break)
5743                         break;
5744         }
5745 }
5746
5747 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5748 {
5749         int i, j;
5750
5751         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5752                 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5753
5754                 if (!mlxsw_sp_vr_is_used(vr))
5755                         continue;
5756
5757                 for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
5758                         mlxsw_sp_mr_table_flush(vr->mr_table[j]);
5759                 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5760
5761                 /* If virtual router was only used for IPv4, then it's no
5762                  * longer used.
5763                  */
5764                 if (!mlxsw_sp_vr_is_used(vr))
5765                         continue;
5766                 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5767         }
5768 }
5769
5770 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5771 {
5772         int err;
5773
5774         if (mlxsw_sp->router->aborted)
5775                 return;
5776         dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5777         mlxsw_sp_router_fib_flush(mlxsw_sp);
5778         mlxsw_sp->router->aborted = true;
5779         err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5780         if (err)
5781                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5782 }
5783
5784 struct mlxsw_sp_fib_event_work {
5785         struct work_struct work;
5786         union {
5787                 struct fib6_entry_notifier_info fen6_info;
5788                 struct fib_entry_notifier_info fen_info;
5789                 struct fib_rule_notifier_info fr_info;
5790                 struct fib_nh_notifier_info fnh_info;
5791                 struct mfc_entry_notifier_info men_info;
5792                 struct vif_entry_notifier_info ven_info;
5793         };
5794         struct mlxsw_sp *mlxsw_sp;
5795         unsigned long event;
5796 };
5797
5798 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5799 {
5800         struct mlxsw_sp_fib_event_work *fib_work =
5801                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5802         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5803         bool replace, append;
5804         int err;
5805
5806         /* Protect internal structures from changes */
5807         rtnl_lock();
5808         mlxsw_sp_span_respin(mlxsw_sp);
5809
5810         switch (fib_work->event) {
5811         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5812         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5813         case FIB_EVENT_ENTRY_ADD:
5814                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5815                 append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5816                 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5817                                                replace, append);
5818                 if (err)
5819                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5820                 fib_info_put(fib_work->fen_info.fi);
5821                 break;
5822         case FIB_EVENT_ENTRY_DEL:
5823                 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5824                 fib_info_put(fib_work->fen_info.fi);
5825                 break;
5826         case FIB_EVENT_RULE_ADD:
5827                 /* if we get here, a rule was added that we do not support.
5828                  * just do the fib_abort
5829                  */
5830                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5831                 break;
5832         case FIB_EVENT_NH_ADD: /* fall through */
5833         case FIB_EVENT_NH_DEL:
5834                 mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5835                                         fib_work->fnh_info.fib_nh);
5836                 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5837                 break;
5838         }
5839         rtnl_unlock();
5840         kfree(fib_work);
5841 }
5842
5843 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5844 {
5845         struct mlxsw_sp_fib_event_work *fib_work =
5846                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5847         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5848         bool replace;
5849         int err;
5850
5851         rtnl_lock();
5852         mlxsw_sp_span_respin(mlxsw_sp);
5853
5854         switch (fib_work->event) {
5855         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5856         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5857         case FIB_EVENT_ENTRY_ADD:
5858                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5859                 err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5860                                                fib_work->fen6_info.rt, replace);
5861                 if (err)
5862                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5863                 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5864                 break;
5865         case FIB_EVENT_ENTRY_DEL:
5866                 mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5867                 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5868                 break;
5869         case FIB_EVENT_RULE_ADD:
5870                 /* if we get here, a rule was added that we do not support.
5871                  * just do the fib_abort
5872                  */
5873                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5874                 break;
5875         }
5876         rtnl_unlock();
5877         kfree(fib_work);
5878 }
5879
5880 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5881 {
5882         struct mlxsw_sp_fib_event_work *fib_work =
5883                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5884         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5885         bool replace;
5886         int err;
5887
5888         rtnl_lock();
5889         switch (fib_work->event) {
5890         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5891         case FIB_EVENT_ENTRY_ADD:
5892                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5893
5894                 err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5895                                                 replace);
5896                 if (err)
5897                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5898                 mr_cache_put(fib_work->men_info.mfc);
5899                 break;
5900         case FIB_EVENT_ENTRY_DEL:
5901                 mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5902                 mr_cache_put(fib_work->men_info.mfc);
5903                 break;
5904         case FIB_EVENT_VIF_ADD:
5905                 err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5906                                                     &fib_work->ven_info);
5907                 if (err)
5908                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5909                 dev_put(fib_work->ven_info.dev);
5910                 break;
5911         case FIB_EVENT_VIF_DEL:
5912                 mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5913                                               &fib_work->ven_info);
5914                 dev_put(fib_work->ven_info.dev);
5915                 break;
5916         case FIB_EVENT_RULE_ADD:
5917                 /* if we get here, a rule was added that we do not support.
5918                  * just do the fib_abort
5919                  */
5920                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5921                 break;
5922         }
5923         rtnl_unlock();
5924         kfree(fib_work);
5925 }
5926
5927 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5928                                        struct fib_notifier_info *info)
5929 {
5930         struct fib_entry_notifier_info *fen_info;
5931         struct fib_nh_notifier_info *fnh_info;
5932
5933         switch (fib_work->event) {
5934         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5935         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5936         case FIB_EVENT_ENTRY_ADD: /* fall through */
5937         case FIB_EVENT_ENTRY_DEL:
5938                 fen_info = container_of(info, struct fib_entry_notifier_info,
5939                                         info);
5940                 fib_work->fen_info = *fen_info;
5941                 /* Take reference on fib_info to prevent it from being
5942                  * freed while work is queued. Release it afterwards.
5943                  */
5944                 fib_info_hold(fib_work->fen_info.fi);
5945                 break;
5946         case FIB_EVENT_NH_ADD: /* fall through */
5947         case FIB_EVENT_NH_DEL:
5948                 fnh_info = container_of(info, struct fib_nh_notifier_info,
5949                                         info);
5950                 fib_work->fnh_info = *fnh_info;
5951                 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5952                 break;
5953         }
5954 }
5955
5956 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5957                                        struct fib_notifier_info *info)
5958 {
5959         struct fib6_entry_notifier_info *fen6_info;
5960
5961         switch (fib_work->event) {
5962         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5963         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5964         case FIB_EVENT_ENTRY_ADD: /* fall through */
5965         case FIB_EVENT_ENTRY_DEL:
5966                 fen6_info = container_of(info, struct fib6_entry_notifier_info,
5967                                          info);
5968                 fib_work->fen6_info = *fen6_info;
5969                 fib6_info_hold(fib_work->fen6_info.rt);
5970                 break;
5971         }
5972 }
5973
5974 static void
5975 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
5976                             struct fib_notifier_info *info)
5977 {
5978         switch (fib_work->event) {
5979         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5980         case FIB_EVENT_ENTRY_ADD: /* fall through */
5981         case FIB_EVENT_ENTRY_DEL:
5982                 memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
5983                 mr_cache_hold(fib_work->men_info.mfc);
5984                 break;
5985         case FIB_EVENT_VIF_ADD: /* fall through */
5986         case FIB_EVENT_VIF_DEL:
5987                 memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
5988                 dev_hold(fib_work->ven_info.dev);
5989                 break;
5990         }
5991 }
5992
5993 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
5994                                           struct fib_notifier_info *info,
5995                                           struct mlxsw_sp *mlxsw_sp)
5996 {
5997         struct netlink_ext_ack *extack = info->extack;
5998         struct fib_rule_notifier_info *fr_info;
5999         struct fib_rule *rule;
6000         int err = 0;
6001
6002         /* nothing to do at the moment */
6003         if (event == FIB_EVENT_RULE_DEL)
6004                 return 0;
6005
6006         if (mlxsw_sp->router->aborted)
6007                 return 0;
6008
6009         fr_info = container_of(info, struct fib_rule_notifier_info, info);
6010         rule = fr_info->rule;
6011
6012         switch (info->family) {
6013         case AF_INET:
6014                 if (!fib4_rule_default(rule) && !rule->l3mdev)
6015                         err = -EOPNOTSUPP;
6016                 break;
6017         case AF_INET6:
6018                 if (!fib6_rule_default(rule) && !rule->l3mdev)
6019                         err = -EOPNOTSUPP;
6020                 break;
6021         case RTNL_FAMILY_IPMR:
6022                 if (!ipmr_rule_default(rule) && !rule->l3mdev)
6023                         err = -EOPNOTSUPP;
6024                 break;
6025         case RTNL_FAMILY_IP6MR:
6026                 if (!ip6mr_rule_default(rule) && !rule->l3mdev)
6027                         err = -EOPNOTSUPP;
6028                 break;
6029         }
6030
6031         if (err < 0)
6032                 NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
6033
6034         return err;
6035 }
6036
6037 /* Called with rcu_read_lock() */
6038 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
6039                                      unsigned long event, void *ptr)
6040 {
6041         struct mlxsw_sp_fib_event_work *fib_work;
6042         struct fib_notifier_info *info = ptr;
6043         struct mlxsw_sp_router *router;
6044         int err;
6045
6046         if (!net_eq(info->net, &init_net) ||
6047             (info->family != AF_INET && info->family != AF_INET6 &&
6048              info->family != RTNL_FAMILY_IPMR &&
6049              info->family != RTNL_FAMILY_IP6MR))
6050                 return NOTIFY_DONE;
6051
6052         router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6053
6054         switch (event) {
6055         case FIB_EVENT_RULE_ADD: /* fall through */
6056         case FIB_EVENT_RULE_DEL:
6057                 err = mlxsw_sp_router_fib_rule_event(event, info,
6058                                                      router->mlxsw_sp);
6059                 if (!err || info->extack)
6060                         return notifier_from_errno(err);
6061                 break;
6062         case FIB_EVENT_ENTRY_ADD:
6063                 if (router->aborted) {
6064                         NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
6065                         return notifier_from_errno(-EINVAL);
6066                 }
6067                 break;
6068         }
6069
6070         fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
6071         if (WARN_ON(!fib_work))
6072                 return NOTIFY_BAD;
6073
6074         fib_work->mlxsw_sp = router->mlxsw_sp;
6075         fib_work->event = event;
6076
6077         switch (info->family) {
6078         case AF_INET:
6079                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
6080                 mlxsw_sp_router_fib4_event(fib_work, info);
6081                 break;
6082         case AF_INET6:
6083                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
6084                 mlxsw_sp_router_fib6_event(fib_work, info);
6085                 break;
6086         case RTNL_FAMILY_IP6MR:
6087         case RTNL_FAMILY_IPMR:
6088                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
6089                 mlxsw_sp_router_fibmr_event(fib_work, info);
6090                 break;
6091         }
6092
6093         mlxsw_core_schedule_work(&fib_work->work);
6094
6095         return NOTIFY_DONE;
6096 }
6097
6098 struct mlxsw_sp_rif *
6099 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
6100                          const struct net_device *dev)
6101 {
6102         int i;
6103
6104         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6105                 if (mlxsw_sp->router->rifs[i] &&
6106                     mlxsw_sp->router->rifs[i]->dev == dev)
6107                         return mlxsw_sp->router->rifs[i];
6108
6109         return NULL;
6110 }
6111
6112 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
6113 {
6114         char ritr_pl[MLXSW_REG_RITR_LEN];
6115         int err;
6116
6117         mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
6118         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6119         if (WARN_ON_ONCE(err))
6120                 return err;
6121
6122         mlxsw_reg_ritr_enable_set(ritr_pl, false);
6123         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6124 }
6125
6126 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
6127                                           struct mlxsw_sp_rif *rif)
6128 {
6129         mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
6130         mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
6131         mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
6132 }
6133
6134 static bool
6135 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
6136                            unsigned long event)
6137 {
6138         struct inet6_dev *inet6_dev;
6139         bool addr_list_empty = true;
6140         struct in_device *idev;
6141
6142         switch (event) {
6143         case NETDEV_UP:
6144                 return rif == NULL;
6145         case NETDEV_DOWN:
6146                 idev = __in_dev_get_rtnl(dev);
6147                 if (idev && idev->ifa_list)
6148                         addr_list_empty = false;
6149
6150                 inet6_dev = __in6_dev_get(dev);
6151                 if (addr_list_empty && inet6_dev &&
6152                     !list_empty(&inet6_dev->addr_list))
6153                         addr_list_empty = false;
6154
6155                 /* macvlans do not have a RIF, but rather piggy back on the
6156                  * RIF of their lower device.
6157                  */
6158                 if (netif_is_macvlan(dev) && addr_list_empty)
6159                         return true;
6160
6161                 if (rif && addr_list_empty &&
6162                     !netif_is_l3_slave(rif->dev))
6163                         return true;
6164                 /* It is possible we already removed the RIF ourselves
6165                  * if it was assigned to a netdev that is now a bridge
6166                  * or LAG slave.
6167                  */
6168                 return false;
6169         }
6170
6171         return false;
6172 }
6173
6174 static enum mlxsw_sp_rif_type
6175 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
6176                       const struct net_device *dev)
6177 {
6178         enum mlxsw_sp_fid_type type;
6179
6180         if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
6181                 return MLXSW_SP_RIF_TYPE_IPIP_LB;
6182
6183         /* Otherwise RIF type is derived from the type of the underlying FID. */
6184         if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
6185                 type = MLXSW_SP_FID_TYPE_8021Q;
6186         else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
6187                 type = MLXSW_SP_FID_TYPE_8021Q;
6188         else if (netif_is_bridge_master(dev))
6189                 type = MLXSW_SP_FID_TYPE_8021D;
6190         else
6191                 type = MLXSW_SP_FID_TYPE_RFID;
6192
6193         return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
6194 }
6195
6196 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
6197 {
6198         int i;
6199
6200         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6201                 if (!mlxsw_sp->router->rifs[i]) {
6202                         *p_rif_index = i;
6203                         return 0;
6204                 }
6205         }
6206
6207         return -ENOBUFS;
6208 }
6209
6210 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
6211                                                u16 vr_id,
6212                                                struct net_device *l3_dev)
6213 {
6214         struct mlxsw_sp_rif *rif;
6215
6216         rif = kzalloc(rif_size, GFP_KERNEL);
6217         if (!rif)
6218                 return NULL;
6219
6220         INIT_LIST_HEAD(&rif->nexthop_list);
6221         INIT_LIST_HEAD(&rif->neigh_list);
6222         ether_addr_copy(rif->addr, l3_dev->dev_addr);
6223         rif->mtu = l3_dev->mtu;
6224         rif->vr_id = vr_id;
6225         rif->dev = l3_dev;
6226         rif->rif_index = rif_index;
6227
6228         return rif;
6229 }
6230
6231 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
6232                                            u16 rif_index)
6233 {
6234         return mlxsw_sp->router->rifs[rif_index];
6235 }
6236
6237 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
6238 {
6239         return rif->rif_index;
6240 }
6241
6242 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6243 {
6244         return lb_rif->common.rif_index;
6245 }
6246
6247 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6248 {
6249         return lb_rif->ul_vr_id;
6250 }
6251
6252 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6253 {
6254         return rif->dev->ifindex;
6255 }
6256
6257 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6258 {
6259         return rif->dev;
6260 }
6261
6262 struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif)
6263 {
6264         return rif->fid;
6265 }
6266
6267 static struct mlxsw_sp_rif *
6268 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6269                     const struct mlxsw_sp_rif_params *params,
6270                     struct netlink_ext_ack *extack)
6271 {
6272         u32 tb_id = l3mdev_fib_table(params->dev);
6273         const struct mlxsw_sp_rif_ops *ops;
6274         struct mlxsw_sp_fid *fid = NULL;
6275         enum mlxsw_sp_rif_type type;
6276         struct mlxsw_sp_rif *rif;
6277         struct mlxsw_sp_vr *vr;
6278         u16 rif_index;
6279         int i, err;
6280
6281         type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6282         ops = mlxsw_sp->router->rif_ops_arr[type];
6283
6284         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6285         if (IS_ERR(vr))
6286                 return ERR_CAST(vr);
6287         vr->rif_count++;
6288
6289         err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6290         if (err) {
6291                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6292                 goto err_rif_index_alloc;
6293         }
6294
6295         rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6296         if (!rif) {
6297                 err = -ENOMEM;
6298                 goto err_rif_alloc;
6299         }
6300         rif->mlxsw_sp = mlxsw_sp;
6301         rif->ops = ops;
6302
6303         if (ops->fid_get) {
6304                 fid = ops->fid_get(rif, extack);
6305                 if (IS_ERR(fid)) {
6306                         err = PTR_ERR(fid);
6307                         goto err_fid_get;
6308                 }
6309                 rif->fid = fid;
6310         }
6311
6312         if (ops->setup)
6313                 ops->setup(rif, params);
6314
6315         err = ops->configure(rif);
6316         if (err)
6317                 goto err_configure;
6318
6319         for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
6320                 err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
6321                 if (err)
6322                         goto err_mr_rif_add;
6323         }
6324
6325         mlxsw_sp_rif_counters_alloc(rif);
6326         mlxsw_sp->router->rifs[rif_index] = rif;
6327
6328         return rif;
6329
6330 err_mr_rif_add:
6331         for (i--; i >= 0; i--)
6332                 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6333         ops->deconfigure(rif);
6334 err_configure:
6335         if (fid)
6336                 mlxsw_sp_fid_put(fid);
6337 err_fid_get:
6338         kfree(rif);
6339 err_rif_alloc:
6340 err_rif_index_alloc:
6341         vr->rif_count--;
6342         mlxsw_sp_vr_put(mlxsw_sp, vr);
6343         return ERR_PTR(err);
6344 }
6345
6346 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6347 {
6348         const struct mlxsw_sp_rif_ops *ops = rif->ops;
6349         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6350         struct mlxsw_sp_fid *fid = rif->fid;
6351         struct mlxsw_sp_vr *vr;
6352         int i;
6353
6354         mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6355         vr = &mlxsw_sp->router->vrs[rif->vr_id];
6356
6357         mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6358         mlxsw_sp_rif_counters_free(rif);
6359         for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6360                 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6361         ops->deconfigure(rif);
6362         if (fid)
6363                 /* Loopback RIFs are not associated with a FID. */
6364                 mlxsw_sp_fid_put(fid);
6365         kfree(rif);
6366         vr->rif_count--;
6367         mlxsw_sp_vr_put(mlxsw_sp, vr);
6368 }
6369
6370 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
6371                                  struct net_device *dev)
6372 {
6373         struct mlxsw_sp_rif *rif;
6374
6375         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6376         if (!rif)
6377                 return;
6378         mlxsw_sp_rif_destroy(rif);
6379 }
6380
6381 static void
6382 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6383                                  struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6384 {
6385         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6386
6387         params->vid = mlxsw_sp_port_vlan->vid;
6388         params->lag = mlxsw_sp_port->lagged;
6389         if (params->lag)
6390                 params->lag_id = mlxsw_sp_port->lag_id;
6391         else
6392                 params->system_port = mlxsw_sp_port->local_port;
6393 }
6394
6395 static int
6396 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6397                                struct net_device *l3_dev,
6398                                struct netlink_ext_ack *extack)
6399 {
6400         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6401         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6402         u16 vid = mlxsw_sp_port_vlan->vid;
6403         struct mlxsw_sp_rif *rif;
6404         struct mlxsw_sp_fid *fid;
6405         int err;
6406
6407         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6408         if (!rif) {
6409                 struct mlxsw_sp_rif_params params = {
6410                         .dev = l3_dev,
6411                 };
6412
6413                 mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6414                 rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6415                 if (IS_ERR(rif))
6416                         return PTR_ERR(rif);
6417         }
6418
6419         /* FID was already created, just take a reference */
6420         fid = rif->ops->fid_get(rif, extack);
6421         err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6422         if (err)
6423                 goto err_fid_port_vid_map;
6424
6425         err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6426         if (err)
6427                 goto err_port_vid_learning_set;
6428
6429         err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6430                                         BR_STATE_FORWARDING);
6431         if (err)
6432                 goto err_port_vid_stp_set;
6433
6434         mlxsw_sp_port_vlan->fid = fid;
6435
6436         return 0;
6437
6438 err_port_vid_stp_set:
6439         mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6440 err_port_vid_learning_set:
6441         mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6442 err_fid_port_vid_map:
6443         mlxsw_sp_fid_put(fid);
6444         return err;
6445 }
6446
6447 void
6448 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6449 {
6450         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6451         struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6452         u16 vid = mlxsw_sp_port_vlan->vid;
6453
6454         if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6455                 return;
6456
6457         mlxsw_sp_port_vlan->fid = NULL;
6458         mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6459         mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6460         mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6461         /* If router port holds the last reference on the rFID, then the
6462          * associated Sub-port RIF will be destroyed.
6463          */
6464         mlxsw_sp_fid_put(fid);
6465 }
6466
6467 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6468                                              struct net_device *port_dev,
6469                                              unsigned long event, u16 vid,
6470                                              struct netlink_ext_ack *extack)
6471 {
6472         struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6473         struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6474
6475         mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6476         if (WARN_ON(!mlxsw_sp_port_vlan))
6477                 return -EINVAL;
6478
6479         switch (event) {
6480         case NETDEV_UP:
6481                 return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6482                                                       l3_dev, extack);
6483         case NETDEV_DOWN:
6484                 mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6485                 break;
6486         }
6487
6488         return 0;
6489 }
6490
6491 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6492                                         unsigned long event,
6493                                         struct netlink_ext_ack *extack)
6494 {
6495         if (netif_is_bridge_port(port_dev) ||
6496             netif_is_lag_port(port_dev) ||
6497             netif_is_ovs_port(port_dev))
6498                 return 0;
6499
6500         return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
6501                                                  extack);
6502 }
6503
6504 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6505                                          struct net_device *lag_dev,
6506                                          unsigned long event, u16 vid,
6507                                          struct netlink_ext_ack *extack)
6508 {
6509         struct net_device *port_dev;
6510         struct list_head *iter;
6511         int err;
6512
6513         netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6514                 if (mlxsw_sp_port_dev_check(port_dev)) {
6515                         err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6516                                                                 port_dev,
6517                                                                 event, vid,
6518                                                                 extack);
6519                         if (err)
6520                                 return err;
6521                 }
6522         }
6523
6524         return 0;
6525 }
6526
6527 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6528                                        unsigned long event,
6529                                        struct netlink_ext_ack *extack)
6530 {
6531         if (netif_is_bridge_port(lag_dev))
6532                 return 0;
6533
6534         return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
6535                                              extack);
6536 }
6537
6538 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
6539                                           unsigned long event,
6540                                           struct netlink_ext_ack *extack)
6541 {
6542         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6543         struct mlxsw_sp_rif_params params = {
6544                 .dev = l3_dev,
6545         };
6546         struct mlxsw_sp_rif *rif;
6547
6548         switch (event) {
6549         case NETDEV_UP:
6550                 rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6551                 if (IS_ERR(rif))
6552                         return PTR_ERR(rif);
6553                 break;
6554         case NETDEV_DOWN:
6555                 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6556                 mlxsw_sp_rif_destroy(rif);
6557                 break;
6558         }
6559
6560         return 0;
6561 }
6562
6563 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
6564                                         unsigned long event,
6565                                         struct netlink_ext_ack *extack)
6566 {
6567         struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6568         u16 vid = vlan_dev_vlan_id(vlan_dev);
6569
6570         if (netif_is_bridge_port(vlan_dev))
6571                 return 0;
6572
6573         if (mlxsw_sp_port_dev_check(real_dev))
6574                 return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6575                                                          event, vid, extack);
6576         else if (netif_is_lag_master(real_dev))
6577                 return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6578                                                      vid, extack);
6579         else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6580                 return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
6581
6582         return 0;
6583 }
6584
6585 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
6586 {
6587         u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
6588         u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6589
6590         return ether_addr_equal_masked(mac, vrrp4, mask);
6591 }
6592
6593 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
6594 {
6595         u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
6596         u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6597
6598         return ether_addr_equal_masked(mac, vrrp6, mask);
6599 }
6600
6601 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6602                                 const u8 *mac, bool adding)
6603 {
6604         char ritr_pl[MLXSW_REG_RITR_LEN];
6605         u8 vrrp_id = adding ? mac[5] : 0;
6606         int err;
6607
6608         if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
6609             !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
6610                 return 0;
6611
6612         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6613         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6614         if (err)
6615                 return err;
6616
6617         if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
6618                 mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
6619         else
6620                 mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
6621
6622         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6623 }
6624
6625 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
6626                                     const struct net_device *macvlan_dev,
6627                                     struct netlink_ext_ack *extack)
6628 {
6629         struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6630         struct mlxsw_sp_rif *rif;
6631         int err;
6632
6633         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6634         if (!rif) {
6635                 NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
6636                 return -EOPNOTSUPP;
6637         }
6638
6639         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6640                                   mlxsw_sp_fid_index(rif->fid), true);
6641         if (err)
6642                 return err;
6643
6644         err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
6645                                    macvlan_dev->dev_addr, true);
6646         if (err)
6647                 goto err_rif_vrrp_add;
6648
6649         /* Make sure the bridge driver does not have this MAC pointing at
6650          * some other port.
6651          */
6652         if (rif->ops->fdb_del)
6653                 rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
6654
6655         return 0;
6656
6657 err_rif_vrrp_add:
6658         mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6659                             mlxsw_sp_fid_index(rif->fid), false);
6660         return err;
6661 }
6662
6663 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
6664                               const struct net_device *macvlan_dev)
6665 {
6666         struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6667         struct mlxsw_sp_rif *rif;
6668
6669         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6670         /* If we do not have a RIF, then we already took care of
6671          * removing the macvlan's MAC during RIF deletion.
6672          */
6673         if (!rif)
6674                 return;
6675         mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
6676                              false);
6677         mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6678                             mlxsw_sp_fid_index(rif->fid), false);
6679 }
6680
6681 static int mlxsw_sp_inetaddr_macvlan_event(struct net_device *macvlan_dev,
6682                                            unsigned long event,
6683                                            struct netlink_ext_ack *extack)
6684 {
6685         struct mlxsw_sp *mlxsw_sp;
6686
6687         mlxsw_sp = mlxsw_sp_lower_get(macvlan_dev);
6688         if (!mlxsw_sp)
6689                 return 0;
6690
6691         switch (event) {
6692         case NETDEV_UP:
6693                 return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
6694         case NETDEV_DOWN:
6695                 mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
6696                 break;
6697         }
6698
6699         return 0;
6700 }
6701
6702 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
6703                                      unsigned long event,
6704                                      struct netlink_ext_ack *extack)
6705 {
6706         if (mlxsw_sp_port_dev_check(dev))
6707                 return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6708         else if (netif_is_lag_master(dev))
6709                 return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6710         else if (netif_is_bridge_master(dev))
6711                 return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
6712         else if (is_vlan_dev(dev))
6713                 return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
6714         else if (netif_is_macvlan(dev))
6715                 return mlxsw_sp_inetaddr_macvlan_event(dev, event, extack);
6716         else
6717                 return 0;
6718 }
6719
6720 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
6721                             unsigned long event, void *ptr)
6722 {
6723         struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6724         struct net_device *dev = ifa->ifa_dev->dev;
6725         struct mlxsw_sp *mlxsw_sp;
6726         struct mlxsw_sp_rif *rif;
6727         int err = 0;
6728
6729         /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6730         if (event == NETDEV_UP)
6731                 goto out;
6732
6733         mlxsw_sp = mlxsw_sp_lower_get(dev);
6734         if (!mlxsw_sp)
6735                 goto out;
6736
6737         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6738         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6739                 goto out;
6740
6741         err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
6742 out:
6743         return notifier_from_errno(err);
6744 }
6745
6746 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6747                                   unsigned long event, void *ptr)
6748 {
6749         struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6750         struct net_device *dev = ivi->ivi_dev->dev;
6751         struct mlxsw_sp *mlxsw_sp;
6752         struct mlxsw_sp_rif *rif;
6753         int err = 0;
6754
6755         mlxsw_sp = mlxsw_sp_lower_get(dev);
6756         if (!mlxsw_sp)
6757                 goto out;
6758
6759         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6760         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6761                 goto out;
6762
6763         err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
6764 out:
6765         return notifier_from_errno(err);
6766 }
6767
6768 struct mlxsw_sp_inet6addr_event_work {
6769         struct work_struct work;
6770         struct net_device *dev;
6771         unsigned long event;
6772 };
6773
6774 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6775 {
6776         struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6777                 container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6778         struct net_device *dev = inet6addr_work->dev;
6779         unsigned long event = inet6addr_work->event;
6780         struct mlxsw_sp *mlxsw_sp;
6781         struct mlxsw_sp_rif *rif;
6782
6783         rtnl_lock();
6784         mlxsw_sp = mlxsw_sp_lower_get(dev);
6785         if (!mlxsw_sp)
6786                 goto out;
6787
6788         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6789         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6790                 goto out;
6791
6792         __mlxsw_sp_inetaddr_event(dev, event, NULL);
6793 out:
6794         rtnl_unlock();
6795         dev_put(dev);
6796         kfree(inet6addr_work);
6797 }
6798
6799 /* Called with rcu_read_lock() */
6800 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
6801                              unsigned long event, void *ptr)
6802 {
6803         struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6804         struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6805         struct net_device *dev = if6->idev->dev;
6806
6807         /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6808         if (event == NETDEV_UP)
6809                 return NOTIFY_DONE;
6810
6811         if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
6812                 return NOTIFY_DONE;
6813
6814         inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6815         if (!inet6addr_work)
6816                 return NOTIFY_BAD;
6817
6818         INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6819         inet6addr_work->dev = dev;
6820         inet6addr_work->event = event;
6821         dev_hold(dev);
6822         mlxsw_core_schedule_work(&inet6addr_work->work);
6823
6824         return NOTIFY_DONE;
6825 }
6826
6827 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6828                                    unsigned long event, void *ptr)
6829 {
6830         struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6831         struct net_device *dev = i6vi->i6vi_dev->dev;
6832         struct mlxsw_sp *mlxsw_sp;
6833         struct mlxsw_sp_rif *rif;
6834         int err = 0;
6835
6836         mlxsw_sp = mlxsw_sp_lower_get(dev);
6837         if (!mlxsw_sp)
6838                 goto out;
6839
6840         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6841         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6842                 goto out;
6843
6844         err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
6845 out:
6846         return notifier_from_errno(err);
6847 }
6848
6849 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6850                              const char *mac, int mtu)
6851 {
6852         char ritr_pl[MLXSW_REG_RITR_LEN];
6853         int err;
6854
6855         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6856         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6857         if (err)
6858                 return err;
6859
6860         mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6861         mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6862         mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6863         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6864 }
6865
6866 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
6867 {
6868         struct mlxsw_sp *mlxsw_sp;
6869         struct mlxsw_sp_rif *rif;
6870         u16 fid_index;
6871         int err;
6872
6873         mlxsw_sp = mlxsw_sp_lower_get(dev);
6874         if (!mlxsw_sp)
6875                 return 0;
6876
6877         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6878         if (!rif)
6879                 return 0;
6880         fid_index = mlxsw_sp_fid_index(rif->fid);
6881
6882         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
6883         if (err)
6884                 return err;
6885
6886         err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
6887                                 dev->mtu);
6888         if (err)
6889                 goto err_rif_edit;
6890
6891         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
6892         if (err)
6893                 goto err_rif_fdb_op;
6894
6895         if (rif->mtu != dev->mtu) {
6896                 struct mlxsw_sp_vr *vr;
6897                 int i;
6898
6899                 /* The RIF is relevant only to its mr_table instance, as unlike
6900                  * unicast routing, in multicast routing a RIF cannot be shared
6901                  * between several multicast routing tables.
6902                  */
6903                 vr = &mlxsw_sp->router->vrs[rif->vr_id];
6904                 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6905                         mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
6906                                                    rif, dev->mtu);
6907         }
6908
6909         ether_addr_copy(rif->addr, dev->dev_addr);
6910         rif->mtu = dev->mtu;
6911
6912         netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
6913
6914         return 0;
6915
6916 err_rif_fdb_op:
6917         mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
6918 err_rif_edit:
6919         mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
6920         return err;
6921 }
6922
6923 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
6924                                   struct net_device *l3_dev,
6925                                   struct netlink_ext_ack *extack)
6926 {
6927         struct mlxsw_sp_rif *rif;
6928
6929         /* If netdev is already associated with a RIF, then we need to
6930          * destroy it and create a new one with the new virtual router ID.
6931          */
6932         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6933         if (rif)
6934                 __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
6935
6936         return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
6937 }
6938
6939 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
6940                                     struct net_device *l3_dev)
6941 {
6942         struct mlxsw_sp_rif *rif;
6943
6944         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6945         if (!rif)
6946                 return;
6947         __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
6948 }
6949
6950 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
6951                                  struct netdev_notifier_changeupper_info *info)
6952 {
6953         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6954         int err = 0;
6955
6956         /* We do not create a RIF for a macvlan, but only use it to
6957          * direct more MAC addresses to the router.
6958          */
6959         if (!mlxsw_sp || netif_is_macvlan(l3_dev))
6960                 return 0;
6961
6962         switch (event) {
6963         case NETDEV_PRECHANGEUPPER:
6964                 return 0;
6965         case NETDEV_CHANGEUPPER:
6966                 if (info->linking) {
6967                         struct netlink_ext_ack *extack;
6968
6969                         extack = netdev_notifier_info_to_extack(&info->info);
6970                         err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
6971                 } else {
6972                         mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
6973                 }
6974                 break;
6975         }
6976
6977         return err;
6978 }
6979
6980 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data)
6981 {
6982         struct mlxsw_sp_rif *rif = data;
6983
6984         if (!netif_is_macvlan(dev))
6985                 return 0;
6986
6987         return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
6988                                    mlxsw_sp_fid_index(rif->fid), false);
6989 }
6990
6991 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
6992 {
6993         if (!netif_is_macvlan_port(rif->dev))
6994                 return 0;
6995
6996         netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
6997         return netdev_walk_all_upper_dev_rcu(rif->dev,
6998                                              __mlxsw_sp_rif_macvlan_flush, rif);
6999 }
7000
7001 static struct mlxsw_sp_rif_subport *
7002 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
7003 {
7004         return container_of(rif, struct mlxsw_sp_rif_subport, common);
7005 }
7006
7007 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
7008                                        const struct mlxsw_sp_rif_params *params)
7009 {
7010         struct mlxsw_sp_rif_subport *rif_subport;
7011
7012         rif_subport = mlxsw_sp_rif_subport_rif(rif);
7013         rif_subport->vid = params->vid;
7014         rif_subport->lag = params->lag;
7015         if (params->lag)
7016                 rif_subport->lag_id = params->lag_id;
7017         else
7018                 rif_subport->system_port = params->system_port;
7019 }
7020
7021 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
7022 {
7023         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7024         struct mlxsw_sp_rif_subport *rif_subport;
7025         char ritr_pl[MLXSW_REG_RITR_LEN];
7026
7027         rif_subport = mlxsw_sp_rif_subport_rif(rif);
7028         mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
7029                             rif->rif_index, rif->vr_id, rif->dev->mtu);
7030         mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7031         mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
7032                                   rif_subport->lag ? rif_subport->lag_id :
7033                                                      rif_subport->system_port,
7034                                   rif_subport->vid);
7035
7036         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7037 }
7038
7039 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
7040 {
7041         int err;
7042
7043         err = mlxsw_sp_rif_subport_op(rif, true);
7044         if (err)
7045                 return err;
7046
7047         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7048                                   mlxsw_sp_fid_index(rif->fid), true);
7049         if (err)
7050                 goto err_rif_fdb_op;
7051
7052         mlxsw_sp_fid_rif_set(rif->fid, rif);
7053         return 0;
7054
7055 err_rif_fdb_op:
7056         mlxsw_sp_rif_subport_op(rif, false);
7057         return err;
7058 }
7059
7060 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
7061 {
7062         struct mlxsw_sp_fid *fid = rif->fid;
7063
7064         mlxsw_sp_fid_rif_set(fid, NULL);
7065         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7066                             mlxsw_sp_fid_index(fid), false);
7067         mlxsw_sp_rif_macvlan_flush(rif);
7068         mlxsw_sp_rif_subport_op(rif, false);
7069 }
7070
7071 static struct mlxsw_sp_fid *
7072 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
7073                              struct netlink_ext_ack *extack)
7074 {
7075         return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
7076 }
7077
7078 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
7079         .type                   = MLXSW_SP_RIF_TYPE_SUBPORT,
7080         .rif_size               = sizeof(struct mlxsw_sp_rif_subport),
7081         .setup                  = mlxsw_sp_rif_subport_setup,
7082         .configure              = mlxsw_sp_rif_subport_configure,
7083         .deconfigure            = mlxsw_sp_rif_subport_deconfigure,
7084         .fid_get                = mlxsw_sp_rif_subport_fid_get,
7085 };
7086
7087 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
7088                                     enum mlxsw_reg_ritr_if_type type,
7089                                     u16 vid_fid, bool enable)
7090 {
7091         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7092         char ritr_pl[MLXSW_REG_RITR_LEN];
7093
7094         mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
7095                             rif->dev->mtu);
7096         mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7097         mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
7098
7099         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7100 }
7101
7102 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
7103 {
7104         return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
7105 }
7106
7107 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
7108 {
7109         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7110         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7111         int err;
7112
7113         err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
7114         if (err)
7115                 return err;
7116
7117         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7118                                      mlxsw_sp_router_port(mlxsw_sp), true);
7119         if (err)
7120                 goto err_fid_mc_flood_set;
7121
7122         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7123                                      mlxsw_sp_router_port(mlxsw_sp), true);
7124         if (err)
7125                 goto err_fid_bc_flood_set;
7126
7127         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7128                                   mlxsw_sp_fid_index(rif->fid), true);
7129         if (err)
7130                 goto err_rif_fdb_op;
7131
7132         mlxsw_sp_fid_rif_set(rif->fid, rif);
7133         return 0;
7134
7135 err_rif_fdb_op:
7136         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7137                                mlxsw_sp_router_port(mlxsw_sp), false);
7138 err_fid_bc_flood_set:
7139         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7140                                mlxsw_sp_router_port(mlxsw_sp), false);
7141 err_fid_mc_flood_set:
7142         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7143         return err;
7144 }
7145
7146 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
7147 {
7148         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7149         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7150         struct mlxsw_sp_fid *fid = rif->fid;
7151
7152         mlxsw_sp_fid_rif_set(fid, NULL);
7153         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7154                             mlxsw_sp_fid_index(fid), false);
7155         mlxsw_sp_rif_macvlan_flush(rif);
7156         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7157                                mlxsw_sp_router_port(mlxsw_sp), false);
7158         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7159                                mlxsw_sp_router_port(mlxsw_sp), false);
7160         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7161 }
7162
7163 static struct mlxsw_sp_fid *
7164 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
7165                           struct netlink_ext_ack *extack)
7166 {
7167         u16 vid;
7168         int err;
7169
7170         if (is_vlan_dev(rif->dev)) {
7171                 vid = vlan_dev_vlan_id(rif->dev);
7172         } else {
7173                 err = br_vlan_get_pvid(rif->dev, &vid);
7174                 if (err < 0 || !vid) {
7175                         NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
7176                         return ERR_PTR(-EINVAL);
7177                 }
7178         }
7179
7180         return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
7181 }
7182
7183 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7184 {
7185         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7186         struct switchdev_notifier_fdb_info info;
7187         struct net_device *br_dev;
7188         struct net_device *dev;
7189
7190         br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
7191         dev = br_fdb_find_port(br_dev, mac, vid);
7192         if (!dev)
7193                 return;
7194
7195         info.addr = mac;
7196         info.vid = vid;
7197         call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
7198 }
7199
7200 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
7201         .type                   = MLXSW_SP_RIF_TYPE_VLAN,
7202         .rif_size               = sizeof(struct mlxsw_sp_rif),
7203         .configure              = mlxsw_sp_rif_vlan_configure,
7204         .deconfigure            = mlxsw_sp_rif_vlan_deconfigure,
7205         .fid_get                = mlxsw_sp_rif_vlan_fid_get,
7206         .fdb_del                = mlxsw_sp_rif_vlan_fdb_del,
7207 };
7208
7209 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
7210 {
7211         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7212         u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7213         int err;
7214
7215         err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
7216                                        true);
7217         if (err)
7218                 return err;
7219
7220         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7221                                      mlxsw_sp_router_port(mlxsw_sp), true);
7222         if (err)
7223                 goto err_fid_mc_flood_set;
7224
7225         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7226                                      mlxsw_sp_router_port(mlxsw_sp), true);
7227         if (err)
7228                 goto err_fid_bc_flood_set;
7229
7230         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7231                                   mlxsw_sp_fid_index(rif->fid), true);
7232         if (err)
7233                 goto err_rif_fdb_op;
7234
7235         mlxsw_sp_fid_rif_set(rif->fid, rif);
7236         return 0;
7237
7238 err_rif_fdb_op:
7239         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7240                                mlxsw_sp_router_port(mlxsw_sp), false);
7241 err_fid_bc_flood_set:
7242         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7243                                mlxsw_sp_router_port(mlxsw_sp), false);
7244 err_fid_mc_flood_set:
7245         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7246         return err;
7247 }
7248
7249 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
7250 {
7251         u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7252         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7253         struct mlxsw_sp_fid *fid = rif->fid;
7254
7255         mlxsw_sp_fid_rif_set(fid, NULL);
7256         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7257                             mlxsw_sp_fid_index(fid), false);
7258         mlxsw_sp_rif_macvlan_flush(rif);
7259         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7260                                mlxsw_sp_router_port(mlxsw_sp), false);
7261         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7262                                mlxsw_sp_router_port(mlxsw_sp), false);
7263         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7264 }
7265
7266 static struct mlxsw_sp_fid *
7267 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
7268                          struct netlink_ext_ack *extack)
7269 {
7270         return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
7271 }
7272
7273 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7274 {
7275         struct switchdev_notifier_fdb_info info;
7276         struct net_device *dev;
7277
7278         dev = br_fdb_find_port(rif->dev, mac, 0);
7279         if (!dev)
7280                 return;
7281
7282         info.addr = mac;
7283         info.vid = 0;
7284         call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
7285 }
7286
7287 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
7288         .type                   = MLXSW_SP_RIF_TYPE_FID,
7289         .rif_size               = sizeof(struct mlxsw_sp_rif),
7290         .configure              = mlxsw_sp_rif_fid_configure,
7291         .deconfigure            = mlxsw_sp_rif_fid_deconfigure,
7292         .fid_get                = mlxsw_sp_rif_fid_fid_get,
7293         .fdb_del                = mlxsw_sp_rif_fid_fdb_del,
7294 };
7295
7296 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_emu_ops = {
7297         .type                   = MLXSW_SP_RIF_TYPE_VLAN,
7298         .rif_size               = sizeof(struct mlxsw_sp_rif),
7299         .configure              = mlxsw_sp_rif_fid_configure,
7300         .deconfigure            = mlxsw_sp_rif_fid_deconfigure,
7301         .fid_get                = mlxsw_sp_rif_vlan_fid_get,
7302         .fdb_del                = mlxsw_sp_rif_vlan_fdb_del,
7303 };
7304
7305 static struct mlxsw_sp_rif_ipip_lb *
7306 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
7307 {
7308         return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
7309 }
7310
7311 static void
7312 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
7313                            const struct mlxsw_sp_rif_params *params)
7314 {
7315         struct mlxsw_sp_rif_params_ipip_lb *params_lb;
7316         struct mlxsw_sp_rif_ipip_lb *rif_lb;
7317
7318         params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
7319                                  common);
7320         rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
7321         rif_lb->lb_config = params_lb->lb_config;
7322 }
7323
7324 static int
7325 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7326 {
7327         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7328         u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7329         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7330         struct mlxsw_sp_vr *ul_vr;
7331         int err;
7332
7333         ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
7334         if (IS_ERR(ul_vr))
7335                 return PTR_ERR(ul_vr);
7336
7337         err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
7338         if (err)
7339                 goto err_loopback_op;
7340
7341         lb_rif->ul_vr_id = ul_vr->id;
7342         ++ul_vr->rif_count;
7343         return 0;
7344
7345 err_loopback_op:
7346         mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7347         return err;
7348 }
7349
7350 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7351 {
7352         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7353         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7354         struct mlxsw_sp_vr *ul_vr;
7355
7356         ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
7357         mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
7358
7359         --ul_vr->rif_count;
7360         mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7361 }
7362
7363 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
7364         .type                   = MLXSW_SP_RIF_TYPE_IPIP_LB,
7365         .rif_size               = sizeof(struct mlxsw_sp_rif_ipip_lb),
7366         .setup                  = mlxsw_sp_rif_ipip_lb_setup,
7367         .configure              = mlxsw_sp_rif_ipip_lb_configure,
7368         .deconfigure            = mlxsw_sp_rif_ipip_lb_deconfigure,
7369 };
7370
7371 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
7372         [MLXSW_SP_RIF_TYPE_SUBPORT]     = &mlxsw_sp_rif_subport_ops,
7373         [MLXSW_SP_RIF_TYPE_VLAN]        = &mlxsw_sp_rif_vlan_emu_ops,
7374         [MLXSW_SP_RIF_TYPE_FID]         = &mlxsw_sp_rif_fid_ops,
7375         [MLXSW_SP_RIF_TYPE_IPIP_LB]     = &mlxsw_sp_rif_ipip_lb_ops,
7376 };
7377
7378 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
7379 {
7380         u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7381
7382         mlxsw_sp->router->rifs = kcalloc(max_rifs,
7383                                          sizeof(struct mlxsw_sp_rif *),
7384                                          GFP_KERNEL);
7385         if (!mlxsw_sp->router->rifs)
7386                 return -ENOMEM;
7387
7388         mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
7389
7390         return 0;
7391 }
7392
7393 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
7394 {
7395         int i;
7396
7397         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7398                 WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
7399
7400         kfree(mlxsw_sp->router->rifs);
7401 }
7402
7403 static int
7404 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
7405 {
7406         char tigcr_pl[MLXSW_REG_TIGCR_LEN];
7407
7408         mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
7409         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
7410 }
7411
7412 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
7413 {
7414         mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
7415         INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
7416         return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
7417 }
7418
7419 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
7420 {
7421         WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
7422 }
7423
7424 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
7425 {
7426         struct mlxsw_sp_router *router;
7427
7428         /* Flush pending FIB notifications and then flush the device's
7429          * table before requesting another dump. The FIB notification
7430          * block is unregistered, so no need to take RTNL.
7431          */
7432         mlxsw_core_flush_owq();
7433         router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7434         mlxsw_sp_router_fib_flush(router->mlxsw_sp);
7435 }
7436
7437 #ifdef CONFIG_IP_ROUTE_MULTIPATH
7438 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
7439 {
7440         mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
7441 }
7442
7443 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
7444 {
7445         mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
7446 }
7447
7448 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
7449 {
7450         bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
7451
7452         mlxsw_sp_mp_hash_header_set(recr2_pl,
7453                                     MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7454         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7455         mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7456         mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7457         if (only_l3)
7458                 return;
7459         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7460         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7461         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7462         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7463 }
7464
7465 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
7466 {
7467         bool only_l3 = !ip6_multipath_hash_policy(&init_net);
7468
7469         mlxsw_sp_mp_hash_header_set(recr2_pl,
7470                                     MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7471         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7472         mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7473         mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7474         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7475         if (only_l3) {
7476                 mlxsw_sp_mp_hash_field_set(recr2_pl,
7477                                            MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7478         } else {
7479                 mlxsw_sp_mp_hash_header_set(recr2_pl,
7480                                             MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
7481                 mlxsw_sp_mp_hash_field_set(recr2_pl,
7482                                            MLXSW_REG_RECR2_TCP_UDP_SPORT);
7483                 mlxsw_sp_mp_hash_field_set(recr2_pl,
7484                                            MLXSW_REG_RECR2_TCP_UDP_DPORT);
7485         }
7486 }
7487
7488 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7489 {
7490         char recr2_pl[MLXSW_REG_RECR2_LEN];
7491         u32 seed;
7492
7493         get_random_bytes(&seed, sizeof(seed));
7494         mlxsw_reg_recr2_pack(recr2_pl, seed);
7495         mlxsw_sp_mp4_hash_init(recr2_pl);
7496         mlxsw_sp_mp6_hash_init(recr2_pl);
7497
7498         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7499 }
7500 #else
7501 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7502 {
7503         return 0;
7504 }
7505 #endif
7506
7507 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
7508 {
7509         char rdpm_pl[MLXSW_REG_RDPM_LEN];
7510         unsigned int i;
7511
7512         MLXSW_REG_ZERO(rdpm, rdpm_pl);
7513
7514         /* HW is determining switch priority based on DSCP-bits, but the
7515          * kernel is still doing that based on the ToS. Since there's a
7516          * mismatch in bits we need to make sure to translate the right
7517          * value ToS would observe, skipping the 2 least-significant ECN bits.
7518          */
7519         for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
7520                 mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
7521
7522         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
7523 }
7524
7525 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7526 {
7527         bool usp = init_net.ipv4.sysctl_ip_fwd_update_priority;
7528         char rgcr_pl[MLXSW_REG_RGCR_LEN];
7529         u64 max_rifs;
7530         int err;
7531
7532         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7533                 return -EIO;
7534         max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7535
7536         mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7537         mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7538         mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
7539         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7540         if (err)
7541                 return err;
7542         return 0;
7543 }
7544
7545 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7546 {
7547         char rgcr_pl[MLXSW_REG_RGCR_LEN];
7548
7549         mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7550         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7551 }
7552
7553 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7554 {
7555         struct mlxsw_sp_router *router;
7556         int err;
7557
7558         router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7559         if (!router)
7560                 return -ENOMEM;
7561         mlxsw_sp->router = router;
7562         router->mlxsw_sp = mlxsw_sp;
7563
7564         INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7565         err = __mlxsw_sp_router_init(mlxsw_sp);
7566         if (err)
7567                 goto err_router_init;
7568
7569         err = mlxsw_sp_rifs_init(mlxsw_sp);
7570         if (err)
7571                 goto err_rifs_init;
7572
7573         err = mlxsw_sp_ipips_init(mlxsw_sp);
7574         if (err)
7575                 goto err_ipips_init;
7576
7577         err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7578                               &mlxsw_sp_nexthop_ht_params);
7579         if (err)
7580                 goto err_nexthop_ht_init;
7581
7582         err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7583                               &mlxsw_sp_nexthop_group_ht_params);
7584         if (err)
7585                 goto err_nexthop_group_ht_init;
7586
7587         INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7588         err = mlxsw_sp_lpm_init(mlxsw_sp);
7589         if (err)
7590                 goto err_lpm_init;
7591
7592         err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7593         if (err)
7594                 goto err_mr_init;
7595
7596         err = mlxsw_sp_vrs_init(mlxsw_sp);
7597         if (err)
7598                 goto err_vrs_init;
7599
7600         err = mlxsw_sp_neigh_init(mlxsw_sp);
7601         if (err)
7602                 goto err_neigh_init;
7603
7604         mlxsw_sp->router->netevent_nb.notifier_call =
7605                 mlxsw_sp_router_netevent_event;
7606         err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7607         if (err)
7608                 goto err_register_netevent_notifier;
7609
7610         err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7611         if (err)
7612                 goto err_mp_hash_init;
7613
7614         err = mlxsw_sp_dscp_init(mlxsw_sp);
7615         if (err)
7616                 goto err_dscp_init;
7617
7618         mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7619         err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7620                                     mlxsw_sp_router_fib_dump_flush);
7621         if (err)
7622                 goto err_register_fib_notifier;
7623
7624         return 0;
7625
7626 err_register_fib_notifier:
7627 err_dscp_init:
7628 err_mp_hash_init:
7629         unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7630 err_register_netevent_notifier:
7631         mlxsw_sp_neigh_fini(mlxsw_sp);
7632 err_neigh_init:
7633         mlxsw_sp_vrs_fini(mlxsw_sp);
7634 err_vrs_init:
7635         mlxsw_sp_mr_fini(mlxsw_sp);
7636 err_mr_init:
7637         mlxsw_sp_lpm_fini(mlxsw_sp);
7638 err_lpm_init:
7639         rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7640 err_nexthop_group_ht_init:
7641         rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7642 err_nexthop_ht_init:
7643         mlxsw_sp_ipips_fini(mlxsw_sp);
7644 err_ipips_init:
7645         mlxsw_sp_rifs_fini(mlxsw_sp);
7646 err_rifs_init:
7647         __mlxsw_sp_router_fini(mlxsw_sp);
7648 err_router_init:
7649         kfree(mlxsw_sp->router);
7650         return err;
7651 }
7652
7653 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7654 {
7655         unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
7656         unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7657         mlxsw_sp_neigh_fini(mlxsw_sp);
7658         mlxsw_sp_vrs_fini(mlxsw_sp);
7659         mlxsw_sp_mr_fini(mlxsw_sp);
7660         mlxsw_sp_lpm_fini(mlxsw_sp);
7661         rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7662         rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7663         mlxsw_sp_ipips_fini(mlxsw_sp);
7664         mlxsw_sp_rifs_fini(mlxsw_sp);
7665         __mlxsw_sp_router_fini(mlxsw_sp);
7666         kfree(mlxsw_sp->router);
7667 }