mlxsw: spectrum_router: Fix an IS_ERR() vs NULL check
[linux-2.6-block.git] / drivers / net / ethernet / mellanox / mlxsw / spectrum_router.c
1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
3
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
8 #include <linux/in6.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/if_macvlan.h>
17 #include <linux/refcount.h>
18 #include <linux/jhash.h>
19 #include <linux/net_namespace.h>
20 #include <linux/mutex.h>
21 #include <linux/genalloc.h>
22 #include <net/netevent.h>
23 #include <net/neighbour.h>
24 #include <net/arp.h>
25 #include <net/inet_dscp.h>
26 #include <net/ip_fib.h>
27 #include <net/ip6_fib.h>
28 #include <net/nexthop.h>
29 #include <net/fib_rules.h>
30 #include <net/ip_tunnels.h>
31 #include <net/l3mdev.h>
32 #include <net/addrconf.h>
33 #include <net/ndisc.h>
34 #include <net/ipv6.h>
35 #include <net/fib_notifier.h>
36 #include <net/switchdev.h>
37
38 #include "spectrum.h"
39 #include "core.h"
40 #include "reg.h"
41 #include "spectrum_cnt.h"
42 #include "spectrum_dpipe.h"
43 #include "spectrum_ipip.h"
44 #include "spectrum_mr.h"
45 #include "spectrum_mr_tcam.h"
46 #include "spectrum_router.h"
47 #include "spectrum_span.h"
48
49 struct mlxsw_sp_fib;
50 struct mlxsw_sp_vr;
51 struct mlxsw_sp_lpm_tree;
52 struct mlxsw_sp_rif_ops;
53
54 struct mlxsw_sp_crif_key {
55         struct net_device *dev;
56 };
57
58 struct mlxsw_sp_crif {
59         struct mlxsw_sp_crif_key key;
60         struct rhash_head ht_node;
61         bool can_destroy;
62         struct list_head nexthop_list;
63         struct mlxsw_sp_rif *rif;
64 };
65
66 static const struct rhashtable_params mlxsw_sp_crif_ht_params = {
67         .key_offset = offsetof(struct mlxsw_sp_crif, key),
68         .key_len = sizeof_field(struct mlxsw_sp_crif, key),
69         .head_offset = offsetof(struct mlxsw_sp_crif, ht_node),
70 };
71
72 struct mlxsw_sp_rif {
73         struct mlxsw_sp_crif *crif; /* NULL for underlay RIF */
74         struct list_head neigh_list;
75         struct mlxsw_sp_fid *fid;
76         unsigned char addr[ETH_ALEN];
77         int mtu;
78         u16 rif_index;
79         u8 mac_profile_id;
80         u8 rif_entries;
81         u16 vr_id;
82         const struct mlxsw_sp_rif_ops *ops;
83         struct mlxsw_sp *mlxsw_sp;
84
85         unsigned int counter_ingress;
86         bool counter_ingress_valid;
87         unsigned int counter_egress;
88         bool counter_egress_valid;
89 };
90
91 static struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
92 {
93         if (!rif->crif)
94                 return NULL;
95         return rif->crif->key.dev;
96 }
97
98 struct mlxsw_sp_rif_params {
99         struct net_device *dev;
100         union {
101                 u16 system_port;
102                 u16 lag_id;
103         };
104         u16 vid;
105         bool lag;
106         bool double_entry;
107 };
108
109 struct mlxsw_sp_rif_subport {
110         struct mlxsw_sp_rif common;
111         refcount_t ref_count;
112         union {
113                 u16 system_port;
114                 u16 lag_id;
115         };
116         u16 vid;
117         bool lag;
118 };
119
120 struct mlxsw_sp_rif_ipip_lb {
121         struct mlxsw_sp_rif common;
122         struct mlxsw_sp_rif_ipip_lb_config lb_config;
123         u16 ul_vr_id;   /* Spectrum-1. */
124         u16 ul_rif_id;  /* Spectrum-2+. */
125 };
126
127 struct mlxsw_sp_rif_params_ipip_lb {
128         struct mlxsw_sp_rif_params common;
129         struct mlxsw_sp_rif_ipip_lb_config lb_config;
130 };
131
132 struct mlxsw_sp_rif_ops {
133         enum mlxsw_sp_rif_type type;
134         size_t rif_size;
135
136         void (*setup)(struct mlxsw_sp_rif *rif,
137                       const struct mlxsw_sp_rif_params *params);
138         int (*configure)(struct mlxsw_sp_rif *rif,
139                          struct netlink_ext_ack *extack);
140         void (*deconfigure)(struct mlxsw_sp_rif *rif);
141         struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
142                                          struct netlink_ext_ack *extack);
143         void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
144 };
145
146 struct mlxsw_sp_rif_mac_profile {
147         unsigned char mac_prefix[ETH_ALEN];
148         refcount_t ref_count;
149         u8 id;
150 };
151
152 struct mlxsw_sp_router_ops {
153         int (*init)(struct mlxsw_sp *mlxsw_sp);
154         int (*ipips_init)(struct mlxsw_sp *mlxsw_sp);
155 };
156
157 static struct mlxsw_sp_rif *
158 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
159                          const struct net_device *dev);
160 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
161 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
162 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
163                                   struct mlxsw_sp_lpm_tree *lpm_tree);
164 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
165                                      const struct mlxsw_sp_fib *fib,
166                                      u8 tree_id);
167 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
168                                        const struct mlxsw_sp_fib *fib);
169
170 static unsigned int *
171 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
172                            enum mlxsw_sp_rif_counter_dir dir)
173 {
174         switch (dir) {
175         case MLXSW_SP_RIF_COUNTER_EGRESS:
176                 return &rif->counter_egress;
177         case MLXSW_SP_RIF_COUNTER_INGRESS:
178                 return &rif->counter_ingress;
179         }
180         return NULL;
181 }
182
183 static bool
184 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
185                                enum mlxsw_sp_rif_counter_dir dir)
186 {
187         switch (dir) {
188         case MLXSW_SP_RIF_COUNTER_EGRESS:
189                 return rif->counter_egress_valid;
190         case MLXSW_SP_RIF_COUNTER_INGRESS:
191                 return rif->counter_ingress_valid;
192         }
193         return false;
194 }
195
196 static void
197 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
198                                enum mlxsw_sp_rif_counter_dir dir,
199                                bool valid)
200 {
201         switch (dir) {
202         case MLXSW_SP_RIF_COUNTER_EGRESS:
203                 rif->counter_egress_valid = valid;
204                 break;
205         case MLXSW_SP_RIF_COUNTER_INGRESS:
206                 rif->counter_ingress_valid = valid;
207                 break;
208         }
209 }
210
211 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
212                                      unsigned int counter_index, bool enable,
213                                      enum mlxsw_sp_rif_counter_dir dir)
214 {
215         char ritr_pl[MLXSW_REG_RITR_LEN];
216         bool is_egress = false;
217         int err;
218
219         if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
220                 is_egress = true;
221         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
222         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
223         if (err)
224                 return err;
225
226         mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
227                                     is_egress);
228         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
229 }
230
231 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
232                                    struct mlxsw_sp_rif *rif,
233                                    enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
234 {
235         char ricnt_pl[MLXSW_REG_RICNT_LEN];
236         unsigned int *p_counter_index;
237         bool valid;
238         int err;
239
240         valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
241         if (!valid)
242                 return -EINVAL;
243
244         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
245         if (!p_counter_index)
246                 return -EINVAL;
247         mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
248                              MLXSW_REG_RICNT_OPCODE_NOP);
249         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
250         if (err)
251                 return err;
252         *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
253         return 0;
254 }
255
256 struct mlxsw_sp_rif_counter_set_basic {
257         u64 good_unicast_packets;
258         u64 good_multicast_packets;
259         u64 good_broadcast_packets;
260         u64 good_unicast_bytes;
261         u64 good_multicast_bytes;
262         u64 good_broadcast_bytes;
263         u64 error_packets;
264         u64 discard_packets;
265         u64 error_bytes;
266         u64 discard_bytes;
267 };
268
269 static int
270 mlxsw_sp_rif_counter_fetch_clear(struct mlxsw_sp_rif *rif,
271                                  enum mlxsw_sp_rif_counter_dir dir,
272                                  struct mlxsw_sp_rif_counter_set_basic *set)
273 {
274         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
275         char ricnt_pl[MLXSW_REG_RICNT_LEN];
276         unsigned int *p_counter_index;
277         int err;
278
279         if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
280                 return -EINVAL;
281
282         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
283         if (!p_counter_index)
284                 return -EINVAL;
285
286         mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
287                              MLXSW_REG_RICNT_OPCODE_CLEAR);
288         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
289         if (err)
290                 return err;
291
292         if (!set)
293                 return 0;
294
295 #define MLXSW_SP_RIF_COUNTER_EXTRACT(NAME)                              \
296                 (set->NAME = mlxsw_reg_ricnt_ ## NAME ## _get(ricnt_pl))
297
298         MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_packets);
299         MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_packets);
300         MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_packets);
301         MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_bytes);
302         MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_bytes);
303         MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_bytes);
304         MLXSW_SP_RIF_COUNTER_EXTRACT(error_packets);
305         MLXSW_SP_RIF_COUNTER_EXTRACT(discard_packets);
306         MLXSW_SP_RIF_COUNTER_EXTRACT(error_bytes);
307         MLXSW_SP_RIF_COUNTER_EXTRACT(discard_bytes);
308
309 #undef MLXSW_SP_RIF_COUNTER_EXTRACT
310
311         return 0;
312 }
313
314 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
315                                       unsigned int counter_index)
316 {
317         char ricnt_pl[MLXSW_REG_RICNT_LEN];
318
319         mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
320                              MLXSW_REG_RICNT_OPCODE_CLEAR);
321         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
322 }
323
324 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp_rif *rif,
325                                enum mlxsw_sp_rif_counter_dir dir)
326 {
327         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
328         unsigned int *p_counter_index;
329         int err;
330
331         if (mlxsw_sp_rif_counter_valid_get(rif, dir))
332                 return 0;
333
334         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
335         if (!p_counter_index)
336                 return -EINVAL;
337
338         err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
339                                      p_counter_index);
340         if (err)
341                 return err;
342
343         err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
344         if (err)
345                 goto err_counter_clear;
346
347         err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
348                                         *p_counter_index, true, dir);
349         if (err)
350                 goto err_counter_edit;
351         mlxsw_sp_rif_counter_valid_set(rif, dir, true);
352         return 0;
353
354 err_counter_edit:
355 err_counter_clear:
356         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
357                               *p_counter_index);
358         return err;
359 }
360
361 void mlxsw_sp_rif_counter_free(struct mlxsw_sp_rif *rif,
362                                enum mlxsw_sp_rif_counter_dir dir)
363 {
364         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
365         unsigned int *p_counter_index;
366
367         if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
368                 return;
369
370         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
371         if (WARN_ON(!p_counter_index))
372                 return;
373         mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
374                                   *p_counter_index, false, dir);
375         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
376                               *p_counter_index);
377         mlxsw_sp_rif_counter_valid_set(rif, dir, false);
378 }
379
380 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
381 {
382         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
383         struct devlink *devlink;
384
385         devlink = priv_to_devlink(mlxsw_sp->core);
386         if (!devlink_dpipe_table_counter_enabled(devlink,
387                                                  MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
388                 return;
389         mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
390 }
391
392 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
393 {
394         mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
395 }
396
397 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
398
399 struct mlxsw_sp_prefix_usage {
400         DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
401 };
402
403 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
404         for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
405
406 static bool
407 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
408                          struct mlxsw_sp_prefix_usage *prefix_usage2)
409 {
410         return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
411 }
412
413 static void
414 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
415                           struct mlxsw_sp_prefix_usage *prefix_usage2)
416 {
417         memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
418 }
419
420 static void
421 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
422                           unsigned char prefix_len)
423 {
424         set_bit(prefix_len, prefix_usage->b);
425 }
426
427 static void
428 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
429                             unsigned char prefix_len)
430 {
431         clear_bit(prefix_len, prefix_usage->b);
432 }
433
434 struct mlxsw_sp_fib_key {
435         unsigned char addr[sizeof(struct in6_addr)];
436         unsigned char prefix_len;
437 };
438
439 enum mlxsw_sp_fib_entry_type {
440         MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
441         MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
442         MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
443         MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE,
444         MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE,
445
446         /* This is a special case of local delivery, where a packet should be
447          * decapsulated on reception. Note that there is no corresponding ENCAP,
448          * because that's a type of next hop, not of FIB entry. (There can be
449          * several next hops in a REMOTE entry, and some of them may be
450          * encapsulating entries.)
451          */
452         MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
453         MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
454 };
455
456 struct mlxsw_sp_nexthop_group_info;
457 struct mlxsw_sp_nexthop_group;
458 struct mlxsw_sp_fib_entry;
459
460 struct mlxsw_sp_fib_node {
461         struct mlxsw_sp_fib_entry *fib_entry;
462         struct list_head list;
463         struct rhash_head ht_node;
464         struct mlxsw_sp_fib *fib;
465         struct mlxsw_sp_fib_key key;
466 };
467
468 struct mlxsw_sp_fib_entry_decap {
469         struct mlxsw_sp_ipip_entry *ipip_entry;
470         u32 tunnel_index;
471 };
472
473 struct mlxsw_sp_fib_entry {
474         struct mlxsw_sp_fib_node *fib_node;
475         enum mlxsw_sp_fib_entry_type type;
476         struct list_head nexthop_group_node;
477         struct mlxsw_sp_nexthop_group *nh_group;
478         struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
479 };
480
481 struct mlxsw_sp_fib4_entry {
482         struct mlxsw_sp_fib_entry common;
483         struct fib_info *fi;
484         u32 tb_id;
485         dscp_t dscp;
486         u8 type;
487 };
488
489 struct mlxsw_sp_fib6_entry {
490         struct mlxsw_sp_fib_entry common;
491         struct list_head rt6_list;
492         unsigned int nrt6;
493 };
494
495 struct mlxsw_sp_rt6 {
496         struct list_head list;
497         struct fib6_info *rt;
498 };
499
500 struct mlxsw_sp_lpm_tree {
501         u8 id; /* tree ID */
502         unsigned int ref_count;
503         enum mlxsw_sp_l3proto proto;
504         unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
505         struct mlxsw_sp_prefix_usage prefix_usage;
506 };
507
508 struct mlxsw_sp_fib {
509         struct rhashtable ht;
510         struct list_head node_list;
511         struct mlxsw_sp_vr *vr;
512         struct mlxsw_sp_lpm_tree *lpm_tree;
513         enum mlxsw_sp_l3proto proto;
514 };
515
516 struct mlxsw_sp_vr {
517         u16 id; /* virtual router ID */
518         u32 tb_id; /* kernel fib table id */
519         unsigned int rif_count;
520         struct mlxsw_sp_fib *fib4;
521         struct mlxsw_sp_fib *fib6;
522         struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
523         struct mlxsw_sp_rif *ul_rif;
524         refcount_t ul_rif_refcnt;
525 };
526
527 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
528
529 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
530                                                 struct mlxsw_sp_vr *vr,
531                                                 enum mlxsw_sp_l3proto proto)
532 {
533         struct mlxsw_sp_lpm_tree *lpm_tree;
534         struct mlxsw_sp_fib *fib;
535         int err;
536
537         lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
538         fib = kzalloc(sizeof(*fib), GFP_KERNEL);
539         if (!fib)
540                 return ERR_PTR(-ENOMEM);
541         err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
542         if (err)
543                 goto err_rhashtable_init;
544         INIT_LIST_HEAD(&fib->node_list);
545         fib->proto = proto;
546         fib->vr = vr;
547         fib->lpm_tree = lpm_tree;
548         mlxsw_sp_lpm_tree_hold(lpm_tree);
549         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
550         if (err)
551                 goto err_lpm_tree_bind;
552         return fib;
553
554 err_lpm_tree_bind:
555         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
556 err_rhashtable_init:
557         kfree(fib);
558         return ERR_PTR(err);
559 }
560
561 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
562                                  struct mlxsw_sp_fib *fib)
563 {
564         mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
565         mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
566         WARN_ON(!list_empty(&fib->node_list));
567         rhashtable_destroy(&fib->ht);
568         kfree(fib);
569 }
570
571 static struct mlxsw_sp_lpm_tree *
572 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
573 {
574         static struct mlxsw_sp_lpm_tree *lpm_tree;
575         int i;
576
577         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
578                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
579                 if (lpm_tree->ref_count == 0)
580                         return lpm_tree;
581         }
582         return NULL;
583 }
584
585 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
586                                    struct mlxsw_sp_lpm_tree *lpm_tree)
587 {
588         char ralta_pl[MLXSW_REG_RALTA_LEN];
589
590         mlxsw_reg_ralta_pack(ralta_pl, true,
591                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
592                              lpm_tree->id);
593         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
594 }
595
596 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
597                                    struct mlxsw_sp_lpm_tree *lpm_tree)
598 {
599         char ralta_pl[MLXSW_REG_RALTA_LEN];
600
601         mlxsw_reg_ralta_pack(ralta_pl, false,
602                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
603                              lpm_tree->id);
604         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
605 }
606
607 static int
608 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
609                                   struct mlxsw_sp_prefix_usage *prefix_usage,
610                                   struct mlxsw_sp_lpm_tree *lpm_tree)
611 {
612         char ralst_pl[MLXSW_REG_RALST_LEN];
613         u8 root_bin = 0;
614         u8 prefix;
615         u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
616
617         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
618                 root_bin = prefix;
619
620         mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
621         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
622                 if (prefix == 0)
623                         continue;
624                 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
625                                          MLXSW_REG_RALST_BIN_NO_CHILD);
626                 last_prefix = prefix;
627         }
628         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
629 }
630
631 static struct mlxsw_sp_lpm_tree *
632 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
633                          struct mlxsw_sp_prefix_usage *prefix_usage,
634                          enum mlxsw_sp_l3proto proto)
635 {
636         struct mlxsw_sp_lpm_tree *lpm_tree;
637         int err;
638
639         lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
640         if (!lpm_tree)
641                 return ERR_PTR(-EBUSY);
642         lpm_tree->proto = proto;
643         err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
644         if (err)
645                 return ERR_PTR(err);
646
647         err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
648                                                 lpm_tree);
649         if (err)
650                 goto err_left_struct_set;
651         memcpy(&lpm_tree->prefix_usage, prefix_usage,
652                sizeof(lpm_tree->prefix_usage));
653         memset(&lpm_tree->prefix_ref_count, 0,
654                sizeof(lpm_tree->prefix_ref_count));
655         lpm_tree->ref_count = 1;
656         return lpm_tree;
657
658 err_left_struct_set:
659         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
660         return ERR_PTR(err);
661 }
662
663 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
664                                       struct mlxsw_sp_lpm_tree *lpm_tree)
665 {
666         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
667 }
668
669 static struct mlxsw_sp_lpm_tree *
670 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
671                       struct mlxsw_sp_prefix_usage *prefix_usage,
672                       enum mlxsw_sp_l3proto proto)
673 {
674         struct mlxsw_sp_lpm_tree *lpm_tree;
675         int i;
676
677         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
678                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
679                 if (lpm_tree->ref_count != 0 &&
680                     lpm_tree->proto == proto &&
681                     mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
682                                              prefix_usage)) {
683                         mlxsw_sp_lpm_tree_hold(lpm_tree);
684                         return lpm_tree;
685                 }
686         }
687         return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
688 }
689
690 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
691 {
692         lpm_tree->ref_count++;
693 }
694
695 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
696                                   struct mlxsw_sp_lpm_tree *lpm_tree)
697 {
698         if (--lpm_tree->ref_count == 0)
699                 mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
700 }
701
702 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
703
704 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
705 {
706         struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
707         struct mlxsw_sp_lpm_tree *lpm_tree;
708         u64 max_trees;
709         int err, i;
710
711         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
712                 return -EIO;
713
714         max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
715         mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
716         mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
717                                              sizeof(struct mlxsw_sp_lpm_tree),
718                                              GFP_KERNEL);
719         if (!mlxsw_sp->router->lpm.trees)
720                 return -ENOMEM;
721
722         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
723                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
724                 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
725         }
726
727         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
728                                          MLXSW_SP_L3_PROTO_IPV4);
729         if (IS_ERR(lpm_tree)) {
730                 err = PTR_ERR(lpm_tree);
731                 goto err_ipv4_tree_get;
732         }
733         mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
734
735         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
736                                          MLXSW_SP_L3_PROTO_IPV6);
737         if (IS_ERR(lpm_tree)) {
738                 err = PTR_ERR(lpm_tree);
739                 goto err_ipv6_tree_get;
740         }
741         mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
742
743         return 0;
744
745 err_ipv6_tree_get:
746         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
747         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
748 err_ipv4_tree_get:
749         kfree(mlxsw_sp->router->lpm.trees);
750         return err;
751 }
752
753 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
754 {
755         struct mlxsw_sp_lpm_tree *lpm_tree;
756
757         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
758         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
759
760         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
761         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
762
763         kfree(mlxsw_sp->router->lpm.trees);
764 }
765
766 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
767 {
768         return !!vr->fib4 || !!vr->fib6 ||
769                !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
770                !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
771 }
772
773 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
774 {
775         int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
776         struct mlxsw_sp_vr *vr;
777         int i;
778
779         for (i = 0; i < max_vrs; i++) {
780                 vr = &mlxsw_sp->router->vrs[i];
781                 if (!mlxsw_sp_vr_is_used(vr))
782                         return vr;
783         }
784         return NULL;
785 }
786
787 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
788                                      const struct mlxsw_sp_fib *fib, u8 tree_id)
789 {
790         char raltb_pl[MLXSW_REG_RALTB_LEN];
791
792         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
793                              (enum mlxsw_reg_ralxx_protocol) fib->proto,
794                              tree_id);
795         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
796 }
797
798 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
799                                        const struct mlxsw_sp_fib *fib)
800 {
801         char raltb_pl[MLXSW_REG_RALTB_LEN];
802
803         /* Bind to tree 0 which is default */
804         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
805                              (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
806         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
807 }
808
809 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
810 {
811         /* For our purpose, squash main, default and local tables into one */
812         if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
813                 tb_id = RT_TABLE_MAIN;
814         return tb_id;
815 }
816
817 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
818                                             u32 tb_id)
819 {
820         int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
821         struct mlxsw_sp_vr *vr;
822         int i;
823
824         tb_id = mlxsw_sp_fix_tb_id(tb_id);
825
826         for (i = 0; i < max_vrs; i++) {
827                 vr = &mlxsw_sp->router->vrs[i];
828                 if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
829                         return vr;
830         }
831         return NULL;
832 }
833
834 int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
835                                 u16 *vr_id)
836 {
837         struct mlxsw_sp_vr *vr;
838         int err = 0;
839
840         mutex_lock(&mlxsw_sp->router->lock);
841         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
842         if (!vr) {
843                 err = -ESRCH;
844                 goto out;
845         }
846         *vr_id = vr->id;
847 out:
848         mutex_unlock(&mlxsw_sp->router->lock);
849         return err;
850 }
851
852 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
853                                             enum mlxsw_sp_l3proto proto)
854 {
855         switch (proto) {
856         case MLXSW_SP_L3_PROTO_IPV4:
857                 return vr->fib4;
858         case MLXSW_SP_L3_PROTO_IPV6:
859                 return vr->fib6;
860         }
861         return NULL;
862 }
863
864 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
865                                               u32 tb_id,
866                                               struct netlink_ext_ack *extack)
867 {
868         struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
869         struct mlxsw_sp_fib *fib4;
870         struct mlxsw_sp_fib *fib6;
871         struct mlxsw_sp_vr *vr;
872         int err;
873
874         vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
875         if (!vr) {
876                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
877                 return ERR_PTR(-EBUSY);
878         }
879         fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
880         if (IS_ERR(fib4))
881                 return ERR_CAST(fib4);
882         fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
883         if (IS_ERR(fib6)) {
884                 err = PTR_ERR(fib6);
885                 goto err_fib6_create;
886         }
887         mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
888                                              MLXSW_SP_L3_PROTO_IPV4);
889         if (IS_ERR(mr4_table)) {
890                 err = PTR_ERR(mr4_table);
891                 goto err_mr4_table_create;
892         }
893         mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
894                                              MLXSW_SP_L3_PROTO_IPV6);
895         if (IS_ERR(mr6_table)) {
896                 err = PTR_ERR(mr6_table);
897                 goto err_mr6_table_create;
898         }
899
900         vr->fib4 = fib4;
901         vr->fib6 = fib6;
902         vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
903         vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
904         vr->tb_id = tb_id;
905         return vr;
906
907 err_mr6_table_create:
908         mlxsw_sp_mr_table_destroy(mr4_table);
909 err_mr4_table_create:
910         mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
911 err_fib6_create:
912         mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
913         return ERR_PTR(err);
914 }
915
916 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
917                                 struct mlxsw_sp_vr *vr)
918 {
919         mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
920         vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
921         mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
922         vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
923         mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
924         vr->fib6 = NULL;
925         mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
926         vr->fib4 = NULL;
927 }
928
929 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
930                                            struct netlink_ext_ack *extack)
931 {
932         struct mlxsw_sp_vr *vr;
933
934         tb_id = mlxsw_sp_fix_tb_id(tb_id);
935         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
936         if (!vr)
937                 vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
938         return vr;
939 }
940
941 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
942 {
943         if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
944             list_empty(&vr->fib6->node_list) &&
945             mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
946             mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
947                 mlxsw_sp_vr_destroy(mlxsw_sp, vr);
948 }
949
950 static bool
951 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
952                                     enum mlxsw_sp_l3proto proto, u8 tree_id)
953 {
954         struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
955
956         if (!mlxsw_sp_vr_is_used(vr))
957                 return false;
958         if (fib->lpm_tree->id == tree_id)
959                 return true;
960         return false;
961 }
962
963 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
964                                         struct mlxsw_sp_fib *fib,
965                                         struct mlxsw_sp_lpm_tree *new_tree)
966 {
967         struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
968         int err;
969
970         fib->lpm_tree = new_tree;
971         mlxsw_sp_lpm_tree_hold(new_tree);
972         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
973         if (err)
974                 goto err_tree_bind;
975         mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
976         return 0;
977
978 err_tree_bind:
979         mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
980         fib->lpm_tree = old_tree;
981         return err;
982 }
983
984 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
985                                          struct mlxsw_sp_fib *fib,
986                                          struct mlxsw_sp_lpm_tree *new_tree)
987 {
988         int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
989         enum mlxsw_sp_l3proto proto = fib->proto;
990         struct mlxsw_sp_lpm_tree *old_tree;
991         u8 old_id, new_id = new_tree->id;
992         struct mlxsw_sp_vr *vr;
993         int i, err;
994
995         old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
996         old_id = old_tree->id;
997
998         for (i = 0; i < max_vrs; i++) {
999                 vr = &mlxsw_sp->router->vrs[i];
1000                 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
1001                         continue;
1002                 err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
1003                                                    mlxsw_sp_vr_fib(vr, proto),
1004                                                    new_tree);
1005                 if (err)
1006                         goto err_tree_replace;
1007         }
1008
1009         memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
1010                sizeof(new_tree->prefix_ref_count));
1011         mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
1012         mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
1013
1014         return 0;
1015
1016 err_tree_replace:
1017         for (i--; i >= 0; i--) {
1018                 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
1019                         continue;
1020                 mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
1021                                              mlxsw_sp_vr_fib(vr, proto),
1022                                              old_tree);
1023         }
1024         return err;
1025 }
1026
1027 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
1028 {
1029         struct mlxsw_sp_vr *vr;
1030         u64 max_vrs;
1031         int i;
1032
1033         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
1034                 return -EIO;
1035
1036         max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
1037         mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
1038                                         GFP_KERNEL);
1039         if (!mlxsw_sp->router->vrs)
1040                 return -ENOMEM;
1041
1042         for (i = 0; i < max_vrs; i++) {
1043                 vr = &mlxsw_sp->router->vrs[i];
1044                 vr->id = i;
1045         }
1046
1047         return 0;
1048 }
1049
1050 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
1051
1052 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
1053 {
1054         /* At this stage we're guaranteed not to have new incoming
1055          * FIB notifications and the work queue is free from FIBs
1056          * sitting on top of mlxsw netdevs. However, we can still
1057          * have other FIBs queued. Flush the queue before flushing
1058          * the device's tables. No need for locks, as we're the only
1059          * writer.
1060          */
1061         mlxsw_core_flush_owq();
1062         mlxsw_sp_router_fib_flush(mlxsw_sp);
1063         kfree(mlxsw_sp->router->vrs);
1064 }
1065
1066 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
1067 {
1068         struct net_device *d;
1069         u32 tb_id;
1070
1071         rcu_read_lock();
1072         d = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1073         if (d)
1074                 tb_id = l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
1075         else
1076                 tb_id = RT_TABLE_MAIN;
1077         rcu_read_unlock();
1078
1079         return tb_id;
1080 }
1081
1082 static void
1083 mlxsw_sp_crif_init(struct mlxsw_sp_crif *crif, struct net_device *dev)
1084 {
1085         crif->key.dev = dev;
1086         INIT_LIST_HEAD(&crif->nexthop_list);
1087 }
1088
1089 static struct mlxsw_sp_crif *
1090 mlxsw_sp_crif_alloc(struct net_device *dev)
1091 {
1092         struct mlxsw_sp_crif *crif;
1093
1094         crif = kzalloc(sizeof(*crif), GFP_KERNEL);
1095         if (!crif)
1096                 return NULL;
1097
1098         mlxsw_sp_crif_init(crif, dev);
1099         return crif;
1100 }
1101
1102 static void mlxsw_sp_crif_free(struct mlxsw_sp_crif *crif)
1103 {
1104         if (WARN_ON(crif->rif))
1105                 return;
1106
1107         WARN_ON(!list_empty(&crif->nexthop_list));
1108         kfree(crif);
1109 }
1110
1111 static int mlxsw_sp_crif_insert(struct mlxsw_sp_router *router,
1112                                 struct mlxsw_sp_crif *crif)
1113 {
1114         return rhashtable_insert_fast(&router->crif_ht, &crif->ht_node,
1115                                       mlxsw_sp_crif_ht_params);
1116 }
1117
1118 static void mlxsw_sp_crif_remove(struct mlxsw_sp_router *router,
1119                                  struct mlxsw_sp_crif *crif)
1120 {
1121         rhashtable_remove_fast(&router->crif_ht, &crif->ht_node,
1122                                mlxsw_sp_crif_ht_params);
1123 }
1124
1125 static struct mlxsw_sp_crif *
1126 mlxsw_sp_crif_lookup(struct mlxsw_sp_router *router,
1127                      const struct net_device *dev)
1128 {
1129         struct mlxsw_sp_crif_key key = {
1130                 .dev = (struct net_device *)dev,
1131         };
1132
1133         return rhashtable_lookup_fast(&router->crif_ht, &key,
1134                                       mlxsw_sp_crif_ht_params);
1135 }
1136
1137 static struct mlxsw_sp_rif *
1138 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
1139                     const struct mlxsw_sp_rif_params *params,
1140                     struct netlink_ext_ack *extack);
1141
1142 static struct mlxsw_sp_rif_ipip_lb *
1143 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1144                                 enum mlxsw_sp_ipip_type ipipt,
1145                                 struct net_device *ol_dev,
1146                                 struct netlink_ext_ack *extack)
1147 {
1148         struct mlxsw_sp_rif_params_ipip_lb lb_params;
1149         const struct mlxsw_sp_ipip_ops *ipip_ops;
1150         struct mlxsw_sp_rif *rif;
1151
1152         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1153         lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1154                 .common.dev = ol_dev,
1155                 .common.lag = false,
1156                 .common.double_entry = ipip_ops->double_rif_entry,
1157                 .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1158         };
1159
1160         rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1161         if (IS_ERR(rif))
1162                 return ERR_CAST(rif);
1163         return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1164 }
1165
1166 static struct mlxsw_sp_ipip_entry *
1167 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1168                           enum mlxsw_sp_ipip_type ipipt,
1169                           struct net_device *ol_dev)
1170 {
1171         const struct mlxsw_sp_ipip_ops *ipip_ops;
1172         struct mlxsw_sp_ipip_entry *ipip_entry;
1173         struct mlxsw_sp_ipip_entry *ret = NULL;
1174         int err;
1175
1176         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1177         ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1178         if (!ipip_entry)
1179                 return ERR_PTR(-ENOMEM);
1180
1181         ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1182                                                             ol_dev, NULL);
1183         if (IS_ERR(ipip_entry->ol_lb)) {
1184                 ret = ERR_CAST(ipip_entry->ol_lb);
1185                 goto err_ol_ipip_lb_create;
1186         }
1187
1188         ipip_entry->ipipt = ipipt;
1189         ipip_entry->ol_dev = ol_dev;
1190         ipip_entry->parms = ipip_ops->parms_init(ol_dev);
1191
1192         err = ipip_ops->rem_ip_addr_set(mlxsw_sp, ipip_entry);
1193         if (err) {
1194                 ret = ERR_PTR(err);
1195                 goto err_rem_ip_addr_set;
1196         }
1197
1198         return ipip_entry;
1199
1200 err_rem_ip_addr_set:
1201         mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1202 err_ol_ipip_lb_create:
1203         kfree(ipip_entry);
1204         return ret;
1205 }
1206
1207 static void mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp *mlxsw_sp,
1208                                         struct mlxsw_sp_ipip_entry *ipip_entry)
1209 {
1210         const struct mlxsw_sp_ipip_ops *ipip_ops =
1211                 mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1212
1213         ipip_ops->rem_ip_addr_unset(mlxsw_sp, ipip_entry);
1214         mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1215         kfree(ipip_entry);
1216 }
1217
1218 static bool
1219 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1220                                   const enum mlxsw_sp_l3proto ul_proto,
1221                                   union mlxsw_sp_l3addr saddr,
1222                                   u32 ul_tb_id,
1223                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1224 {
1225         u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1226         enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1227         union mlxsw_sp_l3addr tun_saddr;
1228
1229         if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1230                 return false;
1231
1232         tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1233         return tun_ul_tb_id == ul_tb_id &&
1234                mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1235 }
1236
1237 static int mlxsw_sp_ipip_decap_parsing_depth_inc(struct mlxsw_sp *mlxsw_sp,
1238                                                  enum mlxsw_sp_ipip_type ipipt)
1239 {
1240         const struct mlxsw_sp_ipip_ops *ipip_ops;
1241
1242         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1243
1244         /* Not all tunnels require to increase the default pasing depth
1245          * (96 bytes).
1246          */
1247         if (ipip_ops->inc_parsing_depth)
1248                 return mlxsw_sp_parsing_depth_inc(mlxsw_sp);
1249
1250         return 0;
1251 }
1252
1253 static void mlxsw_sp_ipip_decap_parsing_depth_dec(struct mlxsw_sp *mlxsw_sp,
1254                                                   enum mlxsw_sp_ipip_type ipipt)
1255 {
1256         const struct mlxsw_sp_ipip_ops *ipip_ops =
1257                 mlxsw_sp->router->ipip_ops_arr[ipipt];
1258
1259         if (ipip_ops->inc_parsing_depth)
1260                 mlxsw_sp_parsing_depth_dec(mlxsw_sp);
1261 }
1262
1263 static int
1264 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1265                               struct mlxsw_sp_fib_entry *fib_entry,
1266                               struct mlxsw_sp_ipip_entry *ipip_entry)
1267 {
1268         u32 tunnel_index;
1269         int err;
1270
1271         err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1272                                   1, &tunnel_index);
1273         if (err)
1274                 return err;
1275
1276         err = mlxsw_sp_ipip_decap_parsing_depth_inc(mlxsw_sp,
1277                                                     ipip_entry->ipipt);
1278         if (err)
1279                 goto err_parsing_depth_inc;
1280
1281         ipip_entry->decap_fib_entry = fib_entry;
1282         fib_entry->decap.ipip_entry = ipip_entry;
1283         fib_entry->decap.tunnel_index = tunnel_index;
1284
1285         return 0;
1286
1287 err_parsing_depth_inc:
1288         mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
1289                            fib_entry->decap.tunnel_index);
1290         return err;
1291 }
1292
1293 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1294                                           struct mlxsw_sp_fib_entry *fib_entry)
1295 {
1296         enum mlxsw_sp_ipip_type ipipt = fib_entry->decap.ipip_entry->ipipt;
1297
1298         /* Unlink this node from the IPIP entry that it's the decap entry of. */
1299         fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1300         fib_entry->decap.ipip_entry = NULL;
1301         mlxsw_sp_ipip_decap_parsing_depth_dec(mlxsw_sp, ipipt);
1302         mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1303                            1, fib_entry->decap.tunnel_index);
1304 }
1305
1306 static struct mlxsw_sp_fib_node *
1307 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1308                          size_t addr_len, unsigned char prefix_len);
1309 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1310                                      struct mlxsw_sp_fib_entry *fib_entry);
1311
1312 static void
1313 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1314                                  struct mlxsw_sp_ipip_entry *ipip_entry)
1315 {
1316         struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1317
1318         mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1319         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1320
1321         mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1322 }
1323
1324 static void
1325 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1326                                   struct mlxsw_sp_ipip_entry *ipip_entry,
1327                                   struct mlxsw_sp_fib_entry *decap_fib_entry)
1328 {
1329         if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1330                                           ipip_entry))
1331                 return;
1332         decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1333
1334         if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1335                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1336 }
1337
1338 static struct mlxsw_sp_fib_entry *
1339 mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1340                                      enum mlxsw_sp_l3proto proto,
1341                                      const union mlxsw_sp_l3addr *addr,
1342                                      enum mlxsw_sp_fib_entry_type type)
1343 {
1344         struct mlxsw_sp_fib_node *fib_node;
1345         unsigned char addr_prefix_len;
1346         struct mlxsw_sp_fib *fib;
1347         struct mlxsw_sp_vr *vr;
1348         const void *addrp;
1349         size_t addr_len;
1350         u32 addr4;
1351
1352         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1353         if (!vr)
1354                 return NULL;
1355         fib = mlxsw_sp_vr_fib(vr, proto);
1356
1357         switch (proto) {
1358         case MLXSW_SP_L3_PROTO_IPV4:
1359                 addr4 = be32_to_cpu(addr->addr4);
1360                 addrp = &addr4;
1361                 addr_len = 4;
1362                 addr_prefix_len = 32;
1363                 break;
1364         case MLXSW_SP_L3_PROTO_IPV6:
1365                 addrp = &addr->addr6;
1366                 addr_len = 16;
1367                 addr_prefix_len = 128;
1368                 break;
1369         default:
1370                 WARN_ON(1);
1371                 return NULL;
1372         }
1373
1374         fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1375                                             addr_prefix_len);
1376         if (!fib_node || fib_node->fib_entry->type != type)
1377                 return NULL;
1378
1379         return fib_node->fib_entry;
1380 }
1381
1382 /* Given an IPIP entry, find the corresponding decap route. */
1383 static struct mlxsw_sp_fib_entry *
1384 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1385                                struct mlxsw_sp_ipip_entry *ipip_entry)
1386 {
1387         static struct mlxsw_sp_fib_node *fib_node;
1388         const struct mlxsw_sp_ipip_ops *ipip_ops;
1389         unsigned char saddr_prefix_len;
1390         union mlxsw_sp_l3addr saddr;
1391         struct mlxsw_sp_fib *ul_fib;
1392         struct mlxsw_sp_vr *ul_vr;
1393         const void *saddrp;
1394         size_t saddr_len;
1395         u32 ul_tb_id;
1396         u32 saddr4;
1397
1398         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1399
1400         ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1401         ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1402         if (!ul_vr)
1403                 return NULL;
1404
1405         ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1406         saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1407                                            ipip_entry->ol_dev);
1408
1409         switch (ipip_ops->ul_proto) {
1410         case MLXSW_SP_L3_PROTO_IPV4:
1411                 saddr4 = be32_to_cpu(saddr.addr4);
1412                 saddrp = &saddr4;
1413                 saddr_len = 4;
1414                 saddr_prefix_len = 32;
1415                 break;
1416         case MLXSW_SP_L3_PROTO_IPV6:
1417                 saddrp = &saddr.addr6;
1418                 saddr_len = 16;
1419                 saddr_prefix_len = 128;
1420                 break;
1421         default:
1422                 WARN_ON(1);
1423                 return NULL;
1424         }
1425
1426         fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1427                                             saddr_prefix_len);
1428         if (!fib_node ||
1429             fib_node->fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1430                 return NULL;
1431
1432         return fib_node->fib_entry;
1433 }
1434
1435 static struct mlxsw_sp_ipip_entry *
1436 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1437                            enum mlxsw_sp_ipip_type ipipt,
1438                            struct net_device *ol_dev)
1439 {
1440         struct mlxsw_sp_ipip_entry *ipip_entry;
1441
1442         ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1443         if (IS_ERR(ipip_entry))
1444                 return ipip_entry;
1445
1446         list_add_tail(&ipip_entry->ipip_list_node,
1447                       &mlxsw_sp->router->ipip_list);
1448
1449         return ipip_entry;
1450 }
1451
1452 static void
1453 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1454                             struct mlxsw_sp_ipip_entry *ipip_entry)
1455 {
1456         list_del(&ipip_entry->ipip_list_node);
1457         mlxsw_sp_ipip_entry_dealloc(mlxsw_sp, ipip_entry);
1458 }
1459
1460 static bool
1461 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1462                                   const struct net_device *ul_dev,
1463                                   enum mlxsw_sp_l3proto ul_proto,
1464                                   union mlxsw_sp_l3addr ul_dip,
1465                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1466 {
1467         u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1468         enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1469
1470         if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1471                 return false;
1472
1473         return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1474                                                  ul_tb_id, ipip_entry);
1475 }
1476
1477 /* Given decap parameters, find the corresponding IPIP entry. */
1478 static struct mlxsw_sp_ipip_entry *
1479 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, int ul_dev_ifindex,
1480                                   enum mlxsw_sp_l3proto ul_proto,
1481                                   union mlxsw_sp_l3addr ul_dip)
1482 {
1483         struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1484         struct net_device *ul_dev;
1485
1486         rcu_read_lock();
1487
1488         ul_dev = dev_get_by_index_rcu(mlxsw_sp_net(mlxsw_sp), ul_dev_ifindex);
1489         if (!ul_dev)
1490                 goto out_unlock;
1491
1492         list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1493                             ipip_list_node)
1494                 if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1495                                                       ul_proto, ul_dip,
1496                                                       ipip_entry))
1497                         goto out_unlock;
1498
1499         rcu_read_unlock();
1500
1501         return NULL;
1502
1503 out_unlock:
1504         rcu_read_unlock();
1505         return ipip_entry;
1506 }
1507
1508 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1509                                       const struct net_device *dev,
1510                                       enum mlxsw_sp_ipip_type *p_type)
1511 {
1512         struct mlxsw_sp_router *router = mlxsw_sp->router;
1513         const struct mlxsw_sp_ipip_ops *ipip_ops;
1514         enum mlxsw_sp_ipip_type ipipt;
1515
1516         for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1517                 ipip_ops = router->ipip_ops_arr[ipipt];
1518                 if (dev->type == ipip_ops->dev_type) {
1519                         if (p_type)
1520                                 *p_type = ipipt;
1521                         return true;
1522                 }
1523         }
1524         return false;
1525 }
1526
1527 static bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1528                                        const struct net_device *dev)
1529 {
1530         return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1531 }
1532
1533 static struct mlxsw_sp_ipip_entry *
1534 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1535                                    const struct net_device *ol_dev)
1536 {
1537         struct mlxsw_sp_ipip_entry *ipip_entry;
1538
1539         list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1540                             ipip_list_node)
1541                 if (ipip_entry->ol_dev == ol_dev)
1542                         return ipip_entry;
1543
1544         return NULL;
1545 }
1546
1547 static struct mlxsw_sp_ipip_entry *
1548 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1549                                    const struct net_device *ul_dev,
1550                                    struct mlxsw_sp_ipip_entry *start)
1551 {
1552         struct mlxsw_sp_ipip_entry *ipip_entry;
1553
1554         ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1555                                         ipip_list_node);
1556         list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1557                                      ipip_list_node) {
1558                 struct net_device *ol_dev = ipip_entry->ol_dev;
1559                 struct net_device *ipip_ul_dev;
1560
1561                 rcu_read_lock();
1562                 ipip_ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1563                 rcu_read_unlock();
1564
1565                 if (ipip_ul_dev == ul_dev)
1566                         return ipip_entry;
1567         }
1568
1569         return NULL;
1570 }
1571
1572 static bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp,
1573                                        const struct net_device *dev)
1574 {
1575         return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1576 }
1577
1578 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1579                                                 const struct net_device *ol_dev,
1580                                                 enum mlxsw_sp_ipip_type ipipt)
1581 {
1582         const struct mlxsw_sp_ipip_ops *ops
1583                 = mlxsw_sp->router->ipip_ops_arr[ipipt];
1584
1585         return ops->can_offload(mlxsw_sp, ol_dev);
1586 }
1587
1588 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1589                                                 struct net_device *ol_dev)
1590 {
1591         enum mlxsw_sp_ipip_type ipipt = MLXSW_SP_IPIP_TYPE_MAX;
1592         struct mlxsw_sp_ipip_entry *ipip_entry;
1593         enum mlxsw_sp_l3proto ul_proto;
1594         union mlxsw_sp_l3addr saddr;
1595         u32 ul_tb_id;
1596
1597         mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1598         if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1599                 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1600                 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1601                 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1602                 if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1603                                                           saddr, ul_tb_id,
1604                                                           NULL)) {
1605                         ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1606                                                                 ol_dev);
1607                         if (IS_ERR(ipip_entry))
1608                                 return PTR_ERR(ipip_entry);
1609                 }
1610         }
1611
1612         return 0;
1613 }
1614
1615 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1616                                                    struct net_device *ol_dev)
1617 {
1618         struct mlxsw_sp_ipip_entry *ipip_entry;
1619
1620         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1621         if (ipip_entry)
1622                 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1623 }
1624
1625 static void
1626 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1627                                 struct mlxsw_sp_ipip_entry *ipip_entry)
1628 {
1629         struct mlxsw_sp_fib_entry *decap_fib_entry;
1630
1631         decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1632         if (decap_fib_entry)
1633                 mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1634                                                   decap_fib_entry);
1635 }
1636
1637 static int
1638 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
1639                         u16 ul_rif_id, bool enable)
1640 {
1641         struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1642         struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common);
1643         enum mlxsw_reg_ritr_loopback_ipip_options ipip_options;
1644         struct mlxsw_sp_rif *rif = &lb_rif->common;
1645         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1646         char ritr_pl[MLXSW_REG_RITR_LEN];
1647         struct in6_addr *saddr6;
1648         u32 saddr4;
1649
1650         ipip_options = MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET;
1651         switch (lb_cf.ul_protocol) {
1652         case MLXSW_SP_L3_PROTO_IPV4:
1653                 saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1654                 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1655                                     rif->rif_index, rif->vr_id, dev->mtu);
1656                 mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1657                                                    ipip_options, ul_vr_id,
1658                                                    ul_rif_id, saddr4,
1659                                                    lb_cf.okey);
1660                 break;
1661
1662         case MLXSW_SP_L3_PROTO_IPV6:
1663                 saddr6 = &lb_cf.saddr.addr6;
1664                 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1665                                     rif->rif_index, rif->vr_id, dev->mtu);
1666                 mlxsw_reg_ritr_loopback_ipip6_pack(ritr_pl, lb_cf.lb_ipipt,
1667                                                    ipip_options, ul_vr_id,
1668                                                    ul_rif_id, saddr6,
1669                                                    lb_cf.okey);
1670                 break;
1671         }
1672
1673         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1674 }
1675
1676 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1677                                                  struct net_device *ol_dev)
1678 {
1679         struct mlxsw_sp_ipip_entry *ipip_entry;
1680         struct mlxsw_sp_rif_ipip_lb *lb_rif;
1681         int err = 0;
1682
1683         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1684         if (ipip_entry) {
1685                 lb_rif = ipip_entry->ol_lb;
1686                 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
1687                                               lb_rif->ul_rif_id, true);
1688                 if (err)
1689                         goto out;
1690                 lb_rif->common.mtu = ol_dev->mtu;
1691         }
1692
1693 out:
1694         return err;
1695 }
1696
1697 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1698                                                 struct net_device *ol_dev)
1699 {
1700         struct mlxsw_sp_ipip_entry *ipip_entry;
1701
1702         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1703         if (ipip_entry)
1704                 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1705 }
1706
1707 static void
1708 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1709                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1710 {
1711         if (ipip_entry->decap_fib_entry)
1712                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1713 }
1714
1715 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1716                                                   struct net_device *ol_dev)
1717 {
1718         struct mlxsw_sp_ipip_entry *ipip_entry;
1719
1720         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1721         if (ipip_entry)
1722                 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1723 }
1724
1725 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1726                                         struct mlxsw_sp_rif *rif);
1727
1728 static void mlxsw_sp_rif_migrate_destroy(struct mlxsw_sp *mlxsw_sp,
1729                                          struct mlxsw_sp_rif *old_rif,
1730                                          struct mlxsw_sp_rif *new_rif,
1731                                          bool migrate_nhs)
1732 {
1733         struct mlxsw_sp_crif *crif = old_rif->crif;
1734         struct mlxsw_sp_crif mock_crif = {};
1735
1736         if (migrate_nhs)
1737                 mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
1738
1739         /* Plant a mock CRIF so that destroying the old RIF doesn't unoffload
1740          * our nexthops and IPIP tunnels, and doesn't sever the crif->rif link.
1741          */
1742         mlxsw_sp_crif_init(&mock_crif, crif->key.dev);
1743         old_rif->crif = &mock_crif;
1744         mock_crif.rif = old_rif;
1745         mlxsw_sp_rif_destroy(old_rif);
1746 }
1747
1748 static int
1749 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1750                                  struct mlxsw_sp_ipip_entry *ipip_entry,
1751                                  bool keep_encap,
1752                                  struct netlink_ext_ack *extack)
1753 {
1754         struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1755         struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1756
1757         new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1758                                                      ipip_entry->ipipt,
1759                                                      ipip_entry->ol_dev,
1760                                                      extack);
1761         if (IS_ERR(new_lb_rif))
1762                 return PTR_ERR(new_lb_rif);
1763         ipip_entry->ol_lb = new_lb_rif;
1764
1765         mlxsw_sp_rif_migrate_destroy(mlxsw_sp, &old_lb_rif->common,
1766                                      &new_lb_rif->common, keep_encap);
1767         return 0;
1768 }
1769
1770 /**
1771  * __mlxsw_sp_ipip_entry_update_tunnel - Update offload related to IPIP entry.
1772  * @mlxsw_sp: mlxsw_sp.
1773  * @ipip_entry: IPIP entry.
1774  * @recreate_loopback: Recreates the associated loopback RIF.
1775  * @keep_encap: Updates next hops that use the tunnel netdevice. This is only
1776  *              relevant when recreate_loopback is true.
1777  * @update_nexthops: Updates next hops, keeping the current loopback RIF. This
1778  *                   is only relevant when recreate_loopback is false.
1779  * @extack: extack.
1780  *
1781  * Return: Non-zero value on failure.
1782  */
1783 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1784                                         struct mlxsw_sp_ipip_entry *ipip_entry,
1785                                         bool recreate_loopback,
1786                                         bool keep_encap,
1787                                         bool update_nexthops,
1788                                         struct netlink_ext_ack *extack)
1789 {
1790         int err;
1791
1792         /* RIFs can't be edited, so to update loopback, we need to destroy and
1793          * recreate it. That creates a window of opportunity where RALUE and
1794          * RATR registers end up referencing a RIF that's already gone. RATRs
1795          * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1796          * of RALUE, demote the decap route back.
1797          */
1798         if (ipip_entry->decap_fib_entry)
1799                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1800
1801         if (recreate_loopback) {
1802                 err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1803                                                        keep_encap, extack);
1804                 if (err)
1805                         return err;
1806         } else if (update_nexthops) {
1807                 mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1808                                             &ipip_entry->ol_lb->common);
1809         }
1810
1811         if (ipip_entry->ol_dev->flags & IFF_UP)
1812                 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1813
1814         return 0;
1815 }
1816
1817 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1818                                                 struct net_device *ol_dev,
1819                                                 struct netlink_ext_ack *extack)
1820 {
1821         struct mlxsw_sp_ipip_entry *ipip_entry =
1822                 mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1823
1824         if (!ipip_entry)
1825                 return 0;
1826
1827         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1828                                                    true, false, false, extack);
1829 }
1830
1831 static int
1832 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1833                                      struct mlxsw_sp_ipip_entry *ipip_entry,
1834                                      struct net_device *ul_dev,
1835                                      bool *demote_this,
1836                                      struct netlink_ext_ack *extack)
1837 {
1838         u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1839         enum mlxsw_sp_l3proto ul_proto;
1840         union mlxsw_sp_l3addr saddr;
1841
1842         /* Moving underlay to a different VRF might cause local address
1843          * conflict, and the conflicting tunnels need to be demoted.
1844          */
1845         ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1846         saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1847         if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1848                                                  saddr, ul_tb_id,
1849                                                  ipip_entry)) {
1850                 *demote_this = true;
1851                 return 0;
1852         }
1853
1854         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1855                                                    true, true, false, extack);
1856 }
1857
1858 static int
1859 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1860                                     struct mlxsw_sp_ipip_entry *ipip_entry,
1861                                     struct net_device *ul_dev)
1862 {
1863         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1864                                                    false, false, true, NULL);
1865 }
1866
1867 static int
1868 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1869                                       struct mlxsw_sp_ipip_entry *ipip_entry,
1870                                       struct net_device *ul_dev)
1871 {
1872         /* A down underlay device causes encapsulated packets to not be
1873          * forwarded, but decap still works. So refresh next hops without
1874          * touching anything else.
1875          */
1876         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1877                                                    false, false, true, NULL);
1878 }
1879
1880 static int
1881 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1882                                         struct net_device *ol_dev,
1883                                         struct netlink_ext_ack *extack)
1884 {
1885         const struct mlxsw_sp_ipip_ops *ipip_ops;
1886         struct mlxsw_sp_ipip_entry *ipip_entry;
1887         int err;
1888
1889         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1890         if (!ipip_entry)
1891                 /* A change might make a tunnel eligible for offloading, but
1892                  * that is currently not implemented. What falls to slow path
1893                  * stays there.
1894                  */
1895                 return 0;
1896
1897         /* A change might make a tunnel not eligible for offloading. */
1898         if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1899                                                  ipip_entry->ipipt)) {
1900                 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1901                 return 0;
1902         }
1903
1904         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1905         err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1906         return err;
1907 }
1908
1909 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1910                                        struct mlxsw_sp_ipip_entry *ipip_entry)
1911 {
1912         struct net_device *ol_dev = ipip_entry->ol_dev;
1913
1914         if (ol_dev->flags & IFF_UP)
1915                 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1916         mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1917 }
1918
1919 /* The configuration where several tunnels have the same local address in the
1920  * same underlay table needs special treatment in the HW. That is currently not
1921  * implemented in the driver. This function finds and demotes the first tunnel
1922  * with a given source address, except the one passed in the argument
1923  * `except'.
1924  */
1925 bool
1926 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1927                                      enum mlxsw_sp_l3proto ul_proto,
1928                                      union mlxsw_sp_l3addr saddr,
1929                                      u32 ul_tb_id,
1930                                      const struct mlxsw_sp_ipip_entry *except)
1931 {
1932         struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1933
1934         list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1935                                  ipip_list_node) {
1936                 if (ipip_entry != except &&
1937                     mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1938                                                       ul_tb_id, ipip_entry)) {
1939                         mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1940                         return true;
1941                 }
1942         }
1943
1944         return false;
1945 }
1946
1947 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1948                                                      struct net_device *ul_dev)
1949 {
1950         struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1951
1952         list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1953                                  ipip_list_node) {
1954                 struct net_device *ol_dev = ipip_entry->ol_dev;
1955                 struct net_device *ipip_ul_dev;
1956
1957                 rcu_read_lock();
1958                 ipip_ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1959                 rcu_read_unlock();
1960                 if (ipip_ul_dev == ul_dev)
1961                         mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1962         }
1963 }
1964
1965 static int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1966                                             struct net_device *ol_dev,
1967                                             unsigned long event,
1968                                             struct netdev_notifier_info *info)
1969 {
1970         struct netdev_notifier_changeupper_info *chup;
1971         struct netlink_ext_ack *extack;
1972         int err = 0;
1973
1974         switch (event) {
1975         case NETDEV_REGISTER:
1976                 err = mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1977                 break;
1978         case NETDEV_UNREGISTER:
1979                 mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1980                 break;
1981         case NETDEV_UP:
1982                 mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1983                 break;
1984         case NETDEV_DOWN:
1985                 mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1986                 break;
1987         case NETDEV_CHANGEUPPER:
1988                 chup = container_of(info, typeof(*chup), info);
1989                 extack = info->extack;
1990                 if (netif_is_l3_master(chup->upper_dev))
1991                         err = mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1992                                                                    ol_dev,
1993                                                                    extack);
1994                 break;
1995         case NETDEV_CHANGE:
1996                 extack = info->extack;
1997                 err = mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1998                                                               ol_dev, extack);
1999                 break;
2000         case NETDEV_CHANGEMTU:
2001                 err = mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
2002                 break;
2003         }
2004         return err;
2005 }
2006
2007 static int
2008 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
2009                                    struct mlxsw_sp_ipip_entry *ipip_entry,
2010                                    struct net_device *ul_dev,
2011                                    bool *demote_this,
2012                                    unsigned long event,
2013                                    struct netdev_notifier_info *info)
2014 {
2015         struct netdev_notifier_changeupper_info *chup;
2016         struct netlink_ext_ack *extack;
2017
2018         switch (event) {
2019         case NETDEV_CHANGEUPPER:
2020                 chup = container_of(info, typeof(*chup), info);
2021                 extack = info->extack;
2022                 if (netif_is_l3_master(chup->upper_dev))
2023                         return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
2024                                                                     ipip_entry,
2025                                                                     ul_dev,
2026                                                                     demote_this,
2027                                                                     extack);
2028                 break;
2029
2030         case NETDEV_UP:
2031                 return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
2032                                                            ul_dev);
2033         case NETDEV_DOWN:
2034                 return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
2035                                                              ipip_entry,
2036                                                              ul_dev);
2037         }
2038         return 0;
2039 }
2040
2041 static int
2042 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
2043                                  struct net_device *ul_dev,
2044                                  unsigned long event,
2045                                  struct netdev_notifier_info *info)
2046 {
2047         struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
2048         int err;
2049
2050         while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
2051                                                                 ul_dev,
2052                                                                 ipip_entry))) {
2053                 struct mlxsw_sp_ipip_entry *prev;
2054                 bool demote_this = false;
2055
2056                 err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
2057                                                          ul_dev, &demote_this,
2058                                                          event, info);
2059                 if (err) {
2060                         mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
2061                                                                  ul_dev);
2062                         return err;
2063                 }
2064
2065                 if (demote_this) {
2066                         if (list_is_first(&ipip_entry->ipip_list_node,
2067                                           &mlxsw_sp->router->ipip_list))
2068                                 prev = NULL;
2069                         else
2070                                 /* This can't be cached from previous iteration,
2071                                  * because that entry could be gone now.
2072                                  */
2073                                 prev = list_prev_entry(ipip_entry,
2074                                                        ipip_list_node);
2075                         mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
2076                         ipip_entry = prev;
2077                 }
2078         }
2079
2080         return 0;
2081 }
2082
2083 int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
2084                                       enum mlxsw_sp_l3proto ul_proto,
2085                                       const union mlxsw_sp_l3addr *ul_sip,
2086                                       u32 tunnel_index)
2087 {
2088         enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2089         struct mlxsw_sp_router *router = mlxsw_sp->router;
2090         struct mlxsw_sp_fib_entry *fib_entry;
2091         int err = 0;
2092
2093         mutex_lock(&mlxsw_sp->router->lock);
2094
2095         if (WARN_ON_ONCE(router->nve_decap_config.valid)) {
2096                 err = -EINVAL;
2097                 goto out;
2098         }
2099
2100         router->nve_decap_config.ul_tb_id = ul_tb_id;
2101         router->nve_decap_config.tunnel_index = tunnel_index;
2102         router->nve_decap_config.ul_proto = ul_proto;
2103         router->nve_decap_config.ul_sip = *ul_sip;
2104         router->nve_decap_config.valid = true;
2105
2106         /* It is valid to create a tunnel with a local IP and only later
2107          * assign this IP address to a local interface
2108          */
2109         fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
2110                                                          ul_proto, ul_sip,
2111                                                          type);
2112         if (!fib_entry)
2113                 goto out;
2114
2115         fib_entry->decap.tunnel_index = tunnel_index;
2116         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
2117
2118         err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2119         if (err)
2120                 goto err_fib_entry_update;
2121
2122         goto out;
2123
2124 err_fib_entry_update:
2125         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2126         mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2127 out:
2128         mutex_unlock(&mlxsw_sp->router->lock);
2129         return err;
2130 }
2131
2132 void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
2133                                       enum mlxsw_sp_l3proto ul_proto,
2134                                       const union mlxsw_sp_l3addr *ul_sip)
2135 {
2136         enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
2137         struct mlxsw_sp_router *router = mlxsw_sp->router;
2138         struct mlxsw_sp_fib_entry *fib_entry;
2139
2140         mutex_lock(&mlxsw_sp->router->lock);
2141
2142         if (WARN_ON_ONCE(!router->nve_decap_config.valid))
2143                 goto out;
2144
2145         router->nve_decap_config.valid = false;
2146
2147         fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
2148                                                          ul_proto, ul_sip,
2149                                                          type);
2150         if (!fib_entry)
2151                 goto out;
2152
2153         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2154         mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2155 out:
2156         mutex_unlock(&mlxsw_sp->router->lock);
2157 }
2158
2159 static bool mlxsw_sp_router_nve_is_decap(struct mlxsw_sp *mlxsw_sp,
2160                                          u32 ul_tb_id,
2161                                          enum mlxsw_sp_l3proto ul_proto,
2162                                          const union mlxsw_sp_l3addr *ul_sip)
2163 {
2164         struct mlxsw_sp_router *router = mlxsw_sp->router;
2165
2166         return router->nve_decap_config.valid &&
2167                router->nve_decap_config.ul_tb_id == ul_tb_id &&
2168                router->nve_decap_config.ul_proto == ul_proto &&
2169                !memcmp(&router->nve_decap_config.ul_sip, ul_sip,
2170                        sizeof(*ul_sip));
2171 }
2172
2173 struct mlxsw_sp_neigh_key {
2174         struct neighbour *n;
2175 };
2176
2177 struct mlxsw_sp_neigh_entry {
2178         struct list_head rif_list_node;
2179         struct rhash_head ht_node;
2180         struct mlxsw_sp_neigh_key key;
2181         u16 rif;
2182         bool connected;
2183         unsigned char ha[ETH_ALEN];
2184         struct list_head nexthop_list; /* list of nexthops using
2185                                         * this neigh entry
2186                                         */
2187         struct list_head nexthop_neighs_list_node;
2188         unsigned int counter_index;
2189         bool counter_valid;
2190 };
2191
2192 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
2193         .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
2194         .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
2195         .key_len = sizeof(struct mlxsw_sp_neigh_key),
2196 };
2197
2198 struct mlxsw_sp_neigh_entry *
2199 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
2200                         struct mlxsw_sp_neigh_entry *neigh_entry)
2201 {
2202         if (!neigh_entry) {
2203                 if (list_empty(&rif->neigh_list))
2204                         return NULL;
2205                 else
2206                         return list_first_entry(&rif->neigh_list,
2207                                                 typeof(*neigh_entry),
2208                                                 rif_list_node);
2209         }
2210         if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
2211                 return NULL;
2212         return list_next_entry(neigh_entry, rif_list_node);
2213 }
2214
2215 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
2216 {
2217         return neigh_entry->key.n->tbl->family;
2218 }
2219
2220 unsigned char *
2221 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
2222 {
2223         return neigh_entry->ha;
2224 }
2225
2226 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
2227 {
2228         struct neighbour *n;
2229
2230         n = neigh_entry->key.n;
2231         return ntohl(*((__be32 *) n->primary_key));
2232 }
2233
2234 struct in6_addr *
2235 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
2236 {
2237         struct neighbour *n;
2238
2239         n = neigh_entry->key.n;
2240         return (struct in6_addr *) &n->primary_key;
2241 }
2242
2243 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
2244                                struct mlxsw_sp_neigh_entry *neigh_entry,
2245                                u64 *p_counter)
2246 {
2247         if (!neigh_entry->counter_valid)
2248                 return -EINVAL;
2249
2250         return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
2251                                          p_counter, NULL);
2252 }
2253
2254 static struct mlxsw_sp_neigh_entry *
2255 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
2256                            u16 rif)
2257 {
2258         struct mlxsw_sp_neigh_entry *neigh_entry;
2259
2260         neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
2261         if (!neigh_entry)
2262                 return NULL;
2263
2264         neigh_entry->key.n = n;
2265         neigh_entry->rif = rif;
2266         INIT_LIST_HEAD(&neigh_entry->nexthop_list);
2267
2268         return neigh_entry;
2269 }
2270
2271 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
2272 {
2273         kfree(neigh_entry);
2274 }
2275
2276 static int
2277 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
2278                             struct mlxsw_sp_neigh_entry *neigh_entry)
2279 {
2280         return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
2281                                       &neigh_entry->ht_node,
2282                                       mlxsw_sp_neigh_ht_params);
2283 }
2284
2285 static void
2286 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
2287                             struct mlxsw_sp_neigh_entry *neigh_entry)
2288 {
2289         rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
2290                                &neigh_entry->ht_node,
2291                                mlxsw_sp_neigh_ht_params);
2292 }
2293
2294 static bool
2295 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
2296                                     struct mlxsw_sp_neigh_entry *neigh_entry)
2297 {
2298         struct devlink *devlink;
2299         const char *table_name;
2300
2301         switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2302         case AF_INET:
2303                 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2304                 break;
2305         case AF_INET6:
2306                 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2307                 break;
2308         default:
2309                 WARN_ON(1);
2310                 return false;
2311         }
2312
2313         devlink = priv_to_devlink(mlxsw_sp->core);
2314         return devlink_dpipe_table_counter_enabled(devlink, table_name);
2315 }
2316
2317 static void
2318 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2319                              struct mlxsw_sp_neigh_entry *neigh_entry)
2320 {
2321         if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2322                 return;
2323
2324         if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2325                 return;
2326
2327         neigh_entry->counter_valid = true;
2328 }
2329
2330 static void
2331 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2332                             struct mlxsw_sp_neigh_entry *neigh_entry)
2333 {
2334         if (!neigh_entry->counter_valid)
2335                 return;
2336         mlxsw_sp_flow_counter_free(mlxsw_sp,
2337                                    neigh_entry->counter_index);
2338         neigh_entry->counter_valid = false;
2339 }
2340
2341 static struct mlxsw_sp_neigh_entry *
2342 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2343 {
2344         struct mlxsw_sp_neigh_entry *neigh_entry;
2345         struct mlxsw_sp_rif *rif;
2346         int err;
2347
2348         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2349         if (!rif)
2350                 return ERR_PTR(-EINVAL);
2351
2352         neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2353         if (!neigh_entry)
2354                 return ERR_PTR(-ENOMEM);
2355
2356         err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2357         if (err)
2358                 goto err_neigh_entry_insert;
2359
2360         mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2361         atomic_inc(&mlxsw_sp->router->neighs_update.neigh_count);
2362         list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2363
2364         return neigh_entry;
2365
2366 err_neigh_entry_insert:
2367         mlxsw_sp_neigh_entry_free(neigh_entry);
2368         return ERR_PTR(err);
2369 }
2370
2371 static void
2372 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2373                              struct mlxsw_sp_neigh_entry *neigh_entry)
2374 {
2375         list_del(&neigh_entry->rif_list_node);
2376         atomic_dec(&mlxsw_sp->router->neighs_update.neigh_count);
2377         mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2378         mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2379         mlxsw_sp_neigh_entry_free(neigh_entry);
2380 }
2381
2382 static struct mlxsw_sp_neigh_entry *
2383 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2384 {
2385         struct mlxsw_sp_neigh_key key;
2386
2387         key.n = n;
2388         return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2389                                       &key, mlxsw_sp_neigh_ht_params);
2390 }
2391
2392 static void
2393 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2394 {
2395         unsigned long interval;
2396
2397 #if IS_ENABLED(CONFIG_IPV6)
2398         interval = min_t(unsigned long,
2399                          NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2400                          NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2401 #else
2402         interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2403 #endif
2404         mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2405 }
2406
2407 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2408                                                    char *rauhtd_pl,
2409                                                    int ent_index)
2410 {
2411         u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
2412         struct net_device *dev;
2413         struct neighbour *n;
2414         __be32 dipn;
2415         u32 dip;
2416         u16 rif;
2417
2418         mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2419
2420         if (WARN_ON_ONCE(rif >= max_rifs))
2421                 return;
2422         if (!mlxsw_sp->router->rifs[rif]) {
2423                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2424                 return;
2425         }
2426
2427         dipn = htonl(dip);
2428         dev = mlxsw_sp_rif_dev(mlxsw_sp->router->rifs[rif]);
2429         n = neigh_lookup(&arp_tbl, &dipn, dev);
2430         if (!n)
2431                 return;
2432
2433         netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2434         neigh_event_send(n, NULL);
2435         neigh_release(n);
2436 }
2437
2438 #if IS_ENABLED(CONFIG_IPV6)
2439 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2440                                                    char *rauhtd_pl,
2441                                                    int rec_index)
2442 {
2443         struct net_device *dev;
2444         struct neighbour *n;
2445         struct in6_addr dip;
2446         u16 rif;
2447
2448         mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2449                                          (char *) &dip);
2450
2451         if (!mlxsw_sp->router->rifs[rif]) {
2452                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2453                 return;
2454         }
2455
2456         dev = mlxsw_sp_rif_dev(mlxsw_sp->router->rifs[rif]);
2457         n = neigh_lookup(&nd_tbl, &dip, dev);
2458         if (!n)
2459                 return;
2460
2461         netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2462         neigh_event_send(n, NULL);
2463         neigh_release(n);
2464 }
2465 #else
2466 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2467                                                    char *rauhtd_pl,
2468                                                    int rec_index)
2469 {
2470 }
2471 #endif
2472
2473 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2474                                                    char *rauhtd_pl,
2475                                                    int rec_index)
2476 {
2477         u8 num_entries;
2478         int i;
2479
2480         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2481                                                                 rec_index);
2482         /* Hardware starts counting at 0, so add 1. */
2483         num_entries++;
2484
2485         /* Each record consists of several neighbour entries. */
2486         for (i = 0; i < num_entries; i++) {
2487                 int ent_index;
2488
2489                 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2490                 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2491                                                        ent_index);
2492         }
2493
2494 }
2495
2496 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2497                                                    char *rauhtd_pl,
2498                                                    int rec_index)
2499 {
2500         /* One record contains one entry. */
2501         mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2502                                                rec_index);
2503 }
2504
2505 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2506                                               char *rauhtd_pl, int rec_index)
2507 {
2508         switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2509         case MLXSW_REG_RAUHTD_TYPE_IPV4:
2510                 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2511                                                        rec_index);
2512                 break;
2513         case MLXSW_REG_RAUHTD_TYPE_IPV6:
2514                 mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2515                                                        rec_index);
2516                 break;
2517         }
2518 }
2519
2520 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2521 {
2522         u8 num_rec, last_rec_index, num_entries;
2523
2524         num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2525         last_rec_index = num_rec - 1;
2526
2527         if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2528                 return false;
2529         if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2530             MLXSW_REG_RAUHTD_TYPE_IPV6)
2531                 return true;
2532
2533         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2534                                                                 last_rec_index);
2535         if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2536                 return true;
2537         return false;
2538 }
2539
2540 static int
2541 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2542                                        char *rauhtd_pl,
2543                                        enum mlxsw_reg_rauhtd_type type)
2544 {
2545         int i, num_rec;
2546         int err;
2547
2548         /* Ensure the RIF we read from the device does not change mid-dump. */
2549         mutex_lock(&mlxsw_sp->router->lock);
2550         do {
2551                 mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2552                 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2553                                       rauhtd_pl);
2554                 if (err) {
2555                         dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2556                         break;
2557                 }
2558                 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2559                 for (i = 0; i < num_rec; i++)
2560                         mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2561                                                           i);
2562         } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2563         mutex_unlock(&mlxsw_sp->router->lock);
2564
2565         return err;
2566 }
2567
2568 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2569 {
2570         enum mlxsw_reg_rauhtd_type type;
2571         char *rauhtd_pl;
2572         int err;
2573
2574         if (!atomic_read(&mlxsw_sp->router->neighs_update.neigh_count))
2575                 return 0;
2576
2577         rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2578         if (!rauhtd_pl)
2579                 return -ENOMEM;
2580
2581         type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2582         err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2583         if (err)
2584                 goto out;
2585
2586         type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2587         err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2588 out:
2589         kfree(rauhtd_pl);
2590         return err;
2591 }
2592
2593 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2594 {
2595         struct mlxsw_sp_neigh_entry *neigh_entry;
2596
2597         mutex_lock(&mlxsw_sp->router->lock);
2598         list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2599                             nexthop_neighs_list_node)
2600                 /* If this neigh have nexthops, make the kernel think this neigh
2601                  * is active regardless of the traffic.
2602                  */
2603                 neigh_event_send(neigh_entry->key.n, NULL);
2604         mutex_unlock(&mlxsw_sp->router->lock);
2605 }
2606
2607 static void
2608 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2609 {
2610         unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2611
2612         mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2613                                msecs_to_jiffies(interval));
2614 }
2615
2616 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2617 {
2618         struct mlxsw_sp_router *router;
2619         int err;
2620
2621         router = container_of(work, struct mlxsw_sp_router,
2622                               neighs_update.dw.work);
2623         err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2624         if (err)
2625                 dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2626
2627         mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2628
2629         mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2630 }
2631
2632 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2633 {
2634         struct mlxsw_sp_neigh_entry *neigh_entry;
2635         struct mlxsw_sp_router *router;
2636
2637         router = container_of(work, struct mlxsw_sp_router,
2638                               nexthop_probe_dw.work);
2639         /* Iterate over nexthop neighbours, find those who are unresolved and
2640          * send arp on them. This solves the chicken-egg problem when
2641          * the nexthop wouldn't get offloaded until the neighbor is resolved
2642          * but it wouldn't get resolved ever in case traffic is flowing in HW
2643          * using different nexthop.
2644          */
2645         mutex_lock(&router->lock);
2646         list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2647                             nexthop_neighs_list_node)
2648                 if (!neigh_entry->connected)
2649                         neigh_event_send(neigh_entry->key.n, NULL);
2650         mutex_unlock(&router->lock);
2651
2652         mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2653                                MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2654 }
2655
2656 static void
2657 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2658                               struct mlxsw_sp_neigh_entry *neigh_entry,
2659                               bool removing, bool dead);
2660
2661 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2662 {
2663         return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2664                         MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2665 }
2666
2667 static int
2668 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2669                                 struct mlxsw_sp_neigh_entry *neigh_entry,
2670                                 enum mlxsw_reg_rauht_op op)
2671 {
2672         struct neighbour *n = neigh_entry->key.n;
2673         u32 dip = ntohl(*((__be32 *) n->primary_key));
2674         char rauht_pl[MLXSW_REG_RAUHT_LEN];
2675
2676         mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2677                               dip);
2678         if (neigh_entry->counter_valid)
2679                 mlxsw_reg_rauht_pack_counter(rauht_pl,
2680                                              neigh_entry->counter_index);
2681         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2682 }
2683
2684 static int
2685 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2686                                 struct mlxsw_sp_neigh_entry *neigh_entry,
2687                                 enum mlxsw_reg_rauht_op op)
2688 {
2689         struct neighbour *n = neigh_entry->key.n;
2690         char rauht_pl[MLXSW_REG_RAUHT_LEN];
2691         const char *dip = n->primary_key;
2692
2693         mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2694                               dip);
2695         if (neigh_entry->counter_valid)
2696                 mlxsw_reg_rauht_pack_counter(rauht_pl,
2697                                              neigh_entry->counter_index);
2698         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2699 }
2700
2701 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2702 {
2703         struct neighbour *n = neigh_entry->key.n;
2704
2705         /* Packets with a link-local destination address are trapped
2706          * after LPM lookup and never reach the neighbour table, so
2707          * there is no need to program such neighbours to the device.
2708          */
2709         if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2710             IPV6_ADDR_LINKLOCAL)
2711                 return true;
2712         return false;
2713 }
2714
2715 static void
2716 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2717                             struct mlxsw_sp_neigh_entry *neigh_entry,
2718                             bool adding)
2719 {
2720         enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding);
2721         int err;
2722
2723         if (!adding && !neigh_entry->connected)
2724                 return;
2725         neigh_entry->connected = adding;
2726         if (neigh_entry->key.n->tbl->family == AF_INET) {
2727                 err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2728                                                       op);
2729                 if (err)
2730                         return;
2731         } else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2732                 if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2733                         return;
2734                 err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2735                                                       op);
2736                 if (err)
2737                         return;
2738         } else {
2739                 WARN_ON_ONCE(1);
2740                 return;
2741         }
2742
2743         if (adding)
2744                 neigh_entry->key.n->flags |= NTF_OFFLOADED;
2745         else
2746                 neigh_entry->key.n->flags &= ~NTF_OFFLOADED;
2747 }
2748
2749 void
2750 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2751                                     struct mlxsw_sp_neigh_entry *neigh_entry,
2752                                     bool adding)
2753 {
2754         if (adding)
2755                 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2756         else
2757                 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2758         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2759 }
2760
2761 struct mlxsw_sp_netevent_work {
2762         struct work_struct work;
2763         struct mlxsw_sp *mlxsw_sp;
2764         struct neighbour *n;
2765 };
2766
2767 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2768 {
2769         struct mlxsw_sp_netevent_work *net_work =
2770                 container_of(work, struct mlxsw_sp_netevent_work, work);
2771         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2772         struct mlxsw_sp_neigh_entry *neigh_entry;
2773         struct neighbour *n = net_work->n;
2774         unsigned char ha[ETH_ALEN];
2775         bool entry_connected;
2776         u8 nud_state, dead;
2777
2778         /* If these parameters are changed after we release the lock,
2779          * then we are guaranteed to receive another event letting us
2780          * know about it.
2781          */
2782         read_lock_bh(&n->lock);
2783         memcpy(ha, n->ha, ETH_ALEN);
2784         nud_state = n->nud_state;
2785         dead = n->dead;
2786         read_unlock_bh(&n->lock);
2787
2788         mutex_lock(&mlxsw_sp->router->lock);
2789         mlxsw_sp_span_respin(mlxsw_sp);
2790
2791         entry_connected = nud_state & NUD_VALID && !dead;
2792         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2793         if (!entry_connected && !neigh_entry)
2794                 goto out;
2795         if (!neigh_entry) {
2796                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2797                 if (IS_ERR(neigh_entry))
2798                         goto out;
2799         }
2800
2801         if (neigh_entry->connected && entry_connected &&
2802             !memcmp(neigh_entry->ha, ha, ETH_ALEN))
2803                 goto out;
2804
2805         memcpy(neigh_entry->ha, ha, ETH_ALEN);
2806         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2807         mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
2808                                       dead);
2809
2810         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2811                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2812
2813 out:
2814         mutex_unlock(&mlxsw_sp->router->lock);
2815         neigh_release(n);
2816         kfree(net_work);
2817 }
2818
2819 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2820
2821 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2822 {
2823         struct mlxsw_sp_netevent_work *net_work =
2824                 container_of(work, struct mlxsw_sp_netevent_work, work);
2825         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2826
2827         mlxsw_sp_mp_hash_init(mlxsw_sp);
2828         kfree(net_work);
2829 }
2830
2831 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2832
2833 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2834 {
2835         struct mlxsw_sp_netevent_work *net_work =
2836                 container_of(work, struct mlxsw_sp_netevent_work, work);
2837         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2838
2839         __mlxsw_sp_router_init(mlxsw_sp);
2840         kfree(net_work);
2841 }
2842
2843 static int mlxsw_sp_router_schedule_work(struct net *net,
2844                                          struct mlxsw_sp_router *router,
2845                                          struct neighbour *n,
2846                                          void (*cb)(struct work_struct *))
2847 {
2848         struct mlxsw_sp_netevent_work *net_work;
2849
2850         if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp)))
2851                 return NOTIFY_DONE;
2852
2853         net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2854         if (!net_work)
2855                 return NOTIFY_BAD;
2856
2857         INIT_WORK(&net_work->work, cb);
2858         net_work->mlxsw_sp = router->mlxsw_sp;
2859         net_work->n = n;
2860         mlxsw_core_schedule_work(&net_work->work);
2861         return NOTIFY_DONE;
2862 }
2863
2864 static bool mlxsw_sp_dev_lower_is_port(struct net_device *dev)
2865 {
2866         struct mlxsw_sp_port *mlxsw_sp_port;
2867
2868         rcu_read_lock();
2869         mlxsw_sp_port = mlxsw_sp_port_dev_lower_find_rcu(dev);
2870         rcu_read_unlock();
2871         return !!mlxsw_sp_port;
2872 }
2873
2874 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2875                                           unsigned long event, void *ptr)
2876 {
2877         struct mlxsw_sp_router *router;
2878         unsigned long interval;
2879         struct neigh_parms *p;
2880         struct neighbour *n;
2881         struct net *net;
2882
2883         router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2884
2885         switch (event) {
2886         case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2887                 p = ptr;
2888
2889                 /* We don't care about changes in the default table. */
2890                 if (!p->dev || (p->tbl->family != AF_INET &&
2891                                 p->tbl->family != AF_INET6))
2892                         return NOTIFY_DONE;
2893
2894                 /* We are in atomic context and can't take RTNL mutex,
2895                  * so use RCU variant to walk the device chain.
2896                  */
2897                 if (!mlxsw_sp_dev_lower_is_port(p->dev))
2898                         return NOTIFY_DONE;
2899
2900                 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2901                 router->neighs_update.interval = interval;
2902                 break;
2903         case NETEVENT_NEIGH_UPDATE:
2904                 n = ptr;
2905                 net = neigh_parms_net(n->parms);
2906
2907                 if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2908                         return NOTIFY_DONE;
2909
2910                 if (!mlxsw_sp_dev_lower_is_port(n->dev))
2911                         return NOTIFY_DONE;
2912
2913                 /* Take a reference to ensure the neighbour won't be
2914                  * destructed until we drop the reference in delayed
2915                  * work.
2916                  */
2917                 neigh_clone(n);
2918                 return mlxsw_sp_router_schedule_work(net, router, n,
2919                                 mlxsw_sp_router_neigh_event_work);
2920
2921         case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2922         case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2923                 return mlxsw_sp_router_schedule_work(ptr, router, NULL,
2924                                 mlxsw_sp_router_mp_hash_event_work);
2925
2926         case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2927                 return mlxsw_sp_router_schedule_work(ptr, router, NULL,
2928                                 mlxsw_sp_router_update_priority_work);
2929         }
2930
2931         return NOTIFY_DONE;
2932 }
2933
2934 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2935 {
2936         int err;
2937
2938         err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2939                               &mlxsw_sp_neigh_ht_params);
2940         if (err)
2941                 return err;
2942
2943         /* Initialize the polling interval according to the default
2944          * table.
2945          */
2946         mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2947
2948         /* Create the delayed works for the activity_update */
2949         INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2950                           mlxsw_sp_router_neighs_update_work);
2951         INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2952                           mlxsw_sp_router_probe_unresolved_nexthops);
2953         atomic_set(&mlxsw_sp->router->neighs_update.neigh_count, 0);
2954         mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2955         mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2956         return 0;
2957 }
2958
2959 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2960 {
2961         cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2962         cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2963         rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2964 }
2965
2966 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2967                                          struct mlxsw_sp_rif *rif)
2968 {
2969         struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2970
2971         list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2972                                  rif_list_node) {
2973                 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2974                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2975         }
2976 }
2977
2978 enum mlxsw_sp_nexthop_type {
2979         MLXSW_SP_NEXTHOP_TYPE_ETH,
2980         MLXSW_SP_NEXTHOP_TYPE_IPIP,
2981 };
2982
2983 enum mlxsw_sp_nexthop_action {
2984         /* Nexthop forwards packets to an egress RIF */
2985         MLXSW_SP_NEXTHOP_ACTION_FORWARD,
2986         /* Nexthop discards packets */
2987         MLXSW_SP_NEXTHOP_ACTION_DISCARD,
2988         /* Nexthop traps packets */
2989         MLXSW_SP_NEXTHOP_ACTION_TRAP,
2990 };
2991
2992 struct mlxsw_sp_nexthop_key {
2993         struct fib_nh *fib_nh;
2994 };
2995
2996 struct mlxsw_sp_nexthop {
2997         struct list_head neigh_list_node; /* member of neigh entry list */
2998         struct list_head crif_list_node;
2999         struct list_head router_list_node;
3000         struct mlxsw_sp_nexthop_group_info *nhgi; /* pointer back to the group
3001                                                    * this nexthop belongs to
3002                                                    */
3003         struct rhash_head ht_node;
3004         struct neigh_table *neigh_tbl;
3005         struct mlxsw_sp_nexthop_key key;
3006         unsigned char gw_addr[sizeof(struct in6_addr)];
3007         int ifindex;
3008         int nh_weight;
3009         int norm_nh_weight;
3010         int num_adj_entries;
3011         struct mlxsw_sp_crif *crif;
3012         u8 should_offload:1, /* set indicates this nexthop should be written
3013                               * to the adjacency table.
3014                               */
3015            offloaded:1, /* set indicates this nexthop was written to the
3016                          * adjacency table.
3017                          */
3018            update:1; /* set indicates this nexthop should be updated in the
3019                       * adjacency table (f.e., its MAC changed).
3020                       */
3021         enum mlxsw_sp_nexthop_action action;
3022         enum mlxsw_sp_nexthop_type type;
3023         union {
3024                 struct mlxsw_sp_neigh_entry *neigh_entry;
3025                 struct mlxsw_sp_ipip_entry *ipip_entry;
3026         };
3027         unsigned int counter_index;
3028         bool counter_valid;
3029 };
3030
3031 static struct net_device *
3032 mlxsw_sp_nexthop_dev(const struct mlxsw_sp_nexthop *nh)
3033 {
3034         if (!nh->crif)
3035                 return NULL;
3036         return nh->crif->key.dev;
3037 }
3038
3039 enum mlxsw_sp_nexthop_group_type {
3040         MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4,
3041         MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6,
3042         MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ,
3043 };
3044
3045 struct mlxsw_sp_nexthop_group_info {
3046         struct mlxsw_sp_nexthop_group *nh_grp;
3047         u32 adj_index;
3048         u16 ecmp_size;
3049         u16 count;
3050         int sum_norm_weight;
3051         u8 adj_index_valid:1,
3052            gateway:1, /* routes using the group use a gateway */
3053            is_resilient:1;
3054         struct list_head list; /* member in nh_res_grp_list */
3055         struct mlxsw_sp_nexthop nexthops[];
3056 };
3057
3058 static struct mlxsw_sp_rif *
3059 mlxsw_sp_nhgi_rif(const struct mlxsw_sp_nexthop_group_info *nhgi)
3060 {
3061         struct mlxsw_sp_crif *crif = nhgi->nexthops[0].crif;
3062
3063         if (!crif)
3064                 return NULL;
3065         return crif->rif;
3066 }
3067
3068 struct mlxsw_sp_nexthop_group_vr_key {
3069         u16 vr_id;
3070         enum mlxsw_sp_l3proto proto;
3071 };
3072
3073 struct mlxsw_sp_nexthop_group_vr_entry {
3074         struct list_head list; /* member in vr_list */
3075         struct rhash_head ht_node; /* member in vr_ht */
3076         refcount_t ref_count;
3077         struct mlxsw_sp_nexthop_group_vr_key key;
3078 };
3079
3080 struct mlxsw_sp_nexthop_group {
3081         struct rhash_head ht_node;
3082         struct list_head fib_list; /* list of fib entries that use this group */
3083         union {
3084                 struct {
3085                         struct fib_info *fi;
3086                 } ipv4;
3087                 struct {
3088                         u32 id;
3089                 } obj;
3090         };
3091         struct mlxsw_sp_nexthop_group_info *nhgi;
3092         struct list_head vr_list;
3093         struct rhashtable vr_ht;
3094         enum mlxsw_sp_nexthop_group_type type;
3095         bool can_destroy;
3096 };
3097
3098 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
3099                                     struct mlxsw_sp_nexthop *nh)
3100 {
3101         struct devlink *devlink;
3102
3103         devlink = priv_to_devlink(mlxsw_sp->core);
3104         if (!devlink_dpipe_table_counter_enabled(devlink,
3105                                                  MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
3106                 return;
3107
3108         if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
3109                 return;
3110
3111         nh->counter_valid = true;
3112 }
3113
3114 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
3115                                    struct mlxsw_sp_nexthop *nh)
3116 {
3117         if (!nh->counter_valid)
3118                 return;
3119         mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
3120         nh->counter_valid = false;
3121 }
3122
3123 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
3124                                  struct mlxsw_sp_nexthop *nh, u64 *p_counter)
3125 {
3126         if (!nh->counter_valid)
3127                 return -EINVAL;
3128
3129         return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
3130                                          p_counter, NULL);
3131 }
3132
3133 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
3134                                                struct mlxsw_sp_nexthop *nh)
3135 {
3136         if (!nh) {
3137                 if (list_empty(&router->nexthop_list))
3138                         return NULL;
3139                 else
3140                         return list_first_entry(&router->nexthop_list,
3141                                                 typeof(*nh), router_list_node);
3142         }
3143         if (list_is_last(&nh->router_list_node, &router->nexthop_list))
3144                 return NULL;
3145         return list_next_entry(nh, router_list_node);
3146 }
3147
3148 bool mlxsw_sp_nexthop_is_forward(const struct mlxsw_sp_nexthop *nh)
3149 {
3150         return nh->offloaded && nh->action == MLXSW_SP_NEXTHOP_ACTION_FORWARD;
3151 }
3152
3153 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
3154 {
3155         if (nh->type != MLXSW_SP_NEXTHOP_TYPE_ETH ||
3156             !mlxsw_sp_nexthop_is_forward(nh))
3157                 return NULL;
3158         return nh->neigh_entry->ha;
3159 }
3160
3161 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
3162                              u32 *p_adj_size, u32 *p_adj_hash_index)
3163 {
3164         struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi;
3165         u32 adj_hash_index = 0;
3166         int i;
3167
3168         if (!nh->offloaded || !nhgi->adj_index_valid)
3169                 return -EINVAL;
3170
3171         *p_adj_index = nhgi->adj_index;
3172         *p_adj_size = nhgi->ecmp_size;
3173
3174         for (i = 0; i < nhgi->count; i++) {
3175                 struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i];
3176
3177                 if (nh_iter == nh)
3178                         break;
3179                 if (nh_iter->offloaded)
3180                         adj_hash_index += nh_iter->num_adj_entries;
3181         }
3182
3183         *p_adj_hash_index = adj_hash_index;
3184         return 0;
3185 }
3186
3187 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
3188 {
3189         if (WARN_ON(!nh->crif))
3190                 return NULL;
3191         return nh->crif->rif;
3192 }
3193
3194 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
3195 {
3196         struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi;
3197         int i;
3198
3199         for (i = 0; i < nhgi->count; i++) {
3200                 struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i];
3201
3202                 if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
3203                         return true;
3204         }
3205         return false;
3206 }
3207
3208 static const struct rhashtable_params mlxsw_sp_nexthop_group_vr_ht_params = {
3209         .key_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, key),
3210         .head_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, ht_node),
3211         .key_len = sizeof(struct mlxsw_sp_nexthop_group_vr_key),
3212         .automatic_shrinking = true,
3213 };
3214
3215 static struct mlxsw_sp_nexthop_group_vr_entry *
3216 mlxsw_sp_nexthop_group_vr_entry_lookup(struct mlxsw_sp_nexthop_group *nh_grp,
3217                                        const struct mlxsw_sp_fib *fib)
3218 {
3219         struct mlxsw_sp_nexthop_group_vr_key key;
3220
3221         memset(&key, 0, sizeof(key));
3222         key.vr_id = fib->vr->id;
3223         key.proto = fib->proto;
3224         return rhashtable_lookup_fast(&nh_grp->vr_ht, &key,
3225                                       mlxsw_sp_nexthop_group_vr_ht_params);
3226 }
3227
3228 static int
3229 mlxsw_sp_nexthop_group_vr_entry_create(struct mlxsw_sp_nexthop_group *nh_grp,
3230                                        const struct mlxsw_sp_fib *fib)
3231 {
3232         struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3233         int err;
3234
3235         vr_entry = kzalloc(sizeof(*vr_entry), GFP_KERNEL);
3236         if (!vr_entry)
3237                 return -ENOMEM;
3238
3239         vr_entry->key.vr_id = fib->vr->id;
3240         vr_entry->key.proto = fib->proto;
3241         refcount_set(&vr_entry->ref_count, 1);
3242
3243         err = rhashtable_insert_fast(&nh_grp->vr_ht, &vr_entry->ht_node,
3244                                      mlxsw_sp_nexthop_group_vr_ht_params);
3245         if (err)
3246                 goto err_hashtable_insert;
3247
3248         list_add(&vr_entry->list, &nh_grp->vr_list);
3249
3250         return 0;
3251
3252 err_hashtable_insert:
3253         kfree(vr_entry);
3254         return err;
3255 }
3256
3257 static void
3258 mlxsw_sp_nexthop_group_vr_entry_destroy(struct mlxsw_sp_nexthop_group *nh_grp,
3259                                         struct mlxsw_sp_nexthop_group_vr_entry *vr_entry)
3260 {
3261         list_del(&vr_entry->list);
3262         rhashtable_remove_fast(&nh_grp->vr_ht, &vr_entry->ht_node,
3263                                mlxsw_sp_nexthop_group_vr_ht_params);
3264         kfree(vr_entry);
3265 }
3266
3267 static int
3268 mlxsw_sp_nexthop_group_vr_link(struct mlxsw_sp_nexthop_group *nh_grp,
3269                                const struct mlxsw_sp_fib *fib)
3270 {
3271         struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3272
3273         vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib);
3274         if (vr_entry) {
3275                 refcount_inc(&vr_entry->ref_count);
3276                 return 0;
3277         }
3278
3279         return mlxsw_sp_nexthop_group_vr_entry_create(nh_grp, fib);
3280 }
3281
3282 static void
3283 mlxsw_sp_nexthop_group_vr_unlink(struct mlxsw_sp_nexthop_group *nh_grp,
3284                                  const struct mlxsw_sp_fib *fib)
3285 {
3286         struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3287
3288         vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib);
3289         if (WARN_ON_ONCE(!vr_entry))
3290                 return;
3291
3292         if (!refcount_dec_and_test(&vr_entry->ref_count))
3293                 return;
3294
3295         mlxsw_sp_nexthop_group_vr_entry_destroy(nh_grp, vr_entry);
3296 }
3297
3298 struct mlxsw_sp_nexthop_group_cmp_arg {
3299         enum mlxsw_sp_nexthop_group_type type;
3300         union {
3301                 struct fib_info *fi;
3302                 struct mlxsw_sp_fib6_entry *fib6_entry;
3303                 u32 id;
3304         };
3305 };
3306
3307 static bool
3308 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
3309                                     const struct in6_addr *gw, int ifindex,
3310                                     int weight)
3311 {
3312         int i;
3313
3314         for (i = 0; i < nh_grp->nhgi->count; i++) {
3315                 const struct mlxsw_sp_nexthop *nh;
3316
3317                 nh = &nh_grp->nhgi->nexthops[i];
3318                 if (nh->ifindex == ifindex && nh->nh_weight == weight &&
3319                     ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
3320                         return true;
3321         }
3322
3323         return false;
3324 }
3325
3326 static bool
3327 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
3328                             const struct mlxsw_sp_fib6_entry *fib6_entry)
3329 {
3330         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3331
3332         if (nh_grp->nhgi->count != fib6_entry->nrt6)
3333                 return false;
3334
3335         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3336                 struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3337                 struct in6_addr *gw;
3338                 int ifindex, weight;
3339
3340                 ifindex = fib6_nh->fib_nh_dev->ifindex;
3341                 weight = fib6_nh->fib_nh_weight;
3342                 gw = &fib6_nh->fib_nh_gw6;
3343                 if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
3344                                                          weight))
3345                         return false;
3346         }
3347
3348         return true;
3349 }
3350
3351 static int
3352 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
3353 {
3354         const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
3355         const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
3356
3357         if (nh_grp->type != cmp_arg->type)
3358                 return 1;
3359
3360         switch (cmp_arg->type) {
3361         case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3362                 return cmp_arg->fi != nh_grp->ipv4.fi;
3363         case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3364                 return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
3365                                                     cmp_arg->fib6_entry);
3366         case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3367                 return cmp_arg->id != nh_grp->obj.id;
3368         default:
3369                 WARN_ON(1);
3370                 return 1;
3371         }
3372 }
3373
3374 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
3375 {
3376         const struct mlxsw_sp_nexthop_group *nh_grp = data;
3377         const struct mlxsw_sp_nexthop *nh;
3378         struct fib_info *fi;
3379         unsigned int val;
3380         int i;
3381
3382         switch (nh_grp->type) {
3383         case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3384                 fi = nh_grp->ipv4.fi;
3385                 return jhash(&fi, sizeof(fi), seed);
3386         case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3387                 val = nh_grp->nhgi->count;
3388                 for (i = 0; i < nh_grp->nhgi->count; i++) {
3389                         nh = &nh_grp->nhgi->nexthops[i];
3390                         val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed);
3391                         val ^= jhash(&nh->gw_addr, sizeof(nh->gw_addr), seed);
3392                 }
3393                 return jhash(&val, sizeof(val), seed);
3394         case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3395                 return jhash(&nh_grp->obj.id, sizeof(nh_grp->obj.id), seed);
3396         default:
3397                 WARN_ON(1);
3398                 return 0;
3399         }
3400 }
3401
3402 static u32
3403 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
3404 {
3405         unsigned int val = fib6_entry->nrt6;
3406         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3407
3408         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3409                 struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3410                 struct net_device *dev = fib6_nh->fib_nh_dev;
3411                 struct in6_addr *gw = &fib6_nh->fib_nh_gw6;
3412
3413                 val ^= jhash(&dev->ifindex, sizeof(dev->ifindex), seed);
3414                 val ^= jhash(gw, sizeof(*gw), seed);
3415         }
3416
3417         return jhash(&val, sizeof(val), seed);
3418 }
3419
3420 static u32
3421 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
3422 {
3423         const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
3424
3425         switch (cmp_arg->type) {
3426         case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3427                 return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
3428         case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3429                 return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
3430         case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3431                 return jhash(&cmp_arg->id, sizeof(cmp_arg->id), seed);
3432         default:
3433                 WARN_ON(1);
3434                 return 0;
3435         }
3436 }
3437
3438 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
3439         .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
3440         .hashfn      = mlxsw_sp_nexthop_group_hash,
3441         .obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
3442         .obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
3443 };
3444
3445 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
3446                                          struct mlxsw_sp_nexthop_group *nh_grp)
3447 {
3448         if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 &&
3449             !nh_grp->nhgi->gateway)
3450                 return 0;
3451
3452         return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
3453                                       &nh_grp->ht_node,
3454                                       mlxsw_sp_nexthop_group_ht_params);
3455 }
3456
3457 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
3458                                           struct mlxsw_sp_nexthop_group *nh_grp)
3459 {
3460         if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 &&
3461             !nh_grp->nhgi->gateway)
3462                 return;
3463
3464         rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
3465                                &nh_grp->ht_node,
3466                                mlxsw_sp_nexthop_group_ht_params);
3467 }
3468
3469 static struct mlxsw_sp_nexthop_group *
3470 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
3471                                struct fib_info *fi)
3472 {
3473         struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3474
3475         cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4;
3476         cmp_arg.fi = fi;
3477         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3478                                       &cmp_arg,
3479                                       mlxsw_sp_nexthop_group_ht_params);
3480 }
3481
3482 static struct mlxsw_sp_nexthop_group *
3483 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3484                                struct mlxsw_sp_fib6_entry *fib6_entry)
3485 {
3486         struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3487
3488         cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6;
3489         cmp_arg.fib6_entry = fib6_entry;
3490         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3491                                       &cmp_arg,
3492                                       mlxsw_sp_nexthop_group_ht_params);
3493 }
3494
3495 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3496         .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3497         .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3498         .key_len = sizeof(struct mlxsw_sp_nexthop_key),
3499 };
3500
3501 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3502                                    struct mlxsw_sp_nexthop *nh)
3503 {
3504         return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3505                                       &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3506 }
3507
3508 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3509                                     struct mlxsw_sp_nexthop *nh)
3510 {
3511         rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3512                                mlxsw_sp_nexthop_ht_params);
3513 }
3514
3515 static struct mlxsw_sp_nexthop *
3516 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3517                         struct mlxsw_sp_nexthop_key key)
3518 {
3519         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3520                                       mlxsw_sp_nexthop_ht_params);
3521 }
3522
3523 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3524                                              enum mlxsw_sp_l3proto proto,
3525                                              u16 vr_id,
3526                                              u32 adj_index, u16 ecmp_size,
3527                                              u32 new_adj_index,
3528                                              u16 new_ecmp_size)
3529 {
3530         char raleu_pl[MLXSW_REG_RALEU_LEN];
3531
3532         mlxsw_reg_raleu_pack(raleu_pl,
3533                              (enum mlxsw_reg_ralxx_protocol) proto, vr_id,
3534                              adj_index, ecmp_size, new_adj_index,
3535                              new_ecmp_size);
3536         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3537 }
3538
3539 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3540                                           struct mlxsw_sp_nexthop_group *nh_grp,
3541                                           u32 old_adj_index, u16 old_ecmp_size)
3542 {
3543         struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
3544         struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3545         int err;
3546
3547         list_for_each_entry(vr_entry, &nh_grp->vr_list, list) {
3548                 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp,
3549                                                         vr_entry->key.proto,
3550                                                         vr_entry->key.vr_id,
3551                                                         old_adj_index,
3552                                                         old_ecmp_size,
3553                                                         nhgi->adj_index,
3554                                                         nhgi->ecmp_size);
3555                 if (err)
3556                         goto err_mass_update_vr;
3557         }
3558         return 0;
3559
3560 err_mass_update_vr:
3561         list_for_each_entry_continue_reverse(vr_entry, &nh_grp->vr_list, list)
3562                 mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr_entry->key.proto,
3563                                                   vr_entry->key.vr_id,
3564                                                   nhgi->adj_index,
3565                                                   nhgi->ecmp_size,
3566                                                   old_adj_index, old_ecmp_size);
3567         return err;
3568 }
3569
3570 static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp,
3571                                          u32 adj_index,
3572                                          struct mlxsw_sp_nexthop *nh,
3573                                          bool force, char *ratr_pl)
3574 {
3575         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3576         struct mlxsw_sp_rif *rif = mlxsw_sp_nexthop_rif(nh);
3577         enum mlxsw_reg_ratr_op op;
3578         u16 rif_index;
3579
3580         rif_index = rif ? rif->rif_index :
3581                           mlxsw_sp->router->lb_crif->rif->rif_index;
3582         op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY :
3583                      MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY;
3584         mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_ETHERNET,
3585                             adj_index, rif_index);
3586         switch (nh->action) {
3587         case MLXSW_SP_NEXTHOP_ACTION_FORWARD:
3588                 mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3589                 break;
3590         case MLXSW_SP_NEXTHOP_ACTION_DISCARD:
3591                 mlxsw_reg_ratr_trap_action_set(ratr_pl,
3592                                                MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS);
3593                 break;
3594         case MLXSW_SP_NEXTHOP_ACTION_TRAP:
3595                 mlxsw_reg_ratr_trap_action_set(ratr_pl,
3596                                                MLXSW_REG_RATR_TRAP_ACTION_TRAP);
3597                 mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
3598                 break;
3599         default:
3600                 WARN_ON_ONCE(1);
3601                 return -EINVAL;
3602         }
3603         if (nh->counter_valid)
3604                 mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3605         else
3606                 mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3607
3608         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3609 }
3610
3611 int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3612                                 struct mlxsw_sp_nexthop *nh, bool force,
3613                                 char *ratr_pl)
3614 {
3615         int i;
3616
3617         for (i = 0; i < nh->num_adj_entries; i++) {
3618                 int err;
3619
3620                 err = __mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index + i,
3621                                                     nh, force, ratr_pl);
3622                 if (err)
3623                         return err;
3624         }
3625
3626         return 0;
3627 }
3628
3629 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3630                                           u32 adj_index,
3631                                           struct mlxsw_sp_nexthop *nh,
3632                                           bool force, char *ratr_pl)
3633 {
3634         const struct mlxsw_sp_ipip_ops *ipip_ops;
3635
3636         ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3637         return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry,
3638                                         force, ratr_pl);
3639 }
3640
3641 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3642                                         u32 adj_index,
3643                                         struct mlxsw_sp_nexthop *nh, bool force,
3644                                         char *ratr_pl)
3645 {
3646         int i;
3647
3648         for (i = 0; i < nh->num_adj_entries; i++) {
3649                 int err;
3650
3651                 err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3652                                                      nh, force, ratr_pl);
3653                 if (err)
3654                         return err;
3655         }
3656
3657         return 0;
3658 }
3659
3660 static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3661                                    struct mlxsw_sp_nexthop *nh, bool force,
3662                                    char *ratr_pl)
3663 {
3664         /* When action is discard or trap, the nexthop must be
3665          * programmed as an Ethernet nexthop.
3666          */
3667         if (nh->type == MLXSW_SP_NEXTHOP_TYPE_ETH ||
3668             nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD ||
3669             nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
3670                 return mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index, nh,
3671                                                    force, ratr_pl);
3672         else
3673                 return mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index, nh,
3674                                                     force, ratr_pl);
3675 }
3676
3677 static int
3678 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3679                               struct mlxsw_sp_nexthop_group_info *nhgi,
3680                               bool reallocate)
3681 {
3682         char ratr_pl[MLXSW_REG_RATR_LEN];
3683         u32 adj_index = nhgi->adj_index; /* base */
3684         struct mlxsw_sp_nexthop *nh;
3685         int i;
3686
3687         for (i = 0; i < nhgi->count; i++) {
3688                 nh = &nhgi->nexthops[i];
3689
3690                 if (!nh->should_offload) {
3691                         nh->offloaded = 0;
3692                         continue;
3693                 }
3694
3695                 if (nh->update || reallocate) {
3696                         int err = 0;
3697
3698                         err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh,
3699                                                       true, ratr_pl);
3700                         if (err)
3701                                 return err;
3702                         nh->update = 0;
3703                         nh->offloaded = 1;
3704                 }
3705                 adj_index += nh->num_adj_entries;
3706         }
3707         return 0;
3708 }
3709
3710 static int
3711 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3712                                     struct mlxsw_sp_nexthop_group *nh_grp)
3713 {
3714         struct mlxsw_sp_fib_entry *fib_entry;
3715         int err;
3716
3717         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3718                 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3719                 if (err)
3720                         return err;
3721         }
3722         return 0;
3723 }
3724
3725 struct mlxsw_sp_adj_grp_size_range {
3726         u16 start; /* Inclusive */
3727         u16 end; /* Inclusive */
3728 };
3729
3730 /* Ordered by range start value */
3731 static const struct mlxsw_sp_adj_grp_size_range
3732 mlxsw_sp1_adj_grp_size_ranges[] = {
3733         { .start = 1, .end = 64 },
3734         { .start = 512, .end = 512 },
3735         { .start = 1024, .end = 1024 },
3736         { .start = 2048, .end = 2048 },
3737         { .start = 4096, .end = 4096 },
3738 };
3739
3740 /* Ordered by range start value */
3741 static const struct mlxsw_sp_adj_grp_size_range
3742 mlxsw_sp2_adj_grp_size_ranges[] = {
3743         { .start = 1, .end = 128 },
3744         { .start = 256, .end = 256 },
3745         { .start = 512, .end = 512 },
3746         { .start = 1024, .end = 1024 },
3747         { .start = 2048, .end = 2048 },
3748         { .start = 4096, .end = 4096 },
3749 };
3750
3751 static void mlxsw_sp_adj_grp_size_round_up(const struct mlxsw_sp *mlxsw_sp,
3752                                            u16 *p_adj_grp_size)
3753 {
3754         int i;
3755
3756         for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) {
3757                 const struct mlxsw_sp_adj_grp_size_range *size_range;
3758
3759                 size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
3760
3761                 if (*p_adj_grp_size >= size_range->start &&
3762                     *p_adj_grp_size <= size_range->end)
3763                         return;
3764
3765                 if (*p_adj_grp_size <= size_range->end) {
3766                         *p_adj_grp_size = size_range->end;
3767                         return;
3768                 }
3769         }
3770 }
3771
3772 static void mlxsw_sp_adj_grp_size_round_down(const struct mlxsw_sp *mlxsw_sp,
3773                                              u16 *p_adj_grp_size,
3774                                              unsigned int alloc_size)
3775 {
3776         int i;
3777
3778         for (i = mlxsw_sp->router->adj_grp_size_ranges_count - 1; i >= 0; i--) {
3779                 const struct mlxsw_sp_adj_grp_size_range *size_range;
3780
3781                 size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
3782
3783                 if (alloc_size >= size_range->end) {
3784                         *p_adj_grp_size = size_range->end;
3785                         return;
3786                 }
3787         }
3788 }
3789
3790 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3791                                      u16 *p_adj_grp_size)
3792 {
3793         unsigned int alloc_size;
3794         int err;
3795
3796         /* Round up the requested group size to the next size supported
3797          * by the device and make sure the request can be satisfied.
3798          */
3799         mlxsw_sp_adj_grp_size_round_up(mlxsw_sp, p_adj_grp_size);
3800         err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3801                                               MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3802                                               *p_adj_grp_size, &alloc_size);
3803         if (err)
3804                 return err;
3805         /* It is possible the allocation results in more allocated
3806          * entries than requested. Try to use as much of them as
3807          * possible.
3808          */
3809         mlxsw_sp_adj_grp_size_round_down(mlxsw_sp, p_adj_grp_size, alloc_size);
3810
3811         return 0;
3812 }
3813
3814 static void
3815 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group_info *nhgi)
3816 {
3817         int i, g = 0, sum_norm_weight = 0;
3818         struct mlxsw_sp_nexthop *nh;
3819
3820         for (i = 0; i < nhgi->count; i++) {
3821                 nh = &nhgi->nexthops[i];
3822
3823                 if (!nh->should_offload)
3824                         continue;
3825                 if (g > 0)
3826                         g = gcd(nh->nh_weight, g);
3827                 else
3828                         g = nh->nh_weight;
3829         }
3830
3831         for (i = 0; i < nhgi->count; i++) {
3832                 nh = &nhgi->nexthops[i];
3833
3834                 if (!nh->should_offload)
3835                         continue;
3836                 nh->norm_nh_weight = nh->nh_weight / g;
3837                 sum_norm_weight += nh->norm_nh_weight;
3838         }
3839
3840         nhgi->sum_norm_weight = sum_norm_weight;
3841 }
3842
3843 static void
3844 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group_info *nhgi)
3845 {
3846         int i, weight = 0, lower_bound = 0;
3847         int total = nhgi->sum_norm_weight;
3848         u16 ecmp_size = nhgi->ecmp_size;
3849
3850         for (i = 0; i < nhgi->count; i++) {
3851                 struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
3852                 int upper_bound;
3853
3854                 if (!nh->should_offload)
3855                         continue;
3856                 weight += nh->norm_nh_weight;
3857                 upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3858                 nh->num_adj_entries = upper_bound - lower_bound;
3859                 lower_bound = upper_bound;
3860         }
3861 }
3862
3863 static struct mlxsw_sp_nexthop *
3864 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3865                      const struct mlxsw_sp_rt6 *mlxsw_sp_rt6);
3866
3867 static void
3868 mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3869                                         struct mlxsw_sp_nexthop_group *nh_grp)
3870 {
3871         int i;
3872
3873         for (i = 0; i < nh_grp->nhgi->count; i++) {
3874                 struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
3875
3876                 if (nh->offloaded)
3877                         nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3878                 else
3879                         nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3880         }
3881 }
3882
3883 static void
3884 __mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp_nexthop_group *nh_grp,
3885                                           struct mlxsw_sp_fib6_entry *fib6_entry)
3886 {
3887         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3888
3889         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3890                 struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3891                 struct mlxsw_sp_nexthop *nh;
3892
3893                 nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3894                 if (nh && nh->offloaded)
3895                         fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3896                 else
3897                         fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3898         }
3899 }
3900
3901 static void
3902 mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3903                                         struct mlxsw_sp_nexthop_group *nh_grp)
3904 {
3905         struct mlxsw_sp_fib6_entry *fib6_entry;
3906
3907         /* Unfortunately, in IPv6 the route and the nexthop are described by
3908          * the same struct, so we need to iterate over all the routes using the
3909          * nexthop group and set / clear the offload indication for them.
3910          */
3911         list_for_each_entry(fib6_entry, &nh_grp->fib_list,
3912                             common.nexthop_group_node)
3913                 __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
3914 }
3915
3916 static void
3917 mlxsw_sp_nexthop_bucket_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3918                                         const struct mlxsw_sp_nexthop *nh,
3919                                         u16 bucket_index)
3920 {
3921         struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp;
3922         bool offload = false, trap = false;
3923
3924         if (nh->offloaded) {
3925                 if (nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
3926                         trap = true;
3927                 else
3928                         offload = true;
3929         }
3930         nexthop_bucket_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
3931                                     bucket_index, offload, trap);
3932 }
3933
3934 static void
3935 mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3936                                            struct mlxsw_sp_nexthop_group *nh_grp)
3937 {
3938         int i;
3939
3940         /* Do not update the flags if the nexthop group is being destroyed
3941          * since:
3942          * 1. The nexthop objects is being deleted, in which case the flags are
3943          * irrelevant.
3944          * 2. The nexthop group was replaced by a newer group, in which case
3945          * the flags of the nexthop object were already updated based on the
3946          * new group.
3947          */
3948         if (nh_grp->can_destroy)
3949                 return;
3950
3951         nexthop_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
3952                              nh_grp->nhgi->adj_index_valid, false);
3953
3954         /* Update flags of individual nexthop buckets in case of a resilient
3955          * nexthop group.
3956          */
3957         if (!nh_grp->nhgi->is_resilient)
3958                 return;
3959
3960         for (i = 0; i < nh_grp->nhgi->count; i++) {
3961                 struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
3962
3963                 mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, i);
3964         }
3965 }
3966
3967 static void
3968 mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3969                                        struct mlxsw_sp_nexthop_group *nh_grp)
3970 {
3971         switch (nh_grp->type) {
3972         case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3973                 mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp);
3974                 break;
3975         case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3976                 mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp);
3977                 break;
3978         case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3979                 mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, nh_grp);
3980                 break;
3981         }
3982 }
3983
3984 static int
3985 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3986                                struct mlxsw_sp_nexthop_group *nh_grp)
3987 {
3988         struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
3989         u16 ecmp_size, old_ecmp_size;
3990         struct mlxsw_sp_nexthop *nh;
3991         bool offload_change = false;
3992         u32 adj_index;
3993         bool old_adj_index_valid;
3994         u32 old_adj_index;
3995         int i, err2, err;
3996
3997         if (!nhgi->gateway)
3998                 return mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3999
4000         for (i = 0; i < nhgi->count; i++) {
4001                 nh = &nhgi->nexthops[i];
4002
4003                 if (nh->should_offload != nh->offloaded) {
4004                         offload_change = true;
4005                         if (nh->should_offload)
4006                                 nh->update = 1;
4007                 }
4008         }
4009         if (!offload_change) {
4010                 /* Nothing was added or removed, so no need to reallocate. Just
4011                  * update MAC on existing adjacency indexes.
4012                  */
4013                 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, false);
4014                 if (err) {
4015                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
4016                         goto set_trap;
4017                 }
4018                 /* Flags of individual nexthop buckets might need to be
4019                  * updated.
4020                  */
4021                 mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
4022                 return 0;
4023         }
4024         mlxsw_sp_nexthop_group_normalize(nhgi);
4025         if (!nhgi->sum_norm_weight) {
4026                 /* No neigh of this group is connected so we just set
4027                  * the trap and let everthing flow through kernel.
4028                  */
4029                 err = 0;
4030                 goto set_trap;
4031         }
4032
4033         ecmp_size = nhgi->sum_norm_weight;
4034         err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
4035         if (err)
4036                 /* No valid allocation size available. */
4037                 goto set_trap;
4038
4039         err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
4040                                   ecmp_size, &adj_index);
4041         if (err) {
4042                 /* We ran out of KVD linear space, just set the
4043                  * trap and let everything flow through kernel.
4044                  */
4045                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
4046                 goto set_trap;
4047         }
4048         old_adj_index_valid = nhgi->adj_index_valid;
4049         old_adj_index = nhgi->adj_index;
4050         old_ecmp_size = nhgi->ecmp_size;
4051         nhgi->adj_index_valid = 1;
4052         nhgi->adj_index = adj_index;
4053         nhgi->ecmp_size = ecmp_size;
4054         mlxsw_sp_nexthop_group_rebalance(nhgi);
4055         err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, true);
4056         if (err) {
4057                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
4058                 goto set_trap;
4059         }
4060
4061         mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
4062
4063         if (!old_adj_index_valid) {
4064                 /* The trap was set for fib entries, so we have to call
4065                  * fib entry update to unset it and use adjacency index.
4066                  */
4067                 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
4068                 if (err) {
4069                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
4070                         goto set_trap;
4071                 }
4072                 return 0;
4073         }
4074
4075         err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
4076                                              old_adj_index, old_ecmp_size);
4077         mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
4078                            old_ecmp_size, old_adj_index);
4079         if (err) {
4080                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
4081                 goto set_trap;
4082         }
4083
4084         return 0;
4085
4086 set_trap:
4087         old_adj_index_valid = nhgi->adj_index_valid;
4088         nhgi->adj_index_valid = 0;
4089         for (i = 0; i < nhgi->count; i++) {
4090                 nh = &nhgi->nexthops[i];
4091                 nh->offloaded = 0;
4092         }
4093         err2 = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
4094         if (err2)
4095                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
4096         mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
4097         if (old_adj_index_valid)
4098                 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
4099                                    nhgi->ecmp_size, nhgi->adj_index);
4100         return err;
4101 }
4102
4103 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
4104                                             bool removing)
4105 {
4106         if (!removing) {
4107                 nh->action = MLXSW_SP_NEXTHOP_ACTION_FORWARD;
4108                 nh->should_offload = 1;
4109         } else if (nh->nhgi->is_resilient) {
4110                 nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
4111                 nh->should_offload = 1;
4112         } else {
4113                 nh->should_offload = 0;
4114         }
4115         nh->update = 1;
4116 }
4117
4118 static int
4119 mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
4120                                     struct mlxsw_sp_neigh_entry *neigh_entry)
4121 {
4122         struct neighbour *n, *old_n = neigh_entry->key.n;
4123         struct mlxsw_sp_nexthop *nh;
4124         struct net_device *dev;
4125         bool entry_connected;
4126         u8 nud_state, dead;
4127         int err;
4128
4129         nh = list_first_entry(&neigh_entry->nexthop_list,
4130                               struct mlxsw_sp_nexthop, neigh_list_node);
4131         dev = mlxsw_sp_nexthop_dev(nh);
4132
4133         n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, dev);
4134         if (!n) {
4135                 n = neigh_create(nh->neigh_tbl, &nh->gw_addr, dev);
4136                 if (IS_ERR(n))
4137                         return PTR_ERR(n);
4138                 neigh_event_send(n, NULL);
4139         }
4140
4141         mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
4142         neigh_entry->key.n = n;
4143         err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
4144         if (err)
4145                 goto err_neigh_entry_insert;
4146
4147         read_lock_bh(&n->lock);
4148         nud_state = n->nud_state;
4149         dead = n->dead;
4150         read_unlock_bh(&n->lock);
4151         entry_connected = nud_state & NUD_VALID && !dead;
4152
4153         list_for_each_entry(nh, &neigh_entry->nexthop_list,
4154                             neigh_list_node) {
4155                 neigh_release(old_n);
4156                 neigh_clone(n);
4157                 __mlxsw_sp_nexthop_neigh_update(nh, !entry_connected);
4158                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4159         }
4160
4161         neigh_release(n);
4162
4163         return 0;
4164
4165 err_neigh_entry_insert:
4166         neigh_entry->key.n = old_n;
4167         mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
4168         neigh_release(n);
4169         return err;
4170 }
4171
4172 static void
4173 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
4174                               struct mlxsw_sp_neigh_entry *neigh_entry,
4175                               bool removing, bool dead)
4176 {
4177         struct mlxsw_sp_nexthop *nh;
4178
4179         if (list_empty(&neigh_entry->nexthop_list))
4180                 return;
4181
4182         if (dead) {
4183                 int err;
4184
4185                 err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp,
4186                                                           neigh_entry);
4187                 if (err)
4188                         dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n");
4189                 return;
4190         }
4191
4192         list_for_each_entry(nh, &neigh_entry->nexthop_list,
4193                             neigh_list_node) {
4194                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
4195                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4196         }
4197 }
4198
4199 static void mlxsw_sp_nexthop_crif_init(struct mlxsw_sp_nexthop *nh,
4200                                        struct mlxsw_sp_crif *crif)
4201 {
4202         if (nh->crif)
4203                 return;
4204
4205         nh->crif = crif;
4206         list_add(&nh->crif_list_node, &crif->nexthop_list);
4207 }
4208
4209 static void mlxsw_sp_nexthop_crif_fini(struct mlxsw_sp_nexthop *nh)
4210 {
4211         if (!nh->crif)
4212                 return;
4213
4214         list_del(&nh->crif_list_node);
4215         nh->crif = NULL;
4216 }
4217
4218 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
4219                                        struct mlxsw_sp_nexthop *nh)
4220 {
4221         struct mlxsw_sp_neigh_entry *neigh_entry;
4222         struct net_device *dev;
4223         struct neighbour *n;
4224         u8 nud_state, dead;
4225         int err;
4226
4227         if (WARN_ON(!nh->crif->rif))
4228                 return 0;
4229
4230         if (!nh->nhgi->gateway || nh->neigh_entry)
4231                 return 0;
4232         dev = mlxsw_sp_nexthop_dev(nh);
4233
4234         /* Take a reference of neigh here ensuring that neigh would
4235          * not be destructed before the nexthop entry is finished.
4236          * The reference is taken either in neigh_lookup() or
4237          * in neigh_create() in case n is not found.
4238          */
4239         n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, dev);
4240         if (!n) {
4241                 n = neigh_create(nh->neigh_tbl, &nh->gw_addr, dev);
4242                 if (IS_ERR(n))
4243                         return PTR_ERR(n);
4244                 neigh_event_send(n, NULL);
4245         }
4246         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
4247         if (!neigh_entry) {
4248                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
4249                 if (IS_ERR(neigh_entry)) {
4250                         err = -EINVAL;
4251                         goto err_neigh_entry_create;
4252                 }
4253         }
4254
4255         /* If that is the first nexthop connected to that neigh, add to
4256          * nexthop_neighs_list
4257          */
4258         if (list_empty(&neigh_entry->nexthop_list))
4259                 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
4260                               &mlxsw_sp->router->nexthop_neighs_list);
4261
4262         nh->neigh_entry = neigh_entry;
4263         list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
4264         read_lock_bh(&n->lock);
4265         nud_state = n->nud_state;
4266         dead = n->dead;
4267         read_unlock_bh(&n->lock);
4268         __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
4269
4270         return 0;
4271
4272 err_neigh_entry_create:
4273         neigh_release(n);
4274         return err;
4275 }
4276
4277 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
4278                                         struct mlxsw_sp_nexthop *nh)
4279 {
4280         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
4281         struct neighbour *n;
4282
4283         if (!neigh_entry)
4284                 return;
4285         n = neigh_entry->key.n;
4286
4287         __mlxsw_sp_nexthop_neigh_update(nh, true);
4288         list_del(&nh->neigh_list_node);
4289         nh->neigh_entry = NULL;
4290
4291         /* If that is the last nexthop connected to that neigh, remove from
4292          * nexthop_neighs_list
4293          */
4294         if (list_empty(&neigh_entry->nexthop_list))
4295                 list_del(&neigh_entry->nexthop_neighs_list_node);
4296
4297         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
4298                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
4299
4300         neigh_release(n);
4301 }
4302
4303 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
4304 {
4305         struct net_device *ul_dev;
4306         bool is_up;
4307
4308         rcu_read_lock();
4309         ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
4310         is_up = ul_dev ? (ul_dev->flags & IFF_UP) : true;
4311         rcu_read_unlock();
4312
4313         return is_up;
4314 }
4315
4316 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
4317                                        struct mlxsw_sp_nexthop *nh,
4318                                        struct mlxsw_sp_ipip_entry *ipip_entry)
4319 {
4320         struct mlxsw_sp_crif *crif;
4321         bool removing;
4322
4323         if (!nh->nhgi->gateway || nh->ipip_entry)
4324                 return;
4325
4326         crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, ipip_entry->ol_dev);
4327         if (WARN_ON(!crif))
4328                 return;
4329
4330         nh->ipip_entry = ipip_entry;
4331         removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
4332         __mlxsw_sp_nexthop_neigh_update(nh, removing);
4333         mlxsw_sp_nexthop_crif_init(nh, crif);
4334 }
4335
4336 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
4337                                        struct mlxsw_sp_nexthop *nh)
4338 {
4339         struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
4340
4341         if (!ipip_entry)
4342                 return;
4343
4344         __mlxsw_sp_nexthop_neigh_update(nh, true);
4345         nh->ipip_entry = NULL;
4346 }
4347
4348 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4349                                         const struct fib_nh *fib_nh,
4350                                         enum mlxsw_sp_ipip_type *p_ipipt)
4351 {
4352         struct net_device *dev = fib_nh->fib_nh_dev;
4353
4354         return dev &&
4355                fib_nh->nh_parent->fib_type == RTN_UNICAST &&
4356                mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
4357 }
4358
4359 static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp,
4360                                       struct mlxsw_sp_nexthop *nh,
4361                                       const struct net_device *dev)
4362 {
4363         const struct mlxsw_sp_ipip_ops *ipip_ops;
4364         struct mlxsw_sp_ipip_entry *ipip_entry;
4365         struct mlxsw_sp_crif *crif;
4366         int err;
4367
4368         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4369         if (ipip_entry) {
4370                 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4371                 if (ipip_ops->can_offload(mlxsw_sp, dev)) {
4372                         nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4373                         mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4374                         return 0;
4375                 }
4376         }
4377
4378         nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4379         crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, dev);
4380         if (!crif)
4381                 return 0;
4382
4383         mlxsw_sp_nexthop_crif_init(nh, crif);
4384
4385         if (!crif->rif)
4386                 return 0;
4387
4388         err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4389         if (err)
4390                 goto err_neigh_init;
4391
4392         return 0;
4393
4394 err_neigh_init:
4395         mlxsw_sp_nexthop_crif_fini(nh);
4396         return err;
4397 }
4398
4399 static void mlxsw_sp_nexthop_type_rif_gone(struct mlxsw_sp *mlxsw_sp,
4400                                            struct mlxsw_sp_nexthop *nh)
4401 {
4402         switch (nh->type) {
4403         case MLXSW_SP_NEXTHOP_TYPE_ETH:
4404                 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
4405                 break;
4406         case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4407                 mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
4408                 break;
4409         }
4410 }
4411
4412 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
4413                                        struct mlxsw_sp_nexthop *nh)
4414 {
4415         mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh);
4416         mlxsw_sp_nexthop_crif_fini(nh);
4417 }
4418
4419 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
4420                                   struct mlxsw_sp_nexthop_group *nh_grp,
4421                                   struct mlxsw_sp_nexthop *nh,
4422                                   struct fib_nh *fib_nh)
4423 {
4424         struct net_device *dev = fib_nh->fib_nh_dev;
4425         struct in_device *in_dev;
4426         int err;
4427
4428         nh->nhgi = nh_grp->nhgi;
4429         nh->key.fib_nh = fib_nh;
4430 #ifdef CONFIG_IP_ROUTE_MULTIPATH
4431         nh->nh_weight = fib_nh->fib_nh_weight;
4432 #else
4433         nh->nh_weight = 1;
4434 #endif
4435         memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4));
4436         nh->neigh_tbl = &arp_tbl;
4437         err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
4438         if (err)
4439                 return err;
4440
4441         mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4442         list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4443
4444         if (!dev)
4445                 return 0;
4446         nh->ifindex = dev->ifindex;
4447
4448         rcu_read_lock();
4449         in_dev = __in_dev_get_rcu(dev);
4450         if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
4451             fib_nh->fib_nh_flags & RTNH_F_LINKDOWN) {
4452                 rcu_read_unlock();
4453                 return 0;
4454         }
4455         rcu_read_unlock();
4456
4457         err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
4458         if (err)
4459                 goto err_nexthop_neigh_init;
4460
4461         return 0;
4462
4463 err_nexthop_neigh_init:
4464         list_del(&nh->router_list_node);
4465         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4466         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
4467         return err;
4468 }
4469
4470 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
4471                                    struct mlxsw_sp_nexthop *nh)
4472 {
4473         mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4474         list_del(&nh->router_list_node);
4475         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4476         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
4477 }
4478
4479 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
4480                                     unsigned long event, struct fib_nh *fib_nh)
4481 {
4482         struct mlxsw_sp_nexthop_key key;
4483         struct mlxsw_sp_nexthop *nh;
4484
4485         key.fib_nh = fib_nh;
4486         nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
4487         if (!nh)
4488                 return;
4489
4490         switch (event) {
4491         case FIB_EVENT_NH_ADD:
4492                 mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, fib_nh->fib_nh_dev);
4493                 break;
4494         case FIB_EVENT_NH_DEL:
4495                 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4496                 break;
4497         }
4498
4499         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4500 }
4501
4502 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
4503                                         struct mlxsw_sp_rif *rif)
4504 {
4505         struct net_device *dev = mlxsw_sp_rif_dev(rif);
4506         struct mlxsw_sp_nexthop *nh;
4507         bool removing;
4508
4509         list_for_each_entry(nh, &rif->crif->nexthop_list, crif_list_node) {
4510                 switch (nh->type) {
4511                 case MLXSW_SP_NEXTHOP_TYPE_ETH:
4512                         removing = false;
4513                         break;
4514                 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4515                         removing = !mlxsw_sp_ipip_netdev_ul_up(dev);
4516                         break;
4517                 default:
4518                         WARN_ON(1);
4519                         continue;
4520                 }
4521
4522                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
4523                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4524         }
4525 }
4526
4527 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
4528                                            struct mlxsw_sp_rif *rif)
4529 {
4530         struct mlxsw_sp_nexthop *nh, *tmp;
4531
4532         list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list,
4533                                  crif_list_node) {
4534                 mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh);
4535                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4536         }
4537 }
4538
4539 static int mlxsw_sp_adj_trap_entry_init(struct mlxsw_sp *mlxsw_sp)
4540 {
4541         enum mlxsw_reg_ratr_trap_action trap_action;
4542         char ratr_pl[MLXSW_REG_RATR_LEN];
4543         int err;
4544
4545         err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4546                                   &mlxsw_sp->router->adj_trap_index);
4547         if (err)
4548                 return err;
4549
4550         trap_action = MLXSW_REG_RATR_TRAP_ACTION_TRAP;
4551         mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true,
4552                             MLXSW_REG_RATR_TYPE_ETHERNET,
4553                             mlxsw_sp->router->adj_trap_index,
4554                             mlxsw_sp->router->lb_crif->rif->rif_index);
4555         mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action);
4556         mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
4557         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
4558         if (err)
4559                 goto err_ratr_write;
4560
4561         return 0;
4562
4563 err_ratr_write:
4564         mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4565                            mlxsw_sp->router->adj_trap_index);
4566         return err;
4567 }
4568
4569 static void mlxsw_sp_adj_trap_entry_fini(struct mlxsw_sp *mlxsw_sp)
4570 {
4571         mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4572                            mlxsw_sp->router->adj_trap_index);
4573 }
4574
4575 static int mlxsw_sp_nexthop_group_inc(struct mlxsw_sp *mlxsw_sp)
4576 {
4577         int err;
4578
4579         if (refcount_inc_not_zero(&mlxsw_sp->router->num_groups))
4580                 return 0;
4581
4582         err = mlxsw_sp_adj_trap_entry_init(mlxsw_sp);
4583         if (err)
4584                 return err;
4585
4586         refcount_set(&mlxsw_sp->router->num_groups, 1);
4587
4588         return 0;
4589 }
4590
4591 static void mlxsw_sp_nexthop_group_dec(struct mlxsw_sp *mlxsw_sp)
4592 {
4593         if (!refcount_dec_and_test(&mlxsw_sp->router->num_groups))
4594                 return;
4595
4596         mlxsw_sp_adj_trap_entry_fini(mlxsw_sp);
4597 }
4598
4599 static void
4600 mlxsw_sp_nh_grp_activity_get(struct mlxsw_sp *mlxsw_sp,
4601                              const struct mlxsw_sp_nexthop_group *nh_grp,
4602                              unsigned long *activity)
4603 {
4604         char *ratrad_pl;
4605         int i, err;
4606
4607         ratrad_pl = kmalloc(MLXSW_REG_RATRAD_LEN, GFP_KERNEL);
4608         if (!ratrad_pl)
4609                 return;
4610
4611         mlxsw_reg_ratrad_pack(ratrad_pl, nh_grp->nhgi->adj_index,
4612                               nh_grp->nhgi->count);
4613         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratrad), ratrad_pl);
4614         if (err)
4615                 goto out;
4616
4617         for (i = 0; i < nh_grp->nhgi->count; i++) {
4618                 if (!mlxsw_reg_ratrad_activity_vector_get(ratrad_pl, i))
4619                         continue;
4620                 bitmap_set(activity, i, 1);
4621         }
4622
4623 out:
4624         kfree(ratrad_pl);
4625 }
4626
4627 #define MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL 1000 /* ms */
4628
4629 static void
4630 mlxsw_sp_nh_grp_activity_update(struct mlxsw_sp *mlxsw_sp,
4631                                 const struct mlxsw_sp_nexthop_group *nh_grp)
4632 {
4633         unsigned long *activity;
4634
4635         activity = bitmap_zalloc(nh_grp->nhgi->count, GFP_KERNEL);
4636         if (!activity)
4637                 return;
4638
4639         mlxsw_sp_nh_grp_activity_get(mlxsw_sp, nh_grp, activity);
4640         nexthop_res_grp_activity_update(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
4641                                         nh_grp->nhgi->count, activity);
4642
4643         bitmap_free(activity);
4644 }
4645
4646 static void
4647 mlxsw_sp_nh_grp_activity_work_schedule(struct mlxsw_sp *mlxsw_sp)
4648 {
4649         unsigned int interval = MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL;
4650
4651         mlxsw_core_schedule_dw(&mlxsw_sp->router->nh_grp_activity_dw,
4652                                msecs_to_jiffies(interval));
4653 }
4654
4655 static void mlxsw_sp_nh_grp_activity_work(struct work_struct *work)
4656 {
4657         struct mlxsw_sp_nexthop_group_info *nhgi;
4658         struct mlxsw_sp_router *router;
4659         bool reschedule = false;
4660
4661         router = container_of(work, struct mlxsw_sp_router,
4662                               nh_grp_activity_dw.work);
4663
4664         mutex_lock(&router->lock);
4665
4666         list_for_each_entry(nhgi, &router->nh_res_grp_list, list) {
4667                 mlxsw_sp_nh_grp_activity_update(router->mlxsw_sp, nhgi->nh_grp);
4668                 reschedule = true;
4669         }
4670
4671         mutex_unlock(&router->lock);
4672
4673         if (!reschedule)
4674                 return;
4675         mlxsw_sp_nh_grp_activity_work_schedule(router->mlxsw_sp);
4676 }
4677
4678 static int
4679 mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp,
4680                                      const struct nh_notifier_single_info *nh,
4681                                      struct netlink_ext_ack *extack)
4682 {
4683         int err = -EINVAL;
4684
4685         if (nh->is_fdb)
4686                 NL_SET_ERR_MSG_MOD(extack, "FDB nexthops are not supported");
4687         else if (nh->has_encap)
4688                 NL_SET_ERR_MSG_MOD(extack, "Encapsulating nexthops are not supported");
4689         else
4690                 err = 0;
4691
4692         return err;
4693 }
4694
4695 static int
4696 mlxsw_sp_nexthop_obj_group_entry_validate(struct mlxsw_sp *mlxsw_sp,
4697                                           const struct nh_notifier_single_info *nh,
4698                                           struct netlink_ext_ack *extack)
4699 {
4700         int err;
4701
4702         err = mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, nh, extack);
4703         if (err)
4704                 return err;
4705
4706         /* Device only nexthops with an IPIP device are programmed as
4707          * encapsulating adjacency entries.
4708          */
4709         if (!nh->gw_family && !nh->is_reject &&
4710             !mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) {
4711                 NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway");
4712                 return -EINVAL;
4713         }
4714
4715         return 0;
4716 }
4717
4718 static int
4719 mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp,
4720                                     const struct nh_notifier_grp_info *nh_grp,
4721                                     struct netlink_ext_ack *extack)
4722 {
4723         int i;
4724
4725         if (nh_grp->is_fdb) {
4726                 NL_SET_ERR_MSG_MOD(extack, "FDB nexthop groups are not supported");
4727                 return -EINVAL;
4728         }
4729
4730         for (i = 0; i < nh_grp->num_nh; i++) {
4731                 const struct nh_notifier_single_info *nh;
4732                 int err;
4733
4734                 nh = &nh_grp->nh_entries[i].nh;
4735                 err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4736                                                                 extack);
4737                 if (err)
4738                         return err;
4739         }
4740
4741         return 0;
4742 }
4743
4744 static int
4745 mlxsw_sp_nexthop_obj_res_group_size_validate(struct mlxsw_sp *mlxsw_sp,
4746                                              const struct nh_notifier_res_table_info *nh_res_table,
4747                                              struct netlink_ext_ack *extack)
4748 {
4749         unsigned int alloc_size;
4750         bool valid_size = false;
4751         int err, i;
4752
4753         if (nh_res_table->num_nh_buckets < 32) {
4754                 NL_SET_ERR_MSG_MOD(extack, "Minimum number of buckets is 32");
4755                 return -EINVAL;
4756         }
4757
4758         for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) {
4759                 const struct mlxsw_sp_adj_grp_size_range *size_range;
4760
4761                 size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
4762
4763                 if (nh_res_table->num_nh_buckets >= size_range->start &&
4764                     nh_res_table->num_nh_buckets <= size_range->end) {
4765                         valid_size = true;
4766                         break;
4767                 }
4768         }
4769
4770         if (!valid_size) {
4771                 NL_SET_ERR_MSG_MOD(extack, "Invalid number of buckets");
4772                 return -EINVAL;
4773         }
4774
4775         err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
4776                                               MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
4777                                               nh_res_table->num_nh_buckets,
4778                                               &alloc_size);
4779         if (err || nh_res_table->num_nh_buckets != alloc_size) {
4780                 NL_SET_ERR_MSG_MOD(extack, "Number of buckets does not fit allocation size of any KVDL partition");
4781                 return -EINVAL;
4782         }
4783
4784         return 0;
4785 }
4786
4787 static int
4788 mlxsw_sp_nexthop_obj_res_group_validate(struct mlxsw_sp *mlxsw_sp,
4789                                         const struct nh_notifier_res_table_info *nh_res_table,
4790                                         struct netlink_ext_ack *extack)
4791 {
4792         int err;
4793         u16 i;
4794
4795         err = mlxsw_sp_nexthop_obj_res_group_size_validate(mlxsw_sp,
4796                                                            nh_res_table,
4797                                                            extack);
4798         if (err)
4799                 return err;
4800
4801         for (i = 0; i < nh_res_table->num_nh_buckets; i++) {
4802                 const struct nh_notifier_single_info *nh;
4803                 int err;
4804
4805                 nh = &nh_res_table->nhs[i];
4806                 err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4807                                                                 extack);
4808                 if (err)
4809                         return err;
4810         }
4811
4812         return 0;
4813 }
4814
4815 static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp,
4816                                          unsigned long event,
4817                                          struct nh_notifier_info *info)
4818 {
4819         struct nh_notifier_single_info *nh;
4820
4821         if (event != NEXTHOP_EVENT_REPLACE &&
4822             event != NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE &&
4823             event != NEXTHOP_EVENT_BUCKET_REPLACE)
4824                 return 0;
4825
4826         switch (info->type) {
4827         case NH_NOTIFIER_INFO_TYPE_SINGLE:
4828                 return mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, info->nh,
4829                                                             info->extack);
4830         case NH_NOTIFIER_INFO_TYPE_GRP:
4831                 return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp,
4832                                                            info->nh_grp,
4833                                                            info->extack);
4834         case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4835                 return mlxsw_sp_nexthop_obj_res_group_validate(mlxsw_sp,
4836                                                                info->nh_res_table,
4837                                                                info->extack);
4838         case NH_NOTIFIER_INFO_TYPE_RES_BUCKET:
4839                 nh = &info->nh_res_bucket->new_nh;
4840                 return mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4841                                                                  info->extack);
4842         default:
4843                 NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type");
4844                 return -EOPNOTSUPP;
4845         }
4846 }
4847
4848 static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp,
4849                                             const struct nh_notifier_info *info)
4850 {
4851         const struct net_device *dev;
4852
4853         switch (info->type) {
4854         case NH_NOTIFIER_INFO_TYPE_SINGLE:
4855                 dev = info->nh->dev;
4856                 return info->nh->gw_family || info->nh->is_reject ||
4857                        mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
4858         case NH_NOTIFIER_INFO_TYPE_GRP:
4859         case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4860                 /* Already validated earlier. */
4861                 return true;
4862         default:
4863                 return false;
4864         }
4865 }
4866
4867 static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp,
4868                                                 struct mlxsw_sp_nexthop *nh)
4869 {
4870         nh->action = MLXSW_SP_NEXTHOP_ACTION_DISCARD;
4871         nh->should_offload = 1;
4872         /* While nexthops that discard packets do not forward packets
4873          * via an egress RIF, they still need to be programmed using a
4874          * valid RIF, so use the loopback RIF created during init.
4875          */
4876         nh->crif = mlxsw_sp->router->lb_crif;
4877 }
4878
4879 static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp,
4880                                                 struct mlxsw_sp_nexthop *nh)
4881 {
4882         nh->crif = NULL;
4883         nh->should_offload = 0;
4884 }
4885
4886 static int
4887 mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
4888                           struct mlxsw_sp_nexthop_group *nh_grp,
4889                           struct mlxsw_sp_nexthop *nh,
4890                           struct nh_notifier_single_info *nh_obj, int weight)
4891 {
4892         struct net_device *dev = nh_obj->dev;
4893         int err;
4894
4895         nh->nhgi = nh_grp->nhgi;
4896         nh->nh_weight = weight;
4897
4898         switch (nh_obj->gw_family) {
4899         case AF_INET:
4900                 memcpy(&nh->gw_addr, &nh_obj->ipv4, sizeof(nh_obj->ipv4));
4901                 nh->neigh_tbl = &arp_tbl;
4902                 break;
4903         case AF_INET6:
4904                 memcpy(&nh->gw_addr, &nh_obj->ipv6, sizeof(nh_obj->ipv6));
4905 #if IS_ENABLED(CONFIG_IPV6)
4906                 nh->neigh_tbl = &nd_tbl;
4907 #endif
4908                 break;
4909         }
4910
4911         mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4912         list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4913         nh->ifindex = dev->ifindex;
4914
4915         err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
4916         if (err)
4917                 goto err_type_init;
4918
4919         if (nh_obj->is_reject)
4920                 mlxsw_sp_nexthop_obj_blackhole_init(mlxsw_sp, nh);
4921
4922         /* In a resilient nexthop group, all the nexthops must be written to
4923          * the adjacency table. Even if they do not have a valid neighbour or
4924          * RIF.
4925          */
4926         if (nh_grp->nhgi->is_resilient && !nh->should_offload) {
4927                 nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
4928                 nh->should_offload = 1;
4929         }
4930
4931         return 0;
4932
4933 err_type_init:
4934         list_del(&nh->router_list_node);
4935         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4936         return err;
4937 }
4938
4939 static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp,
4940                                       struct mlxsw_sp_nexthop *nh)
4941 {
4942         if (nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD)
4943                 mlxsw_sp_nexthop_obj_blackhole_fini(mlxsw_sp, nh);
4944         mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4945         list_del(&nh->router_list_node);
4946         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4947         nh->should_offload = 0;
4948 }
4949
4950 static int
4951 mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
4952                                      struct mlxsw_sp_nexthop_group *nh_grp,
4953                                      struct nh_notifier_info *info)
4954 {
4955         struct mlxsw_sp_nexthop_group_info *nhgi;
4956         struct mlxsw_sp_nexthop *nh;
4957         bool is_resilient = false;
4958         unsigned int nhs;
4959         int err, i;
4960
4961         switch (info->type) {
4962         case NH_NOTIFIER_INFO_TYPE_SINGLE:
4963                 nhs = 1;
4964                 break;
4965         case NH_NOTIFIER_INFO_TYPE_GRP:
4966                 nhs = info->nh_grp->num_nh;
4967                 break;
4968         case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4969                 nhs = info->nh_res_table->num_nh_buckets;
4970                 is_resilient = true;
4971                 break;
4972         default:
4973                 return -EINVAL;
4974         }
4975
4976         nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL);
4977         if (!nhgi)
4978                 return -ENOMEM;
4979         nh_grp->nhgi = nhgi;
4980         nhgi->nh_grp = nh_grp;
4981         nhgi->gateway = mlxsw_sp_nexthop_obj_is_gateway(mlxsw_sp, info);
4982         nhgi->is_resilient = is_resilient;
4983         nhgi->count = nhs;
4984         for (i = 0; i < nhgi->count; i++) {
4985                 struct nh_notifier_single_info *nh_obj;
4986                 int weight;
4987
4988                 nh = &nhgi->nexthops[i];
4989                 switch (info->type) {
4990                 case NH_NOTIFIER_INFO_TYPE_SINGLE:
4991                         nh_obj = info->nh;
4992                         weight = 1;
4993                         break;
4994                 case NH_NOTIFIER_INFO_TYPE_GRP:
4995                         nh_obj = &info->nh_grp->nh_entries[i].nh;
4996                         weight = info->nh_grp->nh_entries[i].weight;
4997                         break;
4998                 case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4999                         nh_obj = &info->nh_res_table->nhs[i];
5000                         weight = 1;
5001                         break;
5002                 default:
5003                         err = -EINVAL;
5004                         goto err_nexthop_obj_init;
5005                 }
5006                 err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj,
5007                                                 weight);
5008                 if (err)
5009                         goto err_nexthop_obj_init;
5010         }
5011         err = mlxsw_sp_nexthop_group_inc(mlxsw_sp);
5012         if (err)
5013                 goto err_group_inc;
5014         err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5015         if (err) {
5016                 NL_SET_ERR_MSG_MOD(info->extack, "Failed to write adjacency entries to the device");
5017                 goto err_group_refresh;
5018         }
5019
5020         /* Add resilient nexthop groups to a list so that the activity of their
5021          * nexthop buckets will be periodically queried and cleared.
5022          */
5023         if (nhgi->is_resilient) {
5024                 if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
5025                         mlxsw_sp_nh_grp_activity_work_schedule(mlxsw_sp);
5026                 list_add(&nhgi->list, &mlxsw_sp->router->nh_res_grp_list);
5027         }
5028
5029         return 0;
5030
5031 err_group_refresh:
5032         mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5033 err_group_inc:
5034         i = nhgi->count;
5035 err_nexthop_obj_init:
5036         for (i--; i >= 0; i--) {
5037                 nh = &nhgi->nexthops[i];
5038                 mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5039         }
5040         kfree(nhgi);
5041         return err;
5042 }
5043
5044 static void
5045 mlxsw_sp_nexthop_obj_group_info_fini(struct mlxsw_sp *mlxsw_sp,
5046                                      struct mlxsw_sp_nexthop_group *nh_grp)
5047 {
5048         struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
5049         struct mlxsw_sp_router *router = mlxsw_sp->router;
5050         int i;
5051
5052         if (nhgi->is_resilient) {
5053                 list_del(&nhgi->list);
5054                 if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
5055                         cancel_delayed_work(&router->nh_grp_activity_dw);
5056         }
5057
5058         mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5059         for (i = nhgi->count - 1; i >= 0; i--) {
5060                 struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
5061
5062                 mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5063         }
5064         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5065         WARN_ON_ONCE(nhgi->adj_index_valid);
5066         kfree(nhgi);
5067 }
5068
5069 static struct mlxsw_sp_nexthop_group *
5070 mlxsw_sp_nexthop_obj_group_create(struct mlxsw_sp *mlxsw_sp,
5071                                   struct nh_notifier_info *info)
5072 {
5073         struct mlxsw_sp_nexthop_group *nh_grp;
5074         int err;
5075
5076         nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
5077         if (!nh_grp)
5078                 return ERR_PTR(-ENOMEM);
5079         INIT_LIST_HEAD(&nh_grp->vr_list);
5080         err = rhashtable_init(&nh_grp->vr_ht,
5081                               &mlxsw_sp_nexthop_group_vr_ht_params);
5082         if (err)
5083                 goto err_nexthop_group_vr_ht_init;
5084         INIT_LIST_HEAD(&nh_grp->fib_list);
5085         nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
5086         nh_grp->obj.id = info->id;
5087
5088         err = mlxsw_sp_nexthop_obj_group_info_init(mlxsw_sp, nh_grp, info);
5089         if (err)
5090                 goto err_nexthop_group_info_init;
5091
5092         nh_grp->can_destroy = false;
5093
5094         return nh_grp;
5095
5096 err_nexthop_group_info_init:
5097         rhashtable_destroy(&nh_grp->vr_ht);
5098 err_nexthop_group_vr_ht_init:
5099         kfree(nh_grp);
5100         return ERR_PTR(err);
5101 }
5102
5103 static void
5104 mlxsw_sp_nexthop_obj_group_destroy(struct mlxsw_sp *mlxsw_sp,
5105                                    struct mlxsw_sp_nexthop_group *nh_grp)
5106 {
5107         if (!nh_grp->can_destroy)
5108                 return;
5109         mlxsw_sp_nexthop_obj_group_info_fini(mlxsw_sp, nh_grp);
5110         WARN_ON_ONCE(!list_empty(&nh_grp->fib_list));
5111         WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
5112         rhashtable_destroy(&nh_grp->vr_ht);
5113         kfree(nh_grp);
5114 }
5115
5116 static struct mlxsw_sp_nexthop_group *
5117 mlxsw_sp_nexthop_obj_group_lookup(struct mlxsw_sp *mlxsw_sp, u32 id)
5118 {
5119         struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
5120
5121         cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
5122         cmp_arg.id = id;
5123         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
5124                                       &cmp_arg,
5125                                       mlxsw_sp_nexthop_group_ht_params);
5126 }
5127
5128 static int mlxsw_sp_nexthop_obj_group_add(struct mlxsw_sp *mlxsw_sp,
5129                                           struct mlxsw_sp_nexthop_group *nh_grp)
5130 {
5131         return mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5132 }
5133
5134 static int
5135 mlxsw_sp_nexthop_obj_group_replace(struct mlxsw_sp *mlxsw_sp,
5136                                    struct mlxsw_sp_nexthop_group *nh_grp,
5137                                    struct mlxsw_sp_nexthop_group *old_nh_grp,
5138                                    struct netlink_ext_ack *extack)
5139 {
5140         struct mlxsw_sp_nexthop_group_info *old_nhgi = old_nh_grp->nhgi;
5141         struct mlxsw_sp_nexthop_group_info *new_nhgi = nh_grp->nhgi;
5142         int err;
5143
5144         old_nh_grp->nhgi = new_nhgi;
5145         new_nhgi->nh_grp = old_nh_grp;
5146         nh_grp->nhgi = old_nhgi;
5147         old_nhgi->nh_grp = nh_grp;
5148
5149         if (old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
5150                 /* Both the old adjacency index and the new one are valid.
5151                  * Routes are currently using the old one. Tell the device to
5152                  * replace the old adjacency index with the new one.
5153                  */
5154                 err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, old_nh_grp,
5155                                                      old_nhgi->adj_index,
5156                                                      old_nhgi->ecmp_size);
5157                 if (err) {
5158                         NL_SET_ERR_MSG_MOD(extack, "Failed to replace old adjacency index with new one");
5159                         goto err_out;
5160                 }
5161         } else if (old_nhgi->adj_index_valid && !new_nhgi->adj_index_valid) {
5162                 /* The old adjacency index is valid, while the new one is not.
5163                  * Iterate over all the routes using the group and change them
5164                  * to trap packets to the CPU.
5165                  */
5166                 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
5167                 if (err) {
5168                         NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to trap packets");
5169                         goto err_out;
5170                 }
5171         } else if (!old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
5172                 /* The old adjacency index is invalid, while the new one is.
5173                  * Iterate over all the routes using the group and change them
5174                  * to forward packets using the new valid index.
5175                  */
5176                 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
5177                 if (err) {
5178                         NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to forward packets");
5179                         goto err_out;
5180                 }
5181         }
5182
5183         /* Make sure the flags are set / cleared based on the new nexthop group
5184          * information.
5185          */
5186         mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, old_nh_grp);
5187
5188         /* At this point 'nh_grp' is just a shell that is not used by anyone
5189          * and its nexthop group info is the old info that was just replaced
5190          * with the new one. Remove it.
5191          */
5192         nh_grp->can_destroy = true;
5193         mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5194
5195         return 0;
5196
5197 err_out:
5198         old_nhgi->nh_grp = old_nh_grp;
5199         nh_grp->nhgi = new_nhgi;
5200         new_nhgi->nh_grp = nh_grp;
5201         old_nh_grp->nhgi = old_nhgi;
5202         return err;
5203 }
5204
5205 static int mlxsw_sp_nexthop_obj_new(struct mlxsw_sp *mlxsw_sp,
5206                                     struct nh_notifier_info *info)
5207 {
5208         struct mlxsw_sp_nexthop_group *nh_grp, *old_nh_grp;
5209         struct netlink_ext_ack *extack = info->extack;
5210         int err;
5211
5212         nh_grp = mlxsw_sp_nexthop_obj_group_create(mlxsw_sp, info);
5213         if (IS_ERR(nh_grp))
5214                 return PTR_ERR(nh_grp);
5215
5216         old_nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5217         if (!old_nh_grp)
5218                 err = mlxsw_sp_nexthop_obj_group_add(mlxsw_sp, nh_grp);
5219         else
5220                 err = mlxsw_sp_nexthop_obj_group_replace(mlxsw_sp, nh_grp,
5221                                                          old_nh_grp, extack);
5222
5223         if (err) {
5224                 nh_grp->can_destroy = true;
5225                 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5226         }
5227
5228         return err;
5229 }
5230
5231 static void mlxsw_sp_nexthop_obj_del(struct mlxsw_sp *mlxsw_sp,
5232                                      struct nh_notifier_info *info)
5233 {
5234         struct mlxsw_sp_nexthop_group *nh_grp;
5235
5236         nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5237         if (!nh_grp)
5238                 return;
5239
5240         nh_grp->can_destroy = true;
5241         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5242
5243         /* If the group still has routes using it, then defer the delete
5244          * operation until the last route using it is deleted.
5245          */
5246         if (!list_empty(&nh_grp->fib_list))
5247                 return;
5248         mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5249 }
5250
5251 static int mlxsw_sp_nexthop_obj_bucket_query(struct mlxsw_sp *mlxsw_sp,
5252                                              u32 adj_index, char *ratr_pl)
5253 {
5254         MLXSW_REG_ZERO(ratr, ratr_pl);
5255         mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
5256         mlxsw_reg_ratr_adjacency_index_low_set(ratr_pl, adj_index);
5257         mlxsw_reg_ratr_adjacency_index_high_set(ratr_pl, adj_index >> 16);
5258
5259         return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
5260 }
5261
5262 static int mlxsw_sp_nexthop_obj_bucket_compare(char *ratr_pl, char *ratr_pl_new)
5263 {
5264         /* Clear the opcode and activity on both the old and new payload as
5265          * they are irrelevant for the comparison.
5266          */
5267         mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
5268         mlxsw_reg_ratr_a_set(ratr_pl, 0);
5269         mlxsw_reg_ratr_op_set(ratr_pl_new, MLXSW_REG_RATR_OP_QUERY_READ);
5270         mlxsw_reg_ratr_a_set(ratr_pl_new, 0);
5271
5272         /* If the contents of the adjacency entry are consistent with the
5273          * replacement request, then replacement was successful.
5274          */
5275         if (!memcmp(ratr_pl, ratr_pl_new, MLXSW_REG_RATR_LEN))
5276                 return 0;
5277
5278         return -EINVAL;
5279 }
5280
5281 static int
5282 mlxsw_sp_nexthop_obj_bucket_adj_update(struct mlxsw_sp *mlxsw_sp,
5283                                        struct mlxsw_sp_nexthop *nh,
5284                                        struct nh_notifier_info *info)
5285 {
5286         u16 bucket_index = info->nh_res_bucket->bucket_index;
5287         struct netlink_ext_ack *extack = info->extack;
5288         bool force = info->nh_res_bucket->force;
5289         char ratr_pl_new[MLXSW_REG_RATR_LEN];
5290         char ratr_pl[MLXSW_REG_RATR_LEN];
5291         u32 adj_index;
5292         int err;
5293
5294         /* No point in trying an atomic replacement if the idle timer interval
5295          * is smaller than the interval in which we query and clear activity.
5296          */
5297         if (!force && info->nh_res_bucket->idle_timer_ms <
5298             MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL)
5299                 force = true;
5300
5301         adj_index = nh->nhgi->adj_index + bucket_index;
5302         err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, force, ratr_pl);
5303         if (err) {
5304                 NL_SET_ERR_MSG_MOD(extack, "Failed to overwrite nexthop bucket");
5305                 return err;
5306         }
5307
5308         if (!force) {
5309                 err = mlxsw_sp_nexthop_obj_bucket_query(mlxsw_sp, adj_index,
5310                                                         ratr_pl_new);
5311                 if (err) {
5312                         NL_SET_ERR_MSG_MOD(extack, "Failed to query nexthop bucket state after replacement. State might be inconsistent");
5313                         return err;
5314                 }
5315
5316                 err = mlxsw_sp_nexthop_obj_bucket_compare(ratr_pl, ratr_pl_new);
5317                 if (err) {
5318                         NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket was not replaced because it was active during replacement");
5319                         return err;
5320                 }
5321         }
5322
5323         nh->update = 0;
5324         nh->offloaded = 1;
5325         mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, bucket_index);
5326
5327         return 0;
5328 }
5329
5330 static int mlxsw_sp_nexthop_obj_bucket_replace(struct mlxsw_sp *mlxsw_sp,
5331                                                struct nh_notifier_info *info)
5332 {
5333         u16 bucket_index = info->nh_res_bucket->bucket_index;
5334         struct netlink_ext_ack *extack = info->extack;
5335         struct mlxsw_sp_nexthop_group_info *nhgi;
5336         struct nh_notifier_single_info *nh_obj;
5337         struct mlxsw_sp_nexthop_group *nh_grp;
5338         struct mlxsw_sp_nexthop *nh;
5339         int err;
5340
5341         nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5342         if (!nh_grp) {
5343                 NL_SET_ERR_MSG_MOD(extack, "Nexthop group was not found");
5344                 return -EINVAL;
5345         }
5346
5347         nhgi = nh_grp->nhgi;
5348
5349         if (bucket_index >= nhgi->count) {
5350                 NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket index out of range");
5351                 return -EINVAL;
5352         }
5353
5354         nh = &nhgi->nexthops[bucket_index];
5355         mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5356
5357         nh_obj = &info->nh_res_bucket->new_nh;
5358         err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
5359         if (err) {
5360                 NL_SET_ERR_MSG_MOD(extack, "Failed to initialize nexthop object for nexthop bucket replacement");
5361                 goto err_nexthop_obj_init;
5362         }
5363
5364         err = mlxsw_sp_nexthop_obj_bucket_adj_update(mlxsw_sp, nh, info);
5365         if (err)
5366                 goto err_nexthop_obj_bucket_adj_update;
5367
5368         return 0;
5369
5370 err_nexthop_obj_bucket_adj_update:
5371         mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5372 err_nexthop_obj_init:
5373         nh_obj = &info->nh_res_bucket->old_nh;
5374         mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
5375         /* The old adjacency entry was not overwritten */
5376         nh->update = 0;
5377         nh->offloaded = 1;
5378         return err;
5379 }
5380
5381 static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
5382                                       unsigned long event, void *ptr)
5383 {
5384         struct nh_notifier_info *info = ptr;
5385         struct mlxsw_sp_router *router;
5386         int err = 0;
5387
5388         router = container_of(nb, struct mlxsw_sp_router, nexthop_nb);
5389         err = mlxsw_sp_nexthop_obj_validate(router->mlxsw_sp, event, info);
5390         if (err)
5391                 goto out;
5392
5393         mutex_lock(&router->lock);
5394
5395         switch (event) {
5396         case NEXTHOP_EVENT_REPLACE:
5397                 err = mlxsw_sp_nexthop_obj_new(router->mlxsw_sp, info);
5398                 break;
5399         case NEXTHOP_EVENT_DEL:
5400                 mlxsw_sp_nexthop_obj_del(router->mlxsw_sp, info);
5401                 break;
5402         case NEXTHOP_EVENT_BUCKET_REPLACE:
5403                 err = mlxsw_sp_nexthop_obj_bucket_replace(router->mlxsw_sp,
5404                                                           info);
5405                 break;
5406         default:
5407                 break;
5408         }
5409
5410         mutex_unlock(&router->lock);
5411
5412 out:
5413         return notifier_from_errno(err);
5414 }
5415
5416 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
5417                                    struct fib_info *fi)
5418 {
5419         const struct fib_nh *nh = fib_info_nh(fi, 0);
5420
5421         return nh->fib_nh_gw_family ||
5422                mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
5423 }
5424
5425 static int
5426 mlxsw_sp_nexthop4_group_info_init(struct mlxsw_sp *mlxsw_sp,
5427                                   struct mlxsw_sp_nexthop_group *nh_grp)
5428 {
5429         unsigned int nhs = fib_info_num_path(nh_grp->ipv4.fi);
5430         struct mlxsw_sp_nexthop_group_info *nhgi;
5431         struct mlxsw_sp_nexthop *nh;
5432         int err, i;
5433
5434         nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL);
5435         if (!nhgi)
5436                 return -ENOMEM;
5437         nh_grp->nhgi = nhgi;
5438         nhgi->nh_grp = nh_grp;
5439         nhgi->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, nh_grp->ipv4.fi);
5440         nhgi->count = nhs;
5441         for (i = 0; i < nhgi->count; i++) {
5442                 struct fib_nh *fib_nh;
5443
5444                 nh = &nhgi->nexthops[i];
5445                 fib_nh = fib_info_nh(nh_grp->ipv4.fi, i);
5446                 err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
5447                 if (err)
5448                         goto err_nexthop4_init;
5449         }
5450         err = mlxsw_sp_nexthop_group_inc(mlxsw_sp);
5451         if (err)
5452                 goto err_group_inc;
5453         err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5454         if (err)
5455                 goto err_group_refresh;
5456
5457         return 0;
5458
5459 err_group_refresh:
5460         mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5461 err_group_inc:
5462         i = nhgi->count;
5463 err_nexthop4_init:
5464         for (i--; i >= 0; i--) {
5465                 nh = &nhgi->nexthops[i];
5466                 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
5467         }
5468         kfree(nhgi);
5469         return err;
5470 }
5471
5472 static void
5473 mlxsw_sp_nexthop4_group_info_fini(struct mlxsw_sp *mlxsw_sp,
5474                                   struct mlxsw_sp_nexthop_group *nh_grp)
5475 {
5476         struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
5477         int i;
5478
5479         mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5480         for (i = nhgi->count - 1; i >= 0; i--) {
5481                 struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
5482
5483                 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
5484         }
5485         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5486         WARN_ON_ONCE(nhgi->adj_index_valid);
5487         kfree(nhgi);
5488 }
5489
5490 static struct mlxsw_sp_nexthop_group *
5491 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
5492 {
5493         struct mlxsw_sp_nexthop_group *nh_grp;
5494         int err;
5495
5496         nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
5497         if (!nh_grp)
5498                 return ERR_PTR(-ENOMEM);
5499         INIT_LIST_HEAD(&nh_grp->vr_list);
5500         err = rhashtable_init(&nh_grp->vr_ht,
5501                               &mlxsw_sp_nexthop_group_vr_ht_params);
5502         if (err)
5503                 goto err_nexthop_group_vr_ht_init;
5504         INIT_LIST_HEAD(&nh_grp->fib_list);
5505         nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4;
5506         nh_grp->ipv4.fi = fi;
5507         fib_info_hold(fi);
5508
5509         err = mlxsw_sp_nexthop4_group_info_init(mlxsw_sp, nh_grp);
5510         if (err)
5511                 goto err_nexthop_group_info_init;
5512
5513         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5514         if (err)
5515                 goto err_nexthop_group_insert;
5516
5517         nh_grp->can_destroy = true;
5518
5519         return nh_grp;
5520
5521 err_nexthop_group_insert:
5522         mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp);
5523 err_nexthop_group_info_init:
5524         fib_info_put(fi);
5525         rhashtable_destroy(&nh_grp->vr_ht);
5526 err_nexthop_group_vr_ht_init:
5527         kfree(nh_grp);
5528         return ERR_PTR(err);
5529 }
5530
5531 static void
5532 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
5533                                 struct mlxsw_sp_nexthop_group *nh_grp)
5534 {
5535         if (!nh_grp->can_destroy)
5536                 return;
5537         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5538         mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp);
5539         fib_info_put(nh_grp->ipv4.fi);
5540         WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
5541         rhashtable_destroy(&nh_grp->vr_ht);
5542         kfree(nh_grp);
5543 }
5544
5545 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
5546                                        struct mlxsw_sp_fib_entry *fib_entry,
5547                                        struct fib_info *fi)
5548 {
5549         struct mlxsw_sp_nexthop_group *nh_grp;
5550
5551         if (fi->nh) {
5552                 nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp,
5553                                                            fi->nh->id);
5554                 if (WARN_ON_ONCE(!nh_grp))
5555                         return -EINVAL;
5556                 goto out;
5557         }
5558
5559         nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
5560         if (!nh_grp) {
5561                 nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
5562                 if (IS_ERR(nh_grp))
5563                         return PTR_ERR(nh_grp);
5564         }
5565 out:
5566         list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
5567         fib_entry->nh_group = nh_grp;
5568         return 0;
5569 }
5570
5571 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
5572                                         struct mlxsw_sp_fib_entry *fib_entry)
5573 {
5574         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5575
5576         list_del(&fib_entry->nexthop_group_node);
5577         if (!list_empty(&nh_grp->fib_list))
5578                 return;
5579
5580         if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) {
5581                 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5582                 return;
5583         }
5584
5585         mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
5586 }
5587
5588 static bool
5589 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
5590 {
5591         struct mlxsw_sp_fib4_entry *fib4_entry;
5592
5593         fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5594                                   common);
5595         return !fib4_entry->dscp;
5596 }
5597
5598 static bool
5599 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
5600 {
5601         struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
5602
5603         switch (fib_entry->fib_node->fib->proto) {
5604         case MLXSW_SP_L3_PROTO_IPV4:
5605                 if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
5606                         return false;
5607                 break;
5608         case MLXSW_SP_L3_PROTO_IPV6:
5609                 break;
5610         }
5611
5612         switch (fib_entry->type) {
5613         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
5614                 return !!nh_group->nhgi->adj_index_valid;
5615         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
5616                 return !!mlxsw_sp_nhgi_rif(nh_group->nhgi);
5617         case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
5618         case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
5619         case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
5620                 return true;
5621         default:
5622                 return false;
5623         }
5624 }
5625
5626 static struct mlxsw_sp_nexthop *
5627 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
5628                      const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
5629 {
5630         int i;
5631
5632         for (i = 0; i < nh_grp->nhgi->count; i++) {
5633                 struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
5634                 struct net_device *dev = mlxsw_sp_nexthop_dev(nh);
5635                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
5636
5637                 if (dev && dev == rt->fib6_nh->fib_nh_dev &&
5638                     ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
5639                                     &rt->fib6_nh->fib_nh_gw6))
5640                         return nh;
5641         }
5642
5643         return NULL;
5644 }
5645
5646 static void
5647 mlxsw_sp_fib4_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5648                                       struct fib_entry_notifier_info *fen_info)
5649 {
5650         u32 *p_dst = (u32 *) &fen_info->dst;
5651         struct fib_rt_info fri;
5652
5653         fri.fi = fen_info->fi;
5654         fri.tb_id = fen_info->tb_id;
5655         fri.dst = cpu_to_be32(*p_dst);
5656         fri.dst_len = fen_info->dst_len;
5657         fri.dscp = fen_info->dscp;
5658         fri.type = fen_info->type;
5659         fri.offload = false;
5660         fri.trap = false;
5661         fri.offload_failed = true;
5662         fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
5663 }
5664
5665 static void
5666 mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5667                                  struct mlxsw_sp_fib_entry *fib_entry)
5668 {
5669         u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
5670         int dst_len = fib_entry->fib_node->key.prefix_len;
5671         struct mlxsw_sp_fib4_entry *fib4_entry;
5672         struct fib_rt_info fri;
5673         bool should_offload;
5674
5675         should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
5676         fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5677                                   common);
5678         fri.fi = fib4_entry->fi;
5679         fri.tb_id = fib4_entry->tb_id;
5680         fri.dst = cpu_to_be32(*p_dst);
5681         fri.dst_len = dst_len;
5682         fri.dscp = fib4_entry->dscp;
5683         fri.type = fib4_entry->type;
5684         fri.offload = should_offload;
5685         fri.trap = !should_offload;
5686         fri.offload_failed = false;
5687         fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
5688 }
5689
5690 static void
5691 mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5692                                    struct mlxsw_sp_fib_entry *fib_entry)
5693 {
5694         u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
5695         int dst_len = fib_entry->fib_node->key.prefix_len;
5696         struct mlxsw_sp_fib4_entry *fib4_entry;
5697         struct fib_rt_info fri;
5698
5699         fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5700                                   common);
5701         fri.fi = fib4_entry->fi;
5702         fri.tb_id = fib4_entry->tb_id;
5703         fri.dst = cpu_to_be32(*p_dst);
5704         fri.dst_len = dst_len;
5705         fri.dscp = fib4_entry->dscp;
5706         fri.type = fib4_entry->type;
5707         fri.offload = false;
5708         fri.trap = false;
5709         fri.offload_failed = false;
5710         fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
5711 }
5712
5713 #if IS_ENABLED(CONFIG_IPV6)
5714 static void
5715 mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5716                                       struct fib6_info **rt_arr,
5717                                       unsigned int nrt6)
5718 {
5719         int i;
5720
5721         /* In IPv6 a multipath route is represented using multiple routes, so
5722          * we need to set the flags on all of them.
5723          */
5724         for (i = 0; i < nrt6; i++)
5725                 fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), rt_arr[i],
5726                                        false, false, true);
5727 }
5728 #else
5729 static void
5730 mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5731                                       struct fib6_info **rt_arr,
5732                                       unsigned int nrt6)
5733 {
5734 }
5735 #endif
5736
5737 #if IS_ENABLED(CONFIG_IPV6)
5738 static void
5739 mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5740                                  struct mlxsw_sp_fib_entry *fib_entry)
5741 {
5742         struct mlxsw_sp_fib6_entry *fib6_entry;
5743         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5744         bool should_offload;
5745
5746         should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
5747
5748         /* In IPv6 a multipath route is represented using multiple routes, so
5749          * we need to set the flags on all of them.
5750          */
5751         fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
5752                                   common);
5753         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
5754                 fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
5755                                        should_offload, !should_offload, false);
5756 }
5757 #else
5758 static void
5759 mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5760                                  struct mlxsw_sp_fib_entry *fib_entry)
5761 {
5762 }
5763 #endif
5764
5765 #if IS_ENABLED(CONFIG_IPV6)
5766 static void
5767 mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5768                                    struct mlxsw_sp_fib_entry *fib_entry)
5769 {
5770         struct mlxsw_sp_fib6_entry *fib6_entry;
5771         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5772
5773         fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
5774                                   common);
5775         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
5776                 fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
5777                                        false, false, false);
5778 }
5779 #else
5780 static void
5781 mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5782                                    struct mlxsw_sp_fib_entry *fib_entry)
5783 {
5784 }
5785 #endif
5786
5787 static void
5788 mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5789                                 struct mlxsw_sp_fib_entry *fib_entry)
5790 {
5791         switch (fib_entry->fib_node->fib->proto) {
5792         case MLXSW_SP_L3_PROTO_IPV4:
5793                 mlxsw_sp_fib4_entry_hw_flags_set(mlxsw_sp, fib_entry);
5794                 break;
5795         case MLXSW_SP_L3_PROTO_IPV6:
5796                 mlxsw_sp_fib6_entry_hw_flags_set(mlxsw_sp, fib_entry);
5797                 break;
5798         }
5799 }
5800
5801 static void
5802 mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5803                                   struct mlxsw_sp_fib_entry *fib_entry)
5804 {
5805         switch (fib_entry->fib_node->fib->proto) {
5806         case MLXSW_SP_L3_PROTO_IPV4:
5807                 mlxsw_sp_fib4_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5808                 break;
5809         case MLXSW_SP_L3_PROTO_IPV6:
5810                 mlxsw_sp_fib6_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5811                 break;
5812         }
5813 }
5814
5815 static void
5816 mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp,
5817                                     struct mlxsw_sp_fib_entry *fib_entry,
5818                                     enum mlxsw_reg_ralue_op op)
5819 {
5820         switch (op) {
5821         case MLXSW_REG_RALUE_OP_WRITE_WRITE:
5822                 mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry);
5823                 break;
5824         case MLXSW_REG_RALUE_OP_WRITE_DELETE:
5825                 mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5826                 break;
5827         default:
5828                 break;
5829         }
5830 }
5831
5832 static void
5833 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
5834                               const struct mlxsw_sp_fib_entry *fib_entry,
5835                               enum mlxsw_reg_ralue_op op)
5836 {
5837         struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
5838         enum mlxsw_reg_ralxx_protocol proto;
5839         u32 *p_dip;
5840
5841         proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
5842
5843         switch (fib->proto) {
5844         case MLXSW_SP_L3_PROTO_IPV4:
5845                 p_dip = (u32 *) fib_entry->fib_node->key.addr;
5846                 mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
5847                                       fib_entry->fib_node->key.prefix_len,
5848                                       *p_dip);
5849                 break;
5850         case MLXSW_SP_L3_PROTO_IPV6:
5851                 mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
5852                                       fib_entry->fib_node->key.prefix_len,
5853                                       fib_entry->fib_node->key.addr);
5854                 break;
5855         }
5856 }
5857
5858 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
5859                                         struct mlxsw_sp_fib_entry *fib_entry,
5860                                         enum mlxsw_reg_ralue_op op)
5861 {
5862         struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
5863         struct mlxsw_sp_nexthop_group_info *nhgi = nh_group->nhgi;
5864         char ralue_pl[MLXSW_REG_RALUE_LEN];
5865         enum mlxsw_reg_ralue_trap_action trap_action;
5866         u16 trap_id = 0;
5867         u32 adjacency_index = 0;
5868         u16 ecmp_size = 0;
5869
5870         /* In case the nexthop group adjacency index is valid, use it
5871          * with provided ECMP size. Otherwise, setup trap and pass
5872          * traffic to kernel.
5873          */
5874         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
5875                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5876                 adjacency_index = nhgi->adj_index;
5877                 ecmp_size = nhgi->ecmp_size;
5878         } else if (!nhgi->adj_index_valid && nhgi->count &&
5879                    mlxsw_sp_nhgi_rif(nhgi)) {
5880                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5881                 adjacency_index = mlxsw_sp->router->adj_trap_index;
5882                 ecmp_size = 1;
5883         } else {
5884                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5885                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
5886         }
5887
5888         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5889         mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
5890                                         adjacency_index, ecmp_size);
5891         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5892 }
5893
5894 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
5895                                        struct mlxsw_sp_fib_entry *fib_entry,
5896                                        enum mlxsw_reg_ralue_op op)
5897 {
5898         struct mlxsw_sp_rif *rif = mlxsw_sp_nhgi_rif(fib_entry->nh_group->nhgi);
5899         enum mlxsw_reg_ralue_trap_action trap_action;
5900         char ralue_pl[MLXSW_REG_RALUE_LEN];
5901         u16 trap_id = 0;
5902         u16 rif_index = 0;
5903
5904         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
5905                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5906                 rif_index = rif->rif_index;
5907         } else {
5908                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5909                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
5910         }
5911
5912         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5913         mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
5914                                        rif_index);
5915         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5916 }
5917
5918 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
5919                                       struct mlxsw_sp_fib_entry *fib_entry,
5920                                       enum mlxsw_reg_ralue_op op)
5921 {
5922         char ralue_pl[MLXSW_REG_RALUE_LEN];
5923
5924         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5925         mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5926         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5927 }
5928
5929 static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
5930                                            struct mlxsw_sp_fib_entry *fib_entry,
5931                                            enum mlxsw_reg_ralue_op op)
5932 {
5933         enum mlxsw_reg_ralue_trap_action trap_action;
5934         char ralue_pl[MLXSW_REG_RALUE_LEN];
5935
5936         trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR;
5937         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5938         mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0);
5939         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5940 }
5941
5942 static int
5943 mlxsw_sp_fib_entry_op_unreachable(struct mlxsw_sp *mlxsw_sp,
5944                                   struct mlxsw_sp_fib_entry *fib_entry,
5945                                   enum mlxsw_reg_ralue_op op)
5946 {
5947         enum mlxsw_reg_ralue_trap_action trap_action;
5948         char ralue_pl[MLXSW_REG_RALUE_LEN];
5949         u16 trap_id;
5950
5951         trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5952         trap_id = MLXSW_TRAP_ID_RTR_INGRESS1;
5953
5954         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5955         mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, 0);
5956         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5957 }
5958
5959 static int
5960 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
5961                                  struct mlxsw_sp_fib_entry *fib_entry,
5962                                  enum mlxsw_reg_ralue_op op)
5963 {
5964         struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
5965         const struct mlxsw_sp_ipip_ops *ipip_ops;
5966         char ralue_pl[MLXSW_REG_RALUE_LEN];
5967         int err;
5968
5969         if (WARN_ON(!ipip_entry))
5970                 return -EINVAL;
5971
5972         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
5973         err = ipip_ops->decap_config(mlxsw_sp, ipip_entry,
5974                                      fib_entry->decap.tunnel_index);
5975         if (err)
5976                 return err;
5977
5978         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5979         mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
5980                                            fib_entry->decap.tunnel_index);
5981         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5982 }
5983
5984 static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
5985                                            struct mlxsw_sp_fib_entry *fib_entry,
5986                                            enum mlxsw_reg_ralue_op op)
5987 {
5988         char ralue_pl[MLXSW_REG_RALUE_LEN];
5989
5990         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5991         mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
5992                                            fib_entry->decap.tunnel_index);
5993         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5994 }
5995
5996 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
5997                                    struct mlxsw_sp_fib_entry *fib_entry,
5998                                    enum mlxsw_reg_ralue_op op)
5999 {
6000         switch (fib_entry->type) {
6001         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
6002                 return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
6003         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
6004                 return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
6005         case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
6006                 return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
6007         case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
6008                 return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op);
6009         case MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE:
6010                 return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, fib_entry,
6011                                                          op);
6012         case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
6013                 return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
6014                                                         fib_entry, op);
6015         case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
6016                 return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
6017         }
6018         return -EINVAL;
6019 }
6020
6021 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
6022                                  struct mlxsw_sp_fib_entry *fib_entry,
6023                                  enum mlxsw_reg_ralue_op op)
6024 {
6025         int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
6026
6027         if (err)
6028                 return err;
6029
6030         mlxsw_sp_fib_entry_hw_flags_refresh(mlxsw_sp, fib_entry, op);
6031
6032         return err;
6033 }
6034
6035 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
6036                                      struct mlxsw_sp_fib_entry *fib_entry)
6037 {
6038         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
6039                                      MLXSW_REG_RALUE_OP_WRITE_WRITE);
6040 }
6041
6042 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
6043                                   struct mlxsw_sp_fib_entry *fib_entry)
6044 {
6045         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
6046                                      MLXSW_REG_RALUE_OP_WRITE_DELETE);
6047 }
6048
6049 static int
6050 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
6051                              const struct fib_entry_notifier_info *fen_info,
6052                              struct mlxsw_sp_fib_entry *fib_entry)
6053 {
6054         struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi;
6055         union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
6056         struct mlxsw_sp_router *router = mlxsw_sp->router;
6057         u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
6058         int ifindex = nhgi->nexthops[0].ifindex;
6059         struct mlxsw_sp_ipip_entry *ipip_entry;
6060
6061         switch (fen_info->type) {
6062         case RTN_LOCAL:
6063                 ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex,
6064                                                                MLXSW_SP_L3_PROTO_IPV4, dip);
6065                 if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
6066                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
6067                         return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
6068                                                              fib_entry,
6069                                                              ipip_entry);
6070                 }
6071                 if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id,
6072                                                  MLXSW_SP_L3_PROTO_IPV4,
6073                                                  &dip)) {
6074                         u32 tunnel_index;
6075
6076                         tunnel_index = router->nve_decap_config.tunnel_index;
6077                         fib_entry->decap.tunnel_index = tunnel_index;
6078                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
6079                         return 0;
6080                 }
6081                 fallthrough;
6082         case RTN_BROADCAST:
6083                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
6084                 return 0;
6085         case RTN_BLACKHOLE:
6086                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
6087                 return 0;
6088         case RTN_UNREACHABLE:
6089         case RTN_PROHIBIT:
6090                 /* Packets hitting these routes need to be trapped, but
6091                  * can do so with a lower priority than packets directed
6092                  * at the host, so use action type local instead of trap.
6093                  */
6094                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
6095                 return 0;
6096         case RTN_UNICAST:
6097                 if (nhgi->gateway)
6098                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
6099                 else
6100                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
6101                 return 0;
6102         default:
6103                 return -EINVAL;
6104         }
6105 }
6106
6107 static void
6108 mlxsw_sp_fib_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
6109                               struct mlxsw_sp_fib_entry *fib_entry)
6110 {
6111         switch (fib_entry->type) {
6112         case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
6113                 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
6114                 break;
6115         default:
6116                 break;
6117         }
6118 }
6119
6120 static void
6121 mlxsw_sp_fib4_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
6122                                struct mlxsw_sp_fib4_entry *fib4_entry)
6123 {
6124         mlxsw_sp_fib_entry_type_unset(mlxsw_sp, &fib4_entry->common);
6125 }
6126
6127 static struct mlxsw_sp_fib4_entry *
6128 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
6129                            struct mlxsw_sp_fib_node *fib_node,
6130                            const struct fib_entry_notifier_info *fen_info)
6131 {
6132         struct mlxsw_sp_fib4_entry *fib4_entry;
6133         struct mlxsw_sp_fib_entry *fib_entry;
6134         int err;
6135
6136         fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
6137         if (!fib4_entry)
6138                 return ERR_PTR(-ENOMEM);
6139         fib_entry = &fib4_entry->common;
6140
6141         err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
6142         if (err)
6143                 goto err_nexthop4_group_get;
6144
6145         err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
6146                                              fib_node->fib);
6147         if (err)
6148                 goto err_nexthop_group_vr_link;
6149
6150         err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
6151         if (err)
6152                 goto err_fib4_entry_type_set;
6153
6154         fib4_entry->fi = fen_info->fi;
6155         fib_info_hold(fib4_entry->fi);
6156         fib4_entry->tb_id = fen_info->tb_id;
6157         fib4_entry->type = fen_info->type;
6158         fib4_entry->dscp = fen_info->dscp;
6159
6160         fib_entry->fib_node = fib_node;
6161
6162         return fib4_entry;
6163
6164 err_fib4_entry_type_set:
6165         mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib);
6166 err_nexthop_group_vr_link:
6167         mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
6168 err_nexthop4_group_get:
6169         kfree(fib4_entry);
6170         return ERR_PTR(err);
6171 }
6172
6173 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
6174                                         struct mlxsw_sp_fib4_entry *fib4_entry)
6175 {
6176         struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
6177
6178         fib_info_put(fib4_entry->fi);
6179         mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, fib4_entry);
6180         mlxsw_sp_nexthop_group_vr_unlink(fib4_entry->common.nh_group,
6181                                          fib_node->fib);
6182         mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
6183         kfree(fib4_entry);
6184 }
6185
6186 static struct mlxsw_sp_fib4_entry *
6187 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
6188                            const struct fib_entry_notifier_info *fen_info)
6189 {
6190         struct mlxsw_sp_fib4_entry *fib4_entry;
6191         struct mlxsw_sp_fib_node *fib_node;
6192         struct mlxsw_sp_fib *fib;
6193         struct mlxsw_sp_vr *vr;
6194
6195         vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
6196         if (!vr)
6197                 return NULL;
6198         fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
6199
6200         fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
6201                                             sizeof(fen_info->dst),
6202                                             fen_info->dst_len);
6203         if (!fib_node)
6204                 return NULL;
6205
6206         fib4_entry = container_of(fib_node->fib_entry,
6207                                   struct mlxsw_sp_fib4_entry, common);
6208         if (fib4_entry->tb_id == fen_info->tb_id &&
6209             fib4_entry->dscp == fen_info->dscp &&
6210             fib4_entry->type == fen_info->type &&
6211             fib4_entry->fi == fen_info->fi)
6212                 return fib4_entry;
6213
6214         return NULL;
6215 }
6216
6217 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
6218         .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
6219         .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
6220         .key_len = sizeof(struct mlxsw_sp_fib_key),
6221         .automatic_shrinking = true,
6222 };
6223
6224 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
6225                                     struct mlxsw_sp_fib_node *fib_node)
6226 {
6227         return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
6228                                       mlxsw_sp_fib_ht_params);
6229 }
6230
6231 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
6232                                      struct mlxsw_sp_fib_node *fib_node)
6233 {
6234         rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
6235                                mlxsw_sp_fib_ht_params);
6236 }
6237
6238 static struct mlxsw_sp_fib_node *
6239 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
6240                          size_t addr_len, unsigned char prefix_len)
6241 {
6242         struct mlxsw_sp_fib_key key;
6243
6244         memset(&key, 0, sizeof(key));
6245         memcpy(key.addr, addr, addr_len);
6246         key.prefix_len = prefix_len;
6247         return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
6248 }
6249
6250 static struct mlxsw_sp_fib_node *
6251 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
6252                          size_t addr_len, unsigned char prefix_len)
6253 {
6254         struct mlxsw_sp_fib_node *fib_node;
6255
6256         fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
6257         if (!fib_node)
6258                 return NULL;
6259
6260         list_add(&fib_node->list, &fib->node_list);
6261         memcpy(fib_node->key.addr, addr, addr_len);
6262         fib_node->key.prefix_len = prefix_len;
6263
6264         return fib_node;
6265 }
6266
6267 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
6268 {
6269         list_del(&fib_node->list);
6270         kfree(fib_node);
6271 }
6272
6273 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
6274                                       struct mlxsw_sp_fib_node *fib_node)
6275 {
6276         struct mlxsw_sp_prefix_usage req_prefix_usage;
6277         struct mlxsw_sp_fib *fib = fib_node->fib;
6278         struct mlxsw_sp_lpm_tree *lpm_tree;
6279         int err;
6280
6281         lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
6282         if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
6283                 goto out;
6284
6285         mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
6286         mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
6287         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
6288                                          fib->proto);
6289         if (IS_ERR(lpm_tree))
6290                 return PTR_ERR(lpm_tree);
6291
6292         err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
6293         if (err)
6294                 goto err_lpm_tree_replace;
6295
6296 out:
6297         lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
6298         return 0;
6299
6300 err_lpm_tree_replace:
6301         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
6302         return err;
6303 }
6304
6305 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
6306                                          struct mlxsw_sp_fib_node *fib_node)
6307 {
6308         struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
6309         struct mlxsw_sp_prefix_usage req_prefix_usage;
6310         struct mlxsw_sp_fib *fib = fib_node->fib;
6311         int err;
6312
6313         if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
6314                 return;
6315         /* Try to construct a new LPM tree from the current prefix usage
6316          * minus the unused one. If we fail, continue using the old one.
6317          */
6318         mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
6319         mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
6320                                     fib_node->key.prefix_len);
6321         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
6322                                          fib->proto);
6323         if (IS_ERR(lpm_tree))
6324                 return;
6325
6326         err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
6327         if (err)
6328                 goto err_lpm_tree_replace;
6329
6330         return;
6331
6332 err_lpm_tree_replace:
6333         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
6334 }
6335
6336 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
6337                                   struct mlxsw_sp_fib_node *fib_node,
6338                                   struct mlxsw_sp_fib *fib)
6339 {
6340         int err;
6341
6342         err = mlxsw_sp_fib_node_insert(fib, fib_node);
6343         if (err)
6344                 return err;
6345         fib_node->fib = fib;
6346
6347         err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
6348         if (err)
6349                 goto err_fib_lpm_tree_link;
6350
6351         return 0;
6352
6353 err_fib_lpm_tree_link:
6354         fib_node->fib = NULL;
6355         mlxsw_sp_fib_node_remove(fib, fib_node);
6356         return err;
6357 }
6358
6359 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
6360                                    struct mlxsw_sp_fib_node *fib_node)
6361 {
6362         struct mlxsw_sp_fib *fib = fib_node->fib;
6363
6364         mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
6365         fib_node->fib = NULL;
6366         mlxsw_sp_fib_node_remove(fib, fib_node);
6367 }
6368
6369 static struct mlxsw_sp_fib_node *
6370 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
6371                       size_t addr_len, unsigned char prefix_len,
6372                       enum mlxsw_sp_l3proto proto)
6373 {
6374         struct mlxsw_sp_fib_node *fib_node;
6375         struct mlxsw_sp_fib *fib;
6376         struct mlxsw_sp_vr *vr;
6377         int err;
6378
6379         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
6380         if (IS_ERR(vr))
6381                 return ERR_CAST(vr);
6382         fib = mlxsw_sp_vr_fib(vr, proto);
6383
6384         fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
6385         if (fib_node)
6386                 return fib_node;
6387
6388         fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
6389         if (!fib_node) {
6390                 err = -ENOMEM;
6391                 goto err_fib_node_create;
6392         }
6393
6394         err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
6395         if (err)
6396                 goto err_fib_node_init;
6397
6398         return fib_node;
6399
6400 err_fib_node_init:
6401         mlxsw_sp_fib_node_destroy(fib_node);
6402 err_fib_node_create:
6403         mlxsw_sp_vr_put(mlxsw_sp, vr);
6404         return ERR_PTR(err);
6405 }
6406
6407 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
6408                                   struct mlxsw_sp_fib_node *fib_node)
6409 {
6410         struct mlxsw_sp_vr *vr = fib_node->fib->vr;
6411
6412         if (fib_node->fib_entry)
6413                 return;
6414         mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
6415         mlxsw_sp_fib_node_destroy(fib_node);
6416         mlxsw_sp_vr_put(mlxsw_sp, vr);
6417 }
6418
6419 static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp,
6420                                         struct mlxsw_sp_fib_entry *fib_entry)
6421 {
6422         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
6423         int err;
6424
6425         fib_node->fib_entry = fib_entry;
6426
6427         err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
6428         if (err)
6429                 goto err_fib_entry_update;
6430
6431         return 0;
6432
6433 err_fib_entry_update:
6434         fib_node->fib_entry = NULL;
6435         return err;
6436 }
6437
6438 static void
6439 mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
6440                                struct mlxsw_sp_fib_entry *fib_entry)
6441 {
6442         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
6443
6444         mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
6445         fib_node->fib_entry = NULL;
6446 }
6447
6448 static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry)
6449 {
6450         struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
6451         struct mlxsw_sp_fib4_entry *fib4_replaced;
6452
6453         if (!fib_node->fib_entry)
6454                 return true;
6455
6456         fib4_replaced = container_of(fib_node->fib_entry,
6457                                      struct mlxsw_sp_fib4_entry, common);
6458         if (fib4_entry->tb_id == RT_TABLE_MAIN &&
6459             fib4_replaced->tb_id == RT_TABLE_LOCAL)
6460                 return false;
6461
6462         return true;
6463 }
6464
6465 static int
6466 mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
6467                              const struct fib_entry_notifier_info *fen_info)
6468 {
6469         struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced;
6470         struct mlxsw_sp_fib_entry *replaced;
6471         struct mlxsw_sp_fib_node *fib_node;
6472         int err;
6473
6474         if (fen_info->fi->nh &&
6475             !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, fen_info->fi->nh->id))
6476                 return 0;
6477
6478         fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
6479                                          &fen_info->dst, sizeof(fen_info->dst),
6480                                          fen_info->dst_len,
6481                                          MLXSW_SP_L3_PROTO_IPV4);
6482         if (IS_ERR(fib_node)) {
6483                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
6484                 return PTR_ERR(fib_node);
6485         }
6486
6487         fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
6488         if (IS_ERR(fib4_entry)) {
6489                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
6490                 err = PTR_ERR(fib4_entry);
6491                 goto err_fib4_entry_create;
6492         }
6493
6494         if (!mlxsw_sp_fib4_allow_replace(fib4_entry)) {
6495                 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6496                 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6497                 return 0;
6498         }
6499
6500         replaced = fib_node->fib_entry;
6501         err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib4_entry->common);
6502         if (err) {
6503                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
6504                 goto err_fib_node_entry_link;
6505         }
6506
6507         /* Nothing to replace */
6508         if (!replaced)
6509                 return 0;
6510
6511         mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
6512         fib4_replaced = container_of(replaced, struct mlxsw_sp_fib4_entry,
6513                                      common);
6514         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_replaced);
6515
6516         return 0;
6517
6518 err_fib_node_entry_link:
6519         fib_node->fib_entry = replaced;
6520         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6521 err_fib4_entry_create:
6522         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6523         return err;
6524 }
6525
6526 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
6527                                      struct fib_entry_notifier_info *fen_info)
6528 {
6529         struct mlxsw_sp_fib4_entry *fib4_entry;
6530         struct mlxsw_sp_fib_node *fib_node;
6531
6532         fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
6533         if (!fib4_entry)
6534                 return;
6535         fib_node = fib4_entry->common.fib_node;
6536
6537         mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib4_entry->common);
6538         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6539         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6540 }
6541
6542 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
6543 {
6544         /* Multicast routes aren't supported, so ignore them. Neighbour
6545          * Discovery packets are specifically trapped.
6546          */
6547         if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
6548                 return true;
6549
6550         /* Cloned routes are irrelevant in the forwarding path. */
6551         if (rt->fib6_flags & RTF_CACHE)
6552                 return true;
6553
6554         return false;
6555 }
6556
6557 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
6558 {
6559         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6560
6561         mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
6562         if (!mlxsw_sp_rt6)
6563                 return ERR_PTR(-ENOMEM);
6564
6565         /* In case of route replace, replaced route is deleted with
6566          * no notification. Take reference to prevent accessing freed
6567          * memory.
6568          */
6569         mlxsw_sp_rt6->rt = rt;
6570         fib6_info_hold(rt);
6571
6572         return mlxsw_sp_rt6;
6573 }
6574
6575 #if IS_ENABLED(CONFIG_IPV6)
6576 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
6577 {
6578         fib6_info_release(rt);
6579 }
6580 #else
6581 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
6582 {
6583 }
6584 #endif
6585
6586 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
6587 {
6588         struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
6589
6590         if (!mlxsw_sp_rt6->rt->nh)
6591                 fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
6592         mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
6593         kfree(mlxsw_sp_rt6);
6594 }
6595
6596 static struct fib6_info *
6597 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
6598 {
6599         return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
6600                                 list)->rt;
6601 }
6602
6603 static struct mlxsw_sp_rt6 *
6604 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
6605                             const struct fib6_info *rt)
6606 {
6607         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6608
6609         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
6610                 if (mlxsw_sp_rt6->rt == rt)
6611                         return mlxsw_sp_rt6;
6612         }
6613
6614         return NULL;
6615 }
6616
6617 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
6618                                         const struct fib6_info *rt,
6619                                         enum mlxsw_sp_ipip_type *ret)
6620 {
6621         return rt->fib6_nh->fib_nh_dev &&
6622                mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret);
6623 }
6624
6625 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
6626                                   struct mlxsw_sp_nexthop_group *nh_grp,
6627                                   struct mlxsw_sp_nexthop *nh,
6628                                   const struct fib6_info *rt)
6629 {
6630         struct net_device *dev = rt->fib6_nh->fib_nh_dev;
6631         int err;
6632
6633         nh->nhgi = nh_grp->nhgi;
6634         nh->nh_weight = rt->fib6_nh->fib_nh_weight;
6635         memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr));
6636 #if IS_ENABLED(CONFIG_IPV6)
6637         nh->neigh_tbl = &nd_tbl;
6638 #endif
6639         mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
6640
6641         list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
6642
6643         if (!dev)
6644                 return 0;
6645         nh->ifindex = dev->ifindex;
6646
6647         err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
6648         if (err)
6649                 goto err_nexthop_type_init;
6650
6651         return 0;
6652
6653 err_nexthop_type_init:
6654         list_del(&nh->router_list_node);
6655         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
6656         return err;
6657 }
6658
6659 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
6660                                    struct mlxsw_sp_nexthop *nh)
6661 {
6662         mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
6663         list_del(&nh->router_list_node);
6664         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
6665 }
6666
6667 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
6668                                     const struct fib6_info *rt)
6669 {
6670         return rt->fib6_nh->fib_nh_gw_family ||
6671                mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
6672 }
6673
6674 static int
6675 mlxsw_sp_nexthop6_group_info_init(struct mlxsw_sp *mlxsw_sp,
6676                                   struct mlxsw_sp_nexthop_group *nh_grp,
6677                                   struct mlxsw_sp_fib6_entry *fib6_entry)
6678 {
6679         struct mlxsw_sp_nexthop_group_info *nhgi;
6680         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6681         struct mlxsw_sp_nexthop *nh;
6682         int err, i;
6683
6684         nhgi = kzalloc(struct_size(nhgi, nexthops, fib6_entry->nrt6),
6685                        GFP_KERNEL);
6686         if (!nhgi)
6687                 return -ENOMEM;
6688         nh_grp->nhgi = nhgi;
6689         nhgi->nh_grp = nh_grp;
6690         mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
6691                                         struct mlxsw_sp_rt6, list);
6692         nhgi->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
6693         nhgi->count = fib6_entry->nrt6;
6694         for (i = 0; i < nhgi->count; i++) {
6695                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
6696
6697                 nh = &nhgi->nexthops[i];
6698                 err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
6699                 if (err)
6700                         goto err_nexthop6_init;
6701                 mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
6702         }
6703         nh_grp->nhgi = nhgi;
6704         err = mlxsw_sp_nexthop_group_inc(mlxsw_sp);
6705         if (err)
6706                 goto err_group_inc;
6707         err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
6708         if (err)
6709                 goto err_group_refresh;
6710
6711         return 0;
6712
6713 err_group_refresh:
6714         mlxsw_sp_nexthop_group_dec(mlxsw_sp);
6715 err_group_inc:
6716         i = nhgi->count;
6717 err_nexthop6_init:
6718         for (i--; i >= 0; i--) {
6719                 nh = &nhgi->nexthops[i];
6720                 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
6721         }
6722         kfree(nhgi);
6723         return err;
6724 }
6725
6726 static void
6727 mlxsw_sp_nexthop6_group_info_fini(struct mlxsw_sp *mlxsw_sp,
6728                                   struct mlxsw_sp_nexthop_group *nh_grp)
6729 {
6730         struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
6731         int i;
6732
6733         mlxsw_sp_nexthop_group_dec(mlxsw_sp);
6734         for (i = nhgi->count - 1; i >= 0; i--) {
6735                 struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
6736
6737                 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
6738         }
6739         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
6740         WARN_ON_ONCE(nhgi->adj_index_valid);
6741         kfree(nhgi);
6742 }
6743
6744 static struct mlxsw_sp_nexthop_group *
6745 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
6746                                struct mlxsw_sp_fib6_entry *fib6_entry)
6747 {
6748         struct mlxsw_sp_nexthop_group *nh_grp;
6749         int err;
6750
6751         nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
6752         if (!nh_grp)
6753                 return ERR_PTR(-ENOMEM);
6754         INIT_LIST_HEAD(&nh_grp->vr_list);
6755         err = rhashtable_init(&nh_grp->vr_ht,
6756                               &mlxsw_sp_nexthop_group_vr_ht_params);
6757         if (err)
6758                 goto err_nexthop_group_vr_ht_init;
6759         INIT_LIST_HEAD(&nh_grp->fib_list);
6760         nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6;
6761
6762         err = mlxsw_sp_nexthop6_group_info_init(mlxsw_sp, nh_grp, fib6_entry);
6763         if (err)
6764                 goto err_nexthop_group_info_init;
6765
6766         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
6767         if (err)
6768                 goto err_nexthop_group_insert;
6769
6770         nh_grp->can_destroy = true;
6771
6772         return nh_grp;
6773
6774 err_nexthop_group_insert:
6775         mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp);
6776 err_nexthop_group_info_init:
6777         rhashtable_destroy(&nh_grp->vr_ht);
6778 err_nexthop_group_vr_ht_init:
6779         kfree(nh_grp);
6780         return ERR_PTR(err);
6781 }
6782
6783 static void
6784 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
6785                                 struct mlxsw_sp_nexthop_group *nh_grp)
6786 {
6787         if (!nh_grp->can_destroy)
6788                 return;
6789         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
6790         mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp);
6791         WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
6792         rhashtable_destroy(&nh_grp->vr_ht);
6793         kfree(nh_grp);
6794 }
6795
6796 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
6797                                        struct mlxsw_sp_fib6_entry *fib6_entry)
6798 {
6799         struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
6800         struct mlxsw_sp_nexthop_group *nh_grp;
6801
6802         if (rt->nh) {
6803                 nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp,
6804                                                            rt->nh->id);
6805                 if (WARN_ON_ONCE(!nh_grp))
6806                         return -EINVAL;
6807                 goto out;
6808         }
6809
6810         nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
6811         if (!nh_grp) {
6812                 nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
6813                 if (IS_ERR(nh_grp))
6814                         return PTR_ERR(nh_grp);
6815         }
6816
6817         /* The route and the nexthop are described by the same struct, so we
6818          * need to the update the nexthop offload indication for the new route.
6819          */
6820         __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
6821
6822 out:
6823         list_add_tail(&fib6_entry->common.nexthop_group_node,
6824                       &nh_grp->fib_list);
6825         fib6_entry->common.nh_group = nh_grp;
6826
6827         return 0;
6828 }
6829
6830 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
6831                                         struct mlxsw_sp_fib_entry *fib_entry)
6832 {
6833         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
6834
6835         list_del(&fib_entry->nexthop_group_node);
6836         if (!list_empty(&nh_grp->fib_list))
6837                 return;
6838
6839         if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) {
6840                 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
6841                 return;
6842         }
6843
6844         mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
6845 }
6846
6847 static int
6848 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
6849                                struct mlxsw_sp_fib6_entry *fib6_entry)
6850 {
6851         struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
6852         struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
6853         int err;
6854
6855         mlxsw_sp_nexthop_group_vr_unlink(old_nh_grp, fib_node->fib);
6856         fib6_entry->common.nh_group = NULL;
6857         list_del(&fib6_entry->common.nexthop_group_node);
6858
6859         err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
6860         if (err)
6861                 goto err_nexthop6_group_get;
6862
6863         err = mlxsw_sp_nexthop_group_vr_link(fib6_entry->common.nh_group,
6864                                              fib_node->fib);
6865         if (err)
6866                 goto err_nexthop_group_vr_link;
6867
6868         /* In case this entry is offloaded, then the adjacency index
6869          * currently associated with it in the device's table is that
6870          * of the old group. Start using the new one instead.
6871          */
6872         err = mlxsw_sp_fib_entry_update(mlxsw_sp, &fib6_entry->common);
6873         if (err)
6874                 goto err_fib_entry_update;
6875
6876         if (list_empty(&old_nh_grp->fib_list))
6877                 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
6878
6879         return 0;
6880
6881 err_fib_entry_update:
6882         mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group,
6883                                          fib_node->fib);
6884 err_nexthop_group_vr_link:
6885         mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
6886 err_nexthop6_group_get:
6887         list_add_tail(&fib6_entry->common.nexthop_group_node,
6888                       &old_nh_grp->fib_list);
6889         fib6_entry->common.nh_group = old_nh_grp;
6890         mlxsw_sp_nexthop_group_vr_link(old_nh_grp, fib_node->fib);
6891         return err;
6892 }
6893
6894 static int
6895 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
6896                                 struct mlxsw_sp_fib6_entry *fib6_entry,
6897                                 struct fib6_info **rt_arr, unsigned int nrt6)
6898 {
6899         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6900         int err, i;
6901
6902         for (i = 0; i < nrt6; i++) {
6903                 mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
6904                 if (IS_ERR(mlxsw_sp_rt6)) {
6905                         err = PTR_ERR(mlxsw_sp_rt6);
6906                         goto err_rt6_unwind;
6907                 }
6908
6909                 list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
6910                 fib6_entry->nrt6++;
6911         }
6912
6913         err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
6914         if (err)
6915                 goto err_rt6_unwind;
6916
6917         return 0;
6918
6919 err_rt6_unwind:
6920         for (; i > 0; i--) {
6921                 fib6_entry->nrt6--;
6922                 mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
6923                                                struct mlxsw_sp_rt6, list);
6924                 list_del(&mlxsw_sp_rt6->list);
6925                 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6926         }
6927         return err;
6928 }
6929
6930 static void
6931 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
6932                                 struct mlxsw_sp_fib6_entry *fib6_entry,
6933                                 struct fib6_info **rt_arr, unsigned int nrt6)
6934 {
6935         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6936         int i;
6937
6938         for (i = 0; i < nrt6; i++) {
6939                 mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry,
6940                                                            rt_arr[i]);
6941                 if (WARN_ON_ONCE(!mlxsw_sp_rt6))
6942                         continue;
6943
6944                 fib6_entry->nrt6--;
6945                 list_del(&mlxsw_sp_rt6->list);
6946                 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6947         }
6948
6949         mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
6950 }
6951
6952 static int
6953 mlxsw_sp_fib6_entry_type_set_local(struct mlxsw_sp *mlxsw_sp,
6954                                    struct mlxsw_sp_fib_entry *fib_entry,
6955                                    const struct fib6_info *rt)
6956 {
6957         struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi;
6958         union mlxsw_sp_l3addr dip = { .addr6 = rt->fib6_dst.addr };
6959         u32 tb_id = mlxsw_sp_fix_tb_id(rt->fib6_table->tb6_id);
6960         struct mlxsw_sp_router *router = mlxsw_sp->router;
6961         int ifindex = nhgi->nexthops[0].ifindex;
6962         struct mlxsw_sp_ipip_entry *ipip_entry;
6963
6964         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
6965         ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex,
6966                                                        MLXSW_SP_L3_PROTO_IPV6,
6967                                                        dip);
6968
6969         if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
6970                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
6971                 return mlxsw_sp_fib_entry_decap_init(mlxsw_sp, fib_entry,
6972                                                      ipip_entry);
6973         }
6974         if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id,
6975                                          MLXSW_SP_L3_PROTO_IPV6, &dip)) {
6976                 u32 tunnel_index;
6977
6978                 tunnel_index = router->nve_decap_config.tunnel_index;
6979                 fib_entry->decap.tunnel_index = tunnel_index;
6980                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
6981         }
6982
6983         return 0;
6984 }
6985
6986 static int mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
6987                                         struct mlxsw_sp_fib_entry *fib_entry,
6988                                         const struct fib6_info *rt)
6989 {
6990         if (rt->fib6_flags & RTF_LOCAL)
6991                 return mlxsw_sp_fib6_entry_type_set_local(mlxsw_sp, fib_entry,
6992                                                           rt);
6993         if (rt->fib6_flags & RTF_ANYCAST)
6994                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
6995         else if (rt->fib6_type == RTN_BLACKHOLE)
6996                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
6997         else if (rt->fib6_flags & RTF_REJECT)
6998                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
6999         else if (fib_entry->nh_group->nhgi->gateway)
7000                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
7001         else
7002                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
7003
7004         return 0;
7005 }
7006
7007 static void
7008 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
7009 {
7010         struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
7011
7012         list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
7013                                  list) {
7014                 fib6_entry->nrt6--;
7015                 list_del(&mlxsw_sp_rt6->list);
7016                 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
7017         }
7018 }
7019
7020 static struct mlxsw_sp_fib6_entry *
7021 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
7022                            struct mlxsw_sp_fib_node *fib_node,
7023                            struct fib6_info **rt_arr, unsigned int nrt6)
7024 {
7025         struct mlxsw_sp_fib6_entry *fib6_entry;
7026         struct mlxsw_sp_fib_entry *fib_entry;
7027         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
7028         int err, i;
7029
7030         fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
7031         if (!fib6_entry)
7032                 return ERR_PTR(-ENOMEM);
7033         fib_entry = &fib6_entry->common;
7034
7035         INIT_LIST_HEAD(&fib6_entry->rt6_list);
7036
7037         for (i = 0; i < nrt6; i++) {
7038                 mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
7039                 if (IS_ERR(mlxsw_sp_rt6)) {
7040                         err = PTR_ERR(mlxsw_sp_rt6);
7041                         goto err_rt6_unwind;
7042                 }
7043                 list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
7044                 fib6_entry->nrt6++;
7045         }
7046
7047         err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
7048         if (err)
7049                 goto err_rt6_unwind;
7050
7051         err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
7052                                              fib_node->fib);
7053         if (err)
7054                 goto err_nexthop_group_vr_link;
7055
7056         err = mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]);
7057         if (err)
7058                 goto err_fib6_entry_type_set;
7059
7060         fib_entry->fib_node = fib_node;
7061
7062         return fib6_entry;
7063
7064 err_fib6_entry_type_set:
7065         mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib);
7066 err_nexthop_group_vr_link:
7067         mlxsw_sp_nexthop6_group_put(mlxsw_sp, fib_entry);
7068 err_rt6_unwind:
7069         for (; i > 0; i--) {
7070                 fib6_entry->nrt6--;
7071                 mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
7072                                                struct mlxsw_sp_rt6, list);
7073                 list_del(&mlxsw_sp_rt6->list);
7074                 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
7075         }
7076         kfree(fib6_entry);
7077         return ERR_PTR(err);
7078 }
7079
7080 static void
7081 mlxsw_sp_fib6_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
7082                                struct mlxsw_sp_fib6_entry *fib6_entry)
7083 {
7084         mlxsw_sp_fib_entry_type_unset(mlxsw_sp, &fib6_entry->common);
7085 }
7086
7087 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
7088                                         struct mlxsw_sp_fib6_entry *fib6_entry)
7089 {
7090         struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
7091
7092         mlxsw_sp_fib6_entry_type_unset(mlxsw_sp, fib6_entry);
7093         mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group,
7094                                          fib_node->fib);
7095         mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
7096         mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
7097         WARN_ON(fib6_entry->nrt6);
7098         kfree(fib6_entry);
7099 }
7100
7101 static struct mlxsw_sp_fib6_entry *
7102 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
7103                            const struct fib6_info *rt)
7104 {
7105         struct mlxsw_sp_fib6_entry *fib6_entry;
7106         struct mlxsw_sp_fib_node *fib_node;
7107         struct mlxsw_sp_fib *fib;
7108         struct fib6_info *cmp_rt;
7109         struct mlxsw_sp_vr *vr;
7110
7111         vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
7112         if (!vr)
7113                 return NULL;
7114         fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
7115
7116         fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
7117                                             sizeof(rt->fib6_dst.addr),
7118                                             rt->fib6_dst.plen);
7119         if (!fib_node)
7120                 return NULL;
7121
7122         fib6_entry = container_of(fib_node->fib_entry,
7123                                   struct mlxsw_sp_fib6_entry, common);
7124         cmp_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
7125         if (rt->fib6_table->tb6_id == cmp_rt->fib6_table->tb6_id &&
7126             rt->fib6_metric == cmp_rt->fib6_metric &&
7127             mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
7128                 return fib6_entry;
7129
7130         return NULL;
7131 }
7132
7133 static bool mlxsw_sp_fib6_allow_replace(struct mlxsw_sp_fib6_entry *fib6_entry)
7134 {
7135         struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
7136         struct mlxsw_sp_fib6_entry *fib6_replaced;
7137         struct fib6_info *rt, *rt_replaced;
7138
7139         if (!fib_node->fib_entry)
7140                 return true;
7141
7142         fib6_replaced = container_of(fib_node->fib_entry,
7143                                      struct mlxsw_sp_fib6_entry,
7144                                      common);
7145         rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
7146         rt_replaced = mlxsw_sp_fib6_entry_rt(fib6_replaced);
7147         if (rt->fib6_table->tb6_id == RT_TABLE_MAIN &&
7148             rt_replaced->fib6_table->tb6_id == RT_TABLE_LOCAL)
7149                 return false;
7150
7151         return true;
7152 }
7153
7154 static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
7155                                         struct fib6_info **rt_arr,
7156                                         unsigned int nrt6)
7157 {
7158         struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced;
7159         struct mlxsw_sp_fib_entry *replaced;
7160         struct mlxsw_sp_fib_node *fib_node;
7161         struct fib6_info *rt = rt_arr[0];
7162         int err;
7163
7164         if (rt->fib6_src.plen)
7165                 return -EINVAL;
7166
7167         if (mlxsw_sp_fib6_rt_should_ignore(rt))
7168                 return 0;
7169
7170         if (rt->nh && !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, rt->nh->id))
7171                 return 0;
7172
7173         fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
7174                                          &rt->fib6_dst.addr,
7175                                          sizeof(rt->fib6_dst.addr),
7176                                          rt->fib6_dst.plen,
7177                                          MLXSW_SP_L3_PROTO_IPV6);
7178         if (IS_ERR(fib_node))
7179                 return PTR_ERR(fib_node);
7180
7181         fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
7182                                                 nrt6);
7183         if (IS_ERR(fib6_entry)) {
7184                 err = PTR_ERR(fib6_entry);
7185                 goto err_fib6_entry_create;
7186         }
7187
7188         if (!mlxsw_sp_fib6_allow_replace(fib6_entry)) {
7189                 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7190                 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7191                 return 0;
7192         }
7193
7194         replaced = fib_node->fib_entry;
7195         err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib6_entry->common);
7196         if (err)
7197                 goto err_fib_node_entry_link;
7198
7199         /* Nothing to replace */
7200         if (!replaced)
7201                 return 0;
7202
7203         mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
7204         fib6_replaced = container_of(replaced, struct mlxsw_sp_fib6_entry,
7205                                      common);
7206         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_replaced);
7207
7208         return 0;
7209
7210 err_fib_node_entry_link:
7211         fib_node->fib_entry = replaced;
7212         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7213 err_fib6_entry_create:
7214         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7215         return err;
7216 }
7217
7218 static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp,
7219                                        struct fib6_info **rt_arr,
7220                                        unsigned int nrt6)
7221 {
7222         struct mlxsw_sp_fib6_entry *fib6_entry;
7223         struct mlxsw_sp_fib_node *fib_node;
7224         struct fib6_info *rt = rt_arr[0];
7225         int err;
7226
7227         if (rt->fib6_src.plen)
7228                 return -EINVAL;
7229
7230         if (mlxsw_sp_fib6_rt_should_ignore(rt))
7231                 return 0;
7232
7233         fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
7234                                          &rt->fib6_dst.addr,
7235                                          sizeof(rt->fib6_dst.addr),
7236                                          rt->fib6_dst.plen,
7237                                          MLXSW_SP_L3_PROTO_IPV6);
7238         if (IS_ERR(fib_node))
7239                 return PTR_ERR(fib_node);
7240
7241         if (WARN_ON_ONCE(!fib_node->fib_entry)) {
7242                 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7243                 return -EINVAL;
7244         }
7245
7246         fib6_entry = container_of(fib_node->fib_entry,
7247                                   struct mlxsw_sp_fib6_entry, common);
7248         err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt_arr,
7249                                               nrt6);
7250         if (err)
7251                 goto err_fib6_entry_nexthop_add;
7252
7253         return 0;
7254
7255 err_fib6_entry_nexthop_add:
7256         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7257         return err;
7258 }
7259
7260 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
7261                                      struct fib6_info **rt_arr,
7262                                      unsigned int nrt6)
7263 {
7264         struct mlxsw_sp_fib6_entry *fib6_entry;
7265         struct mlxsw_sp_fib_node *fib_node;
7266         struct fib6_info *rt = rt_arr[0];
7267
7268         if (mlxsw_sp_fib6_rt_should_ignore(rt))
7269                 return;
7270
7271         /* Multipath routes are first added to the FIB trie and only then
7272          * notified. If we vetoed the addition, we will get a delete
7273          * notification for a route we do not have. Therefore, do not warn if
7274          * route was not found.
7275          */
7276         fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
7277         if (!fib6_entry)
7278                 return;
7279
7280         /* If not all the nexthops are deleted, then only reduce the nexthop
7281          * group.
7282          */
7283         if (nrt6 != fib6_entry->nrt6) {
7284                 mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr,
7285                                                 nrt6);
7286                 return;
7287         }
7288
7289         fib_node = fib6_entry->common.fib_node;
7290
7291         mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib6_entry->common);
7292         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7293         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7294 }
7295
7296 static struct mlxsw_sp_mr_table *
7297 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
7298 {
7299         if (family == RTNL_FAMILY_IPMR)
7300                 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
7301         else
7302                 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
7303 }
7304
7305 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
7306                                      struct mfc_entry_notifier_info *men_info,
7307                                      bool replace)
7308 {
7309         struct mlxsw_sp_mr_table *mrt;
7310         struct mlxsw_sp_vr *vr;
7311
7312         vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
7313         if (IS_ERR(vr))
7314                 return PTR_ERR(vr);
7315
7316         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
7317         return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
7318 }
7319
7320 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
7321                                       struct mfc_entry_notifier_info *men_info)
7322 {
7323         struct mlxsw_sp_mr_table *mrt;
7324         struct mlxsw_sp_vr *vr;
7325
7326         vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
7327         if (WARN_ON(!vr))
7328                 return;
7329
7330         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
7331         mlxsw_sp_mr_route_del(mrt, men_info->mfc);
7332         mlxsw_sp_vr_put(mlxsw_sp, vr);
7333 }
7334
7335 static int
7336 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
7337                               struct vif_entry_notifier_info *ven_info)
7338 {
7339         struct mlxsw_sp_mr_table *mrt;
7340         struct mlxsw_sp_rif *rif;
7341         struct mlxsw_sp_vr *vr;
7342
7343         vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
7344         if (IS_ERR(vr))
7345                 return PTR_ERR(vr);
7346
7347         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
7348         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
7349         return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
7350                                    ven_info->vif_index,
7351                                    ven_info->vif_flags, rif);
7352 }
7353
7354 static void
7355 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
7356                               struct vif_entry_notifier_info *ven_info)
7357 {
7358         struct mlxsw_sp_mr_table *mrt;
7359         struct mlxsw_sp_vr *vr;
7360
7361         vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
7362         if (WARN_ON(!vr))
7363                 return;
7364
7365         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
7366         mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
7367         mlxsw_sp_vr_put(mlxsw_sp, vr);
7368 }
7369
7370 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
7371                                      struct mlxsw_sp_fib_node *fib_node)
7372 {
7373         struct mlxsw_sp_fib4_entry *fib4_entry;
7374
7375         fib4_entry = container_of(fib_node->fib_entry,
7376                                   struct mlxsw_sp_fib4_entry, common);
7377         mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
7378         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
7379         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7380 }
7381
7382 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
7383                                      struct mlxsw_sp_fib_node *fib_node)
7384 {
7385         struct mlxsw_sp_fib6_entry *fib6_entry;
7386
7387         fib6_entry = container_of(fib_node->fib_entry,
7388                                   struct mlxsw_sp_fib6_entry, common);
7389         mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
7390         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7391         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7392 }
7393
7394 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
7395                                     struct mlxsw_sp_fib_node *fib_node)
7396 {
7397         switch (fib_node->fib->proto) {
7398         case MLXSW_SP_L3_PROTO_IPV4:
7399                 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
7400                 break;
7401         case MLXSW_SP_L3_PROTO_IPV6:
7402                 mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
7403                 break;
7404         }
7405 }
7406
7407 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
7408                                   struct mlxsw_sp_vr *vr,
7409                                   enum mlxsw_sp_l3proto proto)
7410 {
7411         struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
7412         struct mlxsw_sp_fib_node *fib_node, *tmp;
7413
7414         list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
7415                 bool do_break = &tmp->list == &fib->node_list;
7416
7417                 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
7418                 if (do_break)
7419                         break;
7420         }
7421 }
7422
7423 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
7424 {
7425         int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
7426         int i, j;
7427
7428         for (i = 0; i < max_vrs; i++) {
7429                 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
7430
7431                 if (!mlxsw_sp_vr_is_used(vr))
7432                         continue;
7433
7434                 for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
7435                         mlxsw_sp_mr_table_flush(vr->mr_table[j]);
7436                 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
7437
7438                 /* If virtual router was only used for IPv4, then it's no
7439                  * longer used.
7440                  */
7441                 if (!mlxsw_sp_vr_is_used(vr))
7442                         continue;
7443                 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
7444         }
7445 }
7446
7447 struct mlxsw_sp_fib6_event_work {
7448         struct fib6_info **rt_arr;
7449         unsigned int nrt6;
7450 };
7451
7452 struct mlxsw_sp_fib_event_work {
7453         struct work_struct work;
7454         union {
7455                 struct mlxsw_sp_fib6_event_work fib6_work;
7456                 struct fib_entry_notifier_info fen_info;
7457                 struct fib_rule_notifier_info fr_info;
7458                 struct fib_nh_notifier_info fnh_info;
7459                 struct mfc_entry_notifier_info men_info;
7460                 struct vif_entry_notifier_info ven_info;
7461         };
7462         struct mlxsw_sp *mlxsw_sp;
7463         unsigned long event;
7464 };
7465
7466 static int
7467 mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work,
7468                                struct fib6_entry_notifier_info *fen6_info)
7469 {
7470         struct fib6_info *rt = fen6_info->rt;
7471         struct fib6_info **rt_arr;
7472         struct fib6_info *iter;
7473         unsigned int nrt6;
7474         int i = 0;
7475
7476         nrt6 = fen6_info->nsiblings + 1;
7477
7478         rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC);
7479         if (!rt_arr)
7480                 return -ENOMEM;
7481
7482         fib6_work->rt_arr = rt_arr;
7483         fib6_work->nrt6 = nrt6;
7484
7485         rt_arr[0] = rt;
7486         fib6_info_hold(rt);
7487
7488         if (!fen6_info->nsiblings)
7489                 return 0;
7490
7491         list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
7492                 if (i == fen6_info->nsiblings)
7493                         break;
7494
7495                 rt_arr[i + 1] = iter;
7496                 fib6_info_hold(iter);
7497                 i++;
7498         }
7499         WARN_ON_ONCE(i != fen6_info->nsiblings);
7500
7501         return 0;
7502 }
7503
7504 static void
7505 mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work)
7506 {
7507         int i;
7508
7509         for (i = 0; i < fib6_work->nrt6; i++)
7510                 mlxsw_sp_rt6_release(fib6_work->rt_arr[i]);
7511         kfree(fib6_work->rt_arr);
7512 }
7513
7514 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
7515 {
7516         struct mlxsw_sp_fib_event_work *fib_work =
7517                 container_of(work, struct mlxsw_sp_fib_event_work, work);
7518         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
7519         int err;
7520
7521         mutex_lock(&mlxsw_sp->router->lock);
7522         mlxsw_sp_span_respin(mlxsw_sp);
7523
7524         switch (fib_work->event) {
7525         case FIB_EVENT_ENTRY_REPLACE:
7526                 err = mlxsw_sp_router_fib4_replace(mlxsw_sp,
7527                                                    &fib_work->fen_info);
7528                 if (err) {
7529                         dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n");
7530                         mlxsw_sp_fib4_offload_failed_flag_set(mlxsw_sp,
7531                                                               &fib_work->fen_info);
7532                 }
7533                 fib_info_put(fib_work->fen_info.fi);
7534                 break;
7535         case FIB_EVENT_ENTRY_DEL:
7536                 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
7537                 fib_info_put(fib_work->fen_info.fi);
7538                 break;
7539         case FIB_EVENT_NH_ADD:
7540         case FIB_EVENT_NH_DEL:
7541                 mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
7542                                         fib_work->fnh_info.fib_nh);
7543                 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
7544                 break;
7545         }
7546         mutex_unlock(&mlxsw_sp->router->lock);
7547         kfree(fib_work);
7548 }
7549
7550 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
7551 {
7552         struct mlxsw_sp_fib_event_work *fib_work =
7553                     container_of(work, struct mlxsw_sp_fib_event_work, work);
7554         struct mlxsw_sp_fib6_event_work *fib6_work = &fib_work->fib6_work;
7555         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
7556         int err;
7557
7558         mutex_lock(&mlxsw_sp->router->lock);
7559         mlxsw_sp_span_respin(mlxsw_sp);
7560
7561         switch (fib_work->event) {
7562         case FIB_EVENT_ENTRY_REPLACE:
7563                 err = mlxsw_sp_router_fib6_replace(mlxsw_sp,
7564                                                    fib6_work->rt_arr,
7565                                                    fib6_work->nrt6);
7566                 if (err) {
7567                         dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n");
7568                         mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
7569                                                               fib6_work->rt_arr,
7570                                                               fib6_work->nrt6);
7571                 }
7572                 mlxsw_sp_router_fib6_work_fini(fib6_work);
7573                 break;
7574         case FIB_EVENT_ENTRY_APPEND:
7575                 err = mlxsw_sp_router_fib6_append(mlxsw_sp,
7576                                                   fib6_work->rt_arr,
7577                                                   fib6_work->nrt6);
7578                 if (err) {
7579                         dev_warn(mlxsw_sp->bus_info->dev, "FIB append failed.\n");
7580                         mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
7581                                                               fib6_work->rt_arr,
7582                                                               fib6_work->nrt6);
7583                 }
7584                 mlxsw_sp_router_fib6_work_fini(fib6_work);
7585                 break;
7586         case FIB_EVENT_ENTRY_DEL:
7587                 mlxsw_sp_router_fib6_del(mlxsw_sp,
7588                                          fib6_work->rt_arr,
7589                                          fib6_work->nrt6);
7590                 mlxsw_sp_router_fib6_work_fini(fib6_work);
7591                 break;
7592         }
7593         mutex_unlock(&mlxsw_sp->router->lock);
7594         kfree(fib_work);
7595 }
7596
7597 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
7598 {
7599         struct mlxsw_sp_fib_event_work *fib_work =
7600                 container_of(work, struct mlxsw_sp_fib_event_work, work);
7601         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
7602         bool replace;
7603         int err;
7604
7605         rtnl_lock();
7606         mutex_lock(&mlxsw_sp->router->lock);
7607         switch (fib_work->event) {
7608         case FIB_EVENT_ENTRY_REPLACE:
7609         case FIB_EVENT_ENTRY_ADD:
7610                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
7611
7612                 err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
7613                                                 replace);
7614                 if (err)
7615                         dev_warn(mlxsw_sp->bus_info->dev, "MR entry add failed.\n");
7616                 mr_cache_put(fib_work->men_info.mfc);
7617                 break;
7618         case FIB_EVENT_ENTRY_DEL:
7619                 mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
7620                 mr_cache_put(fib_work->men_info.mfc);
7621                 break;
7622         case FIB_EVENT_VIF_ADD:
7623                 err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
7624                                                     &fib_work->ven_info);
7625                 if (err)
7626                         dev_warn(mlxsw_sp->bus_info->dev, "MR VIF add failed.\n");
7627                 dev_put(fib_work->ven_info.dev);
7628                 break;
7629         case FIB_EVENT_VIF_DEL:
7630                 mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
7631                                               &fib_work->ven_info);
7632                 dev_put(fib_work->ven_info.dev);
7633                 break;
7634         }
7635         mutex_unlock(&mlxsw_sp->router->lock);
7636         rtnl_unlock();
7637         kfree(fib_work);
7638 }
7639
7640 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
7641                                        struct fib_notifier_info *info)
7642 {
7643         struct fib_entry_notifier_info *fen_info;
7644         struct fib_nh_notifier_info *fnh_info;
7645
7646         switch (fib_work->event) {
7647         case FIB_EVENT_ENTRY_REPLACE:
7648         case FIB_EVENT_ENTRY_DEL:
7649                 fen_info = container_of(info, struct fib_entry_notifier_info,
7650                                         info);
7651                 fib_work->fen_info = *fen_info;
7652                 /* Take reference on fib_info to prevent it from being
7653                  * freed while work is queued. Release it afterwards.
7654                  */
7655                 fib_info_hold(fib_work->fen_info.fi);
7656                 break;
7657         case FIB_EVENT_NH_ADD:
7658         case FIB_EVENT_NH_DEL:
7659                 fnh_info = container_of(info, struct fib_nh_notifier_info,
7660                                         info);
7661                 fib_work->fnh_info = *fnh_info;
7662                 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
7663                 break;
7664         }
7665 }
7666
7667 static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
7668                                       struct fib_notifier_info *info)
7669 {
7670         struct fib6_entry_notifier_info *fen6_info;
7671         int err;
7672
7673         switch (fib_work->event) {
7674         case FIB_EVENT_ENTRY_REPLACE:
7675         case FIB_EVENT_ENTRY_APPEND:
7676         case FIB_EVENT_ENTRY_DEL:
7677                 fen6_info = container_of(info, struct fib6_entry_notifier_info,
7678                                          info);
7679                 err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work,
7680                                                      fen6_info);
7681                 if (err)
7682                         return err;
7683                 break;
7684         }
7685
7686         return 0;
7687 }
7688
7689 static void
7690 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
7691                             struct fib_notifier_info *info)
7692 {
7693         switch (fib_work->event) {
7694         case FIB_EVENT_ENTRY_REPLACE:
7695         case FIB_EVENT_ENTRY_ADD:
7696         case FIB_EVENT_ENTRY_DEL:
7697                 memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
7698                 mr_cache_hold(fib_work->men_info.mfc);
7699                 break;
7700         case FIB_EVENT_VIF_ADD:
7701         case FIB_EVENT_VIF_DEL:
7702                 memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
7703                 dev_hold(fib_work->ven_info.dev);
7704                 break;
7705         }
7706 }
7707
7708 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
7709                                           struct fib_notifier_info *info,
7710                                           struct mlxsw_sp *mlxsw_sp)
7711 {
7712         struct netlink_ext_ack *extack = info->extack;
7713         struct fib_rule_notifier_info *fr_info;
7714         struct fib_rule *rule;
7715         int err = 0;
7716
7717         /* nothing to do at the moment */
7718         if (event == FIB_EVENT_RULE_DEL)
7719                 return 0;
7720
7721         fr_info = container_of(info, struct fib_rule_notifier_info, info);
7722         rule = fr_info->rule;
7723
7724         /* Rule only affects locally generated traffic */
7725         if (rule->iifindex == mlxsw_sp_net(mlxsw_sp)->loopback_dev->ifindex)
7726                 return 0;
7727
7728         switch (info->family) {
7729         case AF_INET:
7730                 if (!fib4_rule_default(rule) && !rule->l3mdev)
7731                         err = -EOPNOTSUPP;
7732                 break;
7733         case AF_INET6:
7734                 if (!fib6_rule_default(rule) && !rule->l3mdev)
7735                         err = -EOPNOTSUPP;
7736                 break;
7737         case RTNL_FAMILY_IPMR:
7738                 if (!ipmr_rule_default(rule) && !rule->l3mdev)
7739                         err = -EOPNOTSUPP;
7740                 break;
7741         case RTNL_FAMILY_IP6MR:
7742                 if (!ip6mr_rule_default(rule) && !rule->l3mdev)
7743                         err = -EOPNOTSUPP;
7744                 break;
7745         }
7746
7747         if (err < 0)
7748                 NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
7749
7750         return err;
7751 }
7752
7753 /* Called with rcu_read_lock() */
7754 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
7755                                      unsigned long event, void *ptr)
7756 {
7757         struct mlxsw_sp_fib_event_work *fib_work;
7758         struct fib_notifier_info *info = ptr;
7759         struct mlxsw_sp_router *router;
7760         int err;
7761
7762         if ((info->family != AF_INET && info->family != AF_INET6 &&
7763              info->family != RTNL_FAMILY_IPMR &&
7764              info->family != RTNL_FAMILY_IP6MR))
7765                 return NOTIFY_DONE;
7766
7767         router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7768
7769         switch (event) {
7770         case FIB_EVENT_RULE_ADD:
7771         case FIB_EVENT_RULE_DEL:
7772                 err = mlxsw_sp_router_fib_rule_event(event, info,
7773                                                      router->mlxsw_sp);
7774                 return notifier_from_errno(err);
7775         case FIB_EVENT_ENTRY_ADD:
7776         case FIB_EVENT_ENTRY_REPLACE:
7777         case FIB_EVENT_ENTRY_APPEND:
7778                 if (info->family == AF_INET) {
7779                         struct fib_entry_notifier_info *fen_info = ptr;
7780
7781                         if (fen_info->fi->fib_nh_is_v6) {
7782                                 NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
7783                                 return notifier_from_errno(-EINVAL);
7784                         }
7785                 }
7786                 break;
7787         }
7788
7789         fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
7790         if (!fib_work)
7791                 return NOTIFY_BAD;
7792
7793         fib_work->mlxsw_sp = router->mlxsw_sp;
7794         fib_work->event = event;
7795
7796         switch (info->family) {
7797         case AF_INET:
7798                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
7799                 mlxsw_sp_router_fib4_event(fib_work, info);
7800                 break;
7801         case AF_INET6:
7802                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
7803                 err = mlxsw_sp_router_fib6_event(fib_work, info);
7804                 if (err)
7805                         goto err_fib_event;
7806                 break;
7807         case RTNL_FAMILY_IP6MR:
7808         case RTNL_FAMILY_IPMR:
7809                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
7810                 mlxsw_sp_router_fibmr_event(fib_work, info);
7811                 break;
7812         }
7813
7814         mlxsw_core_schedule_work(&fib_work->work);
7815
7816         return NOTIFY_DONE;
7817
7818 err_fib_event:
7819         kfree(fib_work);
7820         return NOTIFY_BAD;
7821 }
7822
7823 static struct mlxsw_sp_rif *
7824 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
7825                          const struct net_device *dev)
7826 {
7827         int max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7828         int i;
7829
7830         for (i = 0; i < max_rifs; i++)
7831                 if (mlxsw_sp->router->rifs[i] &&
7832                     mlxsw_sp_rif_dev_is(mlxsw_sp->router->rifs[i], dev))
7833                         return mlxsw_sp->router->rifs[i];
7834
7835         return NULL;
7836 }
7837
7838 bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp,
7839                          const struct net_device *dev)
7840 {
7841         struct mlxsw_sp_rif *rif;
7842
7843         mutex_lock(&mlxsw_sp->router->lock);
7844         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7845         mutex_unlock(&mlxsw_sp->router->lock);
7846
7847         return rif;
7848 }
7849
7850 u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev)
7851 {
7852         struct mlxsw_sp_rif *rif;
7853         u16 vid = 0;
7854
7855         mutex_lock(&mlxsw_sp->router->lock);
7856         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7857         if (!rif)
7858                 goto out;
7859
7860         /* We only return the VID for VLAN RIFs. Otherwise we return an
7861          * invalid value (0).
7862          */
7863         if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN)
7864                 goto out;
7865
7866         vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7867
7868 out:
7869         mutex_unlock(&mlxsw_sp->router->lock);
7870         return vid;
7871 }
7872
7873 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
7874 {
7875         char ritr_pl[MLXSW_REG_RITR_LEN];
7876         int err;
7877
7878         mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
7879         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7880         if (err)
7881                 return err;
7882
7883         mlxsw_reg_ritr_enable_set(ritr_pl, false);
7884         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7885 }
7886
7887 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
7888                                           struct mlxsw_sp_rif *rif)
7889 {
7890         /* Signal to nexthop cleanup that the RIF is going away. */
7891         rif->crif->rif = NULL;
7892
7893         mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
7894         mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
7895         mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
7896 }
7897
7898 static bool __mlxsw_sp_dev_addr_list_empty(const struct net_device *dev)
7899 {
7900         struct inet6_dev *inet6_dev;
7901         struct in_device *idev;
7902
7903         idev = __in_dev_get_rcu(dev);
7904         if (idev && idev->ifa_list)
7905                 return false;
7906
7907         inet6_dev = __in6_dev_get(dev);
7908         if (inet6_dev && !list_empty(&inet6_dev->addr_list))
7909                 return false;
7910
7911         return true;
7912 }
7913
7914 static bool mlxsw_sp_dev_addr_list_empty(const struct net_device *dev)
7915 {
7916         bool addr_list_empty;
7917
7918         rcu_read_lock();
7919         addr_list_empty = __mlxsw_sp_dev_addr_list_empty(dev);
7920         rcu_read_unlock();
7921
7922         return addr_list_empty;
7923 }
7924
7925 static bool
7926 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
7927                            unsigned long event)
7928 {
7929         bool addr_list_empty;
7930
7931         switch (event) {
7932         case NETDEV_UP:
7933                 return rif == NULL;
7934         case NETDEV_DOWN:
7935                 addr_list_empty = mlxsw_sp_dev_addr_list_empty(dev);
7936
7937                 /* macvlans do not have a RIF, but rather piggy back on the
7938                  * RIF of their lower device.
7939                  */
7940                 if (netif_is_macvlan(dev) && addr_list_empty)
7941                         return true;
7942
7943                 if (rif && addr_list_empty &&
7944                     !netif_is_l3_slave(mlxsw_sp_rif_dev(rif)))
7945                         return true;
7946                 /* It is possible we already removed the RIF ourselves
7947                  * if it was assigned to a netdev that is now a bridge
7948                  * or LAG slave.
7949                  */
7950                 return false;
7951         }
7952
7953         return false;
7954 }
7955
7956 static enum mlxsw_sp_rif_type
7957 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
7958                       const struct net_device *dev)
7959 {
7960         enum mlxsw_sp_fid_type type;
7961
7962         if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
7963                 return MLXSW_SP_RIF_TYPE_IPIP_LB;
7964
7965         /* Otherwise RIF type is derived from the type of the underlying FID. */
7966         if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
7967                 type = MLXSW_SP_FID_TYPE_8021Q;
7968         else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
7969                 type = MLXSW_SP_FID_TYPE_8021Q;
7970         else if (netif_is_bridge_master(dev))
7971                 type = MLXSW_SP_FID_TYPE_8021D;
7972         else
7973                 type = MLXSW_SP_FID_TYPE_RFID;
7974
7975         return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
7976 }
7977
7978 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index,
7979                                     u8 rif_entries)
7980 {
7981         *p_rif_index = gen_pool_alloc(mlxsw_sp->router->rifs_table,
7982                                       rif_entries);
7983         if (*p_rif_index == 0)
7984                 return -ENOBUFS;
7985         *p_rif_index -= MLXSW_SP_ROUTER_GENALLOC_OFFSET;
7986
7987         /* RIF indexes must be aligned to the allocation size. */
7988         WARN_ON_ONCE(*p_rif_index % rif_entries);
7989
7990         return 0;
7991 }
7992
7993 static void mlxsw_sp_rif_index_free(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
7994                                     u8 rif_entries)
7995 {
7996         gen_pool_free(mlxsw_sp->router->rifs_table,
7997                       MLXSW_SP_ROUTER_GENALLOC_OFFSET + rif_index, rif_entries);
7998 }
7999
8000 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
8001                                                u16 vr_id,
8002                                                struct mlxsw_sp_crif *crif)
8003 {
8004         struct net_device *l3_dev = crif ? crif->key.dev : NULL;
8005         struct mlxsw_sp_rif *rif;
8006
8007         rif = kzalloc(rif_size, GFP_KERNEL);
8008         if (!rif)
8009                 return NULL;
8010
8011         INIT_LIST_HEAD(&rif->neigh_list);
8012         if (l3_dev) {
8013                 ether_addr_copy(rif->addr, l3_dev->dev_addr);
8014                 rif->mtu = l3_dev->mtu;
8015         }
8016         rif->vr_id = vr_id;
8017         rif->rif_index = rif_index;
8018         if (crif) {
8019                 rif->crif = crif;
8020                 crif->rif = rif;
8021         }
8022
8023         return rif;
8024 }
8025
8026 static void mlxsw_sp_rif_free(struct mlxsw_sp_rif *rif)
8027 {
8028         WARN_ON(!list_empty(&rif->neigh_list));
8029
8030         if (rif->crif)
8031                 rif->crif->rif = NULL;
8032         kfree(rif);
8033 }
8034
8035 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
8036                                            u16 rif_index)
8037 {
8038         return mlxsw_sp->router->rifs[rif_index];
8039 }
8040
8041 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
8042 {
8043         return rif->rif_index;
8044 }
8045
8046 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
8047 {
8048         return lb_rif->common.rif_index;
8049 }
8050
8051 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
8052 {
8053         struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common);
8054         u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev);
8055         struct mlxsw_sp_vr *ul_vr;
8056
8057         ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
8058         if (WARN_ON(IS_ERR(ul_vr)))
8059                 return 0;
8060
8061         return ul_vr->id;
8062 }
8063
8064 u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
8065 {
8066         return lb_rif->ul_rif_id;
8067 }
8068
8069 static bool
8070 mlxsw_sp_router_port_l3_stats_enabled(struct mlxsw_sp_rif *rif)
8071 {
8072         return mlxsw_sp_rif_counter_valid_get(rif,
8073                                               MLXSW_SP_RIF_COUNTER_EGRESS) &&
8074                mlxsw_sp_rif_counter_valid_get(rif,
8075                                               MLXSW_SP_RIF_COUNTER_INGRESS);
8076 }
8077
8078 static int
8079 mlxsw_sp_router_port_l3_stats_enable(struct mlxsw_sp_rif *rif)
8080 {
8081         int err;
8082
8083         err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
8084         if (err)
8085                 return err;
8086
8087         /* Clear stale data. */
8088         err = mlxsw_sp_rif_counter_fetch_clear(rif,
8089                                                MLXSW_SP_RIF_COUNTER_INGRESS,
8090                                                NULL);
8091         if (err)
8092                 goto err_clear_ingress;
8093
8094         err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
8095         if (err)
8096                 goto err_alloc_egress;
8097
8098         /* Clear stale data. */
8099         err = mlxsw_sp_rif_counter_fetch_clear(rif,
8100                                                MLXSW_SP_RIF_COUNTER_EGRESS,
8101                                                NULL);
8102         if (err)
8103                 goto err_clear_egress;
8104
8105         return 0;
8106
8107 err_clear_egress:
8108         mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
8109 err_alloc_egress:
8110 err_clear_ingress:
8111         mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
8112         return err;
8113 }
8114
8115 static void
8116 mlxsw_sp_router_port_l3_stats_disable(struct mlxsw_sp_rif *rif)
8117 {
8118         mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
8119         mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
8120 }
8121
8122 static void
8123 mlxsw_sp_router_port_l3_stats_report_used(struct mlxsw_sp_rif *rif,
8124                                           struct netdev_notifier_offload_xstats_info *info)
8125 {
8126         if (!mlxsw_sp_router_port_l3_stats_enabled(rif))
8127                 return;
8128         netdev_offload_xstats_report_used(info->report_used);
8129 }
8130
8131 static int
8132 mlxsw_sp_router_port_l3_stats_fetch(struct mlxsw_sp_rif *rif,
8133                                     struct rtnl_hw_stats64 *p_stats)
8134 {
8135         struct mlxsw_sp_rif_counter_set_basic ingress;
8136         struct mlxsw_sp_rif_counter_set_basic egress;
8137         int err;
8138
8139         err = mlxsw_sp_rif_counter_fetch_clear(rif,
8140                                                MLXSW_SP_RIF_COUNTER_INGRESS,
8141                                                &ingress);
8142         if (err)
8143                 return err;
8144
8145         err = mlxsw_sp_rif_counter_fetch_clear(rif,
8146                                                MLXSW_SP_RIF_COUNTER_EGRESS,
8147                                                &egress);
8148         if (err)
8149                 return err;
8150
8151 #define MLXSW_SP_ROUTER_ALL_GOOD(SET, SFX)              \
8152                 ((SET.good_unicast_ ## SFX) +           \
8153                  (SET.good_multicast_ ## SFX) +         \
8154                  (SET.good_broadcast_ ## SFX))
8155
8156         p_stats->rx_packets = MLXSW_SP_ROUTER_ALL_GOOD(ingress, packets);
8157         p_stats->tx_packets = MLXSW_SP_ROUTER_ALL_GOOD(egress, packets);
8158         p_stats->rx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(ingress, bytes);
8159         p_stats->tx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(egress, bytes);
8160         p_stats->rx_errors = ingress.error_packets;
8161         p_stats->tx_errors = egress.error_packets;
8162         p_stats->rx_dropped = ingress.discard_packets;
8163         p_stats->tx_dropped = egress.discard_packets;
8164         p_stats->multicast = ingress.good_multicast_packets +
8165                              ingress.good_broadcast_packets;
8166
8167 #undef MLXSW_SP_ROUTER_ALL_GOOD
8168
8169         return 0;
8170 }
8171
8172 static int
8173 mlxsw_sp_router_port_l3_stats_report_delta(struct mlxsw_sp_rif *rif,
8174                                            struct netdev_notifier_offload_xstats_info *info)
8175 {
8176         struct rtnl_hw_stats64 stats = {};
8177         int err;
8178
8179         if (!mlxsw_sp_router_port_l3_stats_enabled(rif))
8180                 return 0;
8181
8182         err = mlxsw_sp_router_port_l3_stats_fetch(rif, &stats);
8183         if (err)
8184                 return err;
8185
8186         netdev_offload_xstats_report_delta(info->report_delta, &stats);
8187         return 0;
8188 }
8189
8190 struct mlxsw_sp_router_hwstats_notify_work {
8191         struct work_struct work;
8192         struct net_device *dev;
8193 };
8194
8195 static void mlxsw_sp_router_hwstats_notify_work(struct work_struct *work)
8196 {
8197         struct mlxsw_sp_router_hwstats_notify_work *hws_work =
8198                 container_of(work, struct mlxsw_sp_router_hwstats_notify_work,
8199                              work);
8200
8201         rtnl_lock();
8202         rtnl_offload_xstats_notify(hws_work->dev);
8203         rtnl_unlock();
8204         dev_put(hws_work->dev);
8205         kfree(hws_work);
8206 }
8207
8208 static void
8209 mlxsw_sp_router_hwstats_notify_schedule(struct net_device *dev)
8210 {
8211         struct mlxsw_sp_router_hwstats_notify_work *hws_work;
8212
8213         /* To collect notification payload, the core ends up sending another
8214          * notifier block message, which would deadlock on the attempt to
8215          * acquire the router lock again. Just postpone the notification until
8216          * later.
8217          */
8218
8219         hws_work = kzalloc(sizeof(*hws_work), GFP_KERNEL);
8220         if (!hws_work)
8221                 return;
8222
8223         INIT_WORK(&hws_work->work, mlxsw_sp_router_hwstats_notify_work);
8224         dev_hold(dev);
8225         hws_work->dev = dev;
8226         mlxsw_core_schedule_work(&hws_work->work);
8227 }
8228
8229 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
8230 {
8231         return mlxsw_sp_rif_dev(rif)->ifindex;
8232 }
8233
8234 bool mlxsw_sp_rif_has_dev(const struct mlxsw_sp_rif *rif)
8235 {
8236         return !!mlxsw_sp_rif_dev(rif);
8237 }
8238
8239 bool mlxsw_sp_rif_dev_is(const struct mlxsw_sp_rif *rif,
8240                          const struct net_device *dev)
8241 {
8242         return mlxsw_sp_rif_dev(rif) == dev;
8243 }
8244
8245 static void mlxsw_sp_rif_push_l3_stats(struct mlxsw_sp_rif *rif)
8246 {
8247         struct rtnl_hw_stats64 stats = {};
8248
8249         if (!mlxsw_sp_router_port_l3_stats_fetch(rif, &stats))
8250                 netdev_offload_xstats_push_delta(mlxsw_sp_rif_dev(rif),
8251                                                  NETDEV_OFFLOAD_XSTATS_TYPE_L3,
8252                                                  &stats);
8253 }
8254
8255 static struct mlxsw_sp_rif *
8256 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
8257                     const struct mlxsw_sp_rif_params *params,
8258                     struct netlink_ext_ack *extack)
8259 {
8260         u8 rif_entries = params->double_entry ? 2 : 1;
8261         u32 tb_id = l3mdev_fib_table(params->dev);
8262         const struct mlxsw_sp_rif_ops *ops;
8263         struct mlxsw_sp_fid *fid = NULL;
8264         enum mlxsw_sp_rif_type type;
8265         struct mlxsw_sp_crif *crif;
8266         struct mlxsw_sp_rif *rif;
8267         struct mlxsw_sp_vr *vr;
8268         u16 rif_index;
8269         int i, err;
8270
8271         type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
8272         ops = mlxsw_sp->router->rif_ops_arr[type];
8273
8274         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
8275         if (IS_ERR(vr))
8276                 return ERR_CAST(vr);
8277         vr->rif_count++;
8278
8279         err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index, rif_entries);
8280         if (err) {
8281                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
8282                 goto err_rif_index_alloc;
8283         }
8284
8285         crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, params->dev);
8286         if (WARN_ON(!crif)) {
8287                 err = -ENOENT;
8288                 goto err_crif_lookup;
8289         }
8290
8291         rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, crif);
8292         if (!rif) {
8293                 err = -ENOMEM;
8294                 goto err_rif_alloc;
8295         }
8296         dev_hold(params->dev);
8297         mlxsw_sp->router->rifs[rif_index] = rif;
8298         rif->mlxsw_sp = mlxsw_sp;
8299         rif->ops = ops;
8300         rif->rif_entries = rif_entries;
8301
8302         if (ops->fid_get) {
8303                 fid = ops->fid_get(rif, extack);
8304                 if (IS_ERR(fid)) {
8305                         err = PTR_ERR(fid);
8306                         goto err_fid_get;
8307                 }
8308                 rif->fid = fid;
8309         }
8310
8311         if (ops->setup)
8312                 ops->setup(rif, params);
8313
8314         err = ops->configure(rif, extack);
8315         if (err)
8316                 goto err_configure;
8317
8318         for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
8319                 err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
8320                 if (err)
8321                         goto err_mr_rif_add;
8322         }
8323
8324         if (netdev_offload_xstats_enabled(params->dev,
8325                                           NETDEV_OFFLOAD_XSTATS_TYPE_L3)) {
8326                 err = mlxsw_sp_router_port_l3_stats_enable(rif);
8327                 if (err)
8328                         goto err_stats_enable;
8329                 mlxsw_sp_router_hwstats_notify_schedule(params->dev);
8330         } else {
8331                 mlxsw_sp_rif_counters_alloc(rif);
8332         }
8333
8334         atomic_add(rif_entries, &mlxsw_sp->router->rifs_count);
8335         return rif;
8336
8337 err_stats_enable:
8338 err_mr_rif_add:
8339         for (i--; i >= 0; i--)
8340                 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
8341         ops->deconfigure(rif);
8342 err_configure:
8343         if (fid)
8344                 mlxsw_sp_fid_put(fid);
8345 err_fid_get:
8346         mlxsw_sp->router->rifs[rif_index] = NULL;
8347         dev_put(params->dev);
8348         mlxsw_sp_rif_free(rif);
8349 err_rif_alloc:
8350 err_crif_lookup:
8351         mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
8352 err_rif_index_alloc:
8353         vr->rif_count--;
8354         mlxsw_sp_vr_put(mlxsw_sp, vr);
8355         return ERR_PTR(err);
8356 }
8357
8358 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
8359 {
8360         struct net_device *dev = mlxsw_sp_rif_dev(rif);
8361         const struct mlxsw_sp_rif_ops *ops = rif->ops;
8362         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8363         struct mlxsw_sp_crif *crif = rif->crif;
8364         struct mlxsw_sp_fid *fid = rif->fid;
8365         u8 rif_entries = rif->rif_entries;
8366         u16 rif_index = rif->rif_index;
8367         struct mlxsw_sp_vr *vr;
8368         int i;
8369
8370         atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count);
8371         mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
8372         vr = &mlxsw_sp->router->vrs[rif->vr_id];
8373
8374         if (netdev_offload_xstats_enabled(dev, NETDEV_OFFLOAD_XSTATS_TYPE_L3)) {
8375                 mlxsw_sp_rif_push_l3_stats(rif);
8376                 mlxsw_sp_router_port_l3_stats_disable(rif);
8377                 mlxsw_sp_router_hwstats_notify_schedule(dev);
8378         } else {
8379                 mlxsw_sp_rif_counters_free(rif);
8380         }
8381
8382         for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
8383                 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
8384         ops->deconfigure(rif);
8385         if (fid)
8386                 /* Loopback RIFs are not associated with a FID. */
8387                 mlxsw_sp_fid_put(fid);
8388         mlxsw_sp->router->rifs[rif->rif_index] = NULL;
8389         dev_put(dev);
8390         mlxsw_sp_rif_free(rif);
8391         mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
8392         vr->rif_count--;
8393         mlxsw_sp_vr_put(mlxsw_sp, vr);
8394
8395         if (crif->can_destroy)
8396                 mlxsw_sp_crif_free(crif);
8397 }
8398
8399 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
8400                                  struct net_device *dev)
8401 {
8402         struct mlxsw_sp_rif *rif;
8403
8404         mutex_lock(&mlxsw_sp->router->lock);
8405         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8406         if (!rif)
8407                 goto out;
8408         mlxsw_sp_rif_destroy(rif);
8409 out:
8410         mutex_unlock(&mlxsw_sp->router->lock);
8411 }
8412
8413 static void
8414 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
8415                                  struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8416 {
8417         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8418
8419         params->vid = mlxsw_sp_port_vlan->vid;
8420         params->lag = mlxsw_sp_port->lagged;
8421         if (params->lag)
8422                 params->lag_id = mlxsw_sp_port->lag_id;
8423         else
8424                 params->system_port = mlxsw_sp_port->local_port;
8425 }
8426
8427 static struct mlxsw_sp_rif_subport *
8428 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
8429 {
8430         return container_of(rif, struct mlxsw_sp_rif_subport, common);
8431 }
8432
8433 static struct mlxsw_sp_rif *
8434 mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
8435                          const struct mlxsw_sp_rif_params *params,
8436                          struct netlink_ext_ack *extack)
8437 {
8438         struct mlxsw_sp_rif_subport *rif_subport;
8439         struct mlxsw_sp_rif *rif;
8440
8441         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev);
8442         if (!rif)
8443                 return mlxsw_sp_rif_create(mlxsw_sp, params, extack);
8444
8445         rif_subport = mlxsw_sp_rif_subport_rif(rif);
8446         refcount_inc(&rif_subport->ref_count);
8447         return rif;
8448 }
8449
8450 static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif)
8451 {
8452         struct mlxsw_sp_rif_subport *rif_subport;
8453
8454         rif_subport = mlxsw_sp_rif_subport_rif(rif);
8455         if (!refcount_dec_and_test(&rif_subport->ref_count))
8456                 return;
8457
8458         mlxsw_sp_rif_destroy(rif);
8459 }
8460
8461 static int mlxsw_sp_rif_mac_profile_index_alloc(struct mlxsw_sp *mlxsw_sp,
8462                                                 struct mlxsw_sp_rif_mac_profile *profile,
8463                                                 struct netlink_ext_ack *extack)
8464 {
8465         u8 max_rif_mac_profiles = mlxsw_sp->router->max_rif_mac_profile;
8466         struct mlxsw_sp_router *router = mlxsw_sp->router;
8467         int id;
8468
8469         id = idr_alloc(&router->rif_mac_profiles_idr, profile, 0,
8470                        max_rif_mac_profiles, GFP_KERNEL);
8471
8472         if (id >= 0) {
8473                 profile->id = id;
8474                 return 0;
8475         }
8476
8477         if (id == -ENOSPC)
8478                 NL_SET_ERR_MSG_MOD(extack,
8479                                    "Exceeded number of supported router interface MAC profiles");
8480
8481         return id;
8482 }
8483
8484 static struct mlxsw_sp_rif_mac_profile *
8485 mlxsw_sp_rif_mac_profile_index_free(struct mlxsw_sp *mlxsw_sp, u8 mac_profile)
8486 {
8487         struct mlxsw_sp_rif_mac_profile *profile;
8488
8489         profile = idr_remove(&mlxsw_sp->router->rif_mac_profiles_idr,
8490                              mac_profile);
8491         WARN_ON(!profile);
8492         return profile;
8493 }
8494
8495 static struct mlxsw_sp_rif_mac_profile *
8496 mlxsw_sp_rif_mac_profile_alloc(const char *mac)
8497 {
8498         struct mlxsw_sp_rif_mac_profile *profile;
8499
8500         profile = kzalloc(sizeof(*profile), GFP_KERNEL);
8501         if (!profile)
8502                 return NULL;
8503
8504         ether_addr_copy(profile->mac_prefix, mac);
8505         refcount_set(&profile->ref_count, 1);
8506         return profile;
8507 }
8508
8509 static struct mlxsw_sp_rif_mac_profile *
8510 mlxsw_sp_rif_mac_profile_find(const struct mlxsw_sp *mlxsw_sp, const char *mac)
8511 {
8512         struct mlxsw_sp_router *router = mlxsw_sp->router;
8513         struct mlxsw_sp_rif_mac_profile *profile;
8514         int id;
8515
8516         idr_for_each_entry(&router->rif_mac_profiles_idr, profile, id) {
8517                 if (ether_addr_equal_masked(profile->mac_prefix, mac,
8518                                             mlxsw_sp->mac_mask))
8519                         return profile;
8520         }
8521
8522         return NULL;
8523 }
8524
8525 static u64 mlxsw_sp_rif_mac_profiles_occ_get(void *priv)
8526 {
8527         const struct mlxsw_sp *mlxsw_sp = priv;
8528
8529         return atomic_read(&mlxsw_sp->router->rif_mac_profiles_count);
8530 }
8531
8532 static u64 mlxsw_sp_rifs_occ_get(void *priv)
8533 {
8534         const struct mlxsw_sp *mlxsw_sp = priv;
8535
8536         return atomic_read(&mlxsw_sp->router->rifs_count);
8537 }
8538
8539 static struct mlxsw_sp_rif_mac_profile *
8540 mlxsw_sp_rif_mac_profile_create(struct mlxsw_sp *mlxsw_sp, const char *mac,
8541                                 struct netlink_ext_ack *extack)
8542 {
8543         struct mlxsw_sp_rif_mac_profile *profile;
8544         int err;
8545
8546         profile = mlxsw_sp_rif_mac_profile_alloc(mac);
8547         if (!profile)
8548                 return ERR_PTR(-ENOMEM);
8549
8550         err = mlxsw_sp_rif_mac_profile_index_alloc(mlxsw_sp, profile, extack);
8551         if (err)
8552                 goto profile_index_alloc_err;
8553
8554         atomic_inc(&mlxsw_sp->router->rif_mac_profiles_count);
8555         return profile;
8556
8557 profile_index_alloc_err:
8558         kfree(profile);
8559         return ERR_PTR(err);
8560 }
8561
8562 static void mlxsw_sp_rif_mac_profile_destroy(struct mlxsw_sp *mlxsw_sp,
8563                                              u8 mac_profile)
8564 {
8565         struct mlxsw_sp_rif_mac_profile *profile;
8566
8567         atomic_dec(&mlxsw_sp->router->rif_mac_profiles_count);
8568         profile = mlxsw_sp_rif_mac_profile_index_free(mlxsw_sp, mac_profile);
8569         kfree(profile);
8570 }
8571
8572 static int mlxsw_sp_rif_mac_profile_get(struct mlxsw_sp *mlxsw_sp,
8573                                         const char *mac, u8 *p_mac_profile,
8574                                         struct netlink_ext_ack *extack)
8575 {
8576         struct mlxsw_sp_rif_mac_profile *profile;
8577
8578         profile = mlxsw_sp_rif_mac_profile_find(mlxsw_sp, mac);
8579         if (profile) {
8580                 refcount_inc(&profile->ref_count);
8581                 goto out;
8582         }
8583
8584         profile = mlxsw_sp_rif_mac_profile_create(mlxsw_sp, mac, extack);
8585         if (IS_ERR(profile))
8586                 return PTR_ERR(profile);
8587
8588 out:
8589         *p_mac_profile = profile->id;
8590         return 0;
8591 }
8592
8593 static void mlxsw_sp_rif_mac_profile_put(struct mlxsw_sp *mlxsw_sp,
8594                                          u8 mac_profile)
8595 {
8596         struct mlxsw_sp_rif_mac_profile *profile;
8597
8598         profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr,
8599                            mac_profile);
8600         if (WARN_ON(!profile))
8601                 return;
8602
8603         if (!refcount_dec_and_test(&profile->ref_count))
8604                 return;
8605
8606         mlxsw_sp_rif_mac_profile_destroy(mlxsw_sp, mac_profile);
8607 }
8608
8609 static bool mlxsw_sp_rif_mac_profile_is_shared(const struct mlxsw_sp_rif *rif)
8610 {
8611         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8612         struct mlxsw_sp_rif_mac_profile *profile;
8613
8614         profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr,
8615                            rif->mac_profile_id);
8616         if (WARN_ON(!profile))
8617                 return false;
8618
8619         return refcount_read(&profile->ref_count) > 1;
8620 }
8621
8622 static int mlxsw_sp_rif_mac_profile_edit(struct mlxsw_sp_rif *rif,
8623                                          const char *new_mac)
8624 {
8625         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8626         struct mlxsw_sp_rif_mac_profile *profile;
8627
8628         profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr,
8629                            rif->mac_profile_id);
8630         if (WARN_ON(!profile))
8631                 return -EINVAL;
8632
8633         ether_addr_copy(profile->mac_prefix, new_mac);
8634         return 0;
8635 }
8636
8637 static int
8638 mlxsw_sp_rif_mac_profile_replace(struct mlxsw_sp *mlxsw_sp,
8639                                  struct mlxsw_sp_rif *rif,
8640                                  const char *new_mac,
8641                                  struct netlink_ext_ack *extack)
8642 {
8643         u8 mac_profile;
8644         int err;
8645
8646         if (!mlxsw_sp_rif_mac_profile_is_shared(rif) &&
8647             !mlxsw_sp_rif_mac_profile_find(mlxsw_sp, new_mac))
8648                 return mlxsw_sp_rif_mac_profile_edit(rif, new_mac);
8649
8650         err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, new_mac,
8651                                            &mac_profile, extack);
8652         if (err)
8653                 return err;
8654
8655         mlxsw_sp_rif_mac_profile_put(mlxsw_sp, rif->mac_profile_id);
8656         rif->mac_profile_id = mac_profile;
8657         return 0;
8658 }
8659
8660 static int
8661 __mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
8662                                  struct net_device *l3_dev,
8663                                  struct netlink_ext_ack *extack)
8664 {
8665         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8666         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
8667         struct mlxsw_sp_rif_params params = {
8668                 .dev = l3_dev,
8669         };
8670         u16 vid = mlxsw_sp_port_vlan->vid;
8671         struct mlxsw_sp_rif *rif;
8672         struct mlxsw_sp_fid *fid;
8673         int err;
8674
8675         mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
8676         rif = mlxsw_sp_rif_subport_get(mlxsw_sp, &params, extack);
8677         if (IS_ERR(rif))
8678                 return PTR_ERR(rif);
8679
8680         /* FID was already created, just take a reference */
8681         fid = rif->ops->fid_get(rif, extack);
8682         err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
8683         if (err)
8684                 goto err_fid_port_vid_map;
8685
8686         err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
8687         if (err)
8688                 goto err_port_vid_learning_set;
8689
8690         err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
8691                                         BR_STATE_FORWARDING);
8692         if (err)
8693                 goto err_port_vid_stp_set;
8694
8695         mlxsw_sp_port_vlan->fid = fid;
8696
8697         return 0;
8698
8699 err_port_vid_stp_set:
8700         mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
8701 err_port_vid_learning_set:
8702         mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
8703 err_fid_port_vid_map:
8704         mlxsw_sp_fid_put(fid);
8705         mlxsw_sp_rif_subport_put(rif);
8706         return err;
8707 }
8708
8709 static void
8710 __mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8711 {
8712         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8713         struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
8714         struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid);
8715         u16 vid = mlxsw_sp_port_vlan->vid;
8716
8717         if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
8718                 return;
8719
8720         mlxsw_sp_port_vlan->fid = NULL;
8721         mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
8722         mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
8723         mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
8724         mlxsw_sp_fid_put(fid);
8725         mlxsw_sp_rif_subport_put(rif);
8726 }
8727
8728 static int
8729 mlxsw_sp_port_vlan_router_join_existing(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
8730                                         struct net_device *l3_dev,
8731                                         struct netlink_ext_ack *extack)
8732 {
8733         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
8734
8735         lockdep_assert_held(&mlxsw_sp->router->lock);
8736
8737         if (!mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev))
8738                 return 0;
8739
8740         return __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, l3_dev,
8741                                                 extack);
8742 }
8743
8744 void
8745 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8746 {
8747         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
8748
8749         mutex_lock(&mlxsw_sp->router->lock);
8750         __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
8751         mutex_unlock(&mlxsw_sp->router->lock);
8752 }
8753
8754 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
8755                                              struct net_device *port_dev,
8756                                              unsigned long event, u16 vid,
8757                                              struct netlink_ext_ack *extack)
8758 {
8759         struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
8760         struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
8761
8762         mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
8763         if (WARN_ON(!mlxsw_sp_port_vlan))
8764                 return -EINVAL;
8765
8766         switch (event) {
8767         case NETDEV_UP:
8768                 return __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
8769                                                         l3_dev, extack);
8770         case NETDEV_DOWN:
8771                 __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
8772                 break;
8773         }
8774
8775         return 0;
8776 }
8777
8778 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
8779                                         unsigned long event,
8780                                         struct netlink_ext_ack *extack)
8781 {
8782         if (netif_is_any_bridge_port(port_dev) || netif_is_lag_port(port_dev))
8783                 return 0;
8784
8785         return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
8786                                                  MLXSW_SP_DEFAULT_VID, extack);
8787 }
8788
8789 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
8790                                          struct net_device *lag_dev,
8791                                          unsigned long event, u16 vid,
8792                                          struct netlink_ext_ack *extack)
8793 {
8794         struct net_device *port_dev;
8795         struct list_head *iter;
8796         int err;
8797
8798         netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
8799                 if (mlxsw_sp_port_dev_check(port_dev)) {
8800                         err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
8801                                                                 port_dev,
8802                                                                 event, vid,
8803                                                                 extack);
8804                         if (err)
8805                                 return err;
8806                 }
8807         }
8808
8809         return 0;
8810 }
8811
8812 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
8813                                        unsigned long event,
8814                                        struct netlink_ext_ack *extack)
8815 {
8816         if (netif_is_bridge_port(lag_dev))
8817                 return 0;
8818
8819         return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
8820                                              MLXSW_SP_DEFAULT_VID, extack);
8821 }
8822
8823 static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
8824                                           struct net_device *l3_dev,
8825                                           unsigned long event,
8826                                           struct netlink_ext_ack *extack)
8827 {
8828         struct mlxsw_sp_rif_params params = {
8829                 .dev = l3_dev,
8830         };
8831         struct mlxsw_sp_rif *rif;
8832
8833         switch (event) {
8834         case NETDEV_UP:
8835                 if (netif_is_bridge_master(l3_dev) && br_vlan_enabled(l3_dev)) {
8836                         u16 proto;
8837
8838                         br_vlan_get_proto(l3_dev, &proto);
8839                         if (proto == ETH_P_8021AD) {
8840                                 NL_SET_ERR_MSG_MOD(extack, "Adding an IP address to 802.1ad bridge is not supported");
8841                                 return -EOPNOTSUPP;
8842                         }
8843                 }
8844                 rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
8845                 if (IS_ERR(rif))
8846                         return PTR_ERR(rif);
8847                 break;
8848         case NETDEV_DOWN:
8849                 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
8850                 mlxsw_sp_rif_destroy(rif);
8851                 break;
8852         }
8853
8854         return 0;
8855 }
8856
8857 static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
8858                                         struct net_device *vlan_dev,
8859                                         unsigned long event,
8860                                         struct netlink_ext_ack *extack)
8861 {
8862         struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
8863         u16 vid = vlan_dev_vlan_id(vlan_dev);
8864
8865         if (netif_is_bridge_port(vlan_dev))
8866                 return 0;
8867
8868         if (mlxsw_sp_port_dev_check(real_dev))
8869                 return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
8870                                                          event, vid, extack);
8871         else if (netif_is_lag_master(real_dev))
8872                 return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
8873                                                      vid, extack);
8874         else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
8875                 return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event,
8876                                                       extack);
8877
8878         return 0;
8879 }
8880
8881 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
8882 {
8883         u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
8884         u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
8885
8886         return ether_addr_equal_masked(mac, vrrp4, mask);
8887 }
8888
8889 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
8890 {
8891         u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
8892         u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
8893
8894         return ether_addr_equal_masked(mac, vrrp6, mask);
8895 }
8896
8897 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
8898                                 const u8 *mac, bool adding)
8899 {
8900         char ritr_pl[MLXSW_REG_RITR_LEN];
8901         u8 vrrp_id = adding ? mac[5] : 0;
8902         int err;
8903
8904         if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
8905             !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
8906                 return 0;
8907
8908         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
8909         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8910         if (err)
8911                 return err;
8912
8913         if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
8914                 mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
8915         else
8916                 mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
8917
8918         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8919 }
8920
8921 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
8922                                     const struct net_device *macvlan_dev,
8923                                     struct netlink_ext_ack *extack)
8924 {
8925         struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
8926         struct mlxsw_sp_rif *rif;
8927         int err;
8928
8929         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
8930         if (!rif) {
8931                 NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
8932                 return -EOPNOTSUPP;
8933         }
8934
8935         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
8936                                   mlxsw_sp_fid_index(rif->fid), true);
8937         if (err)
8938                 return err;
8939
8940         err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
8941                                    macvlan_dev->dev_addr, true);
8942         if (err)
8943                 goto err_rif_vrrp_add;
8944
8945         /* Make sure the bridge driver does not have this MAC pointing at
8946          * some other port.
8947          */
8948         if (rif->ops->fdb_del)
8949                 rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
8950
8951         return 0;
8952
8953 err_rif_vrrp_add:
8954         mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
8955                             mlxsw_sp_fid_index(rif->fid), false);
8956         return err;
8957 }
8958
8959 static void __mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
8960                                        const struct net_device *macvlan_dev)
8961 {
8962         struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
8963         struct mlxsw_sp_rif *rif;
8964
8965         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
8966         /* If we do not have a RIF, then we already took care of
8967          * removing the macvlan's MAC during RIF deletion.
8968          */
8969         if (!rif)
8970                 return;
8971         mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
8972                              false);
8973         mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
8974                             mlxsw_sp_fid_index(rif->fid), false);
8975 }
8976
8977 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
8978                               const struct net_device *macvlan_dev)
8979 {
8980         mutex_lock(&mlxsw_sp->router->lock);
8981         __mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
8982         mutex_unlock(&mlxsw_sp->router->lock);
8983 }
8984
8985 static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
8986                                            struct net_device *macvlan_dev,
8987                                            unsigned long event,
8988                                            struct netlink_ext_ack *extack)
8989 {
8990         switch (event) {
8991         case NETDEV_UP:
8992                 return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
8993         case NETDEV_DOWN:
8994                 __mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
8995                 break;
8996         }
8997
8998         return 0;
8999 }
9000
9001 static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
9002                                      struct net_device *dev,
9003                                      unsigned long event,
9004                                      struct netlink_ext_ack *extack)
9005 {
9006         if (mlxsw_sp_port_dev_check(dev))
9007                 return mlxsw_sp_inetaddr_port_event(dev, event, extack);
9008         else if (netif_is_lag_master(dev))
9009                 return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
9010         else if (netif_is_bridge_master(dev))
9011                 return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event,
9012                                                       extack);
9013         else if (is_vlan_dev(dev))
9014                 return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
9015                                                     extack);
9016         else if (netif_is_macvlan(dev))
9017                 return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
9018                                                        extack);
9019         else
9020                 return 0;
9021 }
9022
9023 static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
9024                                    unsigned long event, void *ptr)
9025 {
9026         struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
9027         struct net_device *dev = ifa->ifa_dev->dev;
9028         struct mlxsw_sp_router *router;
9029         struct mlxsw_sp_rif *rif;
9030         int err = 0;
9031
9032         /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
9033         if (event == NETDEV_UP)
9034                 return NOTIFY_DONE;
9035
9036         router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
9037         mutex_lock(&router->lock);
9038         rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
9039         if (!mlxsw_sp_rif_should_config(rif, dev, event))
9040                 goto out;
9041
9042         err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL);
9043 out:
9044         mutex_unlock(&router->lock);
9045         return notifier_from_errno(err);
9046 }
9047
9048 static int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
9049                                          unsigned long event, void *ptr)
9050 {
9051         struct in_validator_info *ivi = (struct in_validator_info *) ptr;
9052         struct net_device *dev = ivi->ivi_dev->dev;
9053         struct mlxsw_sp *mlxsw_sp;
9054         struct mlxsw_sp_rif *rif;
9055         int err = 0;
9056
9057         mlxsw_sp = mlxsw_sp_lower_get(dev);
9058         if (!mlxsw_sp)
9059                 return NOTIFY_DONE;
9060
9061         mutex_lock(&mlxsw_sp->router->lock);
9062         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9063         if (!mlxsw_sp_rif_should_config(rif, dev, event))
9064                 goto out;
9065
9066         err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack);
9067 out:
9068         mutex_unlock(&mlxsw_sp->router->lock);
9069         return notifier_from_errno(err);
9070 }
9071
9072 struct mlxsw_sp_inet6addr_event_work {
9073         struct work_struct work;
9074         struct mlxsw_sp *mlxsw_sp;
9075         struct net_device *dev;
9076         unsigned long event;
9077 };
9078
9079 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
9080 {
9081         struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
9082                 container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
9083         struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp;
9084         struct net_device *dev = inet6addr_work->dev;
9085         unsigned long event = inet6addr_work->event;
9086         struct mlxsw_sp_rif *rif;
9087
9088         rtnl_lock();
9089         mutex_lock(&mlxsw_sp->router->lock);
9090
9091         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9092         if (!mlxsw_sp_rif_should_config(rif, dev, event))
9093                 goto out;
9094
9095         __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL);
9096 out:
9097         mutex_unlock(&mlxsw_sp->router->lock);
9098         rtnl_unlock();
9099         dev_put(dev);
9100         kfree(inet6addr_work);
9101 }
9102
9103 /* Called with rcu_read_lock() */
9104 static int mlxsw_sp_inet6addr_event(struct notifier_block *nb,
9105                                     unsigned long event, void *ptr)
9106 {
9107         struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
9108         struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
9109         struct net_device *dev = if6->idev->dev;
9110         struct mlxsw_sp_router *router;
9111
9112         /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
9113         if (event == NETDEV_UP)
9114                 return NOTIFY_DONE;
9115
9116         inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
9117         if (!inet6addr_work)
9118                 return NOTIFY_BAD;
9119
9120         router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb);
9121         INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
9122         inet6addr_work->mlxsw_sp = router->mlxsw_sp;
9123         inet6addr_work->dev = dev;
9124         inet6addr_work->event = event;
9125         dev_hold(dev);
9126         mlxsw_core_schedule_work(&inet6addr_work->work);
9127
9128         return NOTIFY_DONE;
9129 }
9130
9131 static int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
9132                                           unsigned long event, void *ptr)
9133 {
9134         struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
9135         struct net_device *dev = i6vi->i6vi_dev->dev;
9136         struct mlxsw_sp *mlxsw_sp;
9137         struct mlxsw_sp_rif *rif;
9138         int err = 0;
9139
9140         mlxsw_sp = mlxsw_sp_lower_get(dev);
9141         if (!mlxsw_sp)
9142                 return NOTIFY_DONE;
9143
9144         mutex_lock(&mlxsw_sp->router->lock);
9145         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9146         if (!mlxsw_sp_rif_should_config(rif, dev, event))
9147                 goto out;
9148
9149         err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack);
9150 out:
9151         mutex_unlock(&mlxsw_sp->router->lock);
9152         return notifier_from_errno(err);
9153 }
9154
9155 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
9156                              const char *mac, int mtu, u8 mac_profile)
9157 {
9158         char ritr_pl[MLXSW_REG_RITR_LEN];
9159         int err;
9160
9161         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
9162         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9163         if (err)
9164                 return err;
9165
9166         mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
9167         mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
9168         mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, mac_profile);
9169         mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
9170         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9171 }
9172
9173 static int
9174 mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp,
9175                                   struct mlxsw_sp_rif *rif,
9176                                   struct netlink_ext_ack *extack)
9177 {
9178         struct net_device *dev = mlxsw_sp_rif_dev(rif);
9179         u8 old_mac_profile;
9180         u16 fid_index;
9181         int err;
9182
9183         fid_index = mlxsw_sp_fid_index(rif->fid);
9184
9185         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
9186         if (err)
9187                 return err;
9188
9189         old_mac_profile = rif->mac_profile_id;
9190         err = mlxsw_sp_rif_mac_profile_replace(mlxsw_sp, rif, dev->dev_addr,
9191                                                extack);
9192         if (err)
9193                 goto err_rif_mac_profile_replace;
9194
9195         err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
9196                                 dev->mtu, rif->mac_profile_id);
9197         if (err)
9198                 goto err_rif_edit;
9199
9200         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
9201         if (err)
9202                 goto err_rif_fdb_op;
9203
9204         if (rif->mtu != dev->mtu) {
9205                 struct mlxsw_sp_vr *vr;
9206                 int i;
9207
9208                 /* The RIF is relevant only to its mr_table instance, as unlike
9209                  * unicast routing, in multicast routing a RIF cannot be shared
9210                  * between several multicast routing tables.
9211                  */
9212                 vr = &mlxsw_sp->router->vrs[rif->vr_id];
9213                 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
9214                         mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
9215                                                    rif, dev->mtu);
9216         }
9217
9218         ether_addr_copy(rif->addr, dev->dev_addr);
9219         rif->mtu = dev->mtu;
9220
9221         netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
9222
9223         return 0;
9224
9225 err_rif_fdb_op:
9226         mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu,
9227                           old_mac_profile);
9228 err_rif_edit:
9229         mlxsw_sp_rif_mac_profile_replace(mlxsw_sp, rif, rif->addr, extack);
9230 err_rif_mac_profile_replace:
9231         mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
9232         return err;
9233 }
9234
9235 static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
9236                             struct netdev_notifier_pre_changeaddr_info *info)
9237 {
9238         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9239         struct mlxsw_sp_rif_mac_profile *profile;
9240         struct netlink_ext_ack *extack;
9241         u8 max_rif_mac_profiles;
9242         u64 occ;
9243
9244         extack = netdev_notifier_info_to_extack(&info->info);
9245
9246         profile = mlxsw_sp_rif_mac_profile_find(mlxsw_sp, info->dev_addr);
9247         if (profile)
9248                 return 0;
9249
9250         max_rif_mac_profiles = mlxsw_sp->router->max_rif_mac_profile;
9251         occ = mlxsw_sp_rif_mac_profiles_occ_get(mlxsw_sp);
9252         if (occ < max_rif_mac_profiles)
9253                 return 0;
9254
9255         if (!mlxsw_sp_rif_mac_profile_is_shared(rif))
9256                 return 0;
9257
9258         NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interface MAC profiles");
9259         return -ENOBUFS;
9260 }
9261
9262 static bool mlxsw_sp_router_netdevice_interesting(struct mlxsw_sp *mlxsw_sp,
9263                                                   struct net_device *dev)
9264 {
9265         struct vlan_dev_priv *vlan;
9266
9267         if (netif_is_lag_master(dev) ||
9268             netif_is_bridge_master(dev) ||
9269             mlxsw_sp_port_dev_check(dev) ||
9270             mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev) ||
9271             netif_is_l3_master(dev))
9272                 return true;
9273
9274         if (!is_vlan_dev(dev))
9275                 return false;
9276
9277         vlan = vlan_dev_priv(dev);
9278         return netif_is_lag_master(vlan->real_dev) ||
9279                netif_is_bridge_master(vlan->real_dev) ||
9280                mlxsw_sp_port_dev_check(vlan->real_dev);
9281 }
9282
9283 static struct mlxsw_sp_crif *
9284 mlxsw_sp_crif_register(struct mlxsw_sp_router *router, struct net_device *dev)
9285 {
9286         struct mlxsw_sp_crif *crif;
9287         int err;
9288
9289         if (WARN_ON(mlxsw_sp_crif_lookup(router, dev)))
9290                 return NULL;
9291
9292         crif = mlxsw_sp_crif_alloc(dev);
9293         if (!crif)
9294                 return ERR_PTR(-ENOMEM);
9295
9296         err = mlxsw_sp_crif_insert(router, crif);
9297         if (err)
9298                 goto err_netdev_insert;
9299
9300         return crif;
9301
9302 err_netdev_insert:
9303         mlxsw_sp_crif_free(crif);
9304         return ERR_PTR(err);
9305 }
9306
9307 static void mlxsw_sp_crif_unregister(struct mlxsw_sp_router *router,
9308                                      struct mlxsw_sp_crif *crif)
9309 {
9310         struct mlxsw_sp_nexthop *nh, *tmp;
9311
9312         mlxsw_sp_crif_remove(router, crif);
9313
9314         list_for_each_entry_safe(nh, tmp, &crif->nexthop_list, crif_list_node)
9315                 mlxsw_sp_nexthop_type_fini(router->mlxsw_sp, nh);
9316
9317         if (crif->rif)
9318                 crif->can_destroy = true;
9319         else
9320                 mlxsw_sp_crif_free(crif);
9321 }
9322
9323 static int mlxsw_sp_netdevice_register(struct mlxsw_sp_router *router,
9324                                        struct net_device *dev)
9325 {
9326         struct mlxsw_sp_crif *crif;
9327
9328         if (!mlxsw_sp_router_netdevice_interesting(router->mlxsw_sp, dev))
9329                 return 0;
9330
9331         crif = mlxsw_sp_crif_register(router, dev);
9332         return PTR_ERR_OR_ZERO(crif);
9333 }
9334
9335 static void mlxsw_sp_netdevice_unregister(struct mlxsw_sp_router *router,
9336                                           struct net_device *dev)
9337 {
9338         struct mlxsw_sp_crif *crif;
9339
9340         if (!mlxsw_sp_router_netdevice_interesting(router->mlxsw_sp, dev))
9341                 return;
9342
9343         /* netdev_run_todo(), by way of netdev_wait_allrefs_any(), rebroadcasts
9344          * the NETDEV_UNREGISTER message, so we can get here twice. If that's
9345          * what happened, the netdevice state is NETREG_UNREGISTERED. In that
9346          * case, we expect to have collected the CRIF already, and warn if it
9347          * still exists. Otherwise we expect the CRIF to exist.
9348          */
9349         crif = mlxsw_sp_crif_lookup(router, dev);
9350         if (dev->reg_state == NETREG_UNREGISTERED) {
9351                 if (!WARN_ON(crif))
9352                         return;
9353         }
9354         if (WARN_ON(!crif))
9355                 return;
9356
9357         mlxsw_sp_crif_unregister(router, crif);
9358 }
9359
9360 static bool mlxsw_sp_is_offload_xstats_event(unsigned long event)
9361 {
9362         switch (event) {
9363         case NETDEV_OFFLOAD_XSTATS_ENABLE:
9364         case NETDEV_OFFLOAD_XSTATS_DISABLE:
9365         case NETDEV_OFFLOAD_XSTATS_REPORT_USED:
9366         case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA:
9367                 return true;
9368         }
9369
9370         return false;
9371 }
9372
9373 static int
9374 mlxsw_sp_router_port_offload_xstats_cmd(struct mlxsw_sp_rif *rif,
9375                                         unsigned long event,
9376                                         struct netdev_notifier_offload_xstats_info *info)
9377 {
9378         switch (info->type) {
9379         case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
9380                 break;
9381         default:
9382                 return 0;
9383         }
9384
9385         switch (event) {
9386         case NETDEV_OFFLOAD_XSTATS_ENABLE:
9387                 return mlxsw_sp_router_port_l3_stats_enable(rif);
9388         case NETDEV_OFFLOAD_XSTATS_DISABLE:
9389                 mlxsw_sp_router_port_l3_stats_disable(rif);
9390                 return 0;
9391         case NETDEV_OFFLOAD_XSTATS_REPORT_USED:
9392                 mlxsw_sp_router_port_l3_stats_report_used(rif, info);
9393                 return 0;
9394         case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA:
9395                 return mlxsw_sp_router_port_l3_stats_report_delta(rif, info);
9396         }
9397
9398         WARN_ON_ONCE(1);
9399         return 0;
9400 }
9401
9402 static int
9403 mlxsw_sp_netdevice_offload_xstats_cmd(struct mlxsw_sp *mlxsw_sp,
9404                                       struct net_device *dev,
9405                                       unsigned long event,
9406                                       struct netdev_notifier_offload_xstats_info *info)
9407 {
9408         struct mlxsw_sp_rif *rif;
9409
9410         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9411         if (!rif)
9412                 return 0;
9413
9414         return mlxsw_sp_router_port_offload_xstats_cmd(rif, event, info);
9415 }
9416
9417 static bool mlxsw_sp_is_router_event(unsigned long event)
9418 {
9419         switch (event) {
9420         case NETDEV_PRE_CHANGEADDR:
9421         case NETDEV_CHANGEADDR:
9422         case NETDEV_CHANGEMTU:
9423                 return true;
9424         default:
9425                 return false;
9426         }
9427 }
9428
9429 static int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
9430                                                 unsigned long event, void *ptr)
9431 {
9432         struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
9433         struct mlxsw_sp *mlxsw_sp;
9434         struct mlxsw_sp_rif *rif;
9435
9436         mlxsw_sp = mlxsw_sp_lower_get(dev);
9437         if (!mlxsw_sp)
9438                 return 0;
9439
9440         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9441         if (!rif)
9442                 return 0;
9443
9444         switch (event) {
9445         case NETDEV_CHANGEMTU:
9446         case NETDEV_CHANGEADDR:
9447                 return mlxsw_sp_router_port_change_event(mlxsw_sp, rif, extack);
9448         case NETDEV_PRE_CHANGEADDR:
9449                 return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
9450         default:
9451                 WARN_ON_ONCE(1);
9452                 break;
9453         }
9454
9455         return 0;
9456 }
9457
9458 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
9459                                   struct net_device *l3_dev,
9460                                   struct netlink_ext_ack *extack)
9461 {
9462         struct mlxsw_sp_rif *rif;
9463
9464         /* If netdev is already associated with a RIF, then we need to
9465          * destroy it and create a new one with the new virtual router ID.
9466          */
9467         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
9468         if (rif)
9469                 __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN,
9470                                           extack);
9471
9472         return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack);
9473 }
9474
9475 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
9476                                     struct net_device *l3_dev)
9477 {
9478         struct mlxsw_sp_rif *rif;
9479
9480         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
9481         if (!rif)
9482                 return;
9483         __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL);
9484 }
9485
9486 static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr)
9487 {
9488         struct netdev_notifier_changeupper_info *info = ptr;
9489
9490         if (event != NETDEV_PRECHANGEUPPER && event != NETDEV_CHANGEUPPER)
9491                 return false;
9492         return netif_is_l3_master(info->upper_dev);
9493 }
9494
9495 static int
9496 mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
9497                              struct netdev_notifier_changeupper_info *info)
9498 {
9499         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
9500         int err = 0;
9501
9502         /* We do not create a RIF for a macvlan, but only use it to
9503          * direct more MAC addresses to the router.
9504          */
9505         if (!mlxsw_sp || netif_is_macvlan(l3_dev))
9506                 return 0;
9507
9508         switch (event) {
9509         case NETDEV_PRECHANGEUPPER:
9510                 break;
9511         case NETDEV_CHANGEUPPER:
9512                 if (info->linking) {
9513                         struct netlink_ext_ack *extack;
9514
9515                         extack = netdev_notifier_info_to_extack(&info->info);
9516                         err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
9517                 } else {
9518                         mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
9519                 }
9520                 break;
9521         }
9522
9523         return err;
9524 }
9525
9526 static int
9527 mlxsw_sp_port_vid_router_join_existing(struct mlxsw_sp_port *mlxsw_sp_port,
9528                                        u16 vid, struct net_device *dev,
9529                                        struct netlink_ext_ack *extack)
9530 {
9531         struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
9532
9533         mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port,
9534                                                             vid);
9535         if (WARN_ON(!mlxsw_sp_port_vlan))
9536                 return -EINVAL;
9537
9538         return mlxsw_sp_port_vlan_router_join_existing(mlxsw_sp_port_vlan,
9539                                                        dev, extack);
9540 }
9541
9542 static int __mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port,
9543                                            struct net_device *lag_dev,
9544                                            struct netlink_ext_ack *extack)
9545 {
9546         u16 default_vid = MLXSW_SP_DEFAULT_VID;
9547
9548         return mlxsw_sp_port_vid_router_join_existing(mlxsw_sp_port,
9549                                                       default_vid, lag_dev,
9550                                                       extack);
9551 }
9552
9553 int mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port,
9554                                   struct net_device *lag_dev,
9555                                   struct netlink_ext_ack *extack)
9556 {
9557         int err;
9558
9559         mutex_lock(&mlxsw_sp_port->mlxsw_sp->router->lock);
9560         err = __mlxsw_sp_router_port_join_lag(mlxsw_sp_port, lag_dev, extack);
9561         mutex_unlock(&mlxsw_sp_port->mlxsw_sp->router->lock);
9562
9563         return err;
9564 }
9565
9566 static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb,
9567                                            unsigned long event, void *ptr)
9568 {
9569         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
9570         struct mlxsw_sp_router *router;
9571         struct mlxsw_sp *mlxsw_sp;
9572         int err = 0;
9573
9574         router = container_of(nb, struct mlxsw_sp_router, netdevice_nb);
9575         mlxsw_sp = router->mlxsw_sp;
9576
9577         mutex_lock(&mlxsw_sp->router->lock);
9578
9579         if (event == NETDEV_REGISTER) {
9580                 err = mlxsw_sp_netdevice_register(router, dev);
9581                 if (err)
9582                         /* No need to roll this back, UNREGISTER will collect it
9583                          * anyhow.
9584                          */
9585                         goto out;
9586         }
9587
9588         if (mlxsw_sp_is_offload_xstats_event(event))
9589                 err = mlxsw_sp_netdevice_offload_xstats_cmd(mlxsw_sp, dev,
9590                                                             event, ptr);
9591         else if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
9592                 err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
9593                                                        event, ptr);
9594         else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev))
9595                 err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev,
9596                                                        event, ptr);
9597         else if (mlxsw_sp_is_router_event(event))
9598                 err = mlxsw_sp_netdevice_router_port_event(dev, event, ptr);
9599         else if (mlxsw_sp_is_vrf_event(event, ptr))
9600                 err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr);
9601
9602         if (event == NETDEV_UNREGISTER)
9603                 mlxsw_sp_netdevice_unregister(router, dev);
9604
9605 out:
9606         mutex_unlock(&mlxsw_sp->router->lock);
9607
9608         return notifier_from_errno(err);
9609 }
9610
9611 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev,
9612                                         struct netdev_nested_priv *priv)
9613 {
9614         struct mlxsw_sp_rif *rif = (struct mlxsw_sp_rif *)priv->data;
9615
9616         if (!netif_is_macvlan(dev))
9617                 return 0;
9618
9619         return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9620                                    mlxsw_sp_fid_index(rif->fid), false);
9621 }
9622
9623 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
9624 {
9625         struct net_device *dev = mlxsw_sp_rif_dev(rif);
9626         struct netdev_nested_priv priv = {
9627                 .data = (void *)rif,
9628         };
9629
9630         if (!netif_is_macvlan_port(dev))
9631                 return 0;
9632
9633         netdev_warn(dev, "Router interface is deleted. Upper macvlans will not work\n");
9634         return netdev_walk_all_upper_dev_rcu(dev,
9635                                              __mlxsw_sp_rif_macvlan_flush, &priv);
9636 }
9637
9638 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
9639                                        const struct mlxsw_sp_rif_params *params)
9640 {
9641         struct mlxsw_sp_rif_subport *rif_subport;
9642
9643         rif_subport = mlxsw_sp_rif_subport_rif(rif);
9644         refcount_set(&rif_subport->ref_count, 1);
9645         rif_subport->vid = params->vid;
9646         rif_subport->lag = params->lag;
9647         if (params->lag)
9648                 rif_subport->lag_id = params->lag_id;
9649         else
9650                 rif_subport->system_port = params->system_port;
9651 }
9652
9653 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
9654 {
9655         struct net_device *dev = mlxsw_sp_rif_dev(rif);
9656         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9657         struct mlxsw_sp_rif_subport *rif_subport;
9658         char ritr_pl[MLXSW_REG_RITR_LEN];
9659         u16 efid;
9660
9661         rif_subport = mlxsw_sp_rif_subport_rif(rif);
9662         mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
9663                             rif->rif_index, rif->vr_id, dev->mtu);
9664         mlxsw_reg_ritr_mac_pack(ritr_pl, dev->dev_addr);
9665         mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id);
9666         efid = mlxsw_sp_fid_index(rif->fid);
9667         mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
9668                                   rif_subport->lag ? rif_subport->lag_id :
9669                                                      rif_subport->system_port,
9670                                   efid, 0);
9671         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9672 }
9673
9674 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif,
9675                                           struct netlink_ext_ack *extack)
9676 {
9677         struct net_device *dev = mlxsw_sp_rif_dev(rif);
9678         u8 mac_profile;
9679         int err;
9680
9681         err = mlxsw_sp_rif_mac_profile_get(rif->mlxsw_sp, rif->addr,
9682                                            &mac_profile, extack);
9683         if (err)
9684                 return err;
9685         rif->mac_profile_id = mac_profile;
9686
9687         err = mlxsw_sp_rif_subport_op(rif, true);
9688         if (err)
9689                 goto err_rif_subport_op;
9690
9691         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9692                                   mlxsw_sp_fid_index(rif->fid), true);
9693         if (err)
9694                 goto err_rif_fdb_op;
9695
9696         err = mlxsw_sp_fid_rif_set(rif->fid, rif);
9697         if (err)
9698                 goto err_fid_rif_set;
9699
9700         return 0;
9701
9702 err_fid_rif_set:
9703         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9704                             mlxsw_sp_fid_index(rif->fid), false);
9705 err_rif_fdb_op:
9706         mlxsw_sp_rif_subport_op(rif, false);
9707 err_rif_subport_op:
9708         mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, mac_profile);
9709         return err;
9710 }
9711
9712 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
9713 {
9714         struct net_device *dev = mlxsw_sp_rif_dev(rif);
9715         struct mlxsw_sp_fid *fid = rif->fid;
9716
9717         mlxsw_sp_fid_rif_unset(fid);
9718         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9719                             mlxsw_sp_fid_index(fid), false);
9720         mlxsw_sp_rif_macvlan_flush(rif);
9721         mlxsw_sp_rif_subport_op(rif, false);
9722         mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
9723 }
9724
9725 static struct mlxsw_sp_fid *
9726 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
9727                              struct netlink_ext_ack *extack)
9728 {
9729         return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
9730 }
9731
9732 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
9733         .type                   = MLXSW_SP_RIF_TYPE_SUBPORT,
9734         .rif_size               = sizeof(struct mlxsw_sp_rif_subport),
9735         .setup                  = mlxsw_sp_rif_subport_setup,
9736         .configure              = mlxsw_sp_rif_subport_configure,
9737         .deconfigure            = mlxsw_sp_rif_subport_deconfigure,
9738         .fid_get                = mlxsw_sp_rif_subport_fid_get,
9739 };
9740
9741 static int mlxsw_sp_rif_fid_op(struct mlxsw_sp_rif *rif, u16 fid, bool enable)
9742 {
9743         enum mlxsw_reg_ritr_if_type type = MLXSW_REG_RITR_FID_IF;
9744         struct net_device *dev = mlxsw_sp_rif_dev(rif);
9745         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9746         char ritr_pl[MLXSW_REG_RITR_LEN];
9747
9748         mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
9749                             dev->mtu);
9750         mlxsw_reg_ritr_mac_pack(ritr_pl, dev->dev_addr);
9751         mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id);
9752         mlxsw_reg_ritr_fid_if_fid_set(ritr_pl, fid);
9753
9754         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9755 }
9756
9757 u16 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
9758 {
9759         return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
9760 }
9761
9762 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif,
9763                                       struct netlink_ext_ack *extack)
9764 {
9765         struct net_device *dev = mlxsw_sp_rif_dev(rif);
9766         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9767         u16 fid_index = mlxsw_sp_fid_index(rif->fid);
9768         u8 mac_profile;
9769         int err;
9770
9771         err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, rif->addr,
9772                                            &mac_profile, extack);
9773         if (err)
9774                 return err;
9775         rif->mac_profile_id = mac_profile;
9776
9777         err = mlxsw_sp_rif_fid_op(rif, fid_index, true);
9778         if (err)
9779                 goto err_rif_fid_op;
9780
9781         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9782                                      mlxsw_sp_router_port(mlxsw_sp), true);
9783         if (err)
9784                 goto err_fid_mc_flood_set;
9785
9786         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9787                                      mlxsw_sp_router_port(mlxsw_sp), true);
9788         if (err)
9789                 goto err_fid_bc_flood_set;
9790
9791         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9792                                   mlxsw_sp_fid_index(rif->fid), true);
9793         if (err)
9794                 goto err_rif_fdb_op;
9795
9796         err = mlxsw_sp_fid_rif_set(rif->fid, rif);
9797         if (err)
9798                 goto err_fid_rif_set;
9799
9800         return 0;
9801
9802 err_fid_rif_set:
9803         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9804                             mlxsw_sp_fid_index(rif->fid), false);
9805 err_rif_fdb_op:
9806         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9807                                mlxsw_sp_router_port(mlxsw_sp), false);
9808 err_fid_bc_flood_set:
9809         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9810                                mlxsw_sp_router_port(mlxsw_sp), false);
9811 err_fid_mc_flood_set:
9812         mlxsw_sp_rif_fid_op(rif, fid_index, false);
9813 err_rif_fid_op:
9814         mlxsw_sp_rif_mac_profile_put(mlxsw_sp, mac_profile);
9815         return err;
9816 }
9817
9818 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
9819 {
9820         struct net_device *dev = mlxsw_sp_rif_dev(rif);
9821         u16 fid_index = mlxsw_sp_fid_index(rif->fid);
9822         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9823         struct mlxsw_sp_fid *fid = rif->fid;
9824
9825         mlxsw_sp_fid_rif_unset(fid);
9826         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9827                             mlxsw_sp_fid_index(fid), false);
9828         mlxsw_sp_rif_macvlan_flush(rif);
9829         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9830                                mlxsw_sp_router_port(mlxsw_sp), false);
9831         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9832                                mlxsw_sp_router_port(mlxsw_sp), false);
9833         mlxsw_sp_rif_fid_op(rif, fid_index, false);
9834         mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
9835 }
9836
9837 static struct mlxsw_sp_fid *
9838 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
9839                          struct netlink_ext_ack *extack)
9840 {
9841         int rif_ifindex = mlxsw_sp_rif_dev_ifindex(rif);
9842
9843         return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif_ifindex);
9844 }
9845
9846 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
9847 {
9848         struct switchdev_notifier_fdb_info info = {};
9849         struct net_device *dev;
9850
9851         dev = br_fdb_find_port(mlxsw_sp_rif_dev(rif), mac, 0);
9852         if (!dev)
9853                 return;
9854
9855         info.addr = mac;
9856         info.vid = 0;
9857         call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
9858                                  NULL);
9859 }
9860
9861 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
9862         .type                   = MLXSW_SP_RIF_TYPE_FID,
9863         .rif_size               = sizeof(struct mlxsw_sp_rif),
9864         .configure              = mlxsw_sp_rif_fid_configure,
9865         .deconfigure            = mlxsw_sp_rif_fid_deconfigure,
9866         .fid_get                = mlxsw_sp_rif_fid_fid_get,
9867         .fdb_del                = mlxsw_sp_rif_fid_fdb_del,
9868 };
9869
9870 static struct mlxsw_sp_fid *
9871 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
9872                           struct netlink_ext_ack *extack)
9873 {
9874         struct net_device *dev = mlxsw_sp_rif_dev(rif);
9875         struct net_device *br_dev;
9876         u16 vid;
9877         int err;
9878
9879         if (is_vlan_dev(dev)) {
9880                 vid = vlan_dev_vlan_id(dev);
9881                 br_dev = vlan_dev_real_dev(dev);
9882                 if (WARN_ON(!netif_is_bridge_master(br_dev)))
9883                         return ERR_PTR(-EINVAL);
9884         } else {
9885                 err = br_vlan_get_pvid(dev, &vid);
9886                 if (err < 0 || !vid) {
9887                         NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
9888                         return ERR_PTR(-EINVAL);
9889                 }
9890         }
9891
9892         return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
9893 }
9894
9895 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
9896 {
9897         struct net_device *rif_dev = mlxsw_sp_rif_dev(rif);
9898         struct switchdev_notifier_fdb_info info = {};
9899         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
9900         struct net_device *br_dev;
9901         struct net_device *dev;
9902
9903         br_dev = is_vlan_dev(rif_dev) ? vlan_dev_real_dev(rif_dev) : rif_dev;
9904         dev = br_fdb_find_port(br_dev, mac, vid);
9905         if (!dev)
9906                 return;
9907
9908         info.addr = mac;
9909         info.vid = vid;
9910         call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
9911                                  NULL);
9912 }
9913
9914 static int mlxsw_sp_rif_vlan_op(struct mlxsw_sp_rif *rif, u16 vid, u16 efid,
9915                                 bool enable)
9916 {
9917         struct net_device *dev = mlxsw_sp_rif_dev(rif);
9918         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9919         char ritr_pl[MLXSW_REG_RITR_LEN];
9920
9921         mlxsw_reg_ritr_vlan_if_pack(ritr_pl, enable, rif->rif_index, rif->vr_id,
9922                                     dev->mtu, dev->dev_addr,
9923                                     rif->mac_profile_id, vid, efid);
9924
9925         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9926 }
9927
9928 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif, u16 efid,
9929                                        struct netlink_ext_ack *extack)
9930 {
9931         struct net_device *dev = mlxsw_sp_rif_dev(rif);
9932         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
9933         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9934         u8 mac_profile;
9935         int err;
9936
9937         err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, rif->addr,
9938                                            &mac_profile, extack);
9939         if (err)
9940                 return err;
9941         rif->mac_profile_id = mac_profile;
9942
9943         err = mlxsw_sp_rif_vlan_op(rif, vid, efid, true);
9944         if (err)
9945                 goto err_rif_vlan_fid_op;
9946
9947         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9948                                      mlxsw_sp_router_port(mlxsw_sp), true);
9949         if (err)
9950                 goto err_fid_mc_flood_set;
9951
9952         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9953                                      mlxsw_sp_router_port(mlxsw_sp), true);
9954         if (err)
9955                 goto err_fid_bc_flood_set;
9956
9957         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9958                                   mlxsw_sp_fid_index(rif->fid), true);
9959         if (err)
9960                 goto err_rif_fdb_op;
9961
9962         err = mlxsw_sp_fid_rif_set(rif->fid, rif);
9963         if (err)
9964                 goto err_fid_rif_set;
9965
9966         return 0;
9967
9968 err_fid_rif_set:
9969         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9970                             mlxsw_sp_fid_index(rif->fid), false);
9971 err_rif_fdb_op:
9972         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9973                                mlxsw_sp_router_port(mlxsw_sp), false);
9974 err_fid_bc_flood_set:
9975         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9976                                mlxsw_sp_router_port(mlxsw_sp), false);
9977 err_fid_mc_flood_set:
9978         mlxsw_sp_rif_vlan_op(rif, vid, 0, false);
9979 err_rif_vlan_fid_op:
9980         mlxsw_sp_rif_mac_profile_put(mlxsw_sp, mac_profile);
9981         return err;
9982 }
9983
9984 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
9985 {
9986         struct net_device *dev = mlxsw_sp_rif_dev(rif);
9987         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
9988         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9989
9990         mlxsw_sp_fid_rif_unset(rif->fid);
9991         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9992                             mlxsw_sp_fid_index(rif->fid), false);
9993         mlxsw_sp_rif_macvlan_flush(rif);
9994         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9995                                mlxsw_sp_router_port(mlxsw_sp), false);
9996         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9997                                mlxsw_sp_router_port(mlxsw_sp), false);
9998         mlxsw_sp_rif_vlan_op(rif, vid, 0, false);
9999         mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
10000 }
10001
10002 static int mlxsw_sp1_rif_vlan_configure(struct mlxsw_sp_rif *rif,
10003                                         struct netlink_ext_ack *extack)
10004 {
10005         return mlxsw_sp_rif_vlan_configure(rif, 0, extack);
10006 }
10007
10008 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_vlan_ops = {
10009         .type                   = MLXSW_SP_RIF_TYPE_VLAN,
10010         .rif_size               = sizeof(struct mlxsw_sp_rif),
10011         .configure              = mlxsw_sp1_rif_vlan_configure,
10012         .deconfigure            = mlxsw_sp_rif_vlan_deconfigure,
10013         .fid_get                = mlxsw_sp_rif_vlan_fid_get,
10014         .fdb_del                = mlxsw_sp_rif_vlan_fdb_del,
10015 };
10016
10017 static int mlxsw_sp2_rif_vlan_configure(struct mlxsw_sp_rif *rif,
10018                                         struct netlink_ext_ack *extack)
10019 {
10020         u16 efid = mlxsw_sp_fid_index(rif->fid);
10021
10022         return mlxsw_sp_rif_vlan_configure(rif, efid, extack);
10023 }
10024
10025 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_vlan_ops = {
10026         .type                   = MLXSW_SP_RIF_TYPE_VLAN,
10027         .rif_size               = sizeof(struct mlxsw_sp_rif),
10028         .configure              = mlxsw_sp2_rif_vlan_configure,
10029         .deconfigure            = mlxsw_sp_rif_vlan_deconfigure,
10030         .fid_get                = mlxsw_sp_rif_vlan_fid_get,
10031         .fdb_del                = mlxsw_sp_rif_vlan_fdb_del,
10032 };
10033
10034 static struct mlxsw_sp_rif_ipip_lb *
10035 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
10036 {
10037         return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
10038 }
10039
10040 static void
10041 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
10042                            const struct mlxsw_sp_rif_params *params)
10043 {
10044         struct mlxsw_sp_rif_params_ipip_lb *params_lb;
10045         struct mlxsw_sp_rif_ipip_lb *rif_lb;
10046
10047         params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
10048                                  common);
10049         rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
10050         rif_lb->lb_config = params_lb->lb_config;
10051 }
10052
10053 static int
10054 mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif,
10055                                 struct netlink_ext_ack *extack)
10056 {
10057         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
10058         struct net_device *dev = mlxsw_sp_rif_dev(rif);
10059         u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev);
10060         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10061         struct mlxsw_sp_vr *ul_vr;
10062         int err;
10063
10064         ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, extack);
10065         if (IS_ERR(ul_vr))
10066                 return PTR_ERR(ul_vr);
10067
10068         err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
10069         if (err)
10070                 goto err_loopback_op;
10071
10072         lb_rif->ul_vr_id = ul_vr->id;
10073         lb_rif->ul_rif_id = 0;
10074         ++ul_vr->rif_count;
10075         return 0;
10076
10077 err_loopback_op:
10078         mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
10079         return err;
10080 }
10081
10082 static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
10083 {
10084         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
10085         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10086         struct mlxsw_sp_vr *ul_vr;
10087
10088         ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
10089         mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
10090
10091         --ul_vr->rif_count;
10092         mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
10093 }
10094
10095 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
10096         .type                   = MLXSW_SP_RIF_TYPE_IPIP_LB,
10097         .rif_size               = sizeof(struct mlxsw_sp_rif_ipip_lb),
10098         .setup                  = mlxsw_sp_rif_ipip_lb_setup,
10099         .configure              = mlxsw_sp1_rif_ipip_lb_configure,
10100         .deconfigure            = mlxsw_sp1_rif_ipip_lb_deconfigure,
10101 };
10102
10103 static const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
10104         [MLXSW_SP_RIF_TYPE_SUBPORT]     = &mlxsw_sp_rif_subport_ops,
10105         [MLXSW_SP_RIF_TYPE_VLAN]        = &mlxsw_sp1_rif_vlan_ops,
10106         [MLXSW_SP_RIF_TYPE_FID]         = &mlxsw_sp_rif_fid_ops,
10107         [MLXSW_SP_RIF_TYPE_IPIP_LB]     = &mlxsw_sp1_rif_ipip_lb_ops,
10108 };
10109
10110 static int
10111 mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
10112 {
10113         struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
10114         char ritr_pl[MLXSW_REG_RITR_LEN];
10115
10116         mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
10117                             ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
10118         mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
10119                                              MLXSW_REG_RITR_LOOPBACK_GENERIC);
10120
10121         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
10122 }
10123
10124 static struct mlxsw_sp_rif *
10125 mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
10126                        struct mlxsw_sp_crif *ul_crif,
10127                        struct netlink_ext_ack *extack)
10128 {
10129         struct mlxsw_sp_rif *ul_rif;
10130         u8 rif_entries = 1;
10131         u16 rif_index;
10132         int err;
10133
10134         err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index, rif_entries);
10135         if (err) {
10136                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
10137                 return ERR_PTR(err);
10138         }
10139
10140         ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id,
10141                                     ul_crif);
10142         if (!ul_rif) {
10143                 err = -ENOMEM;
10144                 goto err_rif_alloc;
10145         }
10146
10147         mlxsw_sp->router->rifs[rif_index] = ul_rif;
10148         ul_rif->mlxsw_sp = mlxsw_sp;
10149         ul_rif->rif_entries = rif_entries;
10150         err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
10151         if (err)
10152                 goto ul_rif_op_err;
10153
10154         atomic_add(rif_entries, &mlxsw_sp->router->rifs_count);
10155         return ul_rif;
10156
10157 ul_rif_op_err:
10158         mlxsw_sp->router->rifs[rif_index] = NULL;
10159         mlxsw_sp_rif_free(ul_rif);
10160 err_rif_alloc:
10161         mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
10162         return ERR_PTR(err);
10163 }
10164
10165 static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
10166 {
10167         struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
10168         u8 rif_entries = ul_rif->rif_entries;
10169         u16 rif_index = ul_rif->rif_index;
10170
10171         atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count);
10172         mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
10173         mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
10174         mlxsw_sp_rif_free(ul_rif);
10175         mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
10176 }
10177
10178 static struct mlxsw_sp_rif *
10179 mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
10180                     struct mlxsw_sp_crif *ul_crif,
10181                     struct netlink_ext_ack *extack)
10182 {
10183         struct mlxsw_sp_vr *vr;
10184         int err;
10185
10186         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
10187         if (IS_ERR(vr))
10188                 return ERR_CAST(vr);
10189
10190         if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
10191                 return vr->ul_rif;
10192
10193         vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, ul_crif, extack);
10194         if (IS_ERR(vr->ul_rif)) {
10195                 err = PTR_ERR(vr->ul_rif);
10196                 goto err_ul_rif_create;
10197         }
10198
10199         vr->rif_count++;
10200         refcount_set(&vr->ul_rif_refcnt, 1);
10201
10202         return vr->ul_rif;
10203
10204 err_ul_rif_create:
10205         mlxsw_sp_vr_put(mlxsw_sp, vr);
10206         return ERR_PTR(err);
10207 }
10208
10209 static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
10210 {
10211         struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
10212         struct mlxsw_sp_vr *vr;
10213
10214         vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
10215
10216         if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
10217                 return;
10218
10219         vr->rif_count--;
10220         mlxsw_sp_ul_rif_destroy(ul_rif);
10221         mlxsw_sp_vr_put(mlxsw_sp, vr);
10222 }
10223
10224 int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
10225                                u16 *ul_rif_index)
10226 {
10227         struct mlxsw_sp_rif *ul_rif;
10228         int err = 0;
10229
10230         mutex_lock(&mlxsw_sp->router->lock);
10231         ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL, NULL);
10232         if (IS_ERR(ul_rif)) {
10233                 err = PTR_ERR(ul_rif);
10234                 goto out;
10235         }
10236         *ul_rif_index = ul_rif->rif_index;
10237 out:
10238         mutex_unlock(&mlxsw_sp->router->lock);
10239         return err;
10240 }
10241
10242 void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index)
10243 {
10244         struct mlxsw_sp_rif *ul_rif;
10245
10246         mutex_lock(&mlxsw_sp->router->lock);
10247         ul_rif = mlxsw_sp->router->rifs[ul_rif_index];
10248         if (WARN_ON(!ul_rif))
10249                 goto out;
10250
10251         mlxsw_sp_ul_rif_put(ul_rif);
10252 out:
10253         mutex_unlock(&mlxsw_sp->router->lock);
10254 }
10255
10256 static int
10257 mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif,
10258                                 struct netlink_ext_ack *extack)
10259 {
10260         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
10261         struct net_device *dev = mlxsw_sp_rif_dev(rif);
10262         u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev);
10263         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10264         struct mlxsw_sp_rif *ul_rif;
10265         int err;
10266
10267         ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL, extack);
10268         if (IS_ERR(ul_rif))
10269                 return PTR_ERR(ul_rif);
10270
10271         err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
10272         if (err)
10273                 goto err_loopback_op;
10274
10275         lb_rif->ul_vr_id = 0;
10276         lb_rif->ul_rif_id = ul_rif->rif_index;
10277
10278         return 0;
10279
10280 err_loopback_op:
10281         mlxsw_sp_ul_rif_put(ul_rif);
10282         return err;
10283 }
10284
10285 static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
10286 {
10287         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
10288         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10289         struct mlxsw_sp_rif *ul_rif;
10290
10291         ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
10292         mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
10293         mlxsw_sp_ul_rif_put(ul_rif);
10294 }
10295
10296 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
10297         .type                   = MLXSW_SP_RIF_TYPE_IPIP_LB,
10298         .rif_size               = sizeof(struct mlxsw_sp_rif_ipip_lb),
10299         .setup                  = mlxsw_sp_rif_ipip_lb_setup,
10300         .configure              = mlxsw_sp2_rif_ipip_lb_configure,
10301         .deconfigure            = mlxsw_sp2_rif_ipip_lb_deconfigure,
10302 };
10303
10304 static const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
10305         [MLXSW_SP_RIF_TYPE_SUBPORT]     = &mlxsw_sp_rif_subport_ops,
10306         [MLXSW_SP_RIF_TYPE_VLAN]        = &mlxsw_sp2_rif_vlan_ops,
10307         [MLXSW_SP_RIF_TYPE_FID]         = &mlxsw_sp_rif_fid_ops,
10308         [MLXSW_SP_RIF_TYPE_IPIP_LB]     = &mlxsw_sp2_rif_ipip_lb_ops,
10309 };
10310
10311 static int mlxsw_sp_rifs_table_init(struct mlxsw_sp *mlxsw_sp)
10312 {
10313         struct gen_pool *rifs_table;
10314         int err;
10315
10316         rifs_table = gen_pool_create(0, -1);
10317         if (!rifs_table)
10318                 return -ENOMEM;
10319
10320         gen_pool_set_algo(rifs_table, gen_pool_first_fit_order_align,
10321                           NULL);
10322
10323         err = gen_pool_add(rifs_table, MLXSW_SP_ROUTER_GENALLOC_OFFSET,
10324                            MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS), -1);
10325         if (err)
10326                 goto err_gen_pool_add;
10327
10328         mlxsw_sp->router->rifs_table = rifs_table;
10329
10330         return 0;
10331
10332 err_gen_pool_add:
10333         gen_pool_destroy(rifs_table);
10334         return err;
10335 }
10336
10337 static void mlxsw_sp_rifs_table_fini(struct mlxsw_sp *mlxsw_sp)
10338 {
10339         gen_pool_destroy(mlxsw_sp->router->rifs_table);
10340 }
10341
10342 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
10343 {
10344         u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
10345         struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
10346         struct mlxsw_core *core = mlxsw_sp->core;
10347         int err;
10348
10349         if (!MLXSW_CORE_RES_VALID(core, MAX_RIF_MAC_PROFILES))
10350                 return -EIO;
10351         mlxsw_sp->router->max_rif_mac_profile =
10352                 MLXSW_CORE_RES_GET(core, MAX_RIF_MAC_PROFILES);
10353
10354         mlxsw_sp->router->rifs = kcalloc(max_rifs,
10355                                          sizeof(struct mlxsw_sp_rif *),
10356                                          GFP_KERNEL);
10357         if (!mlxsw_sp->router->rifs)
10358                 return -ENOMEM;
10359
10360         err = mlxsw_sp_rifs_table_init(mlxsw_sp);
10361         if (err)
10362                 goto err_rifs_table_init;
10363
10364         idr_init(&mlxsw_sp->router->rif_mac_profiles_idr);
10365         atomic_set(&mlxsw_sp->router->rif_mac_profiles_count, 0);
10366         atomic_set(&mlxsw_sp->router->rifs_count, 0);
10367         devl_resource_occ_get_register(devlink,
10368                                        MLXSW_SP_RESOURCE_RIF_MAC_PROFILES,
10369                                        mlxsw_sp_rif_mac_profiles_occ_get,
10370                                        mlxsw_sp);
10371         devl_resource_occ_get_register(devlink,
10372                                        MLXSW_SP_RESOURCE_RIFS,
10373                                        mlxsw_sp_rifs_occ_get,
10374                                        mlxsw_sp);
10375
10376         return 0;
10377
10378 err_rifs_table_init:
10379         kfree(mlxsw_sp->router->rifs);
10380         return err;
10381 }
10382
10383 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
10384 {
10385         int max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
10386         struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
10387         int i;
10388
10389         WARN_ON_ONCE(atomic_read(&mlxsw_sp->router->rifs_count));
10390         for (i = 0; i < max_rifs; i++)
10391                 WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
10392
10393         devl_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_RIFS);
10394         devl_resource_occ_get_unregister(devlink,
10395                                          MLXSW_SP_RESOURCE_RIF_MAC_PROFILES);
10396         WARN_ON(!idr_is_empty(&mlxsw_sp->router->rif_mac_profiles_idr));
10397         idr_destroy(&mlxsw_sp->router->rif_mac_profiles_idr);
10398         mlxsw_sp_rifs_table_fini(mlxsw_sp);
10399         kfree(mlxsw_sp->router->rifs);
10400 }
10401
10402 static int
10403 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
10404 {
10405         char tigcr_pl[MLXSW_REG_TIGCR_LEN];
10406
10407         mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
10408         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
10409 }
10410
10411 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
10412 {
10413         int err;
10414
10415         INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
10416
10417         err = mlxsw_sp_ipip_ecn_encap_init(mlxsw_sp);
10418         if (err)
10419                 return err;
10420         err = mlxsw_sp_ipip_ecn_decap_init(mlxsw_sp);
10421         if (err)
10422                 return err;
10423
10424         return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
10425 }
10426
10427 static int mlxsw_sp1_ipips_init(struct mlxsw_sp *mlxsw_sp)
10428 {
10429         mlxsw_sp->router->ipip_ops_arr = mlxsw_sp1_ipip_ops_arr;
10430         return mlxsw_sp_ipips_init(mlxsw_sp);
10431 }
10432
10433 static int mlxsw_sp2_ipips_init(struct mlxsw_sp *mlxsw_sp)
10434 {
10435         mlxsw_sp->router->ipip_ops_arr = mlxsw_sp2_ipip_ops_arr;
10436         return mlxsw_sp_ipips_init(mlxsw_sp);
10437 }
10438
10439 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
10440 {
10441         WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
10442 }
10443
10444 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
10445 {
10446         struct mlxsw_sp_router *router;
10447
10448         /* Flush pending FIB notifications and then flush the device's
10449          * table before requesting another dump. The FIB notification
10450          * block is unregistered, so no need to take RTNL.
10451          */
10452         mlxsw_core_flush_owq();
10453         router = container_of(nb, struct mlxsw_sp_router, fib_nb);
10454         mlxsw_sp_router_fib_flush(router->mlxsw_sp);
10455 }
10456
10457 #ifdef CONFIG_IP_ROUTE_MULTIPATH
10458 struct mlxsw_sp_mp_hash_config {
10459         DECLARE_BITMAP(headers, __MLXSW_REG_RECR2_HEADER_CNT);
10460         DECLARE_BITMAP(fields, __MLXSW_REG_RECR2_FIELD_CNT);
10461         DECLARE_BITMAP(inner_headers, __MLXSW_REG_RECR2_HEADER_CNT);
10462         DECLARE_BITMAP(inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT);
10463         bool inc_parsing_depth;
10464 };
10465
10466 #define MLXSW_SP_MP_HASH_HEADER_SET(_headers, _header) \
10467         bitmap_set(_headers, MLXSW_REG_RECR2_##_header, 1)
10468
10469 #define MLXSW_SP_MP_HASH_FIELD_SET(_fields, _field) \
10470         bitmap_set(_fields, MLXSW_REG_RECR2_##_field, 1)
10471
10472 #define MLXSW_SP_MP_HASH_FIELD_RANGE_SET(_fields, _field, _nr) \
10473         bitmap_set(_fields, MLXSW_REG_RECR2_##_field, _nr)
10474
10475 static void mlxsw_sp_mp_hash_inner_l3(struct mlxsw_sp_mp_hash_config *config)
10476 {
10477         unsigned long *inner_headers = config->inner_headers;
10478         unsigned long *inner_fields = config->inner_fields;
10479
10480         /* IPv4 inner */
10481         MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP);
10482         MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP);
10483         MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4);
10484         MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4);
10485         /* IPv6 inner */
10486         MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP);
10487         MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP);
10488         MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7);
10489         MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8);
10490         MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7);
10491         MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8);
10492         MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER);
10493         MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL);
10494 }
10495
10496 static void mlxsw_sp_mp4_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config)
10497 {
10498         unsigned long *headers = config->headers;
10499         unsigned long *fields = config->fields;
10500
10501         MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
10502         MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
10503         MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
10504         MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
10505 }
10506
10507 static void
10508 mlxsw_sp_mp_hash_inner_custom(struct mlxsw_sp_mp_hash_config *config,
10509                               u32 hash_fields)
10510 {
10511         unsigned long *inner_headers = config->inner_headers;
10512         unsigned long *inner_fields = config->inner_fields;
10513
10514         /* IPv4 Inner */
10515         MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP);
10516         MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP);
10517         if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
10518                 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4);
10519         if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
10520                 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4);
10521         if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
10522                 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV4_PROTOCOL);
10523         /* IPv6 inner */
10524         MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP);
10525         MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP);
10526         if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) {
10527                 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7);
10528                 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8);
10529         }
10530         if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) {
10531                 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7);
10532                 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8);
10533         }
10534         if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
10535                 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER);
10536         if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
10537                 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL);
10538         /* L4 inner */
10539         MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV4);
10540         MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV6);
10541         if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
10542                 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_SPORT);
10543         if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
10544                 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_DPORT);
10545 }
10546
10547 static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
10548                                    struct mlxsw_sp_mp_hash_config *config)
10549 {
10550         struct net *net = mlxsw_sp_net(mlxsw_sp);
10551         unsigned long *headers = config->headers;
10552         unsigned long *fields = config->fields;
10553         u32 hash_fields;
10554
10555         switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
10556         case 0:
10557                 mlxsw_sp_mp4_hash_outer_addr(config);
10558                 break;
10559         case 1:
10560                 mlxsw_sp_mp4_hash_outer_addr(config);
10561                 MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4);
10562                 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL);
10563                 MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
10564                 MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
10565                 break;
10566         case 2:
10567                 /* Outer */
10568                 mlxsw_sp_mp4_hash_outer_addr(config);
10569                 /* Inner */
10570                 mlxsw_sp_mp_hash_inner_l3(config);
10571                 break;
10572         case 3:
10573                 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
10574                 /* Outer */
10575                 MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
10576                 MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
10577                 MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4);
10578                 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
10579                         MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
10580                 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
10581                         MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
10582                 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
10583                         MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL);
10584                 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
10585                         MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
10586                 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
10587                         MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
10588                 /* Inner */
10589                 mlxsw_sp_mp_hash_inner_custom(config, hash_fields);
10590                 break;
10591         }
10592 }
10593
10594 static void mlxsw_sp_mp6_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config)
10595 {
10596         unsigned long *headers = config->headers;
10597         unsigned long *fields = config->fields;
10598
10599         MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
10600         MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
10601         MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
10602         MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
10603         MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
10604         MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
10605 }
10606
10607 static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp,
10608                                    struct mlxsw_sp_mp_hash_config *config)
10609 {
10610         u32 hash_fields = ip6_multipath_hash_fields(mlxsw_sp_net(mlxsw_sp));
10611         unsigned long *headers = config->headers;
10612         unsigned long *fields = config->fields;
10613
10614         switch (ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp))) {
10615         case 0:
10616                 mlxsw_sp_mp6_hash_outer_addr(config);
10617                 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
10618                 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
10619                 break;
10620         case 1:
10621                 mlxsw_sp_mp6_hash_outer_addr(config);
10622                 MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6);
10623                 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
10624                 MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
10625                 MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
10626                 break;
10627         case 2:
10628                 /* Outer */
10629                 mlxsw_sp_mp6_hash_outer_addr(config);
10630                 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
10631                 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
10632                 /* Inner */
10633                 mlxsw_sp_mp_hash_inner_l3(config);
10634                 config->inc_parsing_depth = true;
10635                 break;
10636         case 3:
10637                 /* Outer */
10638                 MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
10639                 MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
10640                 MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6);
10641                 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) {
10642                         MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
10643                         MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
10644                 }
10645                 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) {
10646                         MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
10647                         MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
10648                 }
10649                 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
10650                         MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
10651                 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
10652                         MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
10653                 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
10654                         MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
10655                 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
10656                         MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
10657                 /* Inner */
10658                 mlxsw_sp_mp_hash_inner_custom(config, hash_fields);
10659                 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)
10660                         config->inc_parsing_depth = true;
10661                 break;
10662         }
10663 }
10664
10665 static int mlxsw_sp_mp_hash_parsing_depth_adjust(struct mlxsw_sp *mlxsw_sp,
10666                                                  bool old_inc_parsing_depth,
10667                                                  bool new_inc_parsing_depth)
10668 {
10669         int err;
10670
10671         if (!old_inc_parsing_depth && new_inc_parsing_depth) {
10672                 err = mlxsw_sp_parsing_depth_inc(mlxsw_sp);
10673                 if (err)
10674                         return err;
10675                 mlxsw_sp->router->inc_parsing_depth = true;
10676         } else if (old_inc_parsing_depth && !new_inc_parsing_depth) {
10677                 mlxsw_sp_parsing_depth_dec(mlxsw_sp);
10678                 mlxsw_sp->router->inc_parsing_depth = false;
10679         }
10680
10681         return 0;
10682 }
10683
10684 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
10685 {
10686         bool old_inc_parsing_depth, new_inc_parsing_depth;
10687         struct mlxsw_sp_mp_hash_config config = {};
10688         char recr2_pl[MLXSW_REG_RECR2_LEN];
10689         unsigned long bit;
10690         u32 seed;
10691         int err;
10692
10693         seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
10694         mlxsw_reg_recr2_pack(recr2_pl, seed);
10695         mlxsw_sp_mp4_hash_init(mlxsw_sp, &config);
10696         mlxsw_sp_mp6_hash_init(mlxsw_sp, &config);
10697
10698         old_inc_parsing_depth = mlxsw_sp->router->inc_parsing_depth;
10699         new_inc_parsing_depth = config.inc_parsing_depth;
10700         err = mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp,
10701                                                     old_inc_parsing_depth,
10702                                                     new_inc_parsing_depth);
10703         if (err)
10704                 return err;
10705
10706         for_each_set_bit(bit, config.headers, __MLXSW_REG_RECR2_HEADER_CNT)
10707                 mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, bit, 1);
10708         for_each_set_bit(bit, config.fields, __MLXSW_REG_RECR2_FIELD_CNT)
10709                 mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, bit, 1);
10710         for_each_set_bit(bit, config.inner_headers, __MLXSW_REG_RECR2_HEADER_CNT)
10711                 mlxsw_reg_recr2_inner_header_enables_set(recr2_pl, bit, 1);
10712         for_each_set_bit(bit, config.inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT)
10713                 mlxsw_reg_recr2_inner_header_fields_enable_set(recr2_pl, bit, 1);
10714
10715         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
10716         if (err)
10717                 goto err_reg_write;
10718
10719         return 0;
10720
10721 err_reg_write:
10722         mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, new_inc_parsing_depth,
10723                                               old_inc_parsing_depth);
10724         return err;
10725 }
10726
10727 static void mlxsw_sp_mp_hash_fini(struct mlxsw_sp *mlxsw_sp)
10728 {
10729         bool old_inc_parsing_depth = mlxsw_sp->router->inc_parsing_depth;
10730
10731         mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, old_inc_parsing_depth,
10732                                               false);
10733 }
10734 #else
10735 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
10736 {
10737         return 0;
10738 }
10739
10740 static void mlxsw_sp_mp_hash_fini(struct mlxsw_sp *mlxsw_sp)
10741 {
10742 }
10743 #endif
10744
10745 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
10746 {
10747         char rdpm_pl[MLXSW_REG_RDPM_LEN];
10748         unsigned int i;
10749
10750         MLXSW_REG_ZERO(rdpm, rdpm_pl);
10751
10752         /* HW is determining switch priority based on DSCP-bits, but the
10753          * kernel is still doing that based on the ToS. Since there's a
10754          * mismatch in bits we need to make sure to translate the right
10755          * value ToS would observe, skipping the 2 least-significant ECN bits.
10756          */
10757         for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
10758                 mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
10759
10760         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
10761 }
10762
10763 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
10764 {
10765         struct net *net = mlxsw_sp_net(mlxsw_sp);
10766         char rgcr_pl[MLXSW_REG_RGCR_LEN];
10767         u64 max_rifs;
10768         bool usp;
10769
10770         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
10771                 return -EIO;
10772         max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
10773         usp = READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority);
10774
10775         mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
10776         mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
10777         mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
10778         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
10779 }
10780
10781 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
10782 {
10783         char rgcr_pl[MLXSW_REG_RGCR_LEN];
10784
10785         mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
10786         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
10787 }
10788
10789 static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp,
10790                                 struct netlink_ext_ack *extack)
10791 {
10792         struct mlxsw_sp_router *router = mlxsw_sp->router;
10793         struct mlxsw_sp_rif *lb_rif;
10794         int err;
10795
10796         router->lb_crif = mlxsw_sp_crif_alloc(NULL);
10797         if (!router->lb_crif)
10798                 return -ENOMEM;
10799
10800         /* Create a generic loopback RIF associated with the main table
10801          * (default VRF). Any table can be used, but the main table exists
10802          * anyway, so we do not waste resources. Loopback RIFs are usually
10803          * created with a NULL CRIF, but this RIF is used as a fallback RIF
10804          * for blackhole nexthops, and nexthops expect to have a valid CRIF.
10805          */
10806         lb_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN, router->lb_crif,
10807                                      extack);
10808         if (IS_ERR(lb_rif)) {
10809                 err = PTR_ERR(lb_rif);
10810                 goto err_ul_rif_get;
10811         }
10812
10813         return 0;
10814
10815 err_ul_rif_get:
10816         mlxsw_sp_crif_free(router->lb_crif);
10817         return err;
10818 }
10819
10820 static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp)
10821 {
10822         mlxsw_sp_ul_rif_put(mlxsw_sp->router->lb_crif->rif);
10823         mlxsw_sp_crif_free(mlxsw_sp->router->lb_crif);
10824 }
10825
10826 static int mlxsw_sp1_router_init(struct mlxsw_sp *mlxsw_sp)
10827 {
10828         size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp1_adj_grp_size_ranges);
10829
10830         mlxsw_sp->router->rif_ops_arr = mlxsw_sp1_rif_ops_arr;
10831         mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp1_adj_grp_size_ranges;
10832         mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count;
10833
10834         return 0;
10835 }
10836
10837 const struct mlxsw_sp_router_ops mlxsw_sp1_router_ops = {
10838         .init = mlxsw_sp1_router_init,
10839         .ipips_init = mlxsw_sp1_ipips_init,
10840 };
10841
10842 static int mlxsw_sp2_router_init(struct mlxsw_sp *mlxsw_sp)
10843 {
10844         size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp2_adj_grp_size_ranges);
10845
10846         mlxsw_sp->router->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
10847         mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp2_adj_grp_size_ranges;
10848         mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count;
10849
10850         return 0;
10851 }
10852
10853 const struct mlxsw_sp_router_ops mlxsw_sp2_router_ops = {
10854         .init = mlxsw_sp2_router_init,
10855         .ipips_init = mlxsw_sp2_ipips_init,
10856 };
10857
10858 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
10859                          struct netlink_ext_ack *extack)
10860 {
10861         struct mlxsw_sp_router *router;
10862         struct notifier_block *nb;
10863         int err;
10864
10865         router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
10866         if (!router)
10867                 return -ENOMEM;
10868         mutex_init(&router->lock);
10869         mlxsw_sp->router = router;
10870         router->mlxsw_sp = mlxsw_sp;
10871
10872         err = mlxsw_sp->router_ops->init(mlxsw_sp);
10873         if (err)
10874                 goto err_router_ops_init;
10875
10876         INIT_LIST_HEAD(&mlxsw_sp->router->nh_res_grp_list);
10877         INIT_DELAYED_WORK(&mlxsw_sp->router->nh_grp_activity_dw,
10878                           mlxsw_sp_nh_grp_activity_work);
10879         INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
10880         err = __mlxsw_sp_router_init(mlxsw_sp);
10881         if (err)
10882                 goto err_router_init;
10883
10884         err = mlxsw_sp->router_ops->ipips_init(mlxsw_sp);
10885         if (err)
10886                 goto err_ipips_init;
10887
10888         err = rhashtable_init(&mlxsw_sp->router->crif_ht,
10889                               &mlxsw_sp_crif_ht_params);
10890         if (err)
10891                 goto err_crif_ht_init;
10892
10893         err = mlxsw_sp_rifs_init(mlxsw_sp);
10894         if (err)
10895                 goto err_rifs_init;
10896
10897         err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
10898                               &mlxsw_sp_nexthop_ht_params);
10899         if (err)
10900                 goto err_nexthop_ht_init;
10901
10902         err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
10903                               &mlxsw_sp_nexthop_group_ht_params);
10904         if (err)
10905                 goto err_nexthop_group_ht_init;
10906
10907         INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
10908         err = mlxsw_sp_lpm_init(mlxsw_sp);
10909         if (err)
10910                 goto err_lpm_init;
10911
10912         err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
10913         if (err)
10914                 goto err_mr_init;
10915
10916         err = mlxsw_sp_vrs_init(mlxsw_sp);
10917         if (err)
10918                 goto err_vrs_init;
10919
10920         err = mlxsw_sp_lb_rif_init(mlxsw_sp, extack);
10921         if (err)
10922                 goto err_lb_rif_init;
10923
10924         err = mlxsw_sp_neigh_init(mlxsw_sp);
10925         if (err)
10926                 goto err_neigh_init;
10927
10928         err = mlxsw_sp_mp_hash_init(mlxsw_sp);
10929         if (err)
10930                 goto err_mp_hash_init;
10931
10932         err = mlxsw_sp_dscp_init(mlxsw_sp);
10933         if (err)
10934                 goto err_dscp_init;
10935
10936         router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
10937         err = register_inetaddr_notifier(&router->inetaddr_nb);
10938         if (err)
10939                 goto err_register_inetaddr_notifier;
10940
10941         router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
10942         err = register_inet6addr_notifier(&router->inet6addr_nb);
10943         if (err)
10944                 goto err_register_inet6addr_notifier;
10945
10946         router->inetaddr_valid_nb.notifier_call = mlxsw_sp_inetaddr_valid_event;
10947         err = register_inetaddr_validator_notifier(&router->inetaddr_valid_nb);
10948         if (err)
10949                 goto err_register_inetaddr_valid_notifier;
10950
10951         nb = &router->inet6addr_valid_nb;
10952         nb->notifier_call = mlxsw_sp_inet6addr_valid_event;
10953         err = register_inet6addr_validator_notifier(nb);
10954         if (err)
10955                 goto err_register_inet6addr_valid_notifier;
10956
10957         mlxsw_sp->router->netevent_nb.notifier_call =
10958                 mlxsw_sp_router_netevent_event;
10959         err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
10960         if (err)
10961                 goto err_register_netevent_notifier;
10962
10963         mlxsw_sp->router->nexthop_nb.notifier_call =
10964                 mlxsw_sp_nexthop_obj_event;
10965         err = register_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
10966                                         &mlxsw_sp->router->nexthop_nb,
10967                                         extack);
10968         if (err)
10969                 goto err_register_nexthop_notifier;
10970
10971         mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
10972         err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp),
10973                                     &mlxsw_sp->router->fib_nb,
10974                                     mlxsw_sp_router_fib_dump_flush, extack);
10975         if (err)
10976                 goto err_register_fib_notifier;
10977
10978         mlxsw_sp->router->netdevice_nb.notifier_call =
10979                 mlxsw_sp_router_netdevice_event;
10980         err = register_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
10981                                               &mlxsw_sp->router->netdevice_nb);
10982         if (err)
10983                 goto err_register_netdev_notifier;
10984
10985         return 0;
10986
10987 err_register_netdev_notifier:
10988         unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp),
10989                                 &mlxsw_sp->router->fib_nb);
10990 err_register_fib_notifier:
10991         unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
10992                                     &mlxsw_sp->router->nexthop_nb);
10993 err_register_nexthop_notifier:
10994         unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
10995 err_register_netevent_notifier:
10996         unregister_inet6addr_validator_notifier(&router->inet6addr_valid_nb);
10997 err_register_inet6addr_valid_notifier:
10998         unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb);
10999 err_register_inetaddr_valid_notifier:
11000         unregister_inet6addr_notifier(&router->inet6addr_nb);
11001 err_register_inet6addr_notifier:
11002         unregister_inetaddr_notifier(&router->inetaddr_nb);
11003 err_register_inetaddr_notifier:
11004         mlxsw_core_flush_owq();
11005 err_dscp_init:
11006         mlxsw_sp_mp_hash_fini(mlxsw_sp);
11007 err_mp_hash_init:
11008         mlxsw_sp_neigh_fini(mlxsw_sp);
11009 err_neigh_init:
11010         mlxsw_sp_lb_rif_fini(mlxsw_sp);
11011 err_lb_rif_init:
11012         mlxsw_sp_vrs_fini(mlxsw_sp);
11013 err_vrs_init:
11014         mlxsw_sp_mr_fini(mlxsw_sp);
11015 err_mr_init:
11016         mlxsw_sp_lpm_fini(mlxsw_sp);
11017 err_lpm_init:
11018         rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
11019 err_nexthop_group_ht_init:
11020         rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
11021 err_nexthop_ht_init:
11022         mlxsw_sp_rifs_fini(mlxsw_sp);
11023 err_rifs_init:
11024         rhashtable_destroy(&mlxsw_sp->router->crif_ht);
11025 err_crif_ht_init:
11026         mlxsw_sp_ipips_fini(mlxsw_sp);
11027 err_ipips_init:
11028         __mlxsw_sp_router_fini(mlxsw_sp);
11029 err_router_init:
11030         cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw);
11031 err_router_ops_init:
11032         mutex_destroy(&mlxsw_sp->router->lock);
11033         kfree(mlxsw_sp->router);
11034         return err;
11035 }
11036
11037 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
11038 {
11039         struct mlxsw_sp_router *router = mlxsw_sp->router;
11040
11041         unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
11042                                           &router->netdevice_nb);
11043         unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), &router->fib_nb);
11044         unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
11045                                     &router->nexthop_nb);
11046         unregister_netevent_notifier(&router->netevent_nb);
11047         unregister_inet6addr_validator_notifier(&router->inet6addr_valid_nb);
11048         unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb);
11049         unregister_inet6addr_notifier(&router->inet6addr_nb);
11050         unregister_inetaddr_notifier(&router->inetaddr_nb);
11051         mlxsw_core_flush_owq();
11052         mlxsw_sp_mp_hash_fini(mlxsw_sp);
11053         mlxsw_sp_neigh_fini(mlxsw_sp);
11054         mlxsw_sp_lb_rif_fini(mlxsw_sp);
11055         mlxsw_sp_vrs_fini(mlxsw_sp);
11056         mlxsw_sp_mr_fini(mlxsw_sp);
11057         mlxsw_sp_lpm_fini(mlxsw_sp);
11058         rhashtable_destroy(&router->nexthop_group_ht);
11059         rhashtable_destroy(&router->nexthop_ht);
11060         mlxsw_sp_rifs_fini(mlxsw_sp);
11061         rhashtable_destroy(&mlxsw_sp->router->crif_ht);
11062         mlxsw_sp_ipips_fini(mlxsw_sp);
11063         __mlxsw_sp_router_fini(mlxsw_sp);
11064         cancel_delayed_work_sync(&router->nh_grp_activity_dw);
11065         mutex_destroy(&router->lock);
11066         kfree(router);
11067 }