Merge branches 'acpi-scan', 'acpi-tad', 'acpi-extlog' and 'acpi-misc'
[linux-block.git] / drivers / net / ethernet / mellanox / mlx5 / core / en / tc_tun_encap.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3
4 #include <net/fib_notifier.h>
5 #include <net/nexthop.h>
6 #include <net/ip_tunnels.h>
7 #include "tc_tun_encap.h"
8 #include "en_tc.h"
9 #include "tc_tun.h"
10 #include "rep/tc.h"
11 #include "diag/en_tc_tracepoint.h"
12
13 enum {
14         MLX5E_ROUTE_ENTRY_VALID     = BIT(0),
15 };
16
17 static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv,
18                                      struct mlx5_flow_attr *attr,
19                                      struct mlx5e_encap_entry *e,
20                                      int out_index)
21 {
22         struct net_device *route_dev;
23         int err = 0;
24
25         route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex);
26
27         if (!route_dev || !netif_is_ovs_master(route_dev))
28                 goto out;
29
30         err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex,
31                                                 MLX5E_TC_INT_PORT_EGRESS,
32                                                 &attr->action, out_index);
33
34 out:
35         if (route_dev)
36                 dev_put(route_dev);
37
38         return err;
39 }
40
41 struct mlx5e_route_key {
42         int ip_version;
43         union {
44                 __be32 v4;
45                 struct in6_addr v6;
46         } endpoint_ip;
47 };
48
49 struct mlx5e_route_entry {
50         struct mlx5e_route_key key;
51         struct list_head encap_entries;
52         struct list_head decap_flows;
53         u32 flags;
54         struct hlist_node hlist;
55         refcount_t refcnt;
56         int tunnel_dev_index;
57         struct rcu_head rcu;
58 };
59
60 struct mlx5e_tc_tun_encap {
61         struct mlx5e_priv *priv;
62         struct notifier_block fib_nb;
63         spinlock_t route_lock; /* protects route_tbl */
64         unsigned long route_tbl_last_update;
65         DECLARE_HASHTABLE(route_tbl, 8);
66 };
67
68 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
69 {
70         return r->flags & MLX5E_ROUTE_ENTRY_VALID;
71 }
72
73 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
74                              struct mlx5_flow_spec *spec)
75 {
76         struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
77         struct mlx5_rx_tun_attr *tun_attr;
78         void *daddr, *saddr;
79         u8 ip_version;
80
81         tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
82         if (!tun_attr)
83                 return -ENOMEM;
84
85         esw_attr->rx_tun_attr = tun_attr;
86         ip_version = mlx5e_tc_get_ip_version(spec, true);
87
88         if (ip_version == 4) {
89                 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
90                                      outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
91                 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
92                                      outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
93                 tun_attr->dst_ip.v4 = *(__be32 *)daddr;
94                 tun_attr->src_ip.v4 = *(__be32 *)saddr;
95                 if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
96                         return 0;
97         }
98 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
99         else if (ip_version == 6) {
100                 int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
101
102                 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
103                                      outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
104                 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
105                                      outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
106                 memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
107                 memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
108                 if (ipv6_addr_any(&tun_attr->dst_ip.v6) ||
109                     ipv6_addr_any(&tun_attr->src_ip.v6))
110                         return 0;
111         }
112 #endif
113         /* Only set the flag if both src and dst ip addresses exist. They are
114          * required to establish routing.
115          */
116         flow_flag_set(flow, TUN_RX);
117         flow->attr->tun_ip_version = ip_version;
118         return 0;
119 }
120
121 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
122 {
123         bool all_flow_encaps_valid = true;
124         int i;
125
126         /* Flow can be associated with multiple encap entries.
127          * Before offloading the flow verify that all of them have
128          * a valid neighbour.
129          */
130         for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
131                 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
132                         continue;
133                 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
134                         all_flow_encaps_valid = false;
135                         break;
136                 }
137         }
138
139         return all_flow_encaps_valid;
140 }
141
142 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
143                               struct mlx5e_encap_entry *e,
144                               struct list_head *flow_list)
145 {
146         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
147         struct mlx5_pkt_reformat_params reformat_params;
148         struct mlx5_esw_flow_attr *esw_attr;
149         struct mlx5_flow_handle *rule;
150         struct mlx5_flow_attr *attr;
151         struct mlx5_flow_spec *spec;
152         struct mlx5e_tc_flow *flow;
153         int err;
154
155         if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
156                 return;
157
158         memset(&reformat_params, 0, sizeof(reformat_params));
159         reformat_params.type = e->reformat_type;
160         reformat_params.size = e->encap_size;
161         reformat_params.data = e->encap_header;
162         e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
163                                                      &reformat_params,
164                                                      MLX5_FLOW_NAMESPACE_FDB);
165         if (IS_ERR(e->pkt_reformat)) {
166                 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
167                                PTR_ERR(e->pkt_reformat));
168                 return;
169         }
170         e->flags |= MLX5_ENCAP_ENTRY_VALID;
171         mlx5e_rep_queue_neigh_stats_work(priv);
172
173         list_for_each_entry(flow, flow_list, tmp_list) {
174                 if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
175                         continue;
176
177                 spec = &flow->attr->parse_attr->spec;
178
179                 attr = mlx5e_tc_get_encap_attr(flow);
180                 esw_attr = attr->esw_attr;
181                 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
182                 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
183
184                 /* Do not offload flows with unresolved neighbors */
185                 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
186                         continue;
187
188                 err = mlx5e_tc_offload_flow_post_acts(flow);
189                 if (err) {
190                         mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
191                                        err);
192                         continue;
193                 }
194
195                 /* update from slow path rule to encap rule */
196                 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
197                 if (IS_ERR(rule)) {
198                         mlx5e_tc_unoffload_flow_post_acts(flow);
199                         err = PTR_ERR(rule);
200                         mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
201                                        err);
202                         continue;
203                 }
204
205                 mlx5e_tc_unoffload_from_slow_path(esw, flow);
206                 flow->rule[0] = rule;
207                 /* was unset when slow path rule removed */
208                 flow_flag_set(flow, OFFLOADED);
209         }
210 }
211
212 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
213                               struct mlx5e_encap_entry *e,
214                               struct list_head *flow_list)
215 {
216         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
217         struct mlx5_esw_flow_attr *esw_attr;
218         struct mlx5_flow_handle *rule;
219         struct mlx5_flow_attr *attr;
220         struct mlx5_flow_spec *spec;
221         struct mlx5e_tc_flow *flow;
222         int err;
223
224         list_for_each_entry(flow, flow_list, tmp_list) {
225                 if (!mlx5e_is_offloaded_flow(flow))
226                         continue;
227
228                 attr = mlx5e_tc_get_encap_attr(flow);
229                 esw_attr = attr->esw_attr;
230                 /* mark the flow's encap dest as non-valid */
231                 esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
232                 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
233
234                 /* Clear pkt_reformat before checking slow path flag. Because
235                  * in next iteration, the same flow is already set slow path
236                  * flag, but still need to clear the pkt_reformat.
237                  */
238                 if (flow_flag_test(flow, SLOW))
239                         continue;
240
241                 /* update from encap rule to slow path rule */
242                 spec = &flow->attr->parse_attr->spec;
243                 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
244
245                 if (IS_ERR(rule)) {
246                         err = PTR_ERR(rule);
247                         mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
248                                        err);
249                         continue;
250                 }
251
252                 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
253                 mlx5e_tc_unoffload_flow_post_acts(flow);
254                 flow->rule[0] = rule;
255                 /* was unset when fast path rule removed */
256                 flow_flag_set(flow, OFFLOADED);
257         }
258
259         /* we know that the encap is valid */
260         e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
261         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
262         e->pkt_reformat = NULL;
263 }
264
265 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
266                                 struct list_head *flow_list,
267                                 int index)
268 {
269         if (IS_ERR(mlx5e_flow_get(flow))) {
270                 /* Flow is being deleted concurrently. Wait for it to be
271                  * unoffloaded from hardware, otherwise deleting encap will
272                  * fail.
273                  */
274                 wait_for_completion(&flow->del_hw_done);
275                 return;
276         }
277         wait_for_completion(&flow->init_done);
278
279         flow->tmp_entry_index = index;
280         list_add(&flow->tmp_list, flow_list);
281 }
282
283 /* Takes reference to all flows attached to encap and adds the flows to
284  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
285  */
286 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
287 {
288         struct encap_flow_item *efi;
289         struct mlx5e_tc_flow *flow;
290
291         list_for_each_entry(efi, &e->flows, list) {
292                 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
293                 mlx5e_take_tmp_flow(flow, flow_list, efi->index);
294         }
295 }
296
297 /* Takes reference to all flows attached to route and adds the flows to
298  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
299  */
300 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
301                                              struct list_head *flow_list)
302 {
303         struct mlx5e_tc_flow *flow;
304
305         list_for_each_entry(flow, &r->decap_flows, decap_routes)
306                 mlx5e_take_tmp_flow(flow, flow_list, 0);
307 }
308
309 typedef bool (match_cb)(struct mlx5e_encap_entry *);
310
311 static struct mlx5e_encap_entry *
312 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
313                               struct mlx5e_encap_entry *e,
314                               match_cb match)
315 {
316         struct mlx5e_encap_entry *next = NULL;
317
318 retry:
319         rcu_read_lock();
320
321         /* find encap with non-zero reference counter value */
322         for (next = e ?
323                      list_next_or_null_rcu(&nhe->encap_list,
324                                            &e->encap_list,
325                                            struct mlx5e_encap_entry,
326                                            encap_list) :
327                      list_first_or_null_rcu(&nhe->encap_list,
328                                             struct mlx5e_encap_entry,
329                                             encap_list);
330              next;
331              next = list_next_or_null_rcu(&nhe->encap_list,
332                                           &next->encap_list,
333                                           struct mlx5e_encap_entry,
334                                           encap_list))
335                 if (mlx5e_encap_take(next))
336                         break;
337
338         rcu_read_unlock();
339
340         /* release starting encap */
341         if (e)
342                 mlx5e_encap_put(netdev_priv(e->out_dev), e);
343         if (!next)
344                 return next;
345
346         /* wait for encap to be fully initialized */
347         wait_for_completion(&next->res_ready);
348         /* continue searching if encap entry is not in valid state after completion */
349         if (!match(next)) {
350                 e = next;
351                 goto retry;
352         }
353
354         return next;
355 }
356
357 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
358 {
359         return e->flags & MLX5_ENCAP_ENTRY_VALID;
360 }
361
362 static struct mlx5e_encap_entry *
363 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
364                            struct mlx5e_encap_entry *e)
365 {
366         return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
367 }
368
369 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
370 {
371         return e->compl_result >= 0;
372 }
373
374 struct mlx5e_encap_entry *
375 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
376                           struct mlx5e_encap_entry *e)
377 {
378         return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
379 }
380
381 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
382 {
383         struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
384         struct mlx5e_encap_entry *e = NULL;
385         struct mlx5e_tc_flow *flow;
386         struct mlx5_fc *counter;
387         struct neigh_table *tbl;
388         bool neigh_used = false;
389         struct neighbour *n;
390         u64 lastuse;
391
392         if (m_neigh->family == AF_INET)
393                 tbl = &arp_tbl;
394 #if IS_ENABLED(CONFIG_IPV6)
395         else if (m_neigh->family == AF_INET6)
396                 tbl = ipv6_stub->nd_tbl;
397 #endif
398         else
399                 return;
400
401         /* mlx5e_get_next_valid_encap() releases previous encap before returning
402          * next one.
403          */
404         while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
405                 struct mlx5e_priv *priv = netdev_priv(e->out_dev);
406                 struct encap_flow_item *efi, *tmp;
407                 struct mlx5_eswitch *esw;
408                 LIST_HEAD(flow_list);
409
410                 esw = priv->mdev->priv.eswitch;
411                 mutex_lock(&esw->offloads.encap_tbl_lock);
412                 list_for_each_entry_safe(efi, tmp, &e->flows, list) {
413                         flow = container_of(efi, struct mlx5e_tc_flow,
414                                             encaps[efi->index]);
415                         if (IS_ERR(mlx5e_flow_get(flow)))
416                                 continue;
417                         list_add(&flow->tmp_list, &flow_list);
418
419                         if (mlx5e_is_offloaded_flow(flow)) {
420                                 counter = mlx5e_tc_get_counter(flow);
421                                 lastuse = mlx5_fc_query_lastuse(counter);
422                                 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
423                                         neigh_used = true;
424                                         break;
425                                 }
426                         }
427                 }
428                 mutex_unlock(&esw->offloads.encap_tbl_lock);
429
430                 mlx5e_put_flow_list(priv, &flow_list);
431                 if (neigh_used) {
432                         /* release current encap before breaking the loop */
433                         mlx5e_encap_put(priv, e);
434                         break;
435                 }
436         }
437
438         trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
439
440         if (neigh_used) {
441                 nhe->reported_lastuse = jiffies;
442
443                 /* find the relevant neigh according to the cached device and
444                  * dst ip pair
445                  */
446                 n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
447                 if (!n)
448                         return;
449
450                 neigh_event_send(n, NULL);
451                 neigh_release(n);
452         }
453 }
454
455 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
456 {
457         WARN_ON(!list_empty(&e->flows));
458
459         if (e->compl_result > 0) {
460                 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
461
462                 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
463                         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
464         }
465
466         kfree(e->tun_info);
467         kfree(e->encap_header);
468         kfree_rcu(e, rcu);
469 }
470
471 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
472                                 struct mlx5e_decap_entry *d)
473 {
474         WARN_ON(!list_empty(&d->flows));
475
476         if (!d->compl_result)
477                 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
478
479         kfree_rcu(d, rcu);
480 }
481
482 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
483 {
484         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
485
486         if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
487                 return;
488         list_del(&e->route_list);
489         hash_del_rcu(&e->encap_hlist);
490         mutex_unlock(&esw->offloads.encap_tbl_lock);
491
492         mlx5e_encap_dealloc(priv, e);
493 }
494
495 static void mlx5e_encap_put_locked(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
496 {
497         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
498
499         lockdep_assert_held(&esw->offloads.encap_tbl_lock);
500
501         if (!refcount_dec_and_test(&e->refcnt))
502                 return;
503         list_del(&e->route_list);
504         hash_del_rcu(&e->encap_hlist);
505         mlx5e_encap_dealloc(priv, e);
506 }
507
508 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
509 {
510         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
511
512         if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
513                 return;
514         hash_del_rcu(&d->hlist);
515         mutex_unlock(&esw->offloads.decap_tbl_lock);
516
517         mlx5e_decap_dealloc(priv, d);
518 }
519
520 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
521                                      struct mlx5e_tc_flow *flow,
522                                      int out_index);
523
524 void mlx5e_detach_encap(struct mlx5e_priv *priv,
525                         struct mlx5e_tc_flow *flow,
526                         struct mlx5_flow_attr *attr,
527                         int out_index)
528 {
529         struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
530         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
531
532         if (!mlx5e_is_eswitch_flow(flow))
533                 return;
534
535         if (attr->esw_attr->dests[out_index].flags &
536             MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
537                 mlx5e_detach_encap_route(priv, flow, out_index);
538
539         /* flow wasn't fully initialized */
540         if (!e)
541                 return;
542
543         mutex_lock(&esw->offloads.encap_tbl_lock);
544         list_del(&flow->encaps[out_index].list);
545         flow->encaps[out_index].e = NULL;
546         if (!refcount_dec_and_test(&e->refcnt)) {
547                 mutex_unlock(&esw->offloads.encap_tbl_lock);
548                 return;
549         }
550         list_del(&e->route_list);
551         hash_del_rcu(&e->encap_hlist);
552         mutex_unlock(&esw->offloads.encap_tbl_lock);
553
554         mlx5e_encap_dealloc(priv, e);
555 }
556
557 void mlx5e_detach_decap(struct mlx5e_priv *priv,
558                         struct mlx5e_tc_flow *flow)
559 {
560         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
561         struct mlx5e_decap_entry *d = flow->decap_reformat;
562
563         if (!d)
564                 return;
565
566         mutex_lock(&esw->offloads.decap_tbl_lock);
567         list_del(&flow->l3_to_l2_reformat);
568         flow->decap_reformat = NULL;
569
570         if (!refcount_dec_and_test(&d->refcnt)) {
571                 mutex_unlock(&esw->offloads.decap_tbl_lock);
572                 return;
573         }
574         hash_del_rcu(&d->hlist);
575         mutex_unlock(&esw->offloads.decap_tbl_lock);
576
577         mlx5e_decap_dealloc(priv, d);
578 }
579
580 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
581                                            struct mlx5e_encap_key *b)
582 {
583         return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
584                 a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
585 }
586
587 bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a,
588                                            struct mlx5e_encap_key *b,
589                                            __be16 tun_flags)
590 {
591         struct ip_tunnel_info *a_info;
592         struct ip_tunnel_info *b_info;
593         bool a_has_opts, b_has_opts;
594
595         if (!mlx5e_tc_tun_encap_info_equal_generic(a, b))
596                 return false;
597
598         a_has_opts = !!(a->ip_tun_key->tun_flags & tun_flags);
599         b_has_opts = !!(b->ip_tun_key->tun_flags & tun_flags);
600
601         /* keys are equal when both don't have any options attached */
602         if (!a_has_opts && !b_has_opts)
603                 return true;
604
605         if (a_has_opts != b_has_opts)
606                 return false;
607
608         /* options stored in memory next to ip_tunnel_info struct */
609         a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key);
610         b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key);
611
612         return a_info->options_len == b_info->options_len &&
613                !memcmp(ip_tunnel_info_opts(a_info),
614                        ip_tunnel_info_opts(b_info),
615                        a_info->options_len);
616 }
617
618 static int cmp_decap_info(struct mlx5e_decap_key *a,
619                           struct mlx5e_decap_key *b)
620 {
621         return memcmp(&a->key, &b->key, sizeof(b->key));
622 }
623
624 static int hash_encap_info(struct mlx5e_encap_key *key)
625 {
626         return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
627                      key->tc_tunnel->tunnel_type);
628 }
629
630 static int hash_decap_info(struct mlx5e_decap_key *key)
631 {
632         return jhash(&key->key, sizeof(key->key), 0);
633 }
634
635 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
636 {
637         return refcount_inc_not_zero(&e->refcnt);
638 }
639
640 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
641 {
642         return refcount_inc_not_zero(&e->refcnt);
643 }
644
645 static struct mlx5e_encap_entry *
646 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
647                 uintptr_t hash_key)
648 {
649         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
650         struct mlx5e_encap_key e_key;
651         struct mlx5e_encap_entry *e;
652
653         hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
654                                    encap_hlist, hash_key) {
655                 e_key.ip_tun_key = &e->tun_info->key;
656                 e_key.tc_tunnel = e->tunnel;
657                 if (e->tunnel->encap_info_equal(&e_key, key) &&
658                     mlx5e_encap_take(e))
659                         return e;
660         }
661
662         return NULL;
663 }
664
665 static struct mlx5e_decap_entry *
666 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
667                 uintptr_t hash_key)
668 {
669         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
670         struct mlx5e_decap_key r_key;
671         struct mlx5e_decap_entry *e;
672
673         hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
674                                    hlist, hash_key) {
675                 r_key = e->key;
676                 if (!cmp_decap_info(&r_key, key) &&
677                     mlx5e_decap_take(e))
678                         return e;
679         }
680         return NULL;
681 }
682
683 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
684 {
685         size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
686
687         return kmemdup(tun_info, tun_size, GFP_KERNEL);
688 }
689
690 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
691                                       struct mlx5e_tc_flow *flow,
692                                       int out_index,
693                                       struct mlx5e_encap_entry *e,
694                                       struct netlink_ext_ack *extack)
695 {
696         int i;
697
698         for (i = 0; i < out_index; i++) {
699                 if (flow->encaps[i].e != e)
700                         continue;
701                 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
702                 netdev_err(priv->netdev, "can't duplicate encap action\n");
703                 return true;
704         }
705
706         return false;
707 }
708
709 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
710                                struct mlx5_flow_attr *attr,
711                                struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
712                                struct net_device *out_dev,
713                                int route_dev_ifindex,
714                                int out_index)
715 {
716         struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
717         struct net_device *route_dev;
718         u16 vport_num;
719         int err = 0;
720         u32 data;
721
722         route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
723
724         if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
725             !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
726                 goto out;
727
728         err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
729         if (err)
730                 goto out;
731
732         attr->dest_chain = 0;
733         attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
734         esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
735         data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
736                                                        vport_num);
737         err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
738                                                    MLX5_FLOW_NAMESPACE_FDB,
739                                                    VPORT_TO_REG, data);
740         if (err >= 0) {
741                 esw_attr->dests[out_index].src_port_rewrite_act_id = err;
742                 err = 0;
743         }
744
745 out:
746         if (route_dev)
747                 dev_put(route_dev);
748         return err;
749 }
750
751 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
752                                   struct mlx5_esw_flow_attr *attr,
753                                   struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
754                                   struct net_device *out_dev,
755                                   int route_dev_ifindex,
756                                   int out_index)
757 {
758         int act_id = attr->dests[out_index].src_port_rewrite_act_id;
759         struct net_device *route_dev;
760         u16 vport_num;
761         int err = 0;
762         u32 data;
763
764         route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
765
766         if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
767             !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
768                 err = -ENODEV;
769                 goto out;
770         }
771
772         err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
773         if (err)
774                 goto out;
775
776         data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
777                                                        vport_num);
778         mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
779
780 out:
781         if (route_dev)
782                 dev_put(route_dev);
783         return err;
784 }
785
786 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
787 {
788         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
789         struct mlx5_rep_uplink_priv *uplink_priv;
790         struct mlx5e_rep_priv *uplink_rpriv;
791         struct mlx5e_tc_tun_encap *encap;
792         unsigned int ret;
793
794         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
795         uplink_priv = &uplink_rpriv->uplink_priv;
796         encap = uplink_priv->encap;
797
798         spin_lock_bh(&encap->route_lock);
799         ret = encap->route_tbl_last_update;
800         spin_unlock_bh(&encap->route_lock);
801         return ret;
802 }
803
804 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
805                                     struct mlx5e_tc_flow *flow,
806                                     struct mlx5_flow_attr *attr,
807                                     struct mlx5e_encap_entry *e,
808                                     bool new_encap_entry,
809                                     unsigned long tbl_time_before,
810                                     int out_index);
811
812 int mlx5e_attach_encap(struct mlx5e_priv *priv,
813                        struct mlx5e_tc_flow *flow,
814                        struct mlx5_flow_attr *attr,
815                        struct net_device *mirred_dev,
816                        int out_index,
817                        struct netlink_ext_ack *extack,
818                        struct net_device **encap_dev)
819 {
820         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
821         struct mlx5e_tc_flow_parse_attr *parse_attr;
822         const struct ip_tunnel_info *tun_info;
823         const struct mlx5e_mpls_info *mpls_info;
824         unsigned long tbl_time_before = 0;
825         struct mlx5e_encap_entry *e;
826         struct mlx5e_encap_key key;
827         bool entry_created = false;
828         unsigned short family;
829         uintptr_t hash_key;
830         int err = 0;
831
832         lockdep_assert_held(&esw->offloads.encap_tbl_lock);
833
834         parse_attr = attr->parse_attr;
835         tun_info = parse_attr->tun_info[out_index];
836         mpls_info = &parse_attr->mpls_info[out_index];
837         family = ip_tunnel_info_af(tun_info);
838         key.ip_tun_key = &tun_info->key;
839         key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
840         if (!key.tc_tunnel) {
841                 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
842                 return -EOPNOTSUPP;
843         }
844
845         hash_key = hash_encap_info(&key);
846
847         e = mlx5e_encap_get(priv, &key, hash_key);
848
849         /* must verify if encap is valid or not */
850         if (e) {
851                 /* Check that entry was not already attached to this flow */
852                 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
853                         err = -EOPNOTSUPP;
854                         goto out_err;
855                 }
856
857                 goto attach_flow;
858         }
859
860         e = kzalloc(sizeof(*e), GFP_KERNEL);
861         if (!e) {
862                 err = -ENOMEM;
863                 goto out_err;
864         }
865
866         refcount_set(&e->refcnt, 1);
867         init_completion(&e->res_ready);
868         entry_created = true;
869         INIT_LIST_HEAD(&e->route_list);
870
871         tun_info = mlx5e_dup_tun_info(tun_info);
872         if (!tun_info) {
873                 err = -ENOMEM;
874                 goto out_err_init;
875         }
876         e->tun_info = tun_info;
877         memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
878         err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
879         if (err)
880                 goto out_err_init;
881
882         INIT_LIST_HEAD(&e->flows);
883         hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
884         tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
885
886         if (family == AF_INET)
887                 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
888         else if (family == AF_INET6)
889                 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
890
891         complete_all(&e->res_ready);
892         if (err) {
893                 e->compl_result = err;
894                 goto out_err;
895         }
896         e->compl_result = 1;
897
898 attach_flow:
899         err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
900                                        tbl_time_before, out_index);
901         if (err)
902                 goto out_err;
903
904         err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index);
905         if (err == -EOPNOTSUPP) {
906                 /* If device doesn't support int port offload,
907                  * redirect to uplink vport.
908                  */
909                 mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n");
910                 err = 0;
911         } else if (err) {
912                 goto out_err;
913         }
914
915         flow->encaps[out_index].e = e;
916         list_add(&flow->encaps[out_index].list, &e->flows);
917         flow->encaps[out_index].index = out_index;
918         *encap_dev = e->out_dev;
919         if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
920                 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
921                 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
922         } else {
923                 flow_flag_set(flow, SLOW);
924         }
925
926         return err;
927
928 out_err:
929         if (e)
930                 mlx5e_encap_put_locked(priv, e);
931         return err;
932
933 out_err_init:
934         kfree(tun_info);
935         kfree(e);
936         return err;
937 }
938
939 int mlx5e_attach_decap(struct mlx5e_priv *priv,
940                        struct mlx5e_tc_flow *flow,
941                        struct netlink_ext_ack *extack)
942 {
943         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
944         struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
945         struct mlx5_pkt_reformat_params reformat_params;
946         struct mlx5e_decap_entry *d;
947         struct mlx5e_decap_key key;
948         uintptr_t hash_key;
949         int err = 0;
950
951         if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
952                 NL_SET_ERR_MSG_MOD(extack,
953                                    "encap header larger than max supported");
954                 return -EOPNOTSUPP;
955         }
956
957         key.key = attr->eth;
958         hash_key = hash_decap_info(&key);
959         mutex_lock(&esw->offloads.decap_tbl_lock);
960         d = mlx5e_decap_get(priv, &key, hash_key);
961         if (d) {
962                 mutex_unlock(&esw->offloads.decap_tbl_lock);
963                 wait_for_completion(&d->res_ready);
964                 mutex_lock(&esw->offloads.decap_tbl_lock);
965                 if (d->compl_result) {
966                         err = -EREMOTEIO;
967                         goto out_free;
968                 }
969                 goto found;
970         }
971
972         d = kzalloc(sizeof(*d), GFP_KERNEL);
973         if (!d) {
974                 err = -ENOMEM;
975                 goto out_err;
976         }
977
978         d->key = key;
979         refcount_set(&d->refcnt, 1);
980         init_completion(&d->res_ready);
981         INIT_LIST_HEAD(&d->flows);
982         hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
983         mutex_unlock(&esw->offloads.decap_tbl_lock);
984
985         memset(&reformat_params, 0, sizeof(reformat_params));
986         reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
987         reformat_params.size = sizeof(attr->eth);
988         reformat_params.data = &attr->eth;
989         d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
990                                                      &reformat_params,
991                                                      MLX5_FLOW_NAMESPACE_FDB);
992         if (IS_ERR(d->pkt_reformat)) {
993                 err = PTR_ERR(d->pkt_reformat);
994                 d->compl_result = err;
995         }
996         mutex_lock(&esw->offloads.decap_tbl_lock);
997         complete_all(&d->res_ready);
998         if (err)
999                 goto out_free;
1000
1001 found:
1002         flow->decap_reformat = d;
1003         attr->decap_pkt_reformat = d->pkt_reformat;
1004         list_add(&flow->l3_to_l2_reformat, &d->flows);
1005         mutex_unlock(&esw->offloads.decap_tbl_lock);
1006         return 0;
1007
1008 out_free:
1009         mutex_unlock(&esw->offloads.decap_tbl_lock);
1010         mlx5e_decap_put(priv, d);
1011         return err;
1012
1013 out_err:
1014         mutex_unlock(&esw->offloads.decap_tbl_lock);
1015         return err;
1016 }
1017
1018 int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv,
1019                                  struct mlx5e_tc_flow *flow,
1020                                  struct mlx5_flow_attr *attr,
1021                                  struct netlink_ext_ack *extack,
1022                                  bool *vf_tun)
1023 {
1024         struct mlx5e_tc_flow_parse_attr *parse_attr;
1025         struct mlx5_esw_flow_attr *esw_attr;
1026         struct net_device *encap_dev = NULL;
1027         struct mlx5e_rep_priv *rpriv;
1028         struct mlx5e_priv *out_priv;
1029         struct mlx5_eswitch *esw;
1030         int out_index;
1031         int err = 0;
1032
1033         parse_attr = attr->parse_attr;
1034         esw_attr = attr->esw_attr;
1035         *vf_tun = false;
1036
1037         esw = priv->mdev->priv.eswitch;
1038         mutex_lock(&esw->offloads.encap_tbl_lock);
1039         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1040                 struct net_device *out_dev;
1041                 int mirred_ifindex;
1042
1043                 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1044                         continue;
1045
1046                 mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1047                 out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
1048                 if (!out_dev) {
1049                         NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
1050                         err = -ENODEV;
1051                         goto out;
1052                 }
1053                 err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
1054                                          extack, &encap_dev);
1055                 dev_put(out_dev);
1056                 if (err)
1057                         goto out;
1058
1059                 if (esw_attr->dests[out_index].flags &
1060                     MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1061                     !esw_attr->dest_int_port)
1062                         *vf_tun = true;
1063
1064                 out_priv = netdev_priv(encap_dev);
1065                 rpriv = out_priv->ppriv;
1066                 esw_attr->dests[out_index].rep = rpriv->rep;
1067                 esw_attr->dests[out_index].mdev = out_priv->mdev;
1068         }
1069
1070         if (*vf_tun && esw_attr->out_count > 1) {
1071                 NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
1072                 err = -EOPNOTSUPP;
1073                 goto out;
1074         }
1075
1076 out:
1077         mutex_unlock(&esw->offloads.encap_tbl_lock);
1078         return err;
1079 }
1080
1081 void mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv *priv,
1082                                     struct mlx5e_tc_flow *flow,
1083                                     struct mlx5_flow_attr *attr)
1084 {
1085         struct mlx5_esw_flow_attr *esw_attr;
1086         int out_index;
1087
1088         if (!mlx5e_is_eswitch_flow(flow))
1089                 return;
1090
1091         esw_attr = attr->esw_attr;
1092
1093         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1094                 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1095                         continue;
1096
1097                 mlx5e_detach_encap(flow->priv, flow, attr, out_index);
1098                 kfree(attr->parse_attr->tun_info[out_index]);
1099         }
1100 }
1101
1102 static int cmp_route_info(struct mlx5e_route_key *a,
1103                           struct mlx5e_route_key *b)
1104 {
1105         if (a->ip_version == 4 && b->ip_version == 4)
1106                 return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
1107                               sizeof(a->endpoint_ip.v4));
1108         else if (a->ip_version == 6 && b->ip_version == 6)
1109                 return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
1110                               sizeof(a->endpoint_ip.v6));
1111         return 1;
1112 }
1113
1114 static u32 hash_route_info(struct mlx5e_route_key *key)
1115 {
1116         if (key->ip_version == 4)
1117                 return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
1118         return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
1119 }
1120
1121 static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
1122                                 struct mlx5e_route_entry *r)
1123 {
1124         WARN_ON(!list_empty(&r->decap_flows));
1125         WARN_ON(!list_empty(&r->encap_entries));
1126
1127         kfree_rcu(r, rcu);
1128 }
1129
1130 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1131 {
1132         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1133
1134         if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
1135                 return;
1136
1137         hash_del_rcu(&r->hlist);
1138         mutex_unlock(&esw->offloads.encap_tbl_lock);
1139
1140         mlx5e_route_dealloc(priv, r);
1141 }
1142
1143 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1144 {
1145         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1146
1147         lockdep_assert_held(&esw->offloads.encap_tbl_lock);
1148
1149         if (!refcount_dec_and_test(&r->refcnt))
1150                 return;
1151         hash_del_rcu(&r->hlist);
1152         mlx5e_route_dealloc(priv, r);
1153 }
1154
1155 static struct mlx5e_route_entry *
1156 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
1157                 u32 hash_key)
1158 {
1159         struct mlx5e_route_key r_key;
1160         struct mlx5e_route_entry *r;
1161
1162         hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
1163                 r_key = r->key;
1164                 if (!cmp_route_info(&r_key, key) &&
1165                     refcount_inc_not_zero(&r->refcnt))
1166                         return r;
1167         }
1168         return NULL;
1169 }
1170
1171 static struct mlx5e_route_entry *
1172 mlx5e_route_get_create(struct mlx5e_priv *priv,
1173                        struct mlx5e_route_key *key,
1174                        int tunnel_dev_index,
1175                        unsigned long *route_tbl_change_time)
1176 {
1177         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1178         struct mlx5_rep_uplink_priv *uplink_priv;
1179         struct mlx5e_rep_priv *uplink_rpriv;
1180         struct mlx5e_tc_tun_encap *encap;
1181         struct mlx5e_route_entry *r;
1182         u32 hash_key;
1183
1184         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1185         uplink_priv = &uplink_rpriv->uplink_priv;
1186         encap = uplink_priv->encap;
1187
1188         hash_key = hash_route_info(key);
1189         spin_lock_bh(&encap->route_lock);
1190         r = mlx5e_route_get(encap, key, hash_key);
1191         spin_unlock_bh(&encap->route_lock);
1192         if (r) {
1193                 if (!mlx5e_route_entry_valid(r)) {
1194                         mlx5e_route_put_locked(priv, r);
1195                         return ERR_PTR(-EINVAL);
1196                 }
1197                 return r;
1198         }
1199
1200         r = kzalloc(sizeof(*r), GFP_KERNEL);
1201         if (!r)
1202                 return ERR_PTR(-ENOMEM);
1203
1204         r->key = *key;
1205         r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1206         r->tunnel_dev_index = tunnel_dev_index;
1207         refcount_set(&r->refcnt, 1);
1208         INIT_LIST_HEAD(&r->decap_flows);
1209         INIT_LIST_HEAD(&r->encap_entries);
1210
1211         spin_lock_bh(&encap->route_lock);
1212         *route_tbl_change_time = encap->route_tbl_last_update;
1213         hash_add(encap->route_tbl, &r->hlist, hash_key);
1214         spin_unlock_bh(&encap->route_lock);
1215
1216         return r;
1217 }
1218
1219 static struct mlx5e_route_entry *
1220 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1221 {
1222         u32 hash_key = hash_route_info(key);
1223         struct mlx5e_route_entry *r;
1224
1225         spin_lock_bh(&encap->route_lock);
1226         encap->route_tbl_last_update = jiffies;
1227         r = mlx5e_route_get(encap, key, hash_key);
1228         spin_unlock_bh(&encap->route_lock);
1229
1230         return r;
1231 }
1232
1233 struct mlx5e_tc_fib_event_data {
1234         struct work_struct work;
1235         unsigned long event;
1236         struct mlx5e_route_entry *r;
1237         struct net_device *ul_dev;
1238 };
1239
1240 static void mlx5e_tc_fib_event_work(struct work_struct *work);
1241 static struct mlx5e_tc_fib_event_data *
1242 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1243 {
1244         struct mlx5e_tc_fib_event_data *fib_work;
1245
1246         fib_work = kzalloc(sizeof(*fib_work), flags);
1247         if (WARN_ON(!fib_work))
1248                 return NULL;
1249
1250         INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1251         fib_work->event = event;
1252         fib_work->ul_dev = ul_dev;
1253
1254         return fib_work;
1255 }
1256
1257 static int
1258 mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1259                            struct mlx5e_route_entry *r,
1260                            unsigned long event)
1261 {
1262         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1263         struct mlx5e_tc_fib_event_data *fib_work;
1264         struct mlx5e_rep_priv *uplink_rpriv;
1265         struct net_device *ul_dev;
1266
1267         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1268         ul_dev = uplink_rpriv->netdev;
1269
1270         fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1271         if (!fib_work)
1272                 return -ENOMEM;
1273
1274         dev_hold(ul_dev);
1275         refcount_inc(&r->refcnt);
1276         fib_work->r = r;
1277         queue_work(priv->wq, &fib_work->work);
1278
1279         return 0;
1280 }
1281
1282 int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1283                              struct mlx5e_tc_flow *flow)
1284 {
1285         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1286         unsigned long tbl_time_before, tbl_time_after;
1287         struct mlx5e_tc_flow_parse_attr *parse_attr;
1288         struct mlx5_flow_attr *attr = flow->attr;
1289         struct mlx5_esw_flow_attr *esw_attr;
1290         struct mlx5e_route_entry *r;
1291         struct mlx5e_route_key key;
1292         int err = 0;
1293
1294         esw_attr = attr->esw_attr;
1295         parse_attr = attr->parse_attr;
1296         mutex_lock(&esw->offloads.encap_tbl_lock);
1297         if (!esw_attr->rx_tun_attr)
1298                 goto out;
1299
1300         tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1301         tbl_time_after = tbl_time_before;
1302         err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev);
1303         if (err || !esw_attr->rx_tun_attr->decap_vport)
1304                 goto out;
1305
1306         key.ip_version = attr->tun_ip_version;
1307         if (key.ip_version == 4)
1308                 key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1309         else
1310                 key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1311
1312         r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1313                                    &tbl_time_after);
1314         if (IS_ERR(r)) {
1315                 err = PTR_ERR(r);
1316                 goto out;
1317         }
1318         /* Routing changed concurrently. FIB event handler might have missed new
1319          * entry, schedule update.
1320          */
1321         if (tbl_time_before != tbl_time_after) {
1322                 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1323                 if (err) {
1324                         mlx5e_route_put_locked(priv, r);
1325                         goto out;
1326                 }
1327         }
1328
1329         flow->decap_route = r;
1330         list_add(&flow->decap_routes, &r->decap_flows);
1331         mutex_unlock(&esw->offloads.encap_tbl_lock);
1332         return 0;
1333
1334 out:
1335         mutex_unlock(&esw->offloads.encap_tbl_lock);
1336         return err;
1337 }
1338
1339 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1340                                     struct mlx5e_tc_flow *flow,
1341                                     struct mlx5_flow_attr *attr,
1342                                     struct mlx5e_encap_entry *e,
1343                                     bool new_encap_entry,
1344                                     unsigned long tbl_time_before,
1345                                     int out_index)
1346 {
1347         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1348         unsigned long tbl_time_after = tbl_time_before;
1349         struct mlx5e_tc_flow_parse_attr *parse_attr;
1350         const struct ip_tunnel_info *tun_info;
1351         struct mlx5_esw_flow_attr *esw_attr;
1352         struct mlx5e_route_entry *r;
1353         struct mlx5e_route_key key;
1354         unsigned short family;
1355         int err = 0;
1356
1357         esw_attr = attr->esw_attr;
1358         parse_attr = attr->parse_attr;
1359         tun_info = parse_attr->tun_info[out_index];
1360         family = ip_tunnel_info_af(tun_info);
1361
1362         if (family == AF_INET) {
1363                 key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1364                 key.ip_version = 4;
1365         } else if (family == AF_INET6) {
1366                 key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1367                 key.ip_version = 6;
1368         }
1369
1370         err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1371                                   e->route_dev_ifindex, out_index);
1372         if (err || !(esw_attr->dests[out_index].flags &
1373                      MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1374                 return err;
1375
1376         r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1377                                    &tbl_time_after);
1378         if (IS_ERR(r))
1379                 return PTR_ERR(r);
1380         /* Routing changed concurrently. FIB event handler might have missed new
1381          * entry, schedule update.
1382          */
1383         if (tbl_time_before != tbl_time_after) {
1384                 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1385                 if (err) {
1386                         mlx5e_route_put_locked(priv, r);
1387                         return err;
1388                 }
1389         }
1390
1391         flow->encap_routes[out_index].r = r;
1392         if (new_encap_entry)
1393                 list_add(&e->route_list, &r->encap_entries);
1394         flow->encap_routes[out_index].index = out_index;
1395         return 0;
1396 }
1397
1398 void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1399                               struct mlx5e_tc_flow *flow)
1400 {
1401         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1402         struct mlx5e_route_entry *r = flow->decap_route;
1403
1404         if (!r)
1405                 return;
1406
1407         mutex_lock(&esw->offloads.encap_tbl_lock);
1408         list_del(&flow->decap_routes);
1409         flow->decap_route = NULL;
1410
1411         if (!refcount_dec_and_test(&r->refcnt)) {
1412                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1413                 return;
1414         }
1415         hash_del_rcu(&r->hlist);
1416         mutex_unlock(&esw->offloads.encap_tbl_lock);
1417
1418         mlx5e_route_dealloc(priv, r);
1419 }
1420
1421 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1422                                      struct mlx5e_tc_flow *flow,
1423                                      int out_index)
1424 {
1425         struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1426         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1427         struct mlx5e_encap_entry *e, *tmp;
1428
1429         if (!r)
1430                 return;
1431
1432         mutex_lock(&esw->offloads.encap_tbl_lock);
1433         flow->encap_routes[out_index].r = NULL;
1434
1435         if (!refcount_dec_and_test(&r->refcnt)) {
1436                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1437                 return;
1438         }
1439         list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1440                 list_del_init(&e->route_list);
1441         hash_del_rcu(&r->hlist);
1442         mutex_unlock(&esw->offloads.encap_tbl_lock);
1443
1444         mlx5e_route_dealloc(priv, r);
1445 }
1446
1447 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1448                                    struct mlx5e_encap_entry *e,
1449                                    struct list_head *encap_flows)
1450 {
1451         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1452         struct mlx5e_tc_flow *flow;
1453
1454         list_for_each_entry(flow, encap_flows, tmp_list) {
1455                 struct mlx5_esw_flow_attr *esw_attr;
1456                 struct mlx5_flow_attr *attr;
1457
1458                 if (!mlx5e_is_offloaded_flow(flow))
1459                         continue;
1460
1461                 attr = mlx5e_tc_get_encap_attr(flow);
1462                 esw_attr = attr->esw_attr;
1463
1464                 if (flow_flag_test(flow, SLOW)) {
1465                         mlx5e_tc_unoffload_from_slow_path(esw, flow);
1466                 } else {
1467                         mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1468                         mlx5e_tc_unoffload_flow_post_acts(flow);
1469                 }
1470
1471                 mlx5e_tc_detach_mod_hdr(priv, flow, attr);
1472                 attr->modify_hdr = NULL;
1473
1474                 esw_attr->dests[flow->tmp_entry_index].flags &=
1475                         ~MLX5_ESW_DEST_ENCAP_VALID;
1476                 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1477         }
1478
1479         e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1480         if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1481                 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1482                 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1483                 e->pkt_reformat = NULL;
1484         }
1485 }
1486
1487 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1488                                   struct net_device *tunnel_dev,
1489                                   struct mlx5e_encap_entry *e,
1490                                   struct list_head *encap_flows)
1491 {
1492         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1493         struct mlx5e_tc_flow *flow;
1494         int err;
1495
1496         err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1497                 mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1498                 mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1499         if (err)
1500                 mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1501         e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1502
1503         list_for_each_entry(flow, encap_flows, tmp_list) {
1504                 struct mlx5e_tc_flow_parse_attr *parse_attr;
1505                 struct mlx5_esw_flow_attr *esw_attr;
1506                 struct mlx5_flow_handle *rule;
1507                 struct mlx5_flow_attr *attr;
1508                 struct mlx5_flow_spec *spec;
1509
1510                 if (flow_flag_test(flow, FAILED))
1511                         continue;
1512
1513                 spec = &flow->attr->parse_attr->spec;
1514
1515                 attr = mlx5e_tc_get_encap_attr(flow);
1516                 esw_attr = attr->esw_attr;
1517                 parse_attr = attr->parse_attr;
1518
1519                 err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1520                                              e->out_dev, e->route_dev_ifindex,
1521                                              flow->tmp_entry_index);
1522                 if (err) {
1523                         mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1524                         continue;
1525                 }
1526
1527                 err = mlx5e_tc_attach_mod_hdr(priv, flow, attr);
1528                 if (err) {
1529                         mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1530                                        err);
1531                         continue;
1532                 }
1533
1534                 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1535                         esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1536                         esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1537                         if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1538                                 goto offload_to_slow_path;
1539
1540                         err = mlx5e_tc_offload_flow_post_acts(flow);
1541                         if (err) {
1542                                 mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
1543                                                err);
1544                                 goto offload_to_slow_path;
1545                         }
1546
1547                         /* update from slow path rule to encap rule */
1548                         rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
1549                         if (IS_ERR(rule)) {
1550                                 mlx5e_tc_unoffload_flow_post_acts(flow);
1551                                 err = PTR_ERR(rule);
1552                                 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1553                                                err);
1554                         } else {
1555                                 flow->rule[0] = rule;
1556                         }
1557                 } else {
1558 offload_to_slow_path:
1559                         rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1560                         /* mark the flow's encap dest as non-valid */
1561                         esw_attr->dests[flow->tmp_entry_index].flags &=
1562                                 ~MLX5_ESW_DEST_ENCAP_VALID;
1563
1564                         if (IS_ERR(rule)) {
1565                                 err = PTR_ERR(rule);
1566                                 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1567                                                err);
1568                         } else {
1569                                 flow->rule[0] = rule;
1570                         }
1571                 }
1572                 flow_flag_set(flow, OFFLOADED);
1573         }
1574 }
1575
1576 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1577                                      struct mlx5e_route_entry *r,
1578                                      struct list_head *flow_list,
1579                                      bool replace)
1580 {
1581         struct net_device *tunnel_dev;
1582         struct mlx5e_encap_entry *e;
1583
1584         tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1585         if (!tunnel_dev)
1586                 return -ENODEV;
1587
1588         list_for_each_entry(e, &r->encap_entries, route_list) {
1589                 LIST_HEAD(encap_flows);
1590
1591                 mlx5e_take_all_encap_flows(e, &encap_flows);
1592                 if (list_empty(&encap_flows))
1593                         continue;
1594
1595                 if (mlx5e_route_entry_valid(r))
1596                         mlx5e_invalidate_encap(priv, e, &encap_flows);
1597
1598                 if (!replace) {
1599                         list_splice(&encap_flows, flow_list);
1600                         continue;
1601                 }
1602
1603                 mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1604                 list_splice(&encap_flows, flow_list);
1605         }
1606
1607         return 0;
1608 }
1609
1610 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1611                                       struct list_head *flow_list)
1612 {
1613         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1614         struct mlx5e_tc_flow *flow;
1615
1616         list_for_each_entry(flow, flow_list, tmp_list)
1617                 if (mlx5e_is_offloaded_flow(flow))
1618                         mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1619 }
1620
1621 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1622                                   struct list_head *decap_flows)
1623 {
1624         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1625         struct mlx5e_tc_flow *flow;
1626
1627         list_for_each_entry(flow, decap_flows, tmp_list) {
1628                 struct mlx5e_tc_flow_parse_attr *parse_attr;
1629                 struct mlx5_flow_attr *attr = flow->attr;
1630                 struct mlx5_flow_handle *rule;
1631                 struct mlx5_flow_spec *spec;
1632                 int err;
1633
1634                 if (flow_flag_test(flow, FAILED))
1635                         continue;
1636
1637                 parse_attr = attr->parse_attr;
1638                 spec = &parse_attr->spec;
1639                 err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev);
1640                 if (err) {
1641                         mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1642                                        err);
1643                         continue;
1644                 }
1645
1646                 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1647                 if (IS_ERR(rule)) {
1648                         err = PTR_ERR(rule);
1649                         mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1650                                        err);
1651                 } else {
1652                         flow->rule[0] = rule;
1653                         flow_flag_set(flow, OFFLOADED);
1654                 }
1655         }
1656 }
1657
1658 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1659                                           struct mlx5e_route_entry *r,
1660                                           struct list_head *flow_list,
1661                                           bool replace)
1662 {
1663         struct net_device *tunnel_dev;
1664         LIST_HEAD(decap_flows);
1665
1666         tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1667         if (!tunnel_dev)
1668                 return -ENODEV;
1669
1670         mlx5e_take_all_route_decap_flows(r, &decap_flows);
1671         if (mlx5e_route_entry_valid(r))
1672                 mlx5e_unoffload_flow_list(priv, &decap_flows);
1673         if (replace)
1674                 mlx5e_reoffload_decap(priv, &decap_flows);
1675
1676         list_splice(&decap_flows, flow_list);
1677
1678         return 0;
1679 }
1680
1681 static void mlx5e_tc_fib_event_work(struct work_struct *work)
1682 {
1683         struct mlx5e_tc_fib_event_data *event_data =
1684                 container_of(work, struct mlx5e_tc_fib_event_data, work);
1685         struct net_device *ul_dev = event_data->ul_dev;
1686         struct mlx5e_priv *priv = netdev_priv(ul_dev);
1687         struct mlx5e_route_entry *r = event_data->r;
1688         struct mlx5_eswitch *esw;
1689         LIST_HEAD(flow_list);
1690         bool replace;
1691         int err;
1692
1693         /* sync with concurrent neigh updates */
1694         rtnl_lock();
1695         esw = priv->mdev->priv.eswitch;
1696         mutex_lock(&esw->offloads.encap_tbl_lock);
1697         replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1698
1699         if (!mlx5e_route_entry_valid(r) && !replace)
1700                 goto out;
1701
1702         err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1703         if (err)
1704                 mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1705                                err);
1706
1707         err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1708         if (err)
1709                 mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1710                                err);
1711
1712         if (replace)
1713                 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1714 out:
1715         mutex_unlock(&esw->offloads.encap_tbl_lock);
1716         rtnl_unlock();
1717
1718         mlx5e_put_flow_list(priv, &flow_list);
1719         mlx5e_route_put(priv, event_data->r);
1720         dev_put(event_data->ul_dev);
1721         kfree(event_data);
1722 }
1723
1724 static struct mlx5e_tc_fib_event_data *
1725 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1726                          struct net_device *ul_dev,
1727                          struct mlx5e_tc_tun_encap *encap,
1728                          unsigned long event,
1729                          struct fib_notifier_info *info)
1730 {
1731         struct fib_entry_notifier_info *fen_info;
1732         struct mlx5e_tc_fib_event_data *fib_work;
1733         struct mlx5e_route_entry *r;
1734         struct mlx5e_route_key key;
1735         struct net_device *fib_dev;
1736
1737         fen_info = container_of(info, struct fib_entry_notifier_info, info);
1738         if (fen_info->fi->nh)
1739                 return NULL;
1740         fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1741         if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1742             fen_info->dst_len != 32)
1743                 return NULL;
1744
1745         fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1746         if (!fib_work)
1747                 return ERR_PTR(-ENOMEM);
1748
1749         key.endpoint_ip.v4 = htonl(fen_info->dst);
1750         key.ip_version = 4;
1751
1752         /* Can't fail after this point because releasing reference to r
1753          * requires obtaining sleeping mutex which we can't do in atomic
1754          * context.
1755          */
1756         r = mlx5e_route_lookup_for_update(encap, &key);
1757         if (!r)
1758                 goto out;
1759         fib_work->r = r;
1760         dev_hold(ul_dev);
1761
1762         return fib_work;
1763
1764 out:
1765         kfree(fib_work);
1766         return NULL;
1767 }
1768
1769 static struct mlx5e_tc_fib_event_data *
1770 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1771                          struct net_device *ul_dev,
1772                          struct mlx5e_tc_tun_encap *encap,
1773                          unsigned long event,
1774                          struct fib_notifier_info *info)
1775 {
1776         struct fib6_entry_notifier_info *fen_info;
1777         struct mlx5e_tc_fib_event_data *fib_work;
1778         struct mlx5e_route_entry *r;
1779         struct mlx5e_route_key key;
1780         struct net_device *fib_dev;
1781
1782         fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1783         fib_dev = fib6_info_nh_dev(fen_info->rt);
1784         if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1785             fen_info->rt->fib6_dst.plen != 128)
1786                 return NULL;
1787
1788         fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1789         if (!fib_work)
1790                 return ERR_PTR(-ENOMEM);
1791
1792         memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1793                sizeof(fen_info->rt->fib6_dst.addr));
1794         key.ip_version = 6;
1795
1796         /* Can't fail after this point because releasing reference to r
1797          * requires obtaining sleeping mutex which we can't do in atomic
1798          * context.
1799          */
1800         r = mlx5e_route_lookup_for_update(encap, &key);
1801         if (!r)
1802                 goto out;
1803         fib_work->r = r;
1804         dev_hold(ul_dev);
1805
1806         return fib_work;
1807
1808 out:
1809         kfree(fib_work);
1810         return NULL;
1811 }
1812
1813 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1814 {
1815         struct mlx5e_tc_fib_event_data *fib_work;
1816         struct fib_notifier_info *info = ptr;
1817         struct mlx5e_tc_tun_encap *encap;
1818         struct net_device *ul_dev;
1819         struct mlx5e_priv *priv;
1820
1821         encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1822         priv = encap->priv;
1823         ul_dev = priv->netdev;
1824         priv = netdev_priv(ul_dev);
1825
1826         switch (event) {
1827         case FIB_EVENT_ENTRY_REPLACE:
1828         case FIB_EVENT_ENTRY_DEL:
1829                 if (info->family == AF_INET)
1830                         fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1831                 else if (info->family == AF_INET6)
1832                         fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1833                 else
1834                         return NOTIFY_DONE;
1835
1836                 if (!IS_ERR_OR_NULL(fib_work)) {
1837                         queue_work(priv->wq, &fib_work->work);
1838                 } else if (IS_ERR(fib_work)) {
1839                         NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1840                         mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1841                                        PTR_ERR(fib_work));
1842                 }
1843
1844                 break;
1845         default:
1846                 return NOTIFY_DONE;
1847         }
1848
1849         return NOTIFY_DONE;
1850 }
1851
1852 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1853 {
1854         struct mlx5e_tc_tun_encap *encap;
1855         int err;
1856
1857         encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1858         if (!encap)
1859                 return ERR_PTR(-ENOMEM);
1860
1861         encap->priv = priv;
1862         encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1863         spin_lock_init(&encap->route_lock);
1864         hash_init(encap->route_tbl);
1865         err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1866                                     NULL, NULL);
1867         if (err) {
1868                 kvfree(encap);
1869                 return ERR_PTR(err);
1870         }
1871
1872         return encap;
1873 }
1874
1875 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1876 {
1877         if (!encap)
1878                 return;
1879
1880         unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1881         flush_workqueue(encap->priv->wq); /* flush fib event works */
1882         kvfree(encap);
1883 }