1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
4 #include <net/fib_notifier.h>
5 #include <net/nexthop.h>
6 #include <net/ip_tunnels.h>
7 #include "tc_tun_encap.h"
11 #include "diag/en_tc_tracepoint.h"
14 MLX5E_ROUTE_ENTRY_VALID = BIT(0),
17 static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv,
18 struct mlx5_flow_attr *attr,
19 struct mlx5e_encap_entry *e,
22 struct net_device *route_dev;
25 route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex);
27 if (!route_dev || !netif_is_ovs_master(route_dev))
30 err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex,
31 MLX5E_TC_INT_PORT_EGRESS,
32 &attr->action, out_index);
41 struct mlx5e_route_key {
49 struct mlx5e_route_entry {
50 struct mlx5e_route_key key;
51 struct list_head encap_entries;
52 struct list_head decap_flows;
54 struct hlist_node hlist;
60 struct mlx5e_tc_tun_encap {
61 struct mlx5e_priv *priv;
62 struct notifier_block fib_nb;
63 spinlock_t route_lock; /* protects route_tbl */
64 unsigned long route_tbl_last_update;
65 DECLARE_HASHTABLE(route_tbl, 8);
68 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
70 return r->flags & MLX5E_ROUTE_ENTRY_VALID;
73 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
74 struct mlx5_flow_spec *spec)
76 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
77 struct mlx5_rx_tun_attr *tun_attr;
81 tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
85 esw_attr->rx_tun_attr = tun_attr;
86 ip_version = mlx5e_tc_get_ip_version(spec, true);
88 if (ip_version == 4) {
89 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
90 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
91 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
92 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
93 tun_attr->dst_ip.v4 = *(__be32 *)daddr;
94 tun_attr->src_ip.v4 = *(__be32 *)saddr;
95 if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
98 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
99 else if (ip_version == 6) {
100 int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
102 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
103 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
104 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
105 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
106 memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
107 memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
108 if (ipv6_addr_any(&tun_attr->dst_ip.v6) ||
109 ipv6_addr_any(&tun_attr->src_ip.v6))
113 /* Only set the flag if both src and dst ip addresses exist. They are
114 * required to establish routing.
116 flow_flag_set(flow, TUN_RX);
117 flow->attr->tun_ip_version = ip_version;
121 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
123 bool all_flow_encaps_valid = true;
126 /* Flow can be associated with multiple encap entries.
127 * Before offloading the flow verify that all of them have
130 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
131 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
133 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
134 all_flow_encaps_valid = false;
139 return all_flow_encaps_valid;
142 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
143 struct mlx5e_encap_entry *e,
144 struct list_head *flow_list)
146 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
147 struct mlx5_pkt_reformat_params reformat_params;
148 struct mlx5_esw_flow_attr *esw_attr;
149 struct mlx5_flow_handle *rule;
150 struct mlx5_flow_attr *attr;
151 struct mlx5_flow_spec *spec;
152 struct mlx5e_tc_flow *flow;
155 if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
158 memset(&reformat_params, 0, sizeof(reformat_params));
159 reformat_params.type = e->reformat_type;
160 reformat_params.size = e->encap_size;
161 reformat_params.data = e->encap_header;
162 e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
164 MLX5_FLOW_NAMESPACE_FDB);
165 if (IS_ERR(e->pkt_reformat)) {
166 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
167 PTR_ERR(e->pkt_reformat));
170 e->flags |= MLX5_ENCAP_ENTRY_VALID;
171 mlx5e_rep_queue_neigh_stats_work(priv);
173 list_for_each_entry(flow, flow_list, tmp_list) {
174 if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
177 spec = &flow->attr->parse_attr->spec;
179 attr = mlx5e_tc_get_encap_attr(flow);
180 esw_attr = attr->esw_attr;
181 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
182 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
184 /* Do not offload flows with unresolved neighbors */
185 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
188 err = mlx5e_tc_offload_flow_post_acts(flow);
190 mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
195 /* update from slow path rule to encap rule */
196 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
198 mlx5e_tc_unoffload_flow_post_acts(flow);
200 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
205 mlx5e_tc_unoffload_from_slow_path(esw, flow);
206 flow->rule[0] = rule;
207 /* was unset when slow path rule removed */
208 flow_flag_set(flow, OFFLOADED);
212 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
213 struct mlx5e_encap_entry *e,
214 struct list_head *flow_list)
216 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
217 struct mlx5_esw_flow_attr *esw_attr;
218 struct mlx5_flow_handle *rule;
219 struct mlx5_flow_attr *attr;
220 struct mlx5_flow_spec *spec;
221 struct mlx5e_tc_flow *flow;
224 list_for_each_entry(flow, flow_list, tmp_list) {
225 if (!mlx5e_is_offloaded_flow(flow))
228 attr = mlx5e_tc_get_encap_attr(flow);
229 esw_attr = attr->esw_attr;
230 /* mark the flow's encap dest as non-valid */
231 esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
232 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
234 /* Clear pkt_reformat before checking slow path flag. Because
235 * in next iteration, the same flow is already set slow path
236 * flag, but still need to clear the pkt_reformat.
238 if (flow_flag_test(flow, SLOW))
241 /* update from encap rule to slow path rule */
242 spec = &flow->attr->parse_attr->spec;
243 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
247 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
252 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
253 mlx5e_tc_unoffload_flow_post_acts(flow);
254 flow->rule[0] = rule;
255 /* was unset when fast path rule removed */
256 flow_flag_set(flow, OFFLOADED);
259 /* we know that the encap is valid */
260 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
261 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
262 e->pkt_reformat = NULL;
265 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
266 struct list_head *flow_list,
269 if (IS_ERR(mlx5e_flow_get(flow))) {
270 /* Flow is being deleted concurrently. Wait for it to be
271 * unoffloaded from hardware, otherwise deleting encap will
274 wait_for_completion(&flow->del_hw_done);
277 wait_for_completion(&flow->init_done);
279 flow->tmp_entry_index = index;
280 list_add(&flow->tmp_list, flow_list);
283 /* Takes reference to all flows attached to encap and adds the flows to
284 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
286 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
288 struct encap_flow_item *efi;
289 struct mlx5e_tc_flow *flow;
291 list_for_each_entry(efi, &e->flows, list) {
292 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
293 mlx5e_take_tmp_flow(flow, flow_list, efi->index);
297 /* Takes reference to all flows attached to route and adds the flows to
298 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
300 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
301 struct list_head *flow_list)
303 struct mlx5e_tc_flow *flow;
305 list_for_each_entry(flow, &r->decap_flows, decap_routes)
306 mlx5e_take_tmp_flow(flow, flow_list, 0);
309 typedef bool (match_cb)(struct mlx5e_encap_entry *);
311 static struct mlx5e_encap_entry *
312 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
313 struct mlx5e_encap_entry *e,
316 struct mlx5e_encap_entry *next = NULL;
321 /* find encap with non-zero reference counter value */
323 list_next_or_null_rcu(&nhe->encap_list,
325 struct mlx5e_encap_entry,
327 list_first_or_null_rcu(&nhe->encap_list,
328 struct mlx5e_encap_entry,
331 next = list_next_or_null_rcu(&nhe->encap_list,
333 struct mlx5e_encap_entry,
335 if (mlx5e_encap_take(next))
340 /* release starting encap */
342 mlx5e_encap_put(netdev_priv(e->out_dev), e);
346 /* wait for encap to be fully initialized */
347 wait_for_completion(&next->res_ready);
348 /* continue searching if encap entry is not in valid state after completion */
357 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
359 return e->flags & MLX5_ENCAP_ENTRY_VALID;
362 static struct mlx5e_encap_entry *
363 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
364 struct mlx5e_encap_entry *e)
366 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
369 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
371 return e->compl_result >= 0;
374 struct mlx5e_encap_entry *
375 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
376 struct mlx5e_encap_entry *e)
378 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
381 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
383 struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
384 struct mlx5e_encap_entry *e = NULL;
385 struct mlx5e_tc_flow *flow;
386 struct mlx5_fc *counter;
387 struct neigh_table *tbl;
388 bool neigh_used = false;
392 if (m_neigh->family == AF_INET)
394 #if IS_ENABLED(CONFIG_IPV6)
395 else if (m_neigh->family == AF_INET6)
396 tbl = ipv6_stub->nd_tbl;
401 /* mlx5e_get_next_valid_encap() releases previous encap before returning
404 while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
405 struct mlx5e_priv *priv = netdev_priv(e->out_dev);
406 struct encap_flow_item *efi, *tmp;
407 struct mlx5_eswitch *esw;
408 LIST_HEAD(flow_list);
410 esw = priv->mdev->priv.eswitch;
411 mutex_lock(&esw->offloads.encap_tbl_lock);
412 list_for_each_entry_safe(efi, tmp, &e->flows, list) {
413 flow = container_of(efi, struct mlx5e_tc_flow,
415 if (IS_ERR(mlx5e_flow_get(flow)))
417 list_add(&flow->tmp_list, &flow_list);
419 if (mlx5e_is_offloaded_flow(flow)) {
420 counter = mlx5e_tc_get_counter(flow);
421 lastuse = mlx5_fc_query_lastuse(counter);
422 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
428 mutex_unlock(&esw->offloads.encap_tbl_lock);
430 mlx5e_put_flow_list(priv, &flow_list);
432 /* release current encap before breaking the loop */
433 mlx5e_encap_put(priv, e);
438 trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
441 nhe->reported_lastuse = jiffies;
443 /* find the relevant neigh according to the cached device and
446 n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
450 neigh_event_send(n, NULL);
455 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
457 WARN_ON(!list_empty(&e->flows));
459 if (e->compl_result > 0) {
460 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
462 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
463 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
467 kfree(e->encap_header);
471 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
472 struct mlx5e_decap_entry *d)
474 WARN_ON(!list_empty(&d->flows));
476 if (!d->compl_result)
477 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
482 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
484 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
486 if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
488 list_del(&e->route_list);
489 hash_del_rcu(&e->encap_hlist);
490 mutex_unlock(&esw->offloads.encap_tbl_lock);
492 mlx5e_encap_dealloc(priv, e);
495 static void mlx5e_encap_put_locked(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
497 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
499 lockdep_assert_held(&esw->offloads.encap_tbl_lock);
501 if (!refcount_dec_and_test(&e->refcnt))
503 list_del(&e->route_list);
504 hash_del_rcu(&e->encap_hlist);
505 mlx5e_encap_dealloc(priv, e);
508 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
510 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
512 if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
514 hash_del_rcu(&d->hlist);
515 mutex_unlock(&esw->offloads.decap_tbl_lock);
517 mlx5e_decap_dealloc(priv, d);
520 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
521 struct mlx5e_tc_flow *flow,
524 void mlx5e_detach_encap(struct mlx5e_priv *priv,
525 struct mlx5e_tc_flow *flow,
526 struct mlx5_flow_attr *attr,
529 struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
530 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
532 if (!mlx5e_is_eswitch_flow(flow))
535 if (attr->esw_attr->dests[out_index].flags &
536 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
537 mlx5e_detach_encap_route(priv, flow, out_index);
539 /* flow wasn't fully initialized */
543 mutex_lock(&esw->offloads.encap_tbl_lock);
544 list_del(&flow->encaps[out_index].list);
545 flow->encaps[out_index].e = NULL;
546 if (!refcount_dec_and_test(&e->refcnt)) {
547 mutex_unlock(&esw->offloads.encap_tbl_lock);
550 list_del(&e->route_list);
551 hash_del_rcu(&e->encap_hlist);
552 mutex_unlock(&esw->offloads.encap_tbl_lock);
554 mlx5e_encap_dealloc(priv, e);
557 void mlx5e_detach_decap(struct mlx5e_priv *priv,
558 struct mlx5e_tc_flow *flow)
560 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
561 struct mlx5e_decap_entry *d = flow->decap_reformat;
566 mutex_lock(&esw->offloads.decap_tbl_lock);
567 list_del(&flow->l3_to_l2_reformat);
568 flow->decap_reformat = NULL;
570 if (!refcount_dec_and_test(&d->refcnt)) {
571 mutex_unlock(&esw->offloads.decap_tbl_lock);
574 hash_del_rcu(&d->hlist);
575 mutex_unlock(&esw->offloads.decap_tbl_lock);
577 mlx5e_decap_dealloc(priv, d);
580 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
581 struct mlx5e_encap_key *b)
583 return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
584 a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
587 bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a,
588 struct mlx5e_encap_key *b,
591 struct ip_tunnel_info *a_info;
592 struct ip_tunnel_info *b_info;
593 bool a_has_opts, b_has_opts;
595 if (!mlx5e_tc_tun_encap_info_equal_generic(a, b))
598 a_has_opts = !!(a->ip_tun_key->tun_flags & tun_flags);
599 b_has_opts = !!(b->ip_tun_key->tun_flags & tun_flags);
601 /* keys are equal when both don't have any options attached */
602 if (!a_has_opts && !b_has_opts)
605 if (a_has_opts != b_has_opts)
608 /* options stored in memory next to ip_tunnel_info struct */
609 a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key);
610 b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key);
612 return a_info->options_len == b_info->options_len &&
613 !memcmp(ip_tunnel_info_opts(a_info),
614 ip_tunnel_info_opts(b_info),
615 a_info->options_len);
618 static int cmp_decap_info(struct mlx5e_decap_key *a,
619 struct mlx5e_decap_key *b)
621 return memcmp(&a->key, &b->key, sizeof(b->key));
624 static int hash_encap_info(struct mlx5e_encap_key *key)
626 return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
627 key->tc_tunnel->tunnel_type);
630 static int hash_decap_info(struct mlx5e_decap_key *key)
632 return jhash(&key->key, sizeof(key->key), 0);
635 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
637 return refcount_inc_not_zero(&e->refcnt);
640 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
642 return refcount_inc_not_zero(&e->refcnt);
645 static struct mlx5e_encap_entry *
646 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
649 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
650 struct mlx5e_encap_key e_key;
651 struct mlx5e_encap_entry *e;
653 hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
654 encap_hlist, hash_key) {
655 e_key.ip_tun_key = &e->tun_info->key;
656 e_key.tc_tunnel = e->tunnel;
657 if (e->tunnel->encap_info_equal(&e_key, key) &&
665 static struct mlx5e_decap_entry *
666 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
669 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
670 struct mlx5e_decap_key r_key;
671 struct mlx5e_decap_entry *e;
673 hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
676 if (!cmp_decap_info(&r_key, key) &&
683 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
685 size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
687 return kmemdup(tun_info, tun_size, GFP_KERNEL);
690 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
691 struct mlx5e_tc_flow *flow,
693 struct mlx5e_encap_entry *e,
694 struct netlink_ext_ack *extack)
698 for (i = 0; i < out_index; i++) {
699 if (flow->encaps[i].e != e)
701 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
702 netdev_err(priv->netdev, "can't duplicate encap action\n");
709 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
710 struct mlx5_flow_attr *attr,
711 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
712 struct net_device *out_dev,
713 int route_dev_ifindex,
716 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
717 struct net_device *route_dev;
722 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
724 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
725 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
728 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
732 attr->dest_chain = 0;
733 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
734 esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
735 data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
737 err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
738 MLX5_FLOW_NAMESPACE_FDB,
741 esw_attr->dests[out_index].src_port_rewrite_act_id = err;
751 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
752 struct mlx5_esw_flow_attr *attr,
753 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
754 struct net_device *out_dev,
755 int route_dev_ifindex,
758 int act_id = attr->dests[out_index].src_port_rewrite_act_id;
759 struct net_device *route_dev;
764 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
766 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
767 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
772 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
776 data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
778 mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
786 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
788 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
789 struct mlx5_rep_uplink_priv *uplink_priv;
790 struct mlx5e_rep_priv *uplink_rpriv;
791 struct mlx5e_tc_tun_encap *encap;
794 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
795 uplink_priv = &uplink_rpriv->uplink_priv;
796 encap = uplink_priv->encap;
798 spin_lock_bh(&encap->route_lock);
799 ret = encap->route_tbl_last_update;
800 spin_unlock_bh(&encap->route_lock);
804 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
805 struct mlx5e_tc_flow *flow,
806 struct mlx5_flow_attr *attr,
807 struct mlx5e_encap_entry *e,
808 bool new_encap_entry,
809 unsigned long tbl_time_before,
812 int mlx5e_attach_encap(struct mlx5e_priv *priv,
813 struct mlx5e_tc_flow *flow,
814 struct mlx5_flow_attr *attr,
815 struct net_device *mirred_dev,
817 struct netlink_ext_ack *extack,
818 struct net_device **encap_dev)
820 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
821 struct mlx5e_tc_flow_parse_attr *parse_attr;
822 const struct ip_tunnel_info *tun_info;
823 const struct mlx5e_mpls_info *mpls_info;
824 unsigned long tbl_time_before = 0;
825 struct mlx5e_encap_entry *e;
826 struct mlx5e_encap_key key;
827 bool entry_created = false;
828 unsigned short family;
832 lockdep_assert_held(&esw->offloads.encap_tbl_lock);
834 parse_attr = attr->parse_attr;
835 tun_info = parse_attr->tun_info[out_index];
836 mpls_info = &parse_attr->mpls_info[out_index];
837 family = ip_tunnel_info_af(tun_info);
838 key.ip_tun_key = &tun_info->key;
839 key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
840 if (!key.tc_tunnel) {
841 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
845 hash_key = hash_encap_info(&key);
847 e = mlx5e_encap_get(priv, &key, hash_key);
849 /* must verify if encap is valid or not */
851 /* Check that entry was not already attached to this flow */
852 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
860 e = kzalloc(sizeof(*e), GFP_KERNEL);
866 refcount_set(&e->refcnt, 1);
867 init_completion(&e->res_ready);
868 entry_created = true;
869 INIT_LIST_HEAD(&e->route_list);
871 tun_info = mlx5e_dup_tun_info(tun_info);
876 e->tun_info = tun_info;
877 memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
878 err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
882 INIT_LIST_HEAD(&e->flows);
883 hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
884 tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
886 if (family == AF_INET)
887 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
888 else if (family == AF_INET6)
889 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
891 complete_all(&e->res_ready);
893 e->compl_result = err;
899 err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
900 tbl_time_before, out_index);
904 err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index);
905 if (err == -EOPNOTSUPP) {
906 /* If device doesn't support int port offload,
907 * redirect to uplink vport.
909 mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n");
915 flow->encaps[out_index].e = e;
916 list_add(&flow->encaps[out_index].list, &e->flows);
917 flow->encaps[out_index].index = out_index;
918 *encap_dev = e->out_dev;
919 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
920 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
921 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
923 flow_flag_set(flow, SLOW);
930 mlx5e_encap_put_locked(priv, e);
939 int mlx5e_attach_decap(struct mlx5e_priv *priv,
940 struct mlx5e_tc_flow *flow,
941 struct netlink_ext_ack *extack)
943 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
944 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
945 struct mlx5_pkt_reformat_params reformat_params;
946 struct mlx5e_decap_entry *d;
947 struct mlx5e_decap_key key;
951 if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
952 NL_SET_ERR_MSG_MOD(extack,
953 "encap header larger than max supported");
958 hash_key = hash_decap_info(&key);
959 mutex_lock(&esw->offloads.decap_tbl_lock);
960 d = mlx5e_decap_get(priv, &key, hash_key);
962 mutex_unlock(&esw->offloads.decap_tbl_lock);
963 wait_for_completion(&d->res_ready);
964 mutex_lock(&esw->offloads.decap_tbl_lock);
965 if (d->compl_result) {
972 d = kzalloc(sizeof(*d), GFP_KERNEL);
979 refcount_set(&d->refcnt, 1);
980 init_completion(&d->res_ready);
981 INIT_LIST_HEAD(&d->flows);
982 hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
983 mutex_unlock(&esw->offloads.decap_tbl_lock);
985 memset(&reformat_params, 0, sizeof(reformat_params));
986 reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
987 reformat_params.size = sizeof(attr->eth);
988 reformat_params.data = &attr->eth;
989 d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
991 MLX5_FLOW_NAMESPACE_FDB);
992 if (IS_ERR(d->pkt_reformat)) {
993 err = PTR_ERR(d->pkt_reformat);
994 d->compl_result = err;
996 mutex_lock(&esw->offloads.decap_tbl_lock);
997 complete_all(&d->res_ready);
1002 flow->decap_reformat = d;
1003 attr->decap_pkt_reformat = d->pkt_reformat;
1004 list_add(&flow->l3_to_l2_reformat, &d->flows);
1005 mutex_unlock(&esw->offloads.decap_tbl_lock);
1009 mutex_unlock(&esw->offloads.decap_tbl_lock);
1010 mlx5e_decap_put(priv, d);
1014 mutex_unlock(&esw->offloads.decap_tbl_lock);
1018 int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv,
1019 struct mlx5e_tc_flow *flow,
1020 struct mlx5_flow_attr *attr,
1021 struct netlink_ext_ack *extack,
1024 struct mlx5e_tc_flow_parse_attr *parse_attr;
1025 struct mlx5_esw_flow_attr *esw_attr;
1026 struct net_device *encap_dev = NULL;
1027 struct mlx5e_rep_priv *rpriv;
1028 struct mlx5e_priv *out_priv;
1029 struct mlx5_eswitch *esw;
1033 parse_attr = attr->parse_attr;
1034 esw_attr = attr->esw_attr;
1037 esw = priv->mdev->priv.eswitch;
1038 mutex_lock(&esw->offloads.encap_tbl_lock);
1039 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1040 struct net_device *out_dev;
1043 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1046 mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1047 out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
1049 NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
1053 err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
1054 extack, &encap_dev);
1059 if (esw_attr->dests[out_index].flags &
1060 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1061 !esw_attr->dest_int_port)
1064 out_priv = netdev_priv(encap_dev);
1065 rpriv = out_priv->ppriv;
1066 esw_attr->dests[out_index].rep = rpriv->rep;
1067 esw_attr->dests[out_index].mdev = out_priv->mdev;
1070 if (*vf_tun && esw_attr->out_count > 1) {
1071 NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
1077 mutex_unlock(&esw->offloads.encap_tbl_lock);
1081 void mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv *priv,
1082 struct mlx5e_tc_flow *flow,
1083 struct mlx5_flow_attr *attr)
1085 struct mlx5_esw_flow_attr *esw_attr;
1088 if (!mlx5e_is_eswitch_flow(flow))
1091 esw_attr = attr->esw_attr;
1093 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1094 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1097 mlx5e_detach_encap(flow->priv, flow, attr, out_index);
1098 kfree(attr->parse_attr->tun_info[out_index]);
1102 static int cmp_route_info(struct mlx5e_route_key *a,
1103 struct mlx5e_route_key *b)
1105 if (a->ip_version == 4 && b->ip_version == 4)
1106 return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
1107 sizeof(a->endpoint_ip.v4));
1108 else if (a->ip_version == 6 && b->ip_version == 6)
1109 return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
1110 sizeof(a->endpoint_ip.v6));
1114 static u32 hash_route_info(struct mlx5e_route_key *key)
1116 if (key->ip_version == 4)
1117 return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
1118 return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
1121 static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
1122 struct mlx5e_route_entry *r)
1124 WARN_ON(!list_empty(&r->decap_flows));
1125 WARN_ON(!list_empty(&r->encap_entries));
1130 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1132 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1134 if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
1137 hash_del_rcu(&r->hlist);
1138 mutex_unlock(&esw->offloads.encap_tbl_lock);
1140 mlx5e_route_dealloc(priv, r);
1143 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1145 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1147 lockdep_assert_held(&esw->offloads.encap_tbl_lock);
1149 if (!refcount_dec_and_test(&r->refcnt))
1151 hash_del_rcu(&r->hlist);
1152 mlx5e_route_dealloc(priv, r);
1155 static struct mlx5e_route_entry *
1156 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
1159 struct mlx5e_route_key r_key;
1160 struct mlx5e_route_entry *r;
1162 hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
1164 if (!cmp_route_info(&r_key, key) &&
1165 refcount_inc_not_zero(&r->refcnt))
1171 static struct mlx5e_route_entry *
1172 mlx5e_route_get_create(struct mlx5e_priv *priv,
1173 struct mlx5e_route_key *key,
1174 int tunnel_dev_index,
1175 unsigned long *route_tbl_change_time)
1177 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1178 struct mlx5_rep_uplink_priv *uplink_priv;
1179 struct mlx5e_rep_priv *uplink_rpriv;
1180 struct mlx5e_tc_tun_encap *encap;
1181 struct mlx5e_route_entry *r;
1184 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1185 uplink_priv = &uplink_rpriv->uplink_priv;
1186 encap = uplink_priv->encap;
1188 hash_key = hash_route_info(key);
1189 spin_lock_bh(&encap->route_lock);
1190 r = mlx5e_route_get(encap, key, hash_key);
1191 spin_unlock_bh(&encap->route_lock);
1193 if (!mlx5e_route_entry_valid(r)) {
1194 mlx5e_route_put_locked(priv, r);
1195 return ERR_PTR(-EINVAL);
1200 r = kzalloc(sizeof(*r), GFP_KERNEL);
1202 return ERR_PTR(-ENOMEM);
1205 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1206 r->tunnel_dev_index = tunnel_dev_index;
1207 refcount_set(&r->refcnt, 1);
1208 INIT_LIST_HEAD(&r->decap_flows);
1209 INIT_LIST_HEAD(&r->encap_entries);
1211 spin_lock_bh(&encap->route_lock);
1212 *route_tbl_change_time = encap->route_tbl_last_update;
1213 hash_add(encap->route_tbl, &r->hlist, hash_key);
1214 spin_unlock_bh(&encap->route_lock);
1219 static struct mlx5e_route_entry *
1220 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1222 u32 hash_key = hash_route_info(key);
1223 struct mlx5e_route_entry *r;
1225 spin_lock_bh(&encap->route_lock);
1226 encap->route_tbl_last_update = jiffies;
1227 r = mlx5e_route_get(encap, key, hash_key);
1228 spin_unlock_bh(&encap->route_lock);
1233 struct mlx5e_tc_fib_event_data {
1234 struct work_struct work;
1235 unsigned long event;
1236 struct mlx5e_route_entry *r;
1237 struct net_device *ul_dev;
1240 static void mlx5e_tc_fib_event_work(struct work_struct *work);
1241 static struct mlx5e_tc_fib_event_data *
1242 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1244 struct mlx5e_tc_fib_event_data *fib_work;
1246 fib_work = kzalloc(sizeof(*fib_work), flags);
1247 if (WARN_ON(!fib_work))
1250 INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1251 fib_work->event = event;
1252 fib_work->ul_dev = ul_dev;
1258 mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1259 struct mlx5e_route_entry *r,
1260 unsigned long event)
1262 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1263 struct mlx5e_tc_fib_event_data *fib_work;
1264 struct mlx5e_rep_priv *uplink_rpriv;
1265 struct net_device *ul_dev;
1267 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1268 ul_dev = uplink_rpriv->netdev;
1270 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1275 refcount_inc(&r->refcnt);
1277 queue_work(priv->wq, &fib_work->work);
1282 int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1283 struct mlx5e_tc_flow *flow)
1285 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1286 unsigned long tbl_time_before, tbl_time_after;
1287 struct mlx5e_tc_flow_parse_attr *parse_attr;
1288 struct mlx5_flow_attr *attr = flow->attr;
1289 struct mlx5_esw_flow_attr *esw_attr;
1290 struct mlx5e_route_entry *r;
1291 struct mlx5e_route_key key;
1294 esw_attr = attr->esw_attr;
1295 parse_attr = attr->parse_attr;
1296 mutex_lock(&esw->offloads.encap_tbl_lock);
1297 if (!esw_attr->rx_tun_attr)
1300 tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1301 tbl_time_after = tbl_time_before;
1302 err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev);
1303 if (err || !esw_attr->rx_tun_attr->decap_vport)
1306 key.ip_version = attr->tun_ip_version;
1307 if (key.ip_version == 4)
1308 key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1310 key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1312 r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1318 /* Routing changed concurrently. FIB event handler might have missed new
1319 * entry, schedule update.
1321 if (tbl_time_before != tbl_time_after) {
1322 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1324 mlx5e_route_put_locked(priv, r);
1329 flow->decap_route = r;
1330 list_add(&flow->decap_routes, &r->decap_flows);
1331 mutex_unlock(&esw->offloads.encap_tbl_lock);
1335 mutex_unlock(&esw->offloads.encap_tbl_lock);
1339 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1340 struct mlx5e_tc_flow *flow,
1341 struct mlx5_flow_attr *attr,
1342 struct mlx5e_encap_entry *e,
1343 bool new_encap_entry,
1344 unsigned long tbl_time_before,
1347 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1348 unsigned long tbl_time_after = tbl_time_before;
1349 struct mlx5e_tc_flow_parse_attr *parse_attr;
1350 const struct ip_tunnel_info *tun_info;
1351 struct mlx5_esw_flow_attr *esw_attr;
1352 struct mlx5e_route_entry *r;
1353 struct mlx5e_route_key key;
1354 unsigned short family;
1357 esw_attr = attr->esw_attr;
1358 parse_attr = attr->parse_attr;
1359 tun_info = parse_attr->tun_info[out_index];
1360 family = ip_tunnel_info_af(tun_info);
1362 if (family == AF_INET) {
1363 key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1365 } else if (family == AF_INET6) {
1366 key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1370 err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1371 e->route_dev_ifindex, out_index);
1372 if (err || !(esw_attr->dests[out_index].flags &
1373 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1376 r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1380 /* Routing changed concurrently. FIB event handler might have missed new
1381 * entry, schedule update.
1383 if (tbl_time_before != tbl_time_after) {
1384 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1386 mlx5e_route_put_locked(priv, r);
1391 flow->encap_routes[out_index].r = r;
1392 if (new_encap_entry)
1393 list_add(&e->route_list, &r->encap_entries);
1394 flow->encap_routes[out_index].index = out_index;
1398 void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1399 struct mlx5e_tc_flow *flow)
1401 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1402 struct mlx5e_route_entry *r = flow->decap_route;
1407 mutex_lock(&esw->offloads.encap_tbl_lock);
1408 list_del(&flow->decap_routes);
1409 flow->decap_route = NULL;
1411 if (!refcount_dec_and_test(&r->refcnt)) {
1412 mutex_unlock(&esw->offloads.encap_tbl_lock);
1415 hash_del_rcu(&r->hlist);
1416 mutex_unlock(&esw->offloads.encap_tbl_lock);
1418 mlx5e_route_dealloc(priv, r);
1421 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1422 struct mlx5e_tc_flow *flow,
1425 struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1426 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1427 struct mlx5e_encap_entry *e, *tmp;
1432 mutex_lock(&esw->offloads.encap_tbl_lock);
1433 flow->encap_routes[out_index].r = NULL;
1435 if (!refcount_dec_and_test(&r->refcnt)) {
1436 mutex_unlock(&esw->offloads.encap_tbl_lock);
1439 list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1440 list_del_init(&e->route_list);
1441 hash_del_rcu(&r->hlist);
1442 mutex_unlock(&esw->offloads.encap_tbl_lock);
1444 mlx5e_route_dealloc(priv, r);
1447 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1448 struct mlx5e_encap_entry *e,
1449 struct list_head *encap_flows)
1451 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1452 struct mlx5e_tc_flow *flow;
1454 list_for_each_entry(flow, encap_flows, tmp_list) {
1455 struct mlx5_esw_flow_attr *esw_attr;
1456 struct mlx5_flow_attr *attr;
1458 if (!mlx5e_is_offloaded_flow(flow))
1461 attr = mlx5e_tc_get_encap_attr(flow);
1462 esw_attr = attr->esw_attr;
1464 if (flow_flag_test(flow, SLOW)) {
1465 mlx5e_tc_unoffload_from_slow_path(esw, flow);
1467 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1468 mlx5e_tc_unoffload_flow_post_acts(flow);
1471 mlx5e_tc_detach_mod_hdr(priv, flow, attr);
1472 attr->modify_hdr = NULL;
1474 esw_attr->dests[flow->tmp_entry_index].flags &=
1475 ~MLX5_ESW_DEST_ENCAP_VALID;
1476 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1479 e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1480 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1481 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1482 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1483 e->pkt_reformat = NULL;
1487 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1488 struct net_device *tunnel_dev,
1489 struct mlx5e_encap_entry *e,
1490 struct list_head *encap_flows)
1492 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1493 struct mlx5e_tc_flow *flow;
1496 err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1497 mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1498 mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1500 mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1501 e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1503 list_for_each_entry(flow, encap_flows, tmp_list) {
1504 struct mlx5e_tc_flow_parse_attr *parse_attr;
1505 struct mlx5_esw_flow_attr *esw_attr;
1506 struct mlx5_flow_handle *rule;
1507 struct mlx5_flow_attr *attr;
1508 struct mlx5_flow_spec *spec;
1510 if (flow_flag_test(flow, FAILED))
1513 spec = &flow->attr->parse_attr->spec;
1515 attr = mlx5e_tc_get_encap_attr(flow);
1516 esw_attr = attr->esw_attr;
1517 parse_attr = attr->parse_attr;
1519 err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1520 e->out_dev, e->route_dev_ifindex,
1521 flow->tmp_entry_index);
1523 mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1527 err = mlx5e_tc_attach_mod_hdr(priv, flow, attr);
1529 mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1534 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1535 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1536 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1537 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1538 goto offload_to_slow_path;
1540 err = mlx5e_tc_offload_flow_post_acts(flow);
1542 mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
1544 goto offload_to_slow_path;
1547 /* update from slow path rule to encap rule */
1548 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
1550 mlx5e_tc_unoffload_flow_post_acts(flow);
1551 err = PTR_ERR(rule);
1552 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1555 flow->rule[0] = rule;
1558 offload_to_slow_path:
1559 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1560 /* mark the flow's encap dest as non-valid */
1561 esw_attr->dests[flow->tmp_entry_index].flags &=
1562 ~MLX5_ESW_DEST_ENCAP_VALID;
1565 err = PTR_ERR(rule);
1566 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1569 flow->rule[0] = rule;
1572 flow_flag_set(flow, OFFLOADED);
1576 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1577 struct mlx5e_route_entry *r,
1578 struct list_head *flow_list,
1581 struct net_device *tunnel_dev;
1582 struct mlx5e_encap_entry *e;
1584 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1588 list_for_each_entry(e, &r->encap_entries, route_list) {
1589 LIST_HEAD(encap_flows);
1591 mlx5e_take_all_encap_flows(e, &encap_flows);
1592 if (list_empty(&encap_flows))
1595 if (mlx5e_route_entry_valid(r))
1596 mlx5e_invalidate_encap(priv, e, &encap_flows);
1599 list_splice(&encap_flows, flow_list);
1603 mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1604 list_splice(&encap_flows, flow_list);
1610 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1611 struct list_head *flow_list)
1613 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1614 struct mlx5e_tc_flow *flow;
1616 list_for_each_entry(flow, flow_list, tmp_list)
1617 if (mlx5e_is_offloaded_flow(flow))
1618 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1621 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1622 struct list_head *decap_flows)
1624 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1625 struct mlx5e_tc_flow *flow;
1627 list_for_each_entry(flow, decap_flows, tmp_list) {
1628 struct mlx5e_tc_flow_parse_attr *parse_attr;
1629 struct mlx5_flow_attr *attr = flow->attr;
1630 struct mlx5_flow_handle *rule;
1631 struct mlx5_flow_spec *spec;
1634 if (flow_flag_test(flow, FAILED))
1637 parse_attr = attr->parse_attr;
1638 spec = &parse_attr->spec;
1639 err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev);
1641 mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1646 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1648 err = PTR_ERR(rule);
1649 mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1652 flow->rule[0] = rule;
1653 flow_flag_set(flow, OFFLOADED);
1658 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1659 struct mlx5e_route_entry *r,
1660 struct list_head *flow_list,
1663 struct net_device *tunnel_dev;
1664 LIST_HEAD(decap_flows);
1666 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1670 mlx5e_take_all_route_decap_flows(r, &decap_flows);
1671 if (mlx5e_route_entry_valid(r))
1672 mlx5e_unoffload_flow_list(priv, &decap_flows);
1674 mlx5e_reoffload_decap(priv, &decap_flows);
1676 list_splice(&decap_flows, flow_list);
1681 static void mlx5e_tc_fib_event_work(struct work_struct *work)
1683 struct mlx5e_tc_fib_event_data *event_data =
1684 container_of(work, struct mlx5e_tc_fib_event_data, work);
1685 struct net_device *ul_dev = event_data->ul_dev;
1686 struct mlx5e_priv *priv = netdev_priv(ul_dev);
1687 struct mlx5e_route_entry *r = event_data->r;
1688 struct mlx5_eswitch *esw;
1689 LIST_HEAD(flow_list);
1693 /* sync with concurrent neigh updates */
1695 esw = priv->mdev->priv.eswitch;
1696 mutex_lock(&esw->offloads.encap_tbl_lock);
1697 replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1699 if (!mlx5e_route_entry_valid(r) && !replace)
1702 err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1704 mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1707 err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1709 mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1713 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1715 mutex_unlock(&esw->offloads.encap_tbl_lock);
1718 mlx5e_put_flow_list(priv, &flow_list);
1719 mlx5e_route_put(priv, event_data->r);
1720 dev_put(event_data->ul_dev);
1724 static struct mlx5e_tc_fib_event_data *
1725 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1726 struct net_device *ul_dev,
1727 struct mlx5e_tc_tun_encap *encap,
1728 unsigned long event,
1729 struct fib_notifier_info *info)
1731 struct fib_entry_notifier_info *fen_info;
1732 struct mlx5e_tc_fib_event_data *fib_work;
1733 struct mlx5e_route_entry *r;
1734 struct mlx5e_route_key key;
1735 struct net_device *fib_dev;
1737 fen_info = container_of(info, struct fib_entry_notifier_info, info);
1738 if (fen_info->fi->nh)
1740 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1741 if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1742 fen_info->dst_len != 32)
1745 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1747 return ERR_PTR(-ENOMEM);
1749 key.endpoint_ip.v4 = htonl(fen_info->dst);
1752 /* Can't fail after this point because releasing reference to r
1753 * requires obtaining sleeping mutex which we can't do in atomic
1756 r = mlx5e_route_lookup_for_update(encap, &key);
1769 static struct mlx5e_tc_fib_event_data *
1770 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1771 struct net_device *ul_dev,
1772 struct mlx5e_tc_tun_encap *encap,
1773 unsigned long event,
1774 struct fib_notifier_info *info)
1776 struct fib6_entry_notifier_info *fen_info;
1777 struct mlx5e_tc_fib_event_data *fib_work;
1778 struct mlx5e_route_entry *r;
1779 struct mlx5e_route_key key;
1780 struct net_device *fib_dev;
1782 fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1783 fib_dev = fib6_info_nh_dev(fen_info->rt);
1784 if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1785 fen_info->rt->fib6_dst.plen != 128)
1788 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1790 return ERR_PTR(-ENOMEM);
1792 memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1793 sizeof(fen_info->rt->fib6_dst.addr));
1796 /* Can't fail after this point because releasing reference to r
1797 * requires obtaining sleeping mutex which we can't do in atomic
1800 r = mlx5e_route_lookup_for_update(encap, &key);
1813 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1815 struct mlx5e_tc_fib_event_data *fib_work;
1816 struct fib_notifier_info *info = ptr;
1817 struct mlx5e_tc_tun_encap *encap;
1818 struct net_device *ul_dev;
1819 struct mlx5e_priv *priv;
1821 encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1823 ul_dev = priv->netdev;
1824 priv = netdev_priv(ul_dev);
1827 case FIB_EVENT_ENTRY_REPLACE:
1828 case FIB_EVENT_ENTRY_DEL:
1829 if (info->family == AF_INET)
1830 fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1831 else if (info->family == AF_INET6)
1832 fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1836 if (!IS_ERR_OR_NULL(fib_work)) {
1837 queue_work(priv->wq, &fib_work->work);
1838 } else if (IS_ERR(fib_work)) {
1839 NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1840 mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1852 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1854 struct mlx5e_tc_tun_encap *encap;
1857 encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1859 return ERR_PTR(-ENOMEM);
1862 encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1863 spin_lock_init(&encap->route_lock);
1864 hash_init(encap->route_tbl);
1865 err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1869 return ERR_PTR(err);
1875 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1880 unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1881 flush_workqueue(encap->priv->wq); /* flush fib event works */