Merge branch 'ct-offload' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed...
[linux-2.6-block.git] / drivers / net / ethernet / mellanox / mlx5 / core / en_tc.c
1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <net/flow_dissector.h>
34 #include <net/sch_generic.h>
35 #include <net/pkt_cls.h>
36 #include <net/tc_act/tc_gact.h>
37 #include <net/tc_act/tc_skbedit.h>
38 #include <linux/mlx5/fs.h>
39 #include <linux/mlx5/device.h>
40 #include <linux/rhashtable.h>
41 #include <linux/refcount.h>
42 #include <linux/completion.h>
43 #include <net/tc_act/tc_mirred.h>
44 #include <net/tc_act/tc_vlan.h>
45 #include <net/tc_act/tc_tunnel_key.h>
46 #include <net/tc_act/tc_pedit.h>
47 #include <net/tc_act/tc_csum.h>
48 #include <net/arp.h>
49 #include <net/ipv6_stubs.h>
50 #include "en.h"
51 #include "en_rep.h"
52 #include "en_tc.h"
53 #include "eswitch.h"
54 #include "eswitch_offloads_chains.h"
55 #include "fs_core.h"
56 #include "en/port.h"
57 #include "en/tc_tun.h"
58 #include "en/mapping.h"
59 #include "lib/devcom.h"
60 #include "lib/geneve.h"
61 #include "diag/en_tc_tracepoint.h"
62
63 #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
64
65 struct mlx5_nic_flow_attr {
66         u32 action;
67         u32 flow_tag;
68         struct mlx5_modify_hdr *modify_hdr;
69         u32 hairpin_tirn;
70         u8 match_level;
71         struct mlx5_flow_table  *hairpin_ft;
72         struct mlx5_fc          *counter;
73 };
74
75 #define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
76
77 enum {
78         MLX5E_TC_FLOW_FLAG_INGRESS      = MLX5E_TC_FLAG_INGRESS_BIT,
79         MLX5E_TC_FLOW_FLAG_EGRESS       = MLX5E_TC_FLAG_EGRESS_BIT,
80         MLX5E_TC_FLOW_FLAG_ESWITCH      = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
81         MLX5E_TC_FLOW_FLAG_FT           = MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
82         MLX5E_TC_FLOW_FLAG_NIC          = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
83         MLX5E_TC_FLOW_FLAG_OFFLOADED    = MLX5E_TC_FLOW_BASE,
84         MLX5E_TC_FLOW_FLAG_HAIRPIN      = MLX5E_TC_FLOW_BASE + 1,
85         MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS  = MLX5E_TC_FLOW_BASE + 2,
86         MLX5E_TC_FLOW_FLAG_SLOW         = MLX5E_TC_FLOW_BASE + 3,
87         MLX5E_TC_FLOW_FLAG_DUP          = MLX5E_TC_FLOW_BASE + 4,
88         MLX5E_TC_FLOW_FLAG_NOT_READY    = MLX5E_TC_FLOW_BASE + 5,
89         MLX5E_TC_FLOW_FLAG_DELETED      = MLX5E_TC_FLOW_BASE + 6,
90 };
91
92 #define MLX5E_TC_MAX_SPLITS 1
93
94 /* Helper struct for accessing a struct containing list_head array.
95  * Containing struct
96  *   |- Helper array
97  *      [0] Helper item 0
98  *          |- list_head item 0
99  *          |- index (0)
100  *      [1] Helper item 1
101  *          |- list_head item 1
102  *          |- index (1)
103  * To access the containing struct from one of the list_head items:
104  * 1. Get the helper item from the list_head item using
105  *    helper item =
106  *        container_of(list_head item, helper struct type, list_head field)
107  * 2. Get the contining struct from the helper item and its index in the array:
108  *    containing struct =
109  *        container_of(helper item, containing struct type, helper field[index])
110  */
111 struct encap_flow_item {
112         struct mlx5e_encap_entry *e; /* attached encap instance */
113         struct list_head list;
114         int index;
115 };
116
117 struct mlx5e_tc_flow {
118         struct rhash_head       node;
119         struct mlx5e_priv       *priv;
120         u64                     cookie;
121         unsigned long           flags;
122         struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
123         /* Flow can be associated with multiple encap IDs.
124          * The number of encaps is bounded by the number of supported
125          * destinations.
126          */
127         struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
128         struct mlx5e_tc_flow    *peer_flow;
129         struct mlx5e_mod_hdr_entry *mh; /* attached mod header instance */
130         struct list_head        mod_hdr; /* flows sharing the same mod hdr ID */
131         struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
132         struct list_head        hairpin; /* flows sharing the same hairpin */
133         struct list_head        peer;    /* flows with peer flow */
134         struct list_head        unready; /* flows not ready to be offloaded (e.g due to missing route) */
135         int                     tmp_efi_index;
136         struct list_head        tmp_list; /* temporary flow list used by neigh update */
137         refcount_t              refcnt;
138         struct rcu_head         rcu_head;
139         struct completion       init_done;
140         int tunnel_id; /* the mapped tunnel id of this flow */
141
142         union {
143                 struct mlx5_esw_flow_attr esw_attr[0];
144                 struct mlx5_nic_flow_attr nic_attr[0];
145         };
146 };
147
148 struct mlx5e_tc_flow_parse_attr {
149         const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
150         struct net_device *filter_dev;
151         struct mlx5_flow_spec spec;
152         struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
153         int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
154 };
155
156 #define MLX5E_TC_TABLE_NUM_GROUPS 4
157 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
158
159 struct tunnel_match_key {
160         struct flow_dissector_key_control enc_control;
161         struct flow_dissector_key_keyid enc_key_id;
162         struct flow_dissector_key_ports enc_tp;
163         struct flow_dissector_key_ip enc_ip;
164         union {
165                 struct flow_dissector_key_ipv4_addrs enc_ipv4;
166                 struct flow_dissector_key_ipv6_addrs enc_ipv6;
167         };
168
169         int filter_ifindex;
170 };
171
172 /* Tunnel_id mapping is TUNNEL_INFO_BITS + ENC_OPTS_BITS.
173  * Upper TUNNEL_INFO_BITS for general tunnel info.
174  * Lower ENC_OPTS_BITS bits for enc_opts.
175  */
176 #define TUNNEL_INFO_BITS 6
177 #define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0)
178 #define ENC_OPTS_BITS 2
179 #define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0)
180 #define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS)
181 #define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0)
182
183 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
184         [CHAIN_TO_REG] = {
185                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
186                 .moffset = 0,
187                 .mlen = 2,
188         },
189         [TUNNEL_TO_REG] = {
190                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
191                 .moffset = 3,
192                 .mlen = 1,
193                 .soffset = MLX5_BYTE_OFF(fte_match_param,
194                                          misc_parameters_2.metadata_reg_c_1),
195         },
196 };
197
198 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
199
200 void
201 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
202                             enum mlx5e_tc_attr_to_reg type,
203                             u32 data,
204                             u32 mask)
205 {
206         int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
207         int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
208         void *headers_c = spec->match_criteria;
209         void *headers_v = spec->match_value;
210         void *fmask, *fval;
211
212         fmask = headers_c + soffset;
213         fval = headers_v + soffset;
214
215         mask = cpu_to_be32(mask) >> (32 - (match_len * 8));
216         data = cpu_to_be32(data) >> (32 - (match_len * 8));
217
218         memcpy(fmask, &mask, match_len);
219         memcpy(fval, &data, match_len);
220
221         spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
222 }
223
224 int
225 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
226                           struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
227                           enum mlx5e_tc_attr_to_reg type,
228                           u32 data)
229 {
230         int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
231         int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
232         int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
233         char *modact;
234         int err;
235
236         err = alloc_mod_hdr_actions(mdev, MLX5_FLOW_NAMESPACE_FDB,
237                                     mod_hdr_acts);
238         if (err)
239                 return err;
240
241         modact = mod_hdr_acts->actions +
242                  (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ);
243
244         /* Firmware has 5bit length field and 0 means 32bits */
245         if (mlen == 4)
246                 mlen = 0;
247
248         MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
249         MLX5_SET(set_action_in, modact, field, mfield);
250         MLX5_SET(set_action_in, modact, offset, moffset * 8);
251         MLX5_SET(set_action_in, modact, length, mlen * 8);
252         MLX5_SET(set_action_in, modact, data, data);
253         mod_hdr_acts->num_actions++;
254
255         return 0;
256 }
257
258 struct mlx5e_hairpin {
259         struct mlx5_hairpin *pair;
260
261         struct mlx5_core_dev *func_mdev;
262         struct mlx5e_priv *func_priv;
263         u32 tdn;
264         u32 tirn;
265
266         int num_channels;
267         struct mlx5e_rqt indir_rqt;
268         u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
269         struct mlx5e_ttc_table ttc;
270 };
271
272 struct mlx5e_hairpin_entry {
273         /* a node of a hash table which keeps all the  hairpin entries */
274         struct hlist_node hairpin_hlist;
275
276         /* protects flows list */
277         spinlock_t flows_lock;
278         /* flows sharing the same hairpin */
279         struct list_head flows;
280         /* hpe's that were not fully initialized when dead peer update event
281          * function traversed them.
282          */
283         struct list_head dead_peer_wait_list;
284
285         u16 peer_vhca_id;
286         u8 prio;
287         struct mlx5e_hairpin *hp;
288         refcount_t refcnt;
289         struct completion res_ready;
290 };
291
292 struct mod_hdr_key {
293         int num_actions;
294         void *actions;
295 };
296
297 struct mlx5e_mod_hdr_entry {
298         /* a node of a hash table which keeps all the mod_hdr entries */
299         struct hlist_node mod_hdr_hlist;
300
301         /* protects flows list */
302         spinlock_t flows_lock;
303         /* flows sharing the same mod_hdr entry */
304         struct list_head flows;
305
306         struct mod_hdr_key key;
307
308         struct mlx5_modify_hdr *modify_hdr;
309
310         refcount_t refcnt;
311         struct completion res_ready;
312         int compl_result;
313 };
314
315 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
316                               struct mlx5e_tc_flow *flow);
317
318 static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
319 {
320         if (!flow || !refcount_inc_not_zero(&flow->refcnt))
321                 return ERR_PTR(-EINVAL);
322         return flow;
323 }
324
325 static void mlx5e_flow_put(struct mlx5e_priv *priv,
326                            struct mlx5e_tc_flow *flow)
327 {
328         if (refcount_dec_and_test(&flow->refcnt)) {
329                 mlx5e_tc_del_flow(priv, flow);
330                 kfree_rcu(flow, rcu_head);
331         }
332 }
333
334 static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
335 {
336         /* Complete all memory stores before setting bit. */
337         smp_mb__before_atomic();
338         set_bit(flag, &flow->flags);
339 }
340
341 #define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
342
343 static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
344                                      unsigned long flag)
345 {
346         /* test_and_set_bit() provides all necessary barriers */
347         return test_and_set_bit(flag, &flow->flags);
348 }
349
350 #define flow_flag_test_and_set(flow, flag)                      \
351         __flow_flag_test_and_set(flow,                          \
352                                  MLX5E_TC_FLOW_FLAG_##flag)
353
354 static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
355 {
356         /* Complete all memory stores before clearing bit. */
357         smp_mb__before_atomic();
358         clear_bit(flag, &flow->flags);
359 }
360
361 #define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
362                                                       MLX5E_TC_FLOW_FLAG_##flag)
363
364 static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
365 {
366         bool ret = test_bit(flag, &flow->flags);
367
368         /* Read fields of flow structure only after checking flags. */
369         smp_mb__after_atomic();
370         return ret;
371 }
372
373 #define flow_flag_test(flow, flag) __flow_flag_test(flow, \
374                                                     MLX5E_TC_FLOW_FLAG_##flag)
375
376 static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
377 {
378         return flow_flag_test(flow, ESWITCH);
379 }
380
381 static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
382 {
383         return flow_flag_test(flow, FT);
384 }
385
386 static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
387 {
388         return flow_flag_test(flow, OFFLOADED);
389 }
390
391 static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key)
392 {
393         return jhash(key->actions,
394                      key->num_actions * MLX5_MH_ACT_SZ, 0);
395 }
396
397 static inline int cmp_mod_hdr_info(struct mod_hdr_key *a,
398                                    struct mod_hdr_key *b)
399 {
400         if (a->num_actions != b->num_actions)
401                 return 1;
402
403         return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ);
404 }
405
406 static struct mod_hdr_tbl *
407 get_mod_hdr_table(struct mlx5e_priv *priv, int namespace)
408 {
409         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
410
411         return namespace == MLX5_FLOW_NAMESPACE_FDB ? &esw->offloads.mod_hdr :
412                 &priv->fs.tc.mod_hdr;
413 }
414
415 static struct mlx5e_mod_hdr_entry *
416 mlx5e_mod_hdr_get(struct mod_hdr_tbl *tbl, struct mod_hdr_key *key, u32 hash_key)
417 {
418         struct mlx5e_mod_hdr_entry *mh, *found = NULL;
419
420         hash_for_each_possible(tbl->hlist, mh, mod_hdr_hlist, hash_key) {
421                 if (!cmp_mod_hdr_info(&mh->key, key)) {
422                         refcount_inc(&mh->refcnt);
423                         found = mh;
424                         break;
425                 }
426         }
427
428         return found;
429 }
430
431 static void mlx5e_mod_hdr_put(struct mlx5e_priv *priv,
432                               struct mlx5e_mod_hdr_entry *mh,
433                               int namespace)
434 {
435         struct mod_hdr_tbl *tbl = get_mod_hdr_table(priv, namespace);
436
437         if (!refcount_dec_and_mutex_lock(&mh->refcnt, &tbl->lock))
438                 return;
439         hash_del(&mh->mod_hdr_hlist);
440         mutex_unlock(&tbl->lock);
441
442         WARN_ON(!list_empty(&mh->flows));
443         if (mh->compl_result > 0)
444                 mlx5_modify_header_dealloc(priv->mdev, mh->modify_hdr);
445
446         kfree(mh);
447 }
448
449 static int get_flow_name_space(struct mlx5e_tc_flow *flow)
450 {
451         return mlx5e_is_eswitch_flow(flow) ?
452                 MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
453 }
454 static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
455                                 struct mlx5e_tc_flow *flow,
456                                 struct mlx5e_tc_flow_parse_attr *parse_attr)
457 {
458         int num_actions, actions_size, namespace, err;
459         struct mlx5e_mod_hdr_entry *mh;
460         struct mod_hdr_tbl *tbl;
461         struct mod_hdr_key key;
462         u32 hash_key;
463
464         num_actions  = parse_attr->mod_hdr_acts.num_actions;
465         actions_size = MLX5_MH_ACT_SZ * num_actions;
466
467         key.actions = parse_attr->mod_hdr_acts.actions;
468         key.num_actions = num_actions;
469
470         hash_key = hash_mod_hdr_info(&key);
471
472         namespace = get_flow_name_space(flow);
473         tbl = get_mod_hdr_table(priv, namespace);
474
475         mutex_lock(&tbl->lock);
476         mh = mlx5e_mod_hdr_get(tbl, &key, hash_key);
477         if (mh) {
478                 mutex_unlock(&tbl->lock);
479                 wait_for_completion(&mh->res_ready);
480
481                 if (mh->compl_result < 0) {
482                         err = -EREMOTEIO;
483                         goto attach_header_err;
484                 }
485                 goto attach_flow;
486         }
487
488         mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL);
489         if (!mh) {
490                 mutex_unlock(&tbl->lock);
491                 return -ENOMEM;
492         }
493
494         mh->key.actions = (void *)mh + sizeof(*mh);
495         memcpy(mh->key.actions, key.actions, actions_size);
496         mh->key.num_actions = num_actions;
497         spin_lock_init(&mh->flows_lock);
498         INIT_LIST_HEAD(&mh->flows);
499         refcount_set(&mh->refcnt, 1);
500         init_completion(&mh->res_ready);
501
502         hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key);
503         mutex_unlock(&tbl->lock);
504
505         mh->modify_hdr = mlx5_modify_header_alloc(priv->mdev, namespace,
506                                                   mh->key.num_actions,
507                                                   mh->key.actions);
508         if (IS_ERR(mh->modify_hdr)) {
509                 err = PTR_ERR(mh->modify_hdr);
510                 mh->compl_result = err;
511                 goto alloc_header_err;
512         }
513         mh->compl_result = 1;
514         complete_all(&mh->res_ready);
515
516 attach_flow:
517         flow->mh = mh;
518         spin_lock(&mh->flows_lock);
519         list_add(&flow->mod_hdr, &mh->flows);
520         spin_unlock(&mh->flows_lock);
521         if (mlx5e_is_eswitch_flow(flow))
522                 flow->esw_attr->modify_hdr = mh->modify_hdr;
523         else
524                 flow->nic_attr->modify_hdr = mh->modify_hdr;
525
526         return 0;
527
528 alloc_header_err:
529         complete_all(&mh->res_ready);
530 attach_header_err:
531         mlx5e_mod_hdr_put(priv, mh, namespace);
532         return err;
533 }
534
535 static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
536                                  struct mlx5e_tc_flow *flow)
537 {
538         /* flow wasn't fully initialized */
539         if (!flow->mh)
540                 return;
541
542         spin_lock(&flow->mh->flows_lock);
543         list_del(&flow->mod_hdr);
544         spin_unlock(&flow->mh->flows_lock);
545
546         mlx5e_mod_hdr_put(priv, flow->mh, get_flow_name_space(flow));
547         flow->mh = NULL;
548 }
549
550 static
551 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
552 {
553         struct net_device *netdev;
554         struct mlx5e_priv *priv;
555
556         netdev = __dev_get_by_index(net, ifindex);
557         priv = netdev_priv(netdev);
558         return priv->mdev;
559 }
560
561 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
562 {
563         u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {0};
564         void *tirc;
565         int err;
566
567         err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
568         if (err)
569                 goto alloc_tdn_err;
570
571         tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
572
573         MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
574         MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]);
575         MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
576
577         err = mlx5_core_create_tir(hp->func_mdev, in, MLX5_ST_SZ_BYTES(create_tir_in), &hp->tirn);
578         if (err)
579                 goto create_tir_err;
580
581         return 0;
582
583 create_tir_err:
584         mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
585 alloc_tdn_err:
586         return err;
587 }
588
589 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
590 {
591         mlx5_core_destroy_tir(hp->func_mdev, hp->tirn);
592         mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
593 }
594
595 static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
596 {
597         u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn;
598         struct mlx5e_priv *priv = hp->func_priv;
599         int i, ix, sz = MLX5E_INDIR_RQT_SIZE;
600
601         mlx5e_build_default_indir_rqt(indirection_rqt, sz,
602                                       hp->num_channels);
603
604         for (i = 0; i < sz; i++) {
605                 ix = i;
606                 if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR)
607                         ix = mlx5e_bits_invert(i, ilog2(sz));
608                 ix = indirection_rqt[ix];
609                 rqn = hp->pair->rqn[ix];
610                 MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
611         }
612 }
613
614 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
615 {
616         int inlen, err, sz = MLX5E_INDIR_RQT_SIZE;
617         struct mlx5e_priv *priv = hp->func_priv;
618         struct mlx5_core_dev *mdev = priv->mdev;
619         void *rqtc;
620         u32 *in;
621
622         inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
623         in = kvzalloc(inlen, GFP_KERNEL);
624         if (!in)
625                 return -ENOMEM;
626
627         rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
628
629         MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
630         MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
631
632         mlx5e_hairpin_fill_rqt_rqns(hp, rqtc);
633
634         err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn);
635         if (!err)
636                 hp->indir_rqt.enabled = true;
637
638         kvfree(in);
639         return err;
640 }
641
642 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
643 {
644         struct mlx5e_priv *priv = hp->func_priv;
645         u32 in[MLX5_ST_SZ_DW(create_tir_in)];
646         int tt, i, err;
647         void *tirc;
648
649         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
650                 struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt);
651
652                 memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in));
653                 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
654
655                 MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
656                 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
657                 MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn);
658                 mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false);
659
660                 err = mlx5_core_create_tir(hp->func_mdev, in,
661                                            MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]);
662                 if (err) {
663                         mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
664                         goto err_destroy_tirs;
665                 }
666         }
667         return 0;
668
669 err_destroy_tirs:
670         for (i = 0; i < tt; i++)
671                 mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]);
672         return err;
673 }
674
675 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
676 {
677         int tt;
678
679         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
680                 mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]);
681 }
682
683 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
684                                          struct ttc_params *ttc_params)
685 {
686         struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
687         int tt;
688
689         memset(ttc_params, 0, sizeof(*ttc_params));
690
691         ttc_params->any_tt_tirn = hp->tirn;
692
693         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
694                 ttc_params->indir_tirn[tt] = hp->indir_tirn[tt];
695
696         ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
697         ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
698         ft_attr->prio = MLX5E_TC_PRIO;
699 }
700
701 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
702 {
703         struct mlx5e_priv *priv = hp->func_priv;
704         struct ttc_params ttc_params;
705         int err;
706
707         err = mlx5e_hairpin_create_indirect_rqt(hp);
708         if (err)
709                 return err;
710
711         err = mlx5e_hairpin_create_indirect_tirs(hp);
712         if (err)
713                 goto err_create_indirect_tirs;
714
715         mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
716         err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc);
717         if (err)
718                 goto err_create_ttc_table;
719
720         netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
721                    hp->num_channels, hp->ttc.ft.t->id);
722
723         return 0;
724
725 err_create_ttc_table:
726         mlx5e_hairpin_destroy_indirect_tirs(hp);
727 err_create_indirect_tirs:
728         mlx5e_destroy_rqt(priv, &hp->indir_rqt);
729
730         return err;
731 }
732
733 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
734 {
735         struct mlx5e_priv *priv = hp->func_priv;
736
737         mlx5e_destroy_ttc_table(priv, &hp->ttc);
738         mlx5e_hairpin_destroy_indirect_tirs(hp);
739         mlx5e_destroy_rqt(priv, &hp->indir_rqt);
740 }
741
742 static struct mlx5e_hairpin *
743 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
744                      int peer_ifindex)
745 {
746         struct mlx5_core_dev *func_mdev, *peer_mdev;
747         struct mlx5e_hairpin *hp;
748         struct mlx5_hairpin *pair;
749         int err;
750
751         hp = kzalloc(sizeof(*hp), GFP_KERNEL);
752         if (!hp)
753                 return ERR_PTR(-ENOMEM);
754
755         func_mdev = priv->mdev;
756         peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
757
758         pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
759         if (IS_ERR(pair)) {
760                 err = PTR_ERR(pair);
761                 goto create_pair_err;
762         }
763         hp->pair = pair;
764         hp->func_mdev = func_mdev;
765         hp->func_priv = priv;
766         hp->num_channels = params->num_channels;
767
768         err = mlx5e_hairpin_create_transport(hp);
769         if (err)
770                 goto create_transport_err;
771
772         if (hp->num_channels > 1) {
773                 err = mlx5e_hairpin_rss_init(hp);
774                 if (err)
775                         goto rss_init_err;
776         }
777
778         return hp;
779
780 rss_init_err:
781         mlx5e_hairpin_destroy_transport(hp);
782 create_transport_err:
783         mlx5_core_hairpin_destroy(hp->pair);
784 create_pair_err:
785         kfree(hp);
786         return ERR_PTR(err);
787 }
788
789 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
790 {
791         if (hp->num_channels > 1)
792                 mlx5e_hairpin_rss_cleanup(hp);
793         mlx5e_hairpin_destroy_transport(hp);
794         mlx5_core_hairpin_destroy(hp->pair);
795         kvfree(hp);
796 }
797
798 static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
799 {
800         return (peer_vhca_id << 16 | prio);
801 }
802
803 static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
804                                                      u16 peer_vhca_id, u8 prio)
805 {
806         struct mlx5e_hairpin_entry *hpe;
807         u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
808
809         hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
810                                hairpin_hlist, hash_key) {
811                 if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
812                         refcount_inc(&hpe->refcnt);
813                         return hpe;
814                 }
815         }
816
817         return NULL;
818 }
819
820 static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
821                               struct mlx5e_hairpin_entry *hpe)
822 {
823         /* no more hairpin flows for us, release the hairpin pair */
824         if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock))
825                 return;
826         hash_del(&hpe->hairpin_hlist);
827         mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
828
829         if (!IS_ERR_OR_NULL(hpe->hp)) {
830                 netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
831                            dev_name(hpe->hp->pair->peer_mdev->device));
832
833                 mlx5e_hairpin_destroy(hpe->hp);
834         }
835
836         WARN_ON(!list_empty(&hpe->flows));
837         kfree(hpe);
838 }
839
840 #define UNKNOWN_MATCH_PRIO 8
841
842 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
843                                   struct mlx5_flow_spec *spec, u8 *match_prio,
844                                   struct netlink_ext_ack *extack)
845 {
846         void *headers_c, *headers_v;
847         u8 prio_val, prio_mask = 0;
848         bool vlan_present;
849
850 #ifdef CONFIG_MLX5_CORE_EN_DCB
851         if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
852                 NL_SET_ERR_MSG_MOD(extack,
853                                    "only PCP trust state supported for hairpin");
854                 return -EOPNOTSUPP;
855         }
856 #endif
857         headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
858         headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
859
860         vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag);
861         if (vlan_present) {
862                 prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
863                 prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
864         }
865
866         if (!vlan_present || !prio_mask) {
867                 prio_val = UNKNOWN_MATCH_PRIO;
868         } else if (prio_mask != 0x7) {
869                 NL_SET_ERR_MSG_MOD(extack,
870                                    "masked priority match not supported for hairpin");
871                 return -EOPNOTSUPP;
872         }
873
874         *match_prio = prio_val;
875         return 0;
876 }
877
878 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
879                                   struct mlx5e_tc_flow *flow,
880                                   struct mlx5e_tc_flow_parse_attr *parse_attr,
881                                   struct netlink_ext_ack *extack)
882 {
883         int peer_ifindex = parse_attr->mirred_ifindex[0];
884         struct mlx5_hairpin_params params;
885         struct mlx5_core_dev *peer_mdev;
886         struct mlx5e_hairpin_entry *hpe;
887         struct mlx5e_hairpin *hp;
888         u64 link_speed64;
889         u32 link_speed;
890         u8 match_prio;
891         u16 peer_id;
892         int err;
893
894         peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
895         if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
896                 NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
897                 return -EOPNOTSUPP;
898         }
899
900         peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
901         err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
902                                      extack);
903         if (err)
904                 return err;
905
906         mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
907         hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
908         if (hpe) {
909                 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
910                 wait_for_completion(&hpe->res_ready);
911
912                 if (IS_ERR(hpe->hp)) {
913                         err = -EREMOTEIO;
914                         goto out_err;
915                 }
916                 goto attach_flow;
917         }
918
919         hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
920         if (!hpe) {
921                 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
922                 return -ENOMEM;
923         }
924
925         spin_lock_init(&hpe->flows_lock);
926         INIT_LIST_HEAD(&hpe->flows);
927         INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
928         hpe->peer_vhca_id = peer_id;
929         hpe->prio = match_prio;
930         refcount_set(&hpe->refcnt, 1);
931         init_completion(&hpe->res_ready);
932
933         hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
934                  hash_hairpin_info(peer_id, match_prio));
935         mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
936
937         params.log_data_size = 15;
938         params.log_data_size = min_t(u8, params.log_data_size,
939                                      MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
940         params.log_data_size = max_t(u8, params.log_data_size,
941                                      MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz));
942
943         params.log_num_packets = params.log_data_size -
944                                  MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev);
945         params.log_num_packets = min_t(u8, params.log_num_packets,
946                                        MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets));
947
948         params.q_counter = priv->q_counter;
949         /* set hairpin pair per each 50Gbs share of the link */
950         mlx5e_port_max_linkspeed(priv->mdev, &link_speed);
951         link_speed = max_t(u32, link_speed, 50000);
952         link_speed64 = link_speed;
953         do_div(link_speed64, 50000);
954         params.num_channels = link_speed64;
955
956         hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
957         hpe->hp = hp;
958         complete_all(&hpe->res_ready);
959         if (IS_ERR(hp)) {
960                 err = PTR_ERR(hp);
961                 goto out_err;
962         }
963
964         netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
965                    hp->tirn, hp->pair->rqn[0],
966                    dev_name(hp->pair->peer_mdev->device),
967                    hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
968
969 attach_flow:
970         if (hpe->hp->num_channels > 1) {
971                 flow_flag_set(flow, HAIRPIN_RSS);
972                 flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
973         } else {
974                 flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
975         }
976
977         flow->hpe = hpe;
978         spin_lock(&hpe->flows_lock);
979         list_add(&flow->hairpin, &hpe->flows);
980         spin_unlock(&hpe->flows_lock);
981
982         return 0;
983
984 out_err:
985         mlx5e_hairpin_put(priv, hpe);
986         return err;
987 }
988
989 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
990                                    struct mlx5e_tc_flow *flow)
991 {
992         /* flow wasn't fully initialized */
993         if (!flow->hpe)
994                 return;
995
996         spin_lock(&flow->hpe->flows_lock);
997         list_del(&flow->hairpin);
998         spin_unlock(&flow->hpe->flows_lock);
999
1000         mlx5e_hairpin_put(priv, flow->hpe);
1001         flow->hpe = NULL;
1002 }
1003
1004 static int
1005 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
1006                       struct mlx5e_tc_flow_parse_attr *parse_attr,
1007                       struct mlx5e_tc_flow *flow,
1008                       struct netlink_ext_ack *extack)
1009 {
1010         struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context;
1011         struct mlx5_nic_flow_attr *attr = flow->nic_attr;
1012         struct mlx5_core_dev *dev = priv->mdev;
1013         struct mlx5_flow_destination dest[2] = {};
1014         struct mlx5_flow_act flow_act = {
1015                 .action = attr->action,
1016                 .flags    = FLOW_ACT_NO_APPEND,
1017         };
1018         struct mlx5_fc *counter = NULL;
1019         int err, dest_ix = 0;
1020
1021         flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
1022         flow_context->flow_tag = attr->flow_tag;
1023
1024         if (flow_flag_test(flow, HAIRPIN)) {
1025                 err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
1026                 if (err)
1027                         return err;
1028
1029                 if (flow_flag_test(flow, HAIRPIN_RSS)) {
1030                         dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1031                         dest[dest_ix].ft = attr->hairpin_ft;
1032                 } else {
1033                         dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1034                         dest[dest_ix].tir_num = attr->hairpin_tirn;
1035                 }
1036                 dest_ix++;
1037         } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
1038                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1039                 dest[dest_ix].ft = priv->fs.vlan.ft.t;
1040                 dest_ix++;
1041         }
1042
1043         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1044                 counter = mlx5_fc_create(dev, true);
1045                 if (IS_ERR(counter))
1046                         return PTR_ERR(counter);
1047
1048                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1049                 dest[dest_ix].counter_id = mlx5_fc_id(counter);
1050                 dest_ix++;
1051                 attr->counter = counter;
1052         }
1053
1054         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1055                 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1056                 flow_act.modify_hdr = attr->modify_hdr;
1057                 dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
1058                 if (err)
1059                         return err;
1060         }
1061
1062         mutex_lock(&priv->fs.tc.t_lock);
1063         if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
1064                 struct mlx5_flow_table_attr ft_attr = {};
1065                 int tc_grp_size, tc_tbl_size, tc_num_grps;
1066                 u32 max_flow_counter;
1067
1068                 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
1069                                     MLX5_CAP_GEN(dev, max_flow_counter_15_0);
1070
1071                 tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
1072
1073                 tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
1074                                     BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
1075                 tc_num_grps = MLX5E_TC_TABLE_NUM_GROUPS;
1076
1077                 ft_attr.prio = MLX5E_TC_PRIO;
1078                 ft_attr.max_fte = tc_tbl_size;
1079                 ft_attr.level = MLX5E_TC_FT_LEVEL;
1080                 ft_attr.autogroup.max_num_groups = tc_num_grps;
1081                 priv->fs.tc.t =
1082                         mlx5_create_auto_grouped_flow_table(priv->fs.ns,
1083                                                             &ft_attr);
1084                 if (IS_ERR(priv->fs.tc.t)) {
1085                         mutex_unlock(&priv->fs.tc.t_lock);
1086                         NL_SET_ERR_MSG_MOD(extack,
1087                                            "Failed to create tc offload table\n");
1088                         netdev_err(priv->netdev,
1089                                    "Failed to create tc offload table\n");
1090                         return PTR_ERR(priv->fs.tc.t);
1091                 }
1092         }
1093
1094         if (attr->match_level != MLX5_MATCH_NONE)
1095                 parse_attr->spec.match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
1096
1097         flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
1098                                             &flow_act, dest, dest_ix);
1099         mutex_unlock(&priv->fs.tc.t_lock);
1100
1101         return PTR_ERR_OR_ZERO(flow->rule[0]);
1102 }
1103
1104 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
1105                                   struct mlx5e_tc_flow *flow)
1106 {
1107         struct mlx5_nic_flow_attr *attr = flow->nic_attr;
1108         struct mlx5_fc *counter = NULL;
1109
1110         counter = attr->counter;
1111         if (!IS_ERR_OR_NULL(flow->rule[0]))
1112                 mlx5_del_flow_rules(flow->rule[0]);
1113         mlx5_fc_destroy(priv->mdev, counter);
1114
1115         mutex_lock(&priv->fs.tc.t_lock);
1116         if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && priv->fs.tc.t) {
1117                 mlx5_destroy_flow_table(priv->fs.tc.t);
1118                 priv->fs.tc.t = NULL;
1119         }
1120         mutex_unlock(&priv->fs.tc.t_lock);
1121
1122         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1123                 mlx5e_detach_mod_hdr(priv, flow);
1124
1125         if (flow_flag_test(flow, HAIRPIN))
1126                 mlx5e_hairpin_flow_del(priv, flow);
1127 }
1128
1129 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
1130                                struct mlx5e_tc_flow *flow, int out_index);
1131
1132 static int mlx5e_attach_encap(struct mlx5e_priv *priv,
1133                               struct mlx5e_tc_flow *flow,
1134                               struct net_device *mirred_dev,
1135                               int out_index,
1136                               struct netlink_ext_ack *extack,
1137                               struct net_device **encap_dev,
1138                               bool *encap_valid);
1139
1140 static struct mlx5_flow_handle *
1141 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
1142                            struct mlx5e_tc_flow *flow,
1143                            struct mlx5_flow_spec *spec,
1144                            struct mlx5_esw_flow_attr *attr)
1145 {
1146         struct mlx5_flow_handle *rule;
1147
1148         rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1149         if (IS_ERR(rule))
1150                 return rule;
1151
1152         if (attr->split_count) {
1153                 flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
1154                 if (IS_ERR(flow->rule[1])) {
1155                         mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
1156                         return flow->rule[1];
1157                 }
1158         }
1159
1160         return rule;
1161 }
1162
1163 static void
1164 mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
1165                              struct mlx5e_tc_flow *flow,
1166                            struct mlx5_esw_flow_attr *attr)
1167 {
1168         flow_flag_clear(flow, OFFLOADED);
1169
1170         if (attr->split_count)
1171                 mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
1172
1173         mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
1174 }
1175
1176 static struct mlx5_flow_handle *
1177 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
1178                               struct mlx5e_tc_flow *flow,
1179                               struct mlx5_flow_spec *spec)
1180 {
1181         struct mlx5_esw_flow_attr slow_attr;
1182         struct mlx5_flow_handle *rule;
1183
1184         memcpy(&slow_attr, flow->esw_attr, sizeof(slow_attr));
1185         slow_attr.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1186         slow_attr.split_count = 0;
1187         slow_attr.flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1188
1189         rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, &slow_attr);
1190         if (!IS_ERR(rule))
1191                 flow_flag_set(flow, SLOW);
1192
1193         return rule;
1194 }
1195
1196 static void
1197 mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
1198                                   struct mlx5e_tc_flow *flow)
1199 {
1200         struct mlx5_esw_flow_attr slow_attr;
1201
1202         memcpy(&slow_attr, flow->esw_attr, sizeof(slow_attr));
1203         slow_attr.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1204         slow_attr.split_count = 0;
1205         slow_attr.flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1206         mlx5e_tc_unoffload_fdb_rules(esw, flow, &slow_attr);
1207         flow_flag_clear(flow, SLOW);
1208 }
1209
1210 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1211  * function.
1212  */
1213 static void unready_flow_add(struct mlx5e_tc_flow *flow,
1214                              struct list_head *unready_flows)
1215 {
1216         flow_flag_set(flow, NOT_READY);
1217         list_add_tail(&flow->unready, unready_flows);
1218 }
1219
1220 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1221  * function.
1222  */
1223 static void unready_flow_del(struct mlx5e_tc_flow *flow)
1224 {
1225         list_del(&flow->unready);
1226         flow_flag_clear(flow, NOT_READY);
1227 }
1228
1229 static void add_unready_flow(struct mlx5e_tc_flow *flow)
1230 {
1231         struct mlx5_rep_uplink_priv *uplink_priv;
1232         struct mlx5e_rep_priv *rpriv;
1233         struct mlx5_eswitch *esw;
1234
1235         esw = flow->priv->mdev->priv.eswitch;
1236         rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1237         uplink_priv = &rpriv->uplink_priv;
1238
1239         mutex_lock(&uplink_priv->unready_flows_lock);
1240         unready_flow_add(flow, &uplink_priv->unready_flows);
1241         mutex_unlock(&uplink_priv->unready_flows_lock);
1242 }
1243
1244 static void remove_unready_flow(struct mlx5e_tc_flow *flow)
1245 {
1246         struct mlx5_rep_uplink_priv *uplink_priv;
1247         struct mlx5e_rep_priv *rpriv;
1248         struct mlx5_eswitch *esw;
1249
1250         esw = flow->priv->mdev->priv.eswitch;
1251         rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1252         uplink_priv = &rpriv->uplink_priv;
1253
1254         mutex_lock(&uplink_priv->unready_flows_lock);
1255         unready_flow_del(flow);
1256         mutex_unlock(&uplink_priv->unready_flows_lock);
1257 }
1258
1259 static int
1260 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
1261                       struct mlx5e_tc_flow *flow,
1262                       struct netlink_ext_ack *extack)
1263 {
1264         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1265         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
1266         struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
1267         struct net_device *out_dev, *encap_dev = NULL;
1268         struct mlx5_fc *counter = NULL;
1269         struct mlx5e_rep_priv *rpriv;
1270         struct mlx5e_priv *out_priv;
1271         bool encap_valid = true;
1272         u32 max_prio, max_chain;
1273         int err = 0;
1274         int out_index;
1275
1276         if (!mlx5_esw_chains_prios_supported(esw) && attr->prio != 1) {
1277                 NL_SET_ERR_MSG_MOD(extack,
1278                                    "E-switch priorities unsupported, upgrade FW");
1279                 return -EOPNOTSUPP;
1280         }
1281
1282         /* We check chain range only for tc flows.
1283          * For ft flows, we checked attr->chain was originally 0 and set it to
1284          * FDB_FT_CHAIN which is outside tc range.
1285          * See mlx5e_rep_setup_ft_cb().
1286          */
1287         max_chain = mlx5_esw_chains_get_chain_range(esw);
1288         if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
1289                 NL_SET_ERR_MSG_MOD(extack,
1290                                    "Requested chain is out of supported range");
1291                 return -EOPNOTSUPP;
1292         }
1293
1294         max_prio = mlx5_esw_chains_get_prio_range(esw);
1295         if (attr->prio > max_prio) {
1296                 NL_SET_ERR_MSG_MOD(extack,
1297                                    "Requested priority is out of supported range");
1298                 return -EOPNOTSUPP;
1299         }
1300
1301         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1302                 int mirred_ifindex;
1303
1304                 if (!(attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1305                         continue;
1306
1307                 mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1308                 out_dev = __dev_get_by_index(dev_net(priv->netdev),
1309                                              mirred_ifindex);
1310                 err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
1311                                          extack, &encap_dev, &encap_valid);
1312                 if (err)
1313                         return err;
1314
1315                 out_priv = netdev_priv(encap_dev);
1316                 rpriv = out_priv->ppriv;
1317                 attr->dests[out_index].rep = rpriv->rep;
1318                 attr->dests[out_index].mdev = out_priv->mdev;
1319         }
1320
1321         err = mlx5_eswitch_add_vlan_action(esw, attr);
1322         if (err)
1323                 return err;
1324
1325         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1326                 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1327                 dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
1328                 if (err)
1329                         return err;
1330         }
1331
1332         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1333                 counter = mlx5_fc_create(attr->counter_dev, true);
1334                 if (IS_ERR(counter))
1335                         return PTR_ERR(counter);
1336
1337                 attr->counter = counter;
1338         }
1339
1340         /* we get here if one of the following takes place:
1341          * (1) there's no error
1342          * (2) there's an encap action and we don't have valid neigh
1343          */
1344         if (!encap_valid)
1345                 flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
1346         else
1347                 flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
1348
1349         if (IS_ERR(flow->rule[0]))
1350                 return PTR_ERR(flow->rule[0]);
1351         else
1352                 flow_flag_set(flow, OFFLOADED);
1353
1354         return 0;
1355 }
1356
1357 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
1358 {
1359         struct mlx5_flow_spec *spec = &flow->esw_attr->parse_attr->spec;
1360         void *headers_v = MLX5_ADDR_OF(fte_match_param,
1361                                        spec->match_value,
1362                                        misc_parameters_3);
1363         u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
1364                                              headers_v,
1365                                              geneve_tlv_option_0_data);
1366
1367         return !!geneve_tlv_opt_0_data;
1368 }
1369
1370 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
1371                                   struct mlx5e_tc_flow *flow)
1372 {
1373         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1374         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
1375         int out_index;
1376
1377         mlx5e_put_flow_tunnel_id(flow);
1378
1379         if (flow_flag_test(flow, NOT_READY)) {
1380                 remove_unready_flow(flow);
1381                 kvfree(attr->parse_attr);
1382                 return;
1383         }
1384
1385         if (mlx5e_is_offloaded_flow(flow)) {
1386                 if (flow_flag_test(flow, SLOW))
1387                         mlx5e_tc_unoffload_from_slow_path(esw, flow);
1388                 else
1389                         mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
1390         }
1391
1392         if (mlx5_flow_has_geneve_opt(flow))
1393                 mlx5_geneve_tlv_option_del(priv->mdev->geneve);
1394
1395         mlx5_eswitch_del_vlan_action(esw, attr);
1396
1397         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
1398                 if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
1399                         mlx5e_detach_encap(priv, flow, out_index);
1400                         kfree(attr->parse_attr->tun_info[out_index]);
1401                 }
1402         kvfree(attr->parse_attr);
1403
1404         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1405                 mlx5e_detach_mod_hdr(priv, flow);
1406
1407         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1408                 mlx5_fc_destroy(attr->counter_dev, attr->counter);
1409 }
1410
1411 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
1412                               struct mlx5e_encap_entry *e,
1413                               struct list_head *flow_list)
1414 {
1415         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1416         struct mlx5_esw_flow_attr *esw_attr;
1417         struct mlx5_flow_handle *rule;
1418         struct mlx5_flow_spec *spec;
1419         struct mlx5e_tc_flow *flow;
1420         int err;
1421
1422         e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
1423                                                      e->reformat_type,
1424                                                      e->encap_size, e->encap_header,
1425                                                      MLX5_FLOW_NAMESPACE_FDB);
1426         if (IS_ERR(e->pkt_reformat)) {
1427                 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
1428                                PTR_ERR(e->pkt_reformat));
1429                 return;
1430         }
1431         e->flags |= MLX5_ENCAP_ENTRY_VALID;
1432         mlx5e_rep_queue_neigh_stats_work(priv);
1433
1434         list_for_each_entry(flow, flow_list, tmp_list) {
1435                 bool all_flow_encaps_valid = true;
1436                 int i;
1437
1438                 if (!mlx5e_is_offloaded_flow(flow))
1439                         continue;
1440                 esw_attr = flow->esw_attr;
1441                 spec = &esw_attr->parse_attr->spec;
1442
1443                 esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat;
1444                 esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1445                 /* Flow can be associated with multiple encap entries.
1446                  * Before offloading the flow verify that all of them have
1447                  * a valid neighbour.
1448                  */
1449                 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
1450                         if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
1451                                 continue;
1452                         if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
1453                                 all_flow_encaps_valid = false;
1454                                 break;
1455                         }
1456                 }
1457                 /* Do not offload flows with unresolved neighbors */
1458                 if (!all_flow_encaps_valid)
1459                         continue;
1460                 /* update from slow path rule to encap rule */
1461                 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr);
1462                 if (IS_ERR(rule)) {
1463                         err = PTR_ERR(rule);
1464                         mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1465                                        err);
1466                         continue;
1467                 }
1468
1469                 mlx5e_tc_unoffload_from_slow_path(esw, flow);
1470                 flow->rule[0] = rule;
1471                 /* was unset when slow path rule removed */
1472                 flow_flag_set(flow, OFFLOADED);
1473         }
1474 }
1475
1476 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
1477                               struct mlx5e_encap_entry *e,
1478                               struct list_head *flow_list)
1479 {
1480         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1481         struct mlx5_flow_handle *rule;
1482         struct mlx5_flow_spec *spec;
1483         struct mlx5e_tc_flow *flow;
1484         int err;
1485
1486         list_for_each_entry(flow, flow_list, tmp_list) {
1487                 if (!mlx5e_is_offloaded_flow(flow))
1488                         continue;
1489                 spec = &flow->esw_attr->parse_attr->spec;
1490
1491                 /* update from encap rule to slow path rule */
1492                 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1493                 /* mark the flow's encap dest as non-valid */
1494                 flow->esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
1495
1496                 if (IS_ERR(rule)) {
1497                         err = PTR_ERR(rule);
1498                         mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1499                                        err);
1500                         continue;
1501                 }
1502
1503                 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr);
1504                 flow->rule[0] = rule;
1505                 /* was unset when fast path rule removed */
1506                 flow_flag_set(flow, OFFLOADED);
1507         }
1508
1509         /* we know that the encap is valid */
1510         e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1511         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1512 }
1513
1514 static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
1515 {
1516         if (mlx5e_is_eswitch_flow(flow))
1517                 return flow->esw_attr->counter;
1518         else
1519                 return flow->nic_attr->counter;
1520 }
1521
1522 /* Takes reference to all flows attached to encap and adds the flows to
1523  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
1524  */
1525 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
1526 {
1527         struct encap_flow_item *efi;
1528         struct mlx5e_tc_flow *flow;
1529
1530         list_for_each_entry(efi, &e->flows, list) {
1531                 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
1532                 if (IS_ERR(mlx5e_flow_get(flow)))
1533                         continue;
1534                 wait_for_completion(&flow->init_done);
1535
1536                 flow->tmp_efi_index = efi->index;
1537                 list_add(&flow->tmp_list, flow_list);
1538         }
1539 }
1540
1541 /* Iterate over tmp_list of flows attached to flow_list head. */
1542 void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
1543 {
1544         struct mlx5e_tc_flow *flow, *tmp;
1545
1546         list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
1547                 mlx5e_flow_put(priv, flow);
1548 }
1549
1550 static struct mlx5e_encap_entry *
1551 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
1552                            struct mlx5e_encap_entry *e)
1553 {
1554         struct mlx5e_encap_entry *next = NULL;
1555
1556 retry:
1557         rcu_read_lock();
1558
1559         /* find encap with non-zero reference counter value */
1560         for (next = e ?
1561                      list_next_or_null_rcu(&nhe->encap_list,
1562                                            &e->encap_list,
1563                                            struct mlx5e_encap_entry,
1564                                            encap_list) :
1565                      list_first_or_null_rcu(&nhe->encap_list,
1566                                             struct mlx5e_encap_entry,
1567                                             encap_list);
1568              next;
1569              next = list_next_or_null_rcu(&nhe->encap_list,
1570                                           &next->encap_list,
1571                                           struct mlx5e_encap_entry,
1572                                           encap_list))
1573                 if (mlx5e_encap_take(next))
1574                         break;
1575
1576         rcu_read_unlock();
1577
1578         /* release starting encap */
1579         if (e)
1580                 mlx5e_encap_put(netdev_priv(e->out_dev), e);
1581         if (!next)
1582                 return next;
1583
1584         /* wait for encap to be fully initialized */
1585         wait_for_completion(&next->res_ready);
1586         /* continue searching if encap entry is not in valid state after completion */
1587         if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
1588                 e = next;
1589                 goto retry;
1590         }
1591
1592         return next;
1593 }
1594
1595 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
1596 {
1597         struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
1598         struct mlx5e_encap_entry *e = NULL;
1599         struct mlx5e_tc_flow *flow;
1600         struct mlx5_fc *counter;
1601         struct neigh_table *tbl;
1602         bool neigh_used = false;
1603         struct neighbour *n;
1604         u64 lastuse;
1605
1606         if (m_neigh->family == AF_INET)
1607                 tbl = &arp_tbl;
1608 #if IS_ENABLED(CONFIG_IPV6)
1609         else if (m_neigh->family == AF_INET6)
1610                 tbl = ipv6_stub->nd_tbl;
1611 #endif
1612         else
1613                 return;
1614
1615         /* mlx5e_get_next_valid_encap() releases previous encap before returning
1616          * next one.
1617          */
1618         while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
1619                 struct mlx5e_priv *priv = netdev_priv(e->out_dev);
1620                 struct encap_flow_item *efi, *tmp;
1621                 struct mlx5_eswitch *esw;
1622                 LIST_HEAD(flow_list);
1623
1624                 esw = priv->mdev->priv.eswitch;
1625                 mutex_lock(&esw->offloads.encap_tbl_lock);
1626                 list_for_each_entry_safe(efi, tmp, &e->flows, list) {
1627                         flow = container_of(efi, struct mlx5e_tc_flow,
1628                                             encaps[efi->index]);
1629                         if (IS_ERR(mlx5e_flow_get(flow)))
1630                                 continue;
1631                         list_add(&flow->tmp_list, &flow_list);
1632
1633                         if (mlx5e_is_offloaded_flow(flow)) {
1634                                 counter = mlx5e_tc_get_counter(flow);
1635                                 lastuse = mlx5_fc_query_lastuse(counter);
1636                                 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
1637                                         neigh_used = true;
1638                                         break;
1639                                 }
1640                         }
1641                 }
1642                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1643
1644                 mlx5e_put_encap_flow_list(priv, &flow_list);
1645                 if (neigh_used) {
1646                         /* release current encap before breaking the loop */
1647                         mlx5e_encap_put(priv, e);
1648                         break;
1649                 }
1650         }
1651
1652         trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
1653
1654         if (neigh_used) {
1655                 nhe->reported_lastuse = jiffies;
1656
1657                 /* find the relevant neigh according to the cached device and
1658                  * dst ip pair
1659                  */
1660                 n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev);
1661                 if (!n)
1662                         return;
1663
1664                 neigh_event_send(n, NULL);
1665                 neigh_release(n);
1666         }
1667 }
1668
1669 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
1670 {
1671         WARN_ON(!list_empty(&e->flows));
1672
1673         if (e->compl_result > 0) {
1674                 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
1675
1676                 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
1677                         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1678         }
1679
1680         kfree(e->tun_info);
1681         kfree(e->encap_header);
1682         kfree_rcu(e, rcu);
1683 }
1684
1685 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
1686 {
1687         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1688
1689         if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
1690                 return;
1691         hash_del_rcu(&e->encap_hlist);
1692         mutex_unlock(&esw->offloads.encap_tbl_lock);
1693
1694         mlx5e_encap_dealloc(priv, e);
1695 }
1696
1697 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
1698                                struct mlx5e_tc_flow *flow, int out_index)
1699 {
1700         struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
1701         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1702
1703         /* flow wasn't fully initialized */
1704         if (!e)
1705                 return;
1706
1707         mutex_lock(&esw->offloads.encap_tbl_lock);
1708         list_del(&flow->encaps[out_index].list);
1709         flow->encaps[out_index].e = NULL;
1710         if (!refcount_dec_and_test(&e->refcnt)) {
1711                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1712                 return;
1713         }
1714         hash_del_rcu(&e->encap_hlist);
1715         mutex_unlock(&esw->offloads.encap_tbl_lock);
1716
1717         mlx5e_encap_dealloc(priv, e);
1718 }
1719
1720 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1721 {
1722         struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
1723
1724         if (!flow_flag_test(flow, ESWITCH) ||
1725             !flow_flag_test(flow, DUP))
1726                 return;
1727
1728         mutex_lock(&esw->offloads.peer_mutex);
1729         list_del(&flow->peer);
1730         mutex_unlock(&esw->offloads.peer_mutex);
1731
1732         flow_flag_clear(flow, DUP);
1733
1734         if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
1735                 mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
1736                 kfree(flow->peer_flow);
1737         }
1738
1739         flow->peer_flow = NULL;
1740 }
1741
1742 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1743 {
1744         struct mlx5_core_dev *dev = flow->priv->mdev;
1745         struct mlx5_devcom *devcom = dev->priv.devcom;
1746         struct mlx5_eswitch *peer_esw;
1747
1748         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1749         if (!peer_esw)
1750                 return;
1751
1752         __mlx5e_tc_del_fdb_peer_flow(flow);
1753         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1754 }
1755
1756 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
1757                               struct mlx5e_tc_flow *flow)
1758 {
1759         if (mlx5e_is_eswitch_flow(flow)) {
1760                 mlx5e_tc_del_fdb_peer_flow(flow);
1761                 mlx5e_tc_del_fdb_flow(priv, flow);
1762         } else {
1763                 mlx5e_tc_del_nic_flow(priv, flow);
1764         }
1765 }
1766
1767 static int flow_has_tc_fwd_action(struct flow_cls_offload *f)
1768 {
1769         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1770         struct flow_action *flow_action = &rule->action;
1771         const struct flow_action_entry *act;
1772         int i;
1773
1774         flow_action_for_each(i, act, flow_action) {
1775                 switch (act->id) {
1776                 case FLOW_ACTION_GOTO:
1777                         return true;
1778                 default:
1779                         continue;
1780                 }
1781         }
1782
1783         return false;
1784 }
1785
1786 static int
1787 enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
1788                                     struct flow_dissector_key_enc_opts *opts,
1789                                     struct netlink_ext_ack *extack,
1790                                     bool *dont_care)
1791 {
1792         struct geneve_opt *opt;
1793         int off = 0;
1794
1795         *dont_care = true;
1796
1797         while (opts->len > off) {
1798                 opt = (struct geneve_opt *)&opts->data[off];
1799
1800                 if (!(*dont_care) || opt->opt_class || opt->type ||
1801                     memchr_inv(opt->opt_data, 0, opt->length * 4)) {
1802                         *dont_care = false;
1803
1804                         if (opt->opt_class != U16_MAX ||
1805                             opt->type != U8_MAX ||
1806                             memchr_inv(opt->opt_data, 0xFF,
1807                                        opt->length * 4)) {
1808                                 NL_SET_ERR_MSG(extack,
1809                                                "Partial match of tunnel options in chain > 0 isn't supported");
1810                                 netdev_warn(priv->netdev,
1811                                             "Partial match of tunnel options in chain > 0 isn't supported");
1812                                 return -EOPNOTSUPP;
1813                         }
1814                 }
1815
1816                 off += sizeof(struct geneve_opt) + opt->length * 4;
1817         }
1818
1819         return 0;
1820 }
1821
1822 #define COPY_DISSECTOR(rule, diss_key, dst)\
1823 ({ \
1824         struct flow_rule *__rule = (rule);\
1825         typeof(dst) __dst = dst;\
1826 \
1827         memcpy(__dst,\
1828                skb_flow_dissector_target(__rule->match.dissector,\
1829                                          diss_key,\
1830                                          __rule->match.key),\
1831                sizeof(*__dst));\
1832 })
1833
1834 static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
1835                                     struct mlx5e_tc_flow *flow,
1836                                     struct flow_cls_offload *f,
1837                                     struct net_device *filter_dev)
1838 {
1839         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1840         struct netlink_ext_ack *extack = f->common.extack;
1841         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
1842         struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
1843         struct flow_match_enc_opts enc_opts_match;
1844         struct mlx5_rep_uplink_priv *uplink_priv;
1845         struct mlx5e_rep_priv *uplink_rpriv;
1846         struct tunnel_match_key tunnel_key;
1847         bool enc_opts_is_dont_care = true;
1848         u32 tun_id, enc_opts_id = 0;
1849         struct mlx5_eswitch *esw;
1850         u32 value, mask;
1851         int err;
1852
1853         esw = priv->mdev->priv.eswitch;
1854         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1855         uplink_priv = &uplink_rpriv->uplink_priv;
1856
1857         memset(&tunnel_key, 0, sizeof(tunnel_key));
1858         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL,
1859                        &tunnel_key.enc_control);
1860         if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
1861                 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
1862                                &tunnel_key.enc_ipv4);
1863         else
1864                 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
1865                                &tunnel_key.enc_ipv6);
1866         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip);
1867         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS,
1868                        &tunnel_key.enc_tp);
1869         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID,
1870                        &tunnel_key.enc_key_id);
1871         tunnel_key.filter_ifindex = filter_dev->ifindex;
1872
1873         err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id);
1874         if (err)
1875                 return err;
1876
1877         flow_rule_match_enc_opts(rule, &enc_opts_match);
1878         err = enc_opts_is_dont_care_or_full_match(priv,
1879                                                   enc_opts_match.mask,
1880                                                   extack,
1881                                                   &enc_opts_is_dont_care);
1882         if (err)
1883                 goto err_enc_opts;
1884
1885         if (!enc_opts_is_dont_care) {
1886                 err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
1887                                   enc_opts_match.key, &enc_opts_id);
1888                 if (err)
1889                         goto err_enc_opts;
1890         }
1891
1892         value = tun_id << ENC_OPTS_BITS | enc_opts_id;
1893         mask = enc_opts_id ? TUNNEL_ID_MASK :
1894                              (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK);
1895
1896         if (attr->chain) {
1897                 mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec,
1898                                             TUNNEL_TO_REG, value, mask);
1899         } else {
1900                 mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
1901                 err = mlx5e_tc_match_to_reg_set(priv->mdev,
1902                                                 mod_hdr_acts,
1903                                                 TUNNEL_TO_REG, value);
1904                 if (err)
1905                         goto err_set;
1906
1907                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1908         }
1909
1910         flow->tunnel_id = value;
1911         return 0;
1912
1913 err_set:
1914         if (enc_opts_id)
1915                 mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
1916                                enc_opts_id);
1917 err_enc_opts:
1918         mapping_remove(uplink_priv->tunnel_mapping, tun_id);
1919         return err;
1920 }
1921
1922 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
1923 {
1924         u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK;
1925         u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS;
1926         struct mlx5_rep_uplink_priv *uplink_priv;
1927         struct mlx5e_rep_priv *uplink_rpriv;
1928         struct mlx5_eswitch *esw;
1929
1930         esw = flow->priv->mdev->priv.eswitch;
1931         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1932         uplink_priv = &uplink_rpriv->uplink_priv;
1933
1934         if (tun_id)
1935                 mapping_remove(uplink_priv->tunnel_mapping, tun_id);
1936         if (enc_opts_id)
1937                 mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
1938                                enc_opts_id);
1939 }
1940
1941 static int parse_tunnel_attr(struct mlx5e_priv *priv,
1942                              struct mlx5e_tc_flow *flow,
1943                              struct mlx5_flow_spec *spec,
1944                              struct flow_cls_offload *f,
1945                              struct net_device *filter_dev,
1946                              u8 *match_level,
1947                              bool *match_inner)
1948 {
1949         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1950         struct netlink_ext_ack *extack = f->common.extack;
1951         bool needs_mapping, sets_mapping;
1952         int err;
1953
1954         if (!mlx5e_is_eswitch_flow(flow))
1955                 return -EOPNOTSUPP;
1956
1957         needs_mapping = !!flow->esw_attr->chain;
1958         sets_mapping = !flow->esw_attr->chain && flow_has_tc_fwd_action(f);
1959         *match_inner = !needs_mapping;
1960
1961         if ((needs_mapping || sets_mapping) &&
1962             !mlx5_eswitch_vport_match_metadata_enabled(esw)) {
1963                 NL_SET_ERR_MSG(extack,
1964                                "Chains on tunnel devices isn't supported without register metadata support");
1965                 netdev_warn(priv->netdev,
1966                             "Chains on tunnel devices isn't supported without register metadata support");
1967                 return -EOPNOTSUPP;
1968         }
1969
1970         if (!flow->esw_attr->chain) {
1971                 err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
1972                                          match_level);
1973                 if (err) {
1974                         NL_SET_ERR_MSG_MOD(extack,
1975                                            "Failed to parse tunnel attributes");
1976                         netdev_warn(priv->netdev,
1977                                     "Failed to parse tunnel attributes");
1978                         return err;
1979                 }
1980
1981                 flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
1982         }
1983
1984         if (!needs_mapping && !sets_mapping)
1985                 return 0;
1986
1987         return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev);
1988 }
1989
1990 static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec)
1991 {
1992         return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1993                             inner_headers);
1994 }
1995
1996 static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec)
1997 {
1998         return MLX5_ADDR_OF(fte_match_param, spec->match_value,
1999                             inner_headers);
2000 }
2001
2002 static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec)
2003 {
2004         return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2005                             outer_headers);
2006 }
2007
2008 static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
2009 {
2010         return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2011                             outer_headers);
2012 }
2013
2014 static void *get_match_headers_value(u32 flags,
2015                                      struct mlx5_flow_spec *spec)
2016 {
2017         return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2018                 get_match_inner_headers_value(spec) :
2019                 get_match_outer_headers_value(spec);
2020 }
2021
2022 static void *get_match_headers_criteria(u32 flags,
2023                                         struct mlx5_flow_spec *spec)
2024 {
2025         return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2026                 get_match_inner_headers_criteria(spec) :
2027                 get_match_outer_headers_criteria(spec);
2028 }
2029
2030 static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
2031                                    struct flow_cls_offload *f)
2032 {
2033         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2034         struct netlink_ext_ack *extack = f->common.extack;
2035         struct net_device *ingress_dev;
2036         struct flow_match_meta match;
2037
2038         if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
2039                 return 0;
2040
2041         flow_rule_match_meta(rule, &match);
2042         if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
2043                 NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
2044                 return -EINVAL;
2045         }
2046
2047         ingress_dev = __dev_get_by_index(dev_net(filter_dev),
2048                                          match.key->ingress_ifindex);
2049         if (!ingress_dev) {
2050                 NL_SET_ERR_MSG_MOD(extack,
2051                                    "Can't find the ingress port to match on");
2052                 return -EINVAL;
2053         }
2054
2055         if (ingress_dev != filter_dev) {
2056                 NL_SET_ERR_MSG_MOD(extack,
2057                                    "Can't match on the ingress filter port");
2058                 return -EINVAL;
2059         }
2060
2061         return 0;
2062 }
2063
2064 static int __parse_cls_flower(struct mlx5e_priv *priv,
2065                               struct mlx5e_tc_flow *flow,
2066                               struct mlx5_flow_spec *spec,
2067                               struct flow_cls_offload *f,
2068                               struct net_device *filter_dev,
2069                               u8 *inner_match_level, u8 *outer_match_level)
2070 {
2071         struct netlink_ext_ack *extack = f->common.extack;
2072         void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2073                                        outer_headers);
2074         void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2075                                        outer_headers);
2076         void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2077                                     misc_parameters);
2078         void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2079                                     misc_parameters);
2080         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2081         struct flow_dissector *dissector = rule->match.dissector;
2082         u16 addr_type = 0;
2083         u8 ip_proto = 0;
2084         u8 *match_level;
2085         int err;
2086
2087         match_level = outer_match_level;
2088
2089         if (dissector->used_keys &
2090             ~(BIT(FLOW_DISSECTOR_KEY_META) |
2091               BIT(FLOW_DISSECTOR_KEY_CONTROL) |
2092               BIT(FLOW_DISSECTOR_KEY_BASIC) |
2093               BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
2094               BIT(FLOW_DISSECTOR_KEY_VLAN) |
2095               BIT(FLOW_DISSECTOR_KEY_CVLAN) |
2096               BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
2097               BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
2098               BIT(FLOW_DISSECTOR_KEY_PORTS) |
2099               BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
2100               BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
2101               BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
2102               BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
2103               BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
2104               BIT(FLOW_DISSECTOR_KEY_TCP) |
2105               BIT(FLOW_DISSECTOR_KEY_IP)  |
2106               BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
2107               BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) {
2108                 NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
2109                 netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
2110                             dissector->used_keys);
2111                 return -EOPNOTSUPP;
2112         }
2113
2114         if (mlx5e_get_tc_tun(filter_dev)) {
2115                 bool match_inner = false;
2116
2117                 err = parse_tunnel_attr(priv, flow, spec, f, filter_dev,
2118                                         outer_match_level, &match_inner);
2119                 if (err)
2120                         return err;
2121
2122                 if (match_inner) {
2123                         /* header pointers should point to the inner headers
2124                          * if the packet was decapsulated already.
2125                          * outer headers are set by parse_tunnel_attr.
2126                          */
2127                         match_level = inner_match_level;
2128                         headers_c = get_match_inner_headers_criteria(spec);
2129                         headers_v = get_match_inner_headers_value(spec);
2130                 }
2131         }
2132
2133         err = mlx5e_flower_parse_meta(filter_dev, f);
2134         if (err)
2135                 return err;
2136
2137         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
2138                 struct flow_match_basic match;
2139
2140                 flow_rule_match_basic(rule, &match);
2141                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
2142                          ntohs(match.mask->n_proto));
2143                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
2144                          ntohs(match.key->n_proto));
2145
2146                 if (match.mask->n_proto)
2147                         *match_level = MLX5_MATCH_L2;
2148         }
2149         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
2150             is_vlan_dev(filter_dev)) {
2151                 struct flow_dissector_key_vlan filter_dev_mask;
2152                 struct flow_dissector_key_vlan filter_dev_key;
2153                 struct flow_match_vlan match;
2154
2155                 if (is_vlan_dev(filter_dev)) {
2156                         match.key = &filter_dev_key;
2157                         match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
2158                         match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
2159                         match.key->vlan_priority = 0;
2160                         match.mask = &filter_dev_mask;
2161                         memset(match.mask, 0xff, sizeof(*match.mask));
2162                         match.mask->vlan_priority = 0;
2163                 } else {
2164                         flow_rule_match_vlan(rule, &match);
2165                 }
2166                 if (match.mask->vlan_id ||
2167                     match.mask->vlan_priority ||
2168                     match.mask->vlan_tpid) {
2169                         if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2170                                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2171                                          svlan_tag, 1);
2172                                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2173                                          svlan_tag, 1);
2174                         } else {
2175                                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2176                                          cvlan_tag, 1);
2177                                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2178                                          cvlan_tag, 1);
2179                         }
2180
2181                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
2182                                  match.mask->vlan_id);
2183                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
2184                                  match.key->vlan_id);
2185
2186                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
2187                                  match.mask->vlan_priority);
2188                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
2189                                  match.key->vlan_priority);
2190
2191                         *match_level = MLX5_MATCH_L2;
2192                 }
2193         } else if (*match_level != MLX5_MATCH_NONE) {
2194                 /* cvlan_tag enabled in match criteria and
2195                  * disabled in match value means both S & C tags
2196                  * don't exist (untagged of both)
2197                  */
2198                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
2199                 *match_level = MLX5_MATCH_L2;
2200         }
2201
2202         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
2203                 struct flow_match_vlan match;
2204
2205                 flow_rule_match_cvlan(rule, &match);
2206                 if (match.mask->vlan_id ||
2207                     match.mask->vlan_priority ||
2208                     match.mask->vlan_tpid) {
2209                         if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2210                                 MLX5_SET(fte_match_set_misc, misc_c,
2211                                          outer_second_svlan_tag, 1);
2212                                 MLX5_SET(fte_match_set_misc, misc_v,
2213                                          outer_second_svlan_tag, 1);
2214                         } else {
2215                                 MLX5_SET(fte_match_set_misc, misc_c,
2216                                          outer_second_cvlan_tag, 1);
2217                                 MLX5_SET(fte_match_set_misc, misc_v,
2218                                          outer_second_cvlan_tag, 1);
2219                         }
2220
2221                         MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
2222                                  match.mask->vlan_id);
2223                         MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
2224                                  match.key->vlan_id);
2225                         MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
2226                                  match.mask->vlan_priority);
2227                         MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
2228                                  match.key->vlan_priority);
2229
2230                         *match_level = MLX5_MATCH_L2;
2231                 }
2232         }
2233
2234         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
2235                 struct flow_match_eth_addrs match;
2236
2237                 flow_rule_match_eth_addrs(rule, &match);
2238                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2239                                              dmac_47_16),
2240                                 match.mask->dst);
2241                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2242                                              dmac_47_16),
2243                                 match.key->dst);
2244
2245                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2246                                              smac_47_16),
2247                                 match.mask->src);
2248                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2249                                              smac_47_16),
2250                                 match.key->src);
2251
2252                 if (!is_zero_ether_addr(match.mask->src) ||
2253                     !is_zero_ether_addr(match.mask->dst))
2254                         *match_level = MLX5_MATCH_L2;
2255         }
2256
2257         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
2258                 struct flow_match_control match;
2259
2260                 flow_rule_match_control(rule, &match);
2261                 addr_type = match.key->addr_type;
2262
2263                 /* the HW doesn't support frag first/later */
2264                 if (match.mask->flags & FLOW_DIS_FIRST_FRAG)
2265                         return -EOPNOTSUPP;
2266
2267                 if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
2268                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
2269                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
2270                                  match.key->flags & FLOW_DIS_IS_FRAGMENT);
2271
2272                         /* the HW doesn't need L3 inline to match on frag=no */
2273                         if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
2274                                 *match_level = MLX5_MATCH_L2;
2275         /* ***  L2 attributes parsing up to here *** */
2276                         else
2277                                 *match_level = MLX5_MATCH_L3;
2278                 }
2279         }
2280
2281         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
2282                 struct flow_match_basic match;
2283
2284                 flow_rule_match_basic(rule, &match);
2285                 ip_proto = match.key->ip_proto;
2286
2287                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
2288                          match.mask->ip_proto);
2289                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
2290                          match.key->ip_proto);
2291
2292                 if (match.mask->ip_proto)
2293                         *match_level = MLX5_MATCH_L3;
2294         }
2295
2296         if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2297                 struct flow_match_ipv4_addrs match;
2298
2299                 flow_rule_match_ipv4_addrs(rule, &match);
2300                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2301                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
2302                        &match.mask->src, sizeof(match.mask->src));
2303                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2304                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
2305                        &match.key->src, sizeof(match.key->src));
2306                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2307                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2308                        &match.mask->dst, sizeof(match.mask->dst));
2309                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2310                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2311                        &match.key->dst, sizeof(match.key->dst));
2312
2313                 if (match.mask->src || match.mask->dst)
2314                         *match_level = MLX5_MATCH_L3;
2315         }
2316
2317         if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2318                 struct flow_match_ipv6_addrs match;
2319
2320                 flow_rule_match_ipv6_addrs(rule, &match);
2321                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2322                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
2323                        &match.mask->src, sizeof(match.mask->src));
2324                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2325                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
2326                        &match.key->src, sizeof(match.key->src));
2327
2328                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2329                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2330                        &match.mask->dst, sizeof(match.mask->dst));
2331                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2332                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2333                        &match.key->dst, sizeof(match.key->dst));
2334
2335                 if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
2336                     ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
2337                         *match_level = MLX5_MATCH_L3;
2338         }
2339
2340         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2341                 struct flow_match_ip match;
2342
2343                 flow_rule_match_ip(rule, &match);
2344                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
2345                          match.mask->tos & 0x3);
2346                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
2347                          match.key->tos & 0x3);
2348
2349                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
2350                          match.mask->tos >> 2);
2351                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
2352                          match.key->tos  >> 2);
2353
2354                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
2355                          match.mask->ttl);
2356                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
2357                          match.key->ttl);
2358
2359                 if (match.mask->ttl &&
2360                     !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
2361                                                 ft_field_support.outer_ipv4_ttl)) {
2362                         NL_SET_ERR_MSG_MOD(extack,
2363                                            "Matching on TTL is not supported");
2364                         return -EOPNOTSUPP;
2365                 }
2366
2367                 if (match.mask->tos || match.mask->ttl)
2368                         *match_level = MLX5_MATCH_L3;
2369         }
2370
2371         /* ***  L3 attributes parsing up to here *** */
2372
2373         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
2374                 struct flow_match_ports match;
2375
2376                 flow_rule_match_ports(rule, &match);
2377                 switch (ip_proto) {
2378                 case IPPROTO_TCP:
2379                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2380                                  tcp_sport, ntohs(match.mask->src));
2381                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2382                                  tcp_sport, ntohs(match.key->src));
2383
2384                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2385                                  tcp_dport, ntohs(match.mask->dst));
2386                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2387                                  tcp_dport, ntohs(match.key->dst));
2388                         break;
2389
2390                 case IPPROTO_UDP:
2391                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2392                                  udp_sport, ntohs(match.mask->src));
2393                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2394                                  udp_sport, ntohs(match.key->src));
2395
2396                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2397                                  udp_dport, ntohs(match.mask->dst));
2398                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2399                                  udp_dport, ntohs(match.key->dst));
2400                         break;
2401                 default:
2402                         NL_SET_ERR_MSG_MOD(extack,
2403                                            "Only UDP and TCP transports are supported for L4 matching");
2404                         netdev_err(priv->netdev,
2405                                    "Only UDP and TCP transport are supported\n");
2406                         return -EINVAL;
2407                 }
2408
2409                 if (match.mask->src || match.mask->dst)
2410                         *match_level = MLX5_MATCH_L4;
2411         }
2412
2413         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
2414                 struct flow_match_tcp match;
2415
2416                 flow_rule_match_tcp(rule, &match);
2417                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
2418                          ntohs(match.mask->flags));
2419                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
2420                          ntohs(match.key->flags));
2421
2422                 if (match.mask->flags)
2423                         *match_level = MLX5_MATCH_L4;
2424         }
2425
2426         return 0;
2427 }
2428
2429 static int parse_cls_flower(struct mlx5e_priv *priv,
2430                             struct mlx5e_tc_flow *flow,
2431                             struct mlx5_flow_spec *spec,
2432                             struct flow_cls_offload *f,
2433                             struct net_device *filter_dev)
2434 {
2435         u8 inner_match_level, outer_match_level, non_tunnel_match_level;
2436         struct netlink_ext_ack *extack = f->common.extack;
2437         struct mlx5_core_dev *dev = priv->mdev;
2438         struct mlx5_eswitch *esw = dev->priv.eswitch;
2439         struct mlx5e_rep_priv *rpriv = priv->ppriv;
2440         struct mlx5_eswitch_rep *rep;
2441         bool is_eswitch_flow;
2442         int err;
2443
2444         inner_match_level = MLX5_MATCH_NONE;
2445         outer_match_level = MLX5_MATCH_NONE;
2446
2447         err = __parse_cls_flower(priv, flow, spec, f, filter_dev,
2448                                  &inner_match_level, &outer_match_level);
2449         non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
2450                                  outer_match_level : inner_match_level;
2451
2452         is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
2453         if (!err && is_eswitch_flow) {
2454                 rep = rpriv->rep;
2455                 if (rep->vport != MLX5_VPORT_UPLINK &&
2456                     (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
2457                     esw->offloads.inline_mode < non_tunnel_match_level)) {
2458                         NL_SET_ERR_MSG_MOD(extack,
2459                                            "Flow is not offloaded due to min inline setting");
2460                         netdev_warn(priv->netdev,
2461                                     "Flow is not offloaded due to min inline setting, required %d actual %d\n",
2462                                     non_tunnel_match_level, esw->offloads.inline_mode);
2463                         return -EOPNOTSUPP;
2464                 }
2465         }
2466
2467         if (is_eswitch_flow) {
2468                 flow->esw_attr->inner_match_level = inner_match_level;
2469                 flow->esw_attr->outer_match_level = outer_match_level;
2470         } else {
2471                 flow->nic_attr->match_level = non_tunnel_match_level;
2472         }
2473
2474         return err;
2475 }
2476
2477 struct pedit_headers {
2478         struct ethhdr  eth;
2479         struct vlan_hdr vlan;
2480         struct iphdr   ip4;
2481         struct ipv6hdr ip6;
2482         struct tcphdr  tcp;
2483         struct udphdr  udp;
2484 };
2485
2486 struct pedit_headers_action {
2487         struct pedit_headers    vals;
2488         struct pedit_headers    masks;
2489         u32                     pedits;
2490 };
2491
2492 static int pedit_header_offsets[] = {
2493         [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
2494         [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
2495         [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
2496         [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
2497         [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
2498 };
2499
2500 #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
2501
2502 static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
2503                          struct pedit_headers_action *hdrs)
2504 {
2505         u32 *curr_pmask, *curr_pval;
2506
2507         curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset);
2508         curr_pval  = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
2509
2510         if (*curr_pmask & mask)  /* disallow acting twice on the same location */
2511                 goto out_err;
2512
2513         *curr_pmask |= mask;
2514         *curr_pval  |= (val & mask);
2515
2516         return 0;
2517
2518 out_err:
2519         return -EOPNOTSUPP;
2520 }
2521
2522 struct mlx5_fields {
2523         u8  field;
2524         u8  field_bsize;
2525         u32 field_mask;
2526         u32 offset;
2527         u32 match_offset;
2528 };
2529
2530 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
2531                 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
2532                  offsetof(struct pedit_headers, field) + (off), \
2533                  MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
2534
2535 /* masked values are the same and there are no rewrites that do not have a
2536  * match.
2537  */
2538 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
2539         type matchmaskx = *(type *)(matchmaskp); \
2540         type matchvalx = *(type *)(matchvalp); \
2541         type maskx = *(type *)(maskp); \
2542         type valx = *(type *)(valp); \
2543         \
2544         (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
2545                                                                  matchmaskx)); \
2546 })
2547
2548 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
2549                          void *matchmaskp, u8 bsize)
2550 {
2551         bool same = false;
2552
2553         switch (bsize) {
2554         case 8:
2555                 same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
2556                 break;
2557         case 16:
2558                 same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
2559                 break;
2560         case 32:
2561                 same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
2562                 break;
2563         }
2564
2565         return same;
2566 }
2567
2568 static struct mlx5_fields fields[] = {
2569         OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
2570         OFFLOAD(DMAC_15_0,  16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
2571         OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
2572         OFFLOAD(SMAC_15_0,  16, U16_MAX, eth.h_source[4], 0, smac_15_0),
2573         OFFLOAD(ETHERTYPE,  16, U16_MAX, eth.h_proto, 0, ethertype),
2574         OFFLOAD(FIRST_VID,  16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
2575
2576         OFFLOAD(IP_DSCP, 8,    0xfc, ip4.tos,   0, ip_dscp),
2577         OFFLOAD(IP_TTL,  8,  U8_MAX, ip4.ttl,   0, ttl_hoplimit),
2578         OFFLOAD(SIPV4,  32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
2579         OFFLOAD(DIPV4,  32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2580
2581         OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
2582                 src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
2583         OFFLOAD(SIPV6_95_64,  32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
2584                 src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
2585         OFFLOAD(SIPV6_63_32,  32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
2586                 src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
2587         OFFLOAD(SIPV6_31_0,   32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
2588                 src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
2589         OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
2590                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
2591         OFFLOAD(DIPV6_95_64,  32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
2592                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
2593         OFFLOAD(DIPV6_63_32,  32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
2594                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
2595         OFFLOAD(DIPV6_31_0,   32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
2596                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
2597         OFFLOAD(IPV6_HOPLIMIT, 8,  U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
2598
2599         OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source,  0, tcp_sport),
2600         OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest,    0, tcp_dport),
2601         /* in linux iphdr tcp_flags is 8 bits long */
2602         OFFLOAD(TCP_FLAGS,  8,  U8_MAX, tcp.ack_seq, 5, tcp_flags),
2603
2604         OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
2605         OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest,   0, udp_dport),
2606 };
2607
2608 static int offload_pedit_fields(struct mlx5e_priv *priv,
2609                                 int namespace,
2610                                 struct pedit_headers_action *hdrs,
2611                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
2612                                 u32 *action_flags,
2613                                 struct netlink_ext_ack *extack)
2614 {
2615         struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
2616         int i, action_size, first, last, next_z;
2617         void *headers_c, *headers_v, *action, *vals_p;
2618         u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
2619         struct mlx5e_tc_mod_hdr_acts *mod_acts;
2620         struct mlx5_fields *f;
2621         unsigned long mask;
2622         __be32 mask_be32;
2623         __be16 mask_be16;
2624         int err;
2625         u8 cmd;
2626
2627         mod_acts = &parse_attr->mod_hdr_acts;
2628         headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec);
2629         headers_v = get_match_headers_value(*action_flags, &parse_attr->spec);
2630
2631         set_masks = &hdrs[0].masks;
2632         add_masks = &hdrs[1].masks;
2633         set_vals = &hdrs[0].vals;
2634         add_vals = &hdrs[1].vals;
2635
2636         action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
2637
2638         for (i = 0; i < ARRAY_SIZE(fields); i++) {
2639                 bool skip;
2640
2641                 f = &fields[i];
2642                 /* avoid seeing bits set from previous iterations */
2643                 s_mask = 0;
2644                 a_mask = 0;
2645
2646                 s_masks_p = (void *)set_masks + f->offset;
2647                 a_masks_p = (void *)add_masks + f->offset;
2648
2649                 s_mask = *s_masks_p & f->field_mask;
2650                 a_mask = *a_masks_p & f->field_mask;
2651
2652                 if (!s_mask && !a_mask) /* nothing to offload here */
2653                         continue;
2654
2655                 if (s_mask && a_mask) {
2656                         NL_SET_ERR_MSG_MOD(extack,
2657                                            "can't set and add to the same HW field");
2658                         printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
2659                         return -EOPNOTSUPP;
2660                 }
2661
2662                 skip = false;
2663                 if (s_mask) {
2664                         void *match_mask = headers_c + f->match_offset;
2665                         void *match_val = headers_v + f->match_offset;
2666
2667                         cmd  = MLX5_ACTION_TYPE_SET;
2668                         mask = s_mask;
2669                         vals_p = (void *)set_vals + f->offset;
2670                         /* don't rewrite if we have a match on the same value */
2671                         if (cmp_val_mask(vals_p, s_masks_p, match_val,
2672                                          match_mask, f->field_bsize))
2673                                 skip = true;
2674                         /* clear to denote we consumed this field */
2675                         *s_masks_p &= ~f->field_mask;
2676                 } else {
2677                         cmd  = MLX5_ACTION_TYPE_ADD;
2678                         mask = a_mask;
2679                         vals_p = (void *)add_vals + f->offset;
2680                         /* add 0 is no change */
2681                         if ((*(u32 *)vals_p & f->field_mask) == 0)
2682                                 skip = true;
2683                         /* clear to denote we consumed this field */
2684                         *a_masks_p &= ~f->field_mask;
2685                 }
2686                 if (skip)
2687                         continue;
2688
2689                 if (f->field_bsize == 32) {
2690                         mask_be32 = *(__be32 *)&mask;
2691                         mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
2692                 } else if (f->field_bsize == 16) {
2693                         mask_be16 = *(__be16 *)&mask;
2694                         mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
2695                 }
2696
2697                 first = find_first_bit(&mask, f->field_bsize);
2698                 next_z = find_next_zero_bit(&mask, f->field_bsize, first);
2699                 last  = find_last_bit(&mask, f->field_bsize);
2700                 if (first < next_z && next_z < last) {
2701                         NL_SET_ERR_MSG_MOD(extack,
2702                                            "rewrite of few sub-fields isn't supported");
2703                         printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
2704                                mask);
2705                         return -EOPNOTSUPP;
2706                 }
2707
2708                 err = alloc_mod_hdr_actions(priv->mdev, namespace, mod_acts);
2709                 if (err) {
2710                         NL_SET_ERR_MSG_MOD(extack,
2711                                            "too many pedit actions, can't offload");
2712                         mlx5_core_warn(priv->mdev,
2713                                        "mlx5: parsed %d pedit actions, can't do more\n",
2714                                        mod_acts->num_actions);
2715                         return err;
2716                 }
2717
2718                 action = mod_acts->actions +
2719                          (mod_acts->num_actions * action_size);
2720                 MLX5_SET(set_action_in, action, action_type, cmd);
2721                 MLX5_SET(set_action_in, action, field, f->field);
2722
2723                 if (cmd == MLX5_ACTION_TYPE_SET) {
2724                         int start;
2725
2726                         /* if field is bit sized it can start not from first bit */
2727                         start = find_first_bit((unsigned long *)&f->field_mask,
2728                                                f->field_bsize);
2729
2730                         MLX5_SET(set_action_in, action, offset, first - start);
2731                         /* length is num of bits to be written, zero means length of 32 */
2732                         MLX5_SET(set_action_in, action, length, (last - first + 1));
2733                 }
2734
2735                 if (f->field_bsize == 32)
2736                         MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
2737                 else if (f->field_bsize == 16)
2738                         MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
2739                 else if (f->field_bsize == 8)
2740                         MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
2741
2742                 ++mod_acts->num_actions;
2743         }
2744
2745         return 0;
2746 }
2747
2748 static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev,
2749                                                   int namespace)
2750 {
2751         if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
2752                 return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions);
2753         else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
2754                 return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
2755 }
2756
2757 int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev,
2758                           int namespace,
2759                           struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
2760 {
2761         int action_size, new_num_actions, max_hw_actions;
2762         size_t new_sz, old_sz;
2763         void *ret;
2764
2765         if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions)
2766                 return 0;
2767
2768         action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
2769
2770         max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev,
2771                                                                 namespace);
2772         new_num_actions = min(max_hw_actions,
2773                               mod_hdr_acts->actions ?
2774                               mod_hdr_acts->max_actions * 2 : 1);
2775         if (mod_hdr_acts->max_actions == new_num_actions)
2776                 return -ENOSPC;
2777
2778         new_sz = action_size * new_num_actions;
2779         old_sz = mod_hdr_acts->max_actions * action_size;
2780         ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL);
2781         if (!ret)
2782                 return -ENOMEM;
2783
2784         memset(ret + old_sz, 0, new_sz - old_sz);
2785         mod_hdr_acts->actions = ret;
2786         mod_hdr_acts->max_actions = new_num_actions;
2787
2788         return 0;
2789 }
2790
2791 void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
2792 {
2793         kfree(mod_hdr_acts->actions);
2794         mod_hdr_acts->actions = NULL;
2795         mod_hdr_acts->num_actions = 0;
2796         mod_hdr_acts->max_actions = 0;
2797 }
2798
2799 static const struct pedit_headers zero_masks = {};
2800
2801 static int parse_tc_pedit_action(struct mlx5e_priv *priv,
2802                                  const struct flow_action_entry *act, int namespace,
2803                                  struct pedit_headers_action *hdrs,
2804                                  struct netlink_ext_ack *extack)
2805 {
2806         u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
2807         int err = -EOPNOTSUPP;
2808         u32 mask, val, offset;
2809         u8 htype;
2810
2811         htype = act->mangle.htype;
2812         err = -EOPNOTSUPP; /* can't be all optimistic */
2813
2814         if (htype == FLOW_ACT_MANGLE_UNSPEC) {
2815                 NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded");
2816                 goto out_err;
2817         }
2818
2819         if (!mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace)) {
2820                 NL_SET_ERR_MSG_MOD(extack,
2821                                    "The pedit offload action is not supported");
2822                 goto out_err;
2823         }
2824
2825         mask = act->mangle.mask;
2826         val = act->mangle.val;
2827         offset = act->mangle.offset;
2828
2829         err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]);
2830         if (err)
2831                 goto out_err;
2832
2833         hdrs[cmd].pedits++;
2834
2835         return 0;
2836 out_err:
2837         return err;
2838 }
2839
2840 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
2841                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
2842                                  struct pedit_headers_action *hdrs,
2843                                  u32 *action_flags,
2844                                  struct netlink_ext_ack *extack)
2845 {
2846         struct pedit_headers *cmd_masks;
2847         int err;
2848         u8 cmd;
2849
2850         err = offload_pedit_fields(priv, namespace, hdrs, parse_attr,
2851                                    action_flags, extack);
2852         if (err < 0)
2853                 goto out_dealloc_parsed_actions;
2854
2855         for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
2856                 cmd_masks = &hdrs[cmd].masks;
2857                 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
2858                         NL_SET_ERR_MSG_MOD(extack,
2859                                            "attempt to offload an unsupported field");
2860                         netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
2861                         print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
2862                                        16, 1, cmd_masks, sizeof(zero_masks), true);
2863                         err = -EOPNOTSUPP;
2864                         goto out_dealloc_parsed_actions;
2865                 }
2866         }
2867
2868         return 0;
2869
2870 out_dealloc_parsed_actions:
2871         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
2872         return err;
2873 }
2874
2875 static bool csum_offload_supported(struct mlx5e_priv *priv,
2876                                    u32 action,
2877                                    u32 update_flags,
2878                                    struct netlink_ext_ack *extack)
2879 {
2880         u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
2881                          TCA_CSUM_UPDATE_FLAG_UDP;
2882
2883         /*  The HW recalcs checksums only if re-writing headers */
2884         if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
2885                 NL_SET_ERR_MSG_MOD(extack,
2886                                    "TC csum action is only offloaded with pedit");
2887                 netdev_warn(priv->netdev,
2888                             "TC csum action is only offloaded with pedit\n");
2889                 return false;
2890         }
2891
2892         if (update_flags & ~prot_flags) {
2893                 NL_SET_ERR_MSG_MOD(extack,
2894                                    "can't offload TC csum action for some header/s");
2895                 netdev_warn(priv->netdev,
2896                             "can't offload TC csum action for some header/s - flags %#x\n",
2897                             update_flags);
2898                 return false;
2899         }
2900
2901         return true;
2902 }
2903
2904 struct ip_ttl_word {
2905         __u8    ttl;
2906         __u8    protocol;
2907         __sum16 check;
2908 };
2909
2910 struct ipv6_hoplimit_word {
2911         __be16  payload_len;
2912         __u8    nexthdr;
2913         __u8    hop_limit;
2914 };
2915
2916 static bool is_action_keys_supported(const struct flow_action_entry *act)
2917 {
2918         u32 mask, offset;
2919         u8 htype;
2920
2921         htype = act->mangle.htype;
2922         offset = act->mangle.offset;
2923         mask = ~act->mangle.mask;
2924         /* For IPv4 & IPv6 header check 4 byte word,
2925          * to determine that modified fields
2926          * are NOT ttl & hop_limit only.
2927          */
2928         if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
2929                 struct ip_ttl_word *ttl_word =
2930                         (struct ip_ttl_word *)&mask;
2931
2932                 if (offset != offsetof(struct iphdr, ttl) ||
2933                     ttl_word->protocol ||
2934                     ttl_word->check) {
2935                         return true;
2936                 }
2937         } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
2938                 struct ipv6_hoplimit_word *hoplimit_word =
2939                         (struct ipv6_hoplimit_word *)&mask;
2940
2941                 if (offset != offsetof(struct ipv6hdr, payload_len) ||
2942                     hoplimit_word->payload_len ||
2943                     hoplimit_word->nexthdr) {
2944                         return true;
2945                 }
2946         }
2947         return false;
2948 }
2949
2950 static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
2951                                           struct flow_action *flow_action,
2952                                           u32 actions,
2953                                           struct netlink_ext_ack *extack)
2954 {
2955         const struct flow_action_entry *act;
2956         bool modify_ip_header;
2957         void *headers_v;
2958         u16 ethertype;
2959         u8 ip_proto;
2960         int i;
2961
2962         headers_v = get_match_headers_value(actions, spec);
2963         ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
2964
2965         /* for non-IP we only re-write MACs, so we're okay */
2966         if (ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
2967                 goto out_ok;
2968
2969         modify_ip_header = false;
2970         flow_action_for_each(i, act, flow_action) {
2971                 if (act->id != FLOW_ACTION_MANGLE &&
2972                     act->id != FLOW_ACTION_ADD)
2973                         continue;
2974
2975                 if (is_action_keys_supported(act)) {
2976                         modify_ip_header = true;
2977                         break;
2978                 }
2979         }
2980
2981         ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
2982         if (modify_ip_header && ip_proto != IPPROTO_TCP &&
2983             ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
2984                 NL_SET_ERR_MSG_MOD(extack,
2985                                    "can't offload re-write of non TCP/UDP");
2986                 pr_info("can't offload re-write of ip proto %d\n", ip_proto);
2987                 return false;
2988         }
2989
2990 out_ok:
2991         return true;
2992 }
2993
2994 static bool actions_match_supported(struct mlx5e_priv *priv,
2995                                     struct flow_action *flow_action,
2996                                     struct mlx5e_tc_flow_parse_attr *parse_attr,
2997                                     struct mlx5e_tc_flow *flow,
2998                                     struct netlink_ext_ack *extack)
2999 {
3000         struct net_device *filter_dev = parse_attr->filter_dev;
3001         bool drop_action, pop_action;
3002         u32 actions;
3003
3004         if (mlx5e_is_eswitch_flow(flow))
3005                 actions = flow->esw_attr->action;
3006         else
3007                 actions = flow->nic_attr->action;
3008
3009         drop_action = actions & MLX5_FLOW_CONTEXT_ACTION_DROP;
3010         pop_action = actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
3011
3012         if (flow_flag_test(flow, EGRESS) && !drop_action) {
3013                 /* We only support filters on tunnel device, or on vlan
3014                  * devices if they have pop/drop action
3015                  */
3016                 if (!mlx5e_get_tc_tun(filter_dev) ||
3017                     (is_vlan_dev(filter_dev) && !pop_action))
3018                         return false;
3019         }
3020
3021         if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
3022                 return modify_header_match_supported(&parse_attr->spec,
3023                                                      flow_action, actions,
3024                                                      extack);
3025
3026         return true;
3027 }
3028
3029 static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3030 {
3031         struct mlx5_core_dev *fmdev, *pmdev;
3032         u64 fsystem_guid, psystem_guid;
3033
3034         fmdev = priv->mdev;
3035         pmdev = peer_priv->mdev;
3036
3037         fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
3038         psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
3039
3040         return (fsystem_guid == psystem_guid);
3041 }
3042
3043 static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace,
3044                                    const struct flow_action_entry *act,
3045                                    struct mlx5e_tc_flow_parse_attr *parse_attr,
3046                                    struct pedit_headers_action *hdrs,
3047                                    u32 *action, struct netlink_ext_ack *extack)
3048 {
3049         u16 mask16 = VLAN_VID_MASK;
3050         u16 val16 = act->vlan.vid & VLAN_VID_MASK;
3051         const struct flow_action_entry pedit_act = {
3052                 .id = FLOW_ACTION_MANGLE,
3053                 .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH,
3054                 .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI),
3055                 .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
3056                 .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
3057         };
3058         u8 match_prio_mask, match_prio_val;
3059         void *headers_c, *headers_v;
3060         int err;
3061
3062         headers_c = get_match_headers_criteria(*action, &parse_attr->spec);
3063         headers_v = get_match_headers_value(*action, &parse_attr->spec);
3064
3065         if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
3066               MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
3067                 NL_SET_ERR_MSG_MOD(extack,
3068                                    "VLAN rewrite action must have VLAN protocol match");
3069                 return -EOPNOTSUPP;
3070         }
3071
3072         match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
3073         match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
3074         if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
3075                 NL_SET_ERR_MSG_MOD(extack,
3076                                    "Changing VLAN prio is not supported");
3077                 return -EOPNOTSUPP;
3078         }
3079
3080         err = parse_tc_pedit_action(priv, &pedit_act, namespace, hdrs, NULL);
3081         *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3082
3083         return err;
3084 }
3085
3086 static int
3087 add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
3088                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
3089                                  struct pedit_headers_action *hdrs,
3090                                  u32 *action, struct netlink_ext_ack *extack)
3091 {
3092         const struct flow_action_entry prio_tag_act = {
3093                 .vlan.vid = 0,
3094                 .vlan.prio =
3095                         MLX5_GET(fte_match_set_lyr_2_4,
3096                                  get_match_headers_value(*action,
3097                                                          &parse_attr->spec),
3098                                  first_prio) &
3099                         MLX5_GET(fte_match_set_lyr_2_4,
3100                                  get_match_headers_criteria(*action,
3101                                                             &parse_attr->spec),
3102                                  first_prio),
3103         };
3104
3105         return add_vlan_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
3106                                        &prio_tag_act, parse_attr, hdrs, action,
3107                                        extack);
3108 }
3109
3110 static int parse_tc_nic_actions(struct mlx5e_priv *priv,
3111                                 struct flow_action *flow_action,
3112                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
3113                                 struct mlx5e_tc_flow *flow,
3114                                 struct netlink_ext_ack *extack)
3115 {
3116         struct mlx5_nic_flow_attr *attr = flow->nic_attr;
3117         struct pedit_headers_action hdrs[2] = {};
3118         const struct flow_action_entry *act;
3119         u32 action = 0;
3120         int err, i;
3121
3122         if (!flow_action_has_entries(flow_action))
3123                 return -EINVAL;
3124
3125         if (!flow_action_hw_stats_types_check(flow_action, extack,
3126                                               FLOW_ACTION_HW_STATS_TYPE_DELAYED_BIT))
3127                 return -EOPNOTSUPP;
3128
3129         attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
3130
3131         flow_action_for_each(i, act, flow_action) {
3132                 switch (act->id) {
3133                 case FLOW_ACTION_ACCEPT:
3134                         action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3135                                   MLX5_FLOW_CONTEXT_ACTION_COUNT;
3136                         break;
3137                 case FLOW_ACTION_DROP:
3138                         action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
3139                         if (MLX5_CAP_FLOWTABLE(priv->mdev,
3140                                                flow_table_properties_nic_receive.flow_counter))
3141                                 action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3142                         break;
3143                 case FLOW_ACTION_MANGLE:
3144                 case FLOW_ACTION_ADD:
3145                         err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL,
3146                                                     hdrs, extack);
3147                         if (err)
3148                                 return err;
3149
3150                         action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
3151                                   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3152                         break;
3153                 case FLOW_ACTION_VLAN_MANGLE:
3154                         err = add_vlan_rewrite_action(priv,
3155                                                       MLX5_FLOW_NAMESPACE_KERNEL,
3156                                                       act, parse_attr, hdrs,
3157                                                       &action, extack);
3158                         if (err)
3159                                 return err;
3160
3161                         break;
3162                 case FLOW_ACTION_CSUM:
3163                         if (csum_offload_supported(priv, action,
3164                                                    act->csum_flags,
3165                                                    extack))
3166                                 break;
3167
3168                         return -EOPNOTSUPP;
3169                 case FLOW_ACTION_REDIRECT: {
3170                         struct net_device *peer_dev = act->dev;
3171
3172                         if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
3173                             same_hw_devs(priv, netdev_priv(peer_dev))) {
3174                                 parse_attr->mirred_ifindex[0] = peer_dev->ifindex;
3175                                 flow_flag_set(flow, HAIRPIN);
3176                                 action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3177                                           MLX5_FLOW_CONTEXT_ACTION_COUNT;
3178                         } else {
3179                                 NL_SET_ERR_MSG_MOD(extack,
3180                                                    "device is not on same HW, can't offload");
3181                                 netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
3182                                             peer_dev->name);
3183                                 return -EINVAL;
3184                         }
3185                         }
3186                         break;
3187                 case FLOW_ACTION_MARK: {
3188                         u32 mark = act->mark;
3189
3190                         if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
3191                                 NL_SET_ERR_MSG_MOD(extack,
3192                                                    "Bad flow mark - only 16 bit is supported");
3193                                 return -EINVAL;
3194                         }
3195
3196                         attr->flow_tag = mark;
3197                         action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3198                         }
3199                         break;
3200                 default:
3201                         NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
3202                         return -EOPNOTSUPP;
3203                 }
3204         }
3205
3206         if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
3207             hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
3208                 err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL,
3209                                             parse_attr, hdrs, &action, extack);
3210                 if (err)
3211                         return err;
3212                 /* in case all pedit actions are skipped, remove the MOD_HDR
3213                  * flag.
3214                  */
3215                 if (parse_attr->mod_hdr_acts.num_actions == 0) {
3216                         action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3217                         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
3218                 }
3219         }
3220
3221         attr->action = action;
3222         if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
3223                 return -EOPNOTSUPP;
3224
3225         return 0;
3226 }
3227
3228 struct encap_key {
3229         const struct ip_tunnel_key *ip_tun_key;
3230         struct mlx5e_tc_tunnel *tc_tunnel;
3231 };
3232
3233 static inline int cmp_encap_info(struct encap_key *a,
3234                                  struct encap_key *b)
3235 {
3236         return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
3237                a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
3238 }
3239
3240 static inline int hash_encap_info(struct encap_key *key)
3241 {
3242         return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
3243                      key->tc_tunnel->tunnel_type);
3244 }
3245
3246
3247 static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
3248                                   struct net_device *peer_netdev)
3249 {
3250         struct mlx5e_priv *peer_priv;
3251
3252         peer_priv = netdev_priv(peer_netdev);
3253
3254         return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
3255                 mlx5e_eswitch_rep(priv->netdev) &&
3256                 mlx5e_eswitch_rep(peer_netdev) &&
3257                 same_hw_devs(priv, peer_priv));
3258 }
3259
3260
3261
3262 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
3263 {
3264         return refcount_inc_not_zero(&e->refcnt);
3265 }
3266
3267 static struct mlx5e_encap_entry *
3268 mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
3269                 uintptr_t hash_key)
3270 {
3271         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3272         struct mlx5e_encap_entry *e;
3273         struct encap_key e_key;
3274
3275         hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
3276                                    encap_hlist, hash_key) {
3277                 e_key.ip_tun_key = &e->tun_info->key;
3278                 e_key.tc_tunnel = e->tunnel;
3279                 if (!cmp_encap_info(&e_key, key) &&
3280                     mlx5e_encap_take(e))
3281                         return e;
3282         }
3283
3284         return NULL;
3285 }
3286
3287 static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info)
3288 {
3289         size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
3290
3291         return kmemdup(tun_info, tun_size, GFP_KERNEL);
3292 }
3293
3294 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
3295                                       struct mlx5e_tc_flow *flow,
3296                                       int out_index,
3297                                       struct mlx5e_encap_entry *e,
3298                                       struct netlink_ext_ack *extack)
3299 {
3300         int i;
3301
3302         for (i = 0; i < out_index; i++) {
3303                 if (flow->encaps[i].e != e)
3304                         continue;
3305                 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
3306                 netdev_err(priv->netdev, "can't duplicate encap action\n");
3307                 return true;
3308         }
3309
3310         return false;
3311 }
3312
3313 static int mlx5e_attach_encap(struct mlx5e_priv *priv,
3314                               struct mlx5e_tc_flow *flow,
3315                               struct net_device *mirred_dev,
3316                               int out_index,
3317                               struct netlink_ext_ack *extack,
3318                               struct net_device **encap_dev,
3319                               bool *encap_valid)
3320 {
3321         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3322         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
3323         struct mlx5e_tc_flow_parse_attr *parse_attr;
3324         const struct ip_tunnel_info *tun_info;
3325         struct encap_key key;
3326         struct mlx5e_encap_entry *e;
3327         unsigned short family;
3328         uintptr_t hash_key;
3329         int err = 0;
3330
3331         parse_attr = attr->parse_attr;
3332         tun_info = parse_attr->tun_info[out_index];
3333         family = ip_tunnel_info_af(tun_info);
3334         key.ip_tun_key = &tun_info->key;
3335         key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
3336         if (!key.tc_tunnel) {
3337                 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
3338                 return -EOPNOTSUPP;
3339         }
3340
3341         hash_key = hash_encap_info(&key);
3342
3343         mutex_lock(&esw->offloads.encap_tbl_lock);
3344         e = mlx5e_encap_get(priv, &key, hash_key);
3345
3346         /* must verify if encap is valid or not */
3347         if (e) {
3348                 /* Check that entry was not already attached to this flow */
3349                 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
3350                         err = -EOPNOTSUPP;
3351                         goto out_err;
3352                 }
3353
3354                 mutex_unlock(&esw->offloads.encap_tbl_lock);
3355                 wait_for_completion(&e->res_ready);
3356
3357                 /* Protect against concurrent neigh update. */
3358                 mutex_lock(&esw->offloads.encap_tbl_lock);
3359                 if (e->compl_result < 0) {
3360                         err = -EREMOTEIO;
3361                         goto out_err;
3362                 }
3363                 goto attach_flow;
3364         }
3365
3366         e = kzalloc(sizeof(*e), GFP_KERNEL);
3367         if (!e) {
3368                 err = -ENOMEM;
3369                 goto out_err;
3370         }
3371
3372         refcount_set(&e->refcnt, 1);
3373         init_completion(&e->res_ready);
3374
3375         tun_info = dup_tun_info(tun_info);
3376         if (!tun_info) {
3377                 err = -ENOMEM;
3378                 goto out_err_init;
3379         }
3380         e->tun_info = tun_info;
3381         err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
3382         if (err)
3383                 goto out_err_init;
3384
3385         INIT_LIST_HEAD(&e->flows);
3386         hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
3387         mutex_unlock(&esw->offloads.encap_tbl_lock);
3388
3389         if (family == AF_INET)
3390                 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
3391         else if (family == AF_INET6)
3392                 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
3393
3394         /* Protect against concurrent neigh update. */
3395         mutex_lock(&esw->offloads.encap_tbl_lock);
3396         complete_all(&e->res_ready);
3397         if (err) {
3398                 e->compl_result = err;
3399                 goto out_err;
3400         }
3401         e->compl_result = 1;
3402
3403 attach_flow:
3404         flow->encaps[out_index].e = e;
3405         list_add(&flow->encaps[out_index].list, &e->flows);
3406         flow->encaps[out_index].index = out_index;
3407         *encap_dev = e->out_dev;
3408         if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
3409                 attr->dests[out_index].pkt_reformat = e->pkt_reformat;
3410                 attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
3411                 *encap_valid = true;
3412         } else {
3413                 *encap_valid = false;
3414         }
3415         mutex_unlock(&esw->offloads.encap_tbl_lock);
3416
3417         return err;
3418
3419 out_err:
3420         mutex_unlock(&esw->offloads.encap_tbl_lock);
3421         if (e)
3422                 mlx5e_encap_put(priv, e);
3423         return err;
3424
3425 out_err_init:
3426         mutex_unlock(&esw->offloads.encap_tbl_lock);
3427         kfree(tun_info);
3428         kfree(e);
3429         return err;
3430 }
3431
3432 static int parse_tc_vlan_action(struct mlx5e_priv *priv,
3433                                 const struct flow_action_entry *act,
3434                                 struct mlx5_esw_flow_attr *attr,
3435                                 u32 *action)
3436 {
3437         u8 vlan_idx = attr->total_vlan;
3438
3439         if (vlan_idx >= MLX5_FS_VLAN_DEPTH)
3440                 return -EOPNOTSUPP;
3441
3442         switch (act->id) {
3443         case FLOW_ACTION_VLAN_POP:
3444                 if (vlan_idx) {
3445                         if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
3446                                                                  MLX5_FS_VLAN_DEPTH))
3447                                 return -EOPNOTSUPP;
3448
3449                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2;
3450                 } else {
3451                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
3452                 }
3453                 break;
3454         case FLOW_ACTION_VLAN_PUSH:
3455                 attr->vlan_vid[vlan_idx] = act->vlan.vid;
3456                 attr->vlan_prio[vlan_idx] = act->vlan.prio;
3457                 attr->vlan_proto[vlan_idx] = act->vlan.proto;
3458                 if (!attr->vlan_proto[vlan_idx])
3459                         attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
3460
3461                 if (vlan_idx) {
3462                         if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
3463                                                                  MLX5_FS_VLAN_DEPTH))
3464                                 return -EOPNOTSUPP;
3465
3466                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
3467                 } else {
3468                         if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
3469                             (act->vlan.proto != htons(ETH_P_8021Q) ||
3470                              act->vlan.prio))
3471                                 return -EOPNOTSUPP;
3472
3473                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
3474                 }
3475                 break;
3476         default:
3477                 return -EINVAL;
3478         }
3479
3480         attr->total_vlan = vlan_idx + 1;
3481
3482         return 0;
3483 }
3484
3485 static int add_vlan_push_action(struct mlx5e_priv *priv,
3486                                 struct mlx5_esw_flow_attr *attr,
3487                                 struct net_device **out_dev,
3488                                 u32 *action)
3489 {
3490         struct net_device *vlan_dev = *out_dev;
3491         struct flow_action_entry vlan_act = {
3492                 .id = FLOW_ACTION_VLAN_PUSH,
3493                 .vlan.vid = vlan_dev_vlan_id(vlan_dev),
3494                 .vlan.proto = vlan_dev_vlan_proto(vlan_dev),
3495                 .vlan.prio = 0,
3496         };
3497         int err;
3498
3499         err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
3500         if (err)
3501                 return err;
3502
3503         *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev),
3504                                         dev_get_iflink(vlan_dev));
3505         if (is_vlan_dev(*out_dev))
3506                 err = add_vlan_push_action(priv, attr, out_dev, action);
3507
3508         return err;
3509 }
3510
3511 static int add_vlan_pop_action(struct mlx5e_priv *priv,
3512                                struct mlx5_esw_flow_attr *attr,
3513                                u32 *action)
3514 {
3515         int nest_level = attr->parse_attr->filter_dev->lower_level;
3516         struct flow_action_entry vlan_act = {
3517                 .id = FLOW_ACTION_VLAN_POP,
3518         };
3519         int err = 0;
3520
3521         while (nest_level--) {
3522                 err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
3523                 if (err)
3524                         return err;
3525         }
3526
3527         return err;
3528 }
3529
3530 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
3531                                     struct net_device *out_dev)
3532 {
3533         if (is_merged_eswitch_dev(priv, out_dev))
3534                 return true;
3535
3536         return mlx5e_eswitch_rep(out_dev) &&
3537                same_hw_devs(priv, netdev_priv(out_dev));
3538 }
3539
3540 static bool is_duplicated_output_device(struct net_device *dev,
3541                                         struct net_device *out_dev,
3542                                         int *ifindexes, int if_count,
3543                                         struct netlink_ext_ack *extack)
3544 {
3545         int i;
3546
3547         for (i = 0; i < if_count; i++) {
3548                 if (ifindexes[i] == out_dev->ifindex) {
3549                         NL_SET_ERR_MSG_MOD(extack,
3550                                            "can't duplicate output to same device");
3551                         netdev_err(dev, "can't duplicate output to same device: %s\n",
3552                                    out_dev->name);
3553                         return true;
3554                 }
3555         }
3556
3557         return false;
3558 }
3559
3560 static int mlx5_validate_goto_chain(struct mlx5_eswitch *esw,
3561                                     struct mlx5e_tc_flow *flow,
3562                                     const struct flow_action_entry *act,
3563                                     u32 actions,
3564                                     struct netlink_ext_ack *extack)
3565 {
3566         u32 max_chain = mlx5_esw_chains_get_chain_range(esw);
3567         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
3568         bool ft_flow = mlx5e_is_ft_flow(flow);
3569         u32 dest_chain = act->chain_index;
3570
3571         if (ft_flow) {
3572                 NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
3573                 return -EOPNOTSUPP;
3574         }
3575
3576         if (!mlx5_esw_chains_backwards_supported(esw) &&
3577             dest_chain <= attr->chain) {
3578                 NL_SET_ERR_MSG_MOD(extack,
3579                                    "Goto lower numbered chain isn't supported");
3580                 return -EOPNOTSUPP;
3581         }
3582         if (dest_chain > max_chain) {
3583                 NL_SET_ERR_MSG_MOD(extack,
3584                                    "Requested destination chain is out of supported range");
3585                 return -EOPNOTSUPP;
3586         }
3587
3588         if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
3589                        MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
3590             !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_and_fwd_to_table)) {
3591                 NL_SET_ERR_MSG_MOD(extack,
3592                                    "Goto chain is not allowed if action has reformat or decap");
3593                 return -EOPNOTSUPP;
3594         }
3595
3596         return 0;
3597 }
3598
3599 static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
3600                                 struct flow_action *flow_action,
3601                                 struct mlx5e_tc_flow *flow,
3602                                 struct netlink_ext_ack *extack)
3603 {
3604         struct pedit_headers_action hdrs[2] = {};
3605         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3606         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
3607         struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
3608         struct mlx5e_rep_priv *rpriv = priv->ppriv;
3609         const struct ip_tunnel_info *info = NULL;
3610         int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
3611         bool ft_flow = mlx5e_is_ft_flow(flow);
3612         const struct flow_action_entry *act;
3613         bool encap = false, decap = false;
3614         u32 action = attr->action;
3615         int err, i, if_count = 0;
3616
3617         if (!flow_action_has_entries(flow_action))
3618                 return -EINVAL;
3619
3620         if (!flow_action_hw_stats_types_check(flow_action, extack,
3621                                               FLOW_ACTION_HW_STATS_TYPE_DELAYED_BIT))
3622                 return -EOPNOTSUPP;
3623
3624         flow_action_for_each(i, act, flow_action) {
3625                 switch (act->id) {
3626                 case FLOW_ACTION_DROP:
3627                         action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
3628                                   MLX5_FLOW_CONTEXT_ACTION_COUNT;
3629                         break;
3630                 case FLOW_ACTION_MANGLE:
3631                 case FLOW_ACTION_ADD:
3632                         err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
3633                                                     hdrs, extack);
3634                         if (err)
3635                                 return err;
3636
3637                         action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3638                         attr->split_count = attr->out_count;
3639                         break;
3640                 case FLOW_ACTION_CSUM:
3641                         if (csum_offload_supported(priv, action,
3642                                                    act->csum_flags, extack))
3643                                 break;
3644
3645                         return -EOPNOTSUPP;
3646                 case FLOW_ACTION_REDIRECT:
3647                 case FLOW_ACTION_MIRRED: {
3648                         struct mlx5e_priv *out_priv;
3649                         struct net_device *out_dev;
3650
3651                         out_dev = act->dev;
3652                         if (!out_dev) {
3653                                 /* out_dev is NULL when filters with
3654                                  * non-existing mirred device are replayed to
3655                                  * the driver.
3656                                  */
3657                                 return -EINVAL;
3658                         }
3659
3660                         if (ft_flow && out_dev == priv->netdev) {
3661                                 /* Ignore forward to self rules generated
3662                                  * by adding both mlx5 devs to the flow table
3663                                  * block on a normal nft offload setup.
3664                                  */
3665                                 return -EOPNOTSUPP;
3666                         }
3667
3668                         if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
3669                                 NL_SET_ERR_MSG_MOD(extack,
3670                                                    "can't support more output ports, can't offload forwarding");
3671                                 netdev_warn(priv->netdev,
3672                                             "can't support more than %d output ports, can't offload forwarding\n",
3673                                             attr->out_count);
3674                                 return -EOPNOTSUPP;
3675                         }
3676
3677                         action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3678                                   MLX5_FLOW_CONTEXT_ACTION_COUNT;
3679                         if (encap) {
3680                                 parse_attr->mirred_ifindex[attr->out_count] =
3681                                         out_dev->ifindex;
3682                                 parse_attr->tun_info[attr->out_count] = dup_tun_info(info);
3683                                 if (!parse_attr->tun_info[attr->out_count])
3684                                         return -ENOMEM;
3685                                 encap = false;
3686                                 attr->dests[attr->out_count].flags |=
3687                                         MLX5_ESW_DEST_ENCAP;
3688                                 attr->out_count++;
3689                                 /* attr->dests[].rep is resolved when we
3690                                  * handle encap
3691                                  */
3692                         } else if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
3693                                 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3694                                 struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
3695                                 struct net_device *uplink_upper;
3696                                 struct mlx5e_rep_priv *rep_priv;
3697
3698                                 if (is_duplicated_output_device(priv->netdev,
3699                                                                 out_dev,
3700                                                                 ifindexes,
3701                                                                 if_count,
3702                                                                 extack))
3703                                         return -EOPNOTSUPP;
3704
3705                                 ifindexes[if_count] = out_dev->ifindex;
3706                                 if_count++;
3707
3708                                 rcu_read_lock();
3709                                 uplink_upper =
3710                                         netdev_master_upper_dev_get_rcu(uplink_dev);
3711                                 if (uplink_upper &&
3712                                     netif_is_lag_master(uplink_upper) &&
3713                                     uplink_upper == out_dev)
3714                                         out_dev = uplink_dev;
3715                                 rcu_read_unlock();
3716
3717                                 if (is_vlan_dev(out_dev)) {
3718                                         err = add_vlan_push_action(priv, attr,
3719                                                                    &out_dev,
3720                                                                    &action);
3721                                         if (err)
3722                                                 return err;
3723                                 }
3724
3725                                 if (is_vlan_dev(parse_attr->filter_dev)) {
3726                                         err = add_vlan_pop_action(priv, attr,
3727                                                                   &action);
3728                                         if (err)
3729                                                 return err;
3730                                 }
3731
3732                                 /* Don't allow forwarding between uplink.
3733                                  *
3734                                  * Input vport was stored esw_attr->in_rep.
3735                                  * In LAG case, *priv* is the private data of
3736                                  * uplink which may be not the input vport.
3737                                  */
3738                                 rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep);
3739                                 if (mlx5e_eswitch_uplink_rep(rep_priv->netdev) &&
3740                                     mlx5e_eswitch_uplink_rep(out_dev)) {
3741                                         NL_SET_ERR_MSG_MOD(extack,
3742                                                            "devices are both uplink, can't offload forwarding");
3743                                         pr_err("devices %s %s are both uplink, can't offload forwarding\n",
3744                                                priv->netdev->name, out_dev->name);
3745                                         return -EOPNOTSUPP;
3746                                 }
3747
3748                                 if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
3749                                         NL_SET_ERR_MSG_MOD(extack,
3750                                                            "devices are not on same switch HW, can't offload forwarding");
3751                                         netdev_warn(priv->netdev,
3752                                                     "devices %s %s not on same switch HW, can't offload forwarding\n",
3753                                                     priv->netdev->name,
3754                                                     out_dev->name);
3755                                         return -EOPNOTSUPP;
3756                                 }
3757
3758                                 out_priv = netdev_priv(out_dev);
3759                                 rpriv = out_priv->ppriv;
3760                                 attr->dests[attr->out_count].rep = rpriv->rep;
3761                                 attr->dests[attr->out_count].mdev = out_priv->mdev;
3762                                 attr->out_count++;
3763                         } else if (parse_attr->filter_dev != priv->netdev) {
3764                                 /* All mlx5 devices are called to configure
3765                                  * high level device filters. Therefore, the
3766                                  * *attempt* to  install a filter on invalid
3767                                  * eswitch should not trigger an explicit error
3768                                  */
3769                                 return -EINVAL;
3770                         } else {
3771                                 NL_SET_ERR_MSG_MOD(extack,
3772                                                    "devices are not on same switch HW, can't offload forwarding");
3773                                 netdev_warn(priv->netdev,
3774                                             "devices %s %s not on same switch HW, can't offload forwarding\n",
3775                                             priv->netdev->name,
3776                                             out_dev->name);
3777                                 return -EINVAL;
3778                         }
3779                         }
3780                         break;
3781                 case FLOW_ACTION_TUNNEL_ENCAP:
3782                         info = act->tunnel;
3783                         if (info)
3784                                 encap = true;
3785                         else
3786                                 return -EOPNOTSUPP;
3787
3788                         break;
3789                 case FLOW_ACTION_VLAN_PUSH:
3790                 case FLOW_ACTION_VLAN_POP:
3791                         if (act->id == FLOW_ACTION_VLAN_PUSH &&
3792                             (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) {
3793                                 /* Replace vlan pop+push with vlan modify */
3794                                 action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
3795                                 err = add_vlan_rewrite_action(priv,
3796                                                               MLX5_FLOW_NAMESPACE_FDB,
3797                                                               act, parse_attr, hdrs,
3798                                                               &action, extack);
3799                         } else {
3800                                 err = parse_tc_vlan_action(priv, act, attr, &action);
3801                         }
3802                         if (err)
3803                                 return err;
3804
3805                         attr->split_count = attr->out_count;
3806                         break;
3807                 case FLOW_ACTION_VLAN_MANGLE:
3808                         err = add_vlan_rewrite_action(priv,
3809                                                       MLX5_FLOW_NAMESPACE_FDB,
3810                                                       act, parse_attr, hdrs,
3811                                                       &action, extack);
3812                         if (err)
3813                                 return err;
3814
3815                         attr->split_count = attr->out_count;
3816                         break;
3817                 case FLOW_ACTION_TUNNEL_DECAP:
3818                         decap = true;
3819                         break;
3820                 case FLOW_ACTION_GOTO:
3821                         err = mlx5_validate_goto_chain(esw, flow, act, action,
3822                                                        extack);
3823                         if (err)
3824                                 return err;
3825
3826                         action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3827                         attr->dest_chain = act->chain_index;
3828                         break;
3829                 default:
3830                         NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
3831                         return -EOPNOTSUPP;
3832                 }
3833         }
3834
3835         if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
3836             action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
3837                 /* For prio tag mode, replace vlan pop with rewrite vlan prio
3838                  * tag rewrite.
3839                  */
3840                 action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
3841                 err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs,
3842                                                        &action, extack);
3843                 if (err)
3844                         return err;
3845         }
3846
3847         if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
3848             hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
3849                 err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB,
3850                                             parse_attr, hdrs, &action, extack);
3851                 if (err)
3852                         return err;
3853                 /* in case all pedit actions are skipped, remove the MOD_HDR
3854                  * flag. we might have set split_count either by pedit or
3855                  * pop/push. if there is no pop/push either, reset it too.
3856                  */
3857                 if (parse_attr->mod_hdr_acts.num_actions == 0) {
3858                         action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3859                         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
3860                         if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
3861                               (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
3862                                 attr->split_count = 0;
3863                 }
3864         }
3865
3866         attr->action = action;
3867         if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
3868                 return -EOPNOTSUPP;
3869
3870         if (attr->dest_chain) {
3871                 if (decap) {
3872                         /* It can be supported if we'll create a mapping for
3873                          * the tunnel device only (without tunnel), and set
3874                          * this tunnel id with this decap flow.
3875                          *
3876                          * On restore (miss), we'll just set this saved tunnel
3877                          * device.
3878                          */
3879
3880                         NL_SET_ERR_MSG(extack,
3881                                        "Decap with goto isn't supported");
3882                         netdev_warn(priv->netdev,
3883                                     "Decap with goto isn't supported");
3884                         return -EOPNOTSUPP;
3885                 }
3886
3887                 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
3888                         NL_SET_ERR_MSG_MOD(extack,
3889                                            "Mirroring goto chain rules isn't supported");
3890                         return -EOPNOTSUPP;
3891                 }
3892                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3893         }
3894
3895         if (!(attr->action &
3896               (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
3897                 NL_SET_ERR_MSG_MOD(extack,
3898                                    "Rule must have at least one forward/drop action");
3899                 return -EOPNOTSUPP;
3900         }
3901
3902         if (attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
3903                 NL_SET_ERR_MSG_MOD(extack,
3904                                    "current firmware doesn't support split rule for port mirroring");
3905                 netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
3906                 return -EOPNOTSUPP;
3907         }
3908
3909         return 0;
3910 }
3911
3912 static void get_flags(int flags, unsigned long *flow_flags)
3913 {
3914         unsigned long __flow_flags = 0;
3915
3916         if (flags & MLX5_TC_FLAG(INGRESS))
3917                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
3918         if (flags & MLX5_TC_FLAG(EGRESS))
3919                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
3920
3921         if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
3922                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
3923         if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
3924                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
3925         if (flags & MLX5_TC_FLAG(FT_OFFLOAD))
3926                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT);
3927
3928         *flow_flags = __flow_flags;
3929 }
3930
3931 static const struct rhashtable_params tc_ht_params = {
3932         .head_offset = offsetof(struct mlx5e_tc_flow, node),
3933         .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
3934         .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
3935         .automatic_shrinking = true,
3936 };
3937
3938 static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
3939                                     unsigned long flags)
3940 {
3941         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3942         struct mlx5e_rep_priv *uplink_rpriv;
3943
3944         if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
3945                 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
3946                 return &uplink_rpriv->uplink_priv.tc_ht;
3947         } else /* NIC offload */
3948                 return &priv->fs.tc.ht;
3949 }
3950
3951 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
3952 {
3953         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
3954         bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK &&
3955                 flow_flag_test(flow, INGRESS);
3956         bool act_is_encap = !!(attr->action &
3957                                MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
3958         bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom,
3959                                                 MLX5_DEVCOM_ESW_OFFLOADS);
3960
3961         if (!esw_paired)
3962                 return false;
3963
3964         if ((mlx5_lag_is_sriov(attr->in_mdev) ||
3965              mlx5_lag_is_multipath(attr->in_mdev)) &&
3966             (is_rep_ingress || act_is_encap))
3967                 return true;
3968
3969         return false;
3970 }
3971
3972 static int
3973 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
3974                  struct flow_cls_offload *f, unsigned long flow_flags,
3975                  struct mlx5e_tc_flow_parse_attr **__parse_attr,
3976                  struct mlx5e_tc_flow **__flow)
3977 {
3978         struct mlx5e_tc_flow_parse_attr *parse_attr;
3979         struct mlx5e_tc_flow *flow;
3980         int out_index, err;
3981
3982         flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
3983         parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
3984         if (!parse_attr || !flow) {
3985                 err = -ENOMEM;
3986                 goto err_free;
3987         }
3988
3989         flow->cookie = f->cookie;
3990         flow->flags = flow_flags;
3991         flow->priv = priv;
3992         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
3993                 INIT_LIST_HEAD(&flow->encaps[out_index].list);
3994         INIT_LIST_HEAD(&flow->mod_hdr);
3995         INIT_LIST_HEAD(&flow->hairpin);
3996         refcount_set(&flow->refcnt, 1);
3997         init_completion(&flow->init_done);
3998
3999         *__flow = flow;
4000         *__parse_attr = parse_attr;
4001
4002         return 0;
4003
4004 err_free:
4005         kfree(flow);
4006         kvfree(parse_attr);
4007         return err;
4008 }
4009
4010 static void
4011 mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr,
4012                          struct mlx5e_priv *priv,
4013                          struct mlx5e_tc_flow_parse_attr *parse_attr,
4014                          struct flow_cls_offload *f,
4015                          struct mlx5_eswitch_rep *in_rep,
4016                          struct mlx5_core_dev *in_mdev)
4017 {
4018         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4019
4020         esw_attr->parse_attr = parse_attr;
4021         esw_attr->chain = f->common.chain_index;
4022         esw_attr->prio = f->common.prio;
4023
4024         esw_attr->in_rep = in_rep;
4025         esw_attr->in_mdev = in_mdev;
4026
4027         if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
4028             MLX5_COUNTER_SOURCE_ESWITCH)
4029                 esw_attr->counter_dev = in_mdev;
4030         else
4031                 esw_attr->counter_dev = priv->mdev;
4032 }
4033
4034 static struct mlx5e_tc_flow *
4035 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4036                      struct flow_cls_offload *f,
4037                      unsigned long flow_flags,
4038                      struct net_device *filter_dev,
4039                      struct mlx5_eswitch_rep *in_rep,
4040                      struct mlx5_core_dev *in_mdev)
4041 {
4042         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4043         struct netlink_ext_ack *extack = f->common.extack;
4044         struct mlx5e_tc_flow_parse_attr *parse_attr;
4045         struct mlx5e_tc_flow *flow;
4046         int attr_size, err;
4047
4048         flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4049         attr_size  = sizeof(struct mlx5_esw_flow_attr);
4050         err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4051                                &parse_attr, &flow);
4052         if (err)
4053                 goto out;
4054
4055         parse_attr->filter_dev = filter_dev;
4056         mlx5e_flow_esw_attr_init(flow->esw_attr,
4057                                  priv, parse_attr,
4058                                  f, in_rep, in_mdev);
4059
4060         err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4061                                f, filter_dev);
4062         if (err)
4063                 goto err_free;
4064
4065         err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
4066         if (err)
4067                 goto err_free;
4068
4069         err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
4070         complete_all(&flow->init_done);
4071         if (err) {
4072                 if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
4073                         goto err_free;
4074
4075                 add_unready_flow(flow);
4076         }
4077
4078         return flow;
4079
4080 err_free:
4081         mlx5e_flow_put(priv, flow);
4082 out:
4083         return ERR_PTR(err);
4084 }
4085
4086 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
4087                                       struct mlx5e_tc_flow *flow,
4088                                       unsigned long flow_flags)
4089 {
4090         struct mlx5e_priv *priv = flow->priv, *peer_priv;
4091         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
4092         struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4093         struct mlx5e_tc_flow_parse_attr *parse_attr;
4094         struct mlx5e_rep_priv *peer_urpriv;
4095         struct mlx5e_tc_flow *peer_flow;
4096         struct mlx5_core_dev *in_mdev;
4097         int err = 0;
4098
4099         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4100         if (!peer_esw)
4101                 return -ENODEV;
4102
4103         peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
4104         peer_priv = netdev_priv(peer_urpriv->netdev);
4105
4106         /* in_mdev is assigned of which the packet originated from.
4107          * So packets redirected to uplink use the same mdev of the
4108          * original flow and packets redirected from uplink use the
4109          * peer mdev.
4110          */
4111         if (flow->esw_attr->in_rep->vport == MLX5_VPORT_UPLINK)
4112                 in_mdev = peer_priv->mdev;
4113         else
4114                 in_mdev = priv->mdev;
4115
4116         parse_attr = flow->esw_attr->parse_attr;
4117         peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
4118                                          parse_attr->filter_dev,
4119                                          flow->esw_attr->in_rep, in_mdev);
4120         if (IS_ERR(peer_flow)) {
4121                 err = PTR_ERR(peer_flow);
4122                 goto out;
4123         }
4124
4125         flow->peer_flow = peer_flow;
4126         flow_flag_set(flow, DUP);
4127         mutex_lock(&esw->offloads.peer_mutex);
4128         list_add_tail(&flow->peer, &esw->offloads.peer_flows);
4129         mutex_unlock(&esw->offloads.peer_mutex);
4130
4131 out:
4132         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4133         return err;
4134 }
4135
4136 static int
4137 mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4138                    struct flow_cls_offload *f,
4139                    unsigned long flow_flags,
4140                    struct net_device *filter_dev,
4141                    struct mlx5e_tc_flow **__flow)
4142 {
4143         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4144         struct mlx5_eswitch_rep *in_rep = rpriv->rep;
4145         struct mlx5_core_dev *in_mdev = priv->mdev;
4146         struct mlx5e_tc_flow *flow;
4147         int err;
4148
4149         flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
4150                                     in_mdev);
4151         if (IS_ERR(flow))
4152                 return PTR_ERR(flow);
4153
4154         if (is_peer_flow_needed(flow)) {
4155                 err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
4156                 if (err) {
4157                         mlx5e_tc_del_fdb_flow(priv, flow);
4158                         goto out;
4159                 }
4160         }
4161
4162         *__flow = flow;
4163
4164         return 0;
4165
4166 out:
4167         return err;
4168 }
4169
4170 static int
4171 mlx5e_add_nic_flow(struct mlx5e_priv *priv,
4172                    struct flow_cls_offload *f,
4173                    unsigned long flow_flags,
4174                    struct net_device *filter_dev,
4175                    struct mlx5e_tc_flow **__flow)
4176 {
4177         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4178         struct netlink_ext_ack *extack = f->common.extack;
4179         struct mlx5e_tc_flow_parse_attr *parse_attr;
4180         struct mlx5e_tc_flow *flow;
4181         int attr_size, err;
4182
4183         /* multi-chain not supported for NIC rules */
4184         if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
4185                 return -EOPNOTSUPP;
4186
4187         flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4188         attr_size  = sizeof(struct mlx5_nic_flow_attr);
4189         err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4190                                &parse_attr, &flow);
4191         if (err)
4192                 goto out;
4193
4194         parse_attr->filter_dev = filter_dev;
4195         err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4196                                f, filter_dev);
4197         if (err)
4198                 goto err_free;
4199
4200         err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack);
4201         if (err)
4202                 goto err_free;
4203
4204         err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack);
4205         if (err)
4206                 goto err_free;
4207
4208         flow_flag_set(flow, OFFLOADED);
4209         kvfree(parse_attr);
4210         *__flow = flow;
4211
4212         return 0;
4213
4214 err_free:
4215         mlx5e_flow_put(priv, flow);
4216         kvfree(parse_attr);
4217 out:
4218         return err;
4219 }
4220
4221 static int
4222 mlx5e_tc_add_flow(struct mlx5e_priv *priv,
4223                   struct flow_cls_offload *f,
4224                   unsigned long flags,
4225                   struct net_device *filter_dev,
4226                   struct mlx5e_tc_flow **flow)
4227 {
4228         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4229         unsigned long flow_flags;
4230         int err;
4231
4232         get_flags(flags, &flow_flags);
4233
4234         if (!tc_can_offload_extack(priv->netdev, f->common.extack))
4235                 return -EOPNOTSUPP;
4236
4237         if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
4238                 err = mlx5e_add_fdb_flow(priv, f, flow_flags,
4239                                          filter_dev, flow);
4240         else
4241                 err = mlx5e_add_nic_flow(priv, f, flow_flags,
4242                                          filter_dev, flow);
4243
4244         return err;
4245 }
4246
4247 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
4248                            struct flow_cls_offload *f, unsigned long flags)
4249 {
4250         struct netlink_ext_ack *extack = f->common.extack;
4251         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4252         struct mlx5e_tc_flow *flow;
4253         int err = 0;
4254
4255         rcu_read_lock();
4256         flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4257         rcu_read_unlock();
4258         if (flow) {
4259                 NL_SET_ERR_MSG_MOD(extack,
4260                                    "flow cookie already exists, ignoring");
4261                 netdev_warn_once(priv->netdev,
4262                                  "flow cookie %lx already exists, ignoring\n",
4263                                  f->cookie);
4264                 err = -EEXIST;
4265                 goto out;
4266         }
4267
4268         trace_mlx5e_configure_flower(f);
4269         err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
4270         if (err)
4271                 goto out;
4272
4273         err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
4274         if (err)
4275                 goto err_free;
4276
4277         return 0;
4278
4279 err_free:
4280         mlx5e_flow_put(priv, flow);
4281 out:
4282         return err;
4283 }
4284
4285 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
4286 {
4287         bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
4288         bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
4289
4290         return flow_flag_test(flow, INGRESS) == dir_ingress &&
4291                 flow_flag_test(flow, EGRESS) == dir_egress;
4292 }
4293
4294 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
4295                         struct flow_cls_offload *f, unsigned long flags)
4296 {
4297         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4298         struct mlx5e_tc_flow *flow;
4299         int err;
4300
4301         rcu_read_lock();
4302         flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4303         if (!flow || !same_flow_direction(flow, flags)) {
4304                 err = -EINVAL;
4305                 goto errout;
4306         }
4307
4308         /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
4309          * set.
4310          */
4311         if (flow_flag_test_and_set(flow, DELETED)) {
4312                 err = -EINVAL;
4313                 goto errout;
4314         }
4315         rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
4316         rcu_read_unlock();
4317
4318         trace_mlx5e_delete_flower(f);
4319         mlx5e_flow_put(priv, flow);
4320
4321         return 0;
4322
4323 errout:
4324         rcu_read_unlock();
4325         return err;
4326 }
4327
4328 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
4329                        struct flow_cls_offload *f, unsigned long flags)
4330 {
4331         struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4332         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4333         struct mlx5_eswitch *peer_esw;
4334         struct mlx5e_tc_flow *flow;
4335         struct mlx5_fc *counter;
4336         u64 lastuse = 0;
4337         u64 packets = 0;
4338         u64 bytes = 0;
4339         int err = 0;
4340
4341         rcu_read_lock();
4342         flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
4343                                                 tc_ht_params));
4344         rcu_read_unlock();
4345         if (IS_ERR(flow))
4346                 return PTR_ERR(flow);
4347
4348         if (!same_flow_direction(flow, flags)) {
4349                 err = -EINVAL;
4350                 goto errout;
4351         }
4352
4353         if (mlx5e_is_offloaded_flow(flow)) {
4354                 counter = mlx5e_tc_get_counter(flow);
4355                 if (!counter)
4356                         goto errout;
4357
4358                 mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
4359         }
4360
4361         /* Under multipath it's possible for one rule to be currently
4362          * un-offloaded while the other rule is offloaded.
4363          */
4364         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4365         if (!peer_esw)
4366                 goto out;
4367
4368         if (flow_flag_test(flow, DUP) &&
4369             flow_flag_test(flow->peer_flow, OFFLOADED)) {
4370                 u64 bytes2;
4371                 u64 packets2;
4372                 u64 lastuse2;
4373
4374                 counter = mlx5e_tc_get_counter(flow->peer_flow);
4375                 if (!counter)
4376                         goto no_peer_counter;
4377                 mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
4378
4379                 bytes += bytes2;
4380                 packets += packets2;
4381                 lastuse = max_t(u64, lastuse, lastuse2);
4382         }
4383
4384 no_peer_counter:
4385         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4386 out:
4387         flow_stats_update(&f->stats, bytes, packets, lastuse);
4388         trace_mlx5e_stats_flower(f);
4389 errout:
4390         mlx5e_flow_put(priv, flow);
4391         return err;
4392 }
4393
4394 static int apply_police_params(struct mlx5e_priv *priv, u32 rate,
4395                                struct netlink_ext_ack *extack)
4396 {
4397         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4398         struct mlx5_eswitch *esw;
4399         u16 vport_num;
4400         u32 rate_mbps;
4401         int err;
4402
4403         vport_num = rpriv->rep->vport;
4404         if (vport_num >= MLX5_VPORT_ECPF) {
4405                 NL_SET_ERR_MSG_MOD(extack,
4406                                    "Ingress rate limit is supported only for Eswitch ports connected to VFs");
4407                 return -EOPNOTSUPP;
4408         }
4409
4410         esw = priv->mdev->priv.eswitch;
4411         /* rate is given in bytes/sec.
4412          * First convert to bits/sec and then round to the nearest mbit/secs.
4413          * mbit means million bits.
4414          * Moreover, if rate is non zero we choose to configure to a minimum of
4415          * 1 mbit/sec.
4416          */
4417         rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0;
4418         err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
4419         if (err)
4420                 NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
4421
4422         return err;
4423 }
4424
4425 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
4426                                         struct flow_action *flow_action,
4427                                         struct netlink_ext_ack *extack)
4428 {
4429         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4430         const struct flow_action_entry *act;
4431         int err;
4432         int i;
4433
4434         if (!flow_action_has_entries(flow_action)) {
4435                 NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
4436                 return -EINVAL;
4437         }
4438
4439         if (!flow_offload_has_one_action(flow_action)) {
4440                 NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
4441                 return -EOPNOTSUPP;
4442         }
4443
4444         if (!flow_action_basic_hw_stats_types_check(flow_action, extack))
4445                 return -EOPNOTSUPP;
4446
4447         flow_action_for_each(i, act, flow_action) {
4448                 switch (act->id) {
4449                 case FLOW_ACTION_POLICE:
4450                         err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
4451                         if (err)
4452                                 return err;
4453
4454                         rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
4455                         break;
4456                 default:
4457                         NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
4458                         return -EOPNOTSUPP;
4459                 }
4460         }
4461
4462         return 0;
4463 }
4464
4465 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
4466                                 struct tc_cls_matchall_offload *ma)
4467 {
4468         struct netlink_ext_ack *extack = ma->common.extack;
4469
4470         if (ma->common.prio != 1) {
4471                 NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
4472                 return -EINVAL;
4473         }
4474
4475         return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
4476 }
4477
4478 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
4479                              struct tc_cls_matchall_offload *ma)
4480 {
4481         struct netlink_ext_ack *extack = ma->common.extack;
4482
4483         return apply_police_params(priv, 0, extack);
4484 }
4485
4486 void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
4487                              struct tc_cls_matchall_offload *ma)
4488 {
4489         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4490         struct rtnl_link_stats64 cur_stats;
4491         u64 dbytes;
4492         u64 dpkts;
4493
4494         cur_stats = priv->stats.vf_vport;
4495         dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
4496         dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
4497         rpriv->prev_vf_vport_stats = cur_stats;
4498         flow_stats_update(&ma->stats, dpkts, dbytes, jiffies);
4499 }
4500
4501 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
4502                                               struct mlx5e_priv *peer_priv)
4503 {
4504         struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
4505         struct mlx5e_hairpin_entry *hpe, *tmp;
4506         LIST_HEAD(init_wait_list);
4507         u16 peer_vhca_id;
4508         int bkt;
4509
4510         if (!same_hw_devs(priv, peer_priv))
4511                 return;
4512
4513         peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
4514
4515         mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
4516         hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist)
4517                 if (refcount_inc_not_zero(&hpe->refcnt))
4518                         list_add(&hpe->dead_peer_wait_list, &init_wait_list);
4519         mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
4520
4521         list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
4522                 wait_for_completion(&hpe->res_ready);
4523                 if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
4524                         hpe->hp->pair->peer_gone = true;
4525
4526                 mlx5e_hairpin_put(priv, hpe);
4527         }
4528 }
4529
4530 static int mlx5e_tc_netdev_event(struct notifier_block *this,
4531                                  unsigned long event, void *ptr)
4532 {
4533         struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
4534         struct mlx5e_flow_steering *fs;
4535         struct mlx5e_priv *peer_priv;
4536         struct mlx5e_tc_table *tc;
4537         struct mlx5e_priv *priv;
4538
4539         if (ndev->netdev_ops != &mlx5e_netdev_ops ||
4540             event != NETDEV_UNREGISTER ||
4541             ndev->reg_state == NETREG_REGISTERED)
4542                 return NOTIFY_DONE;
4543
4544         tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
4545         fs = container_of(tc, struct mlx5e_flow_steering, tc);
4546         priv = container_of(fs, struct mlx5e_priv, fs);
4547         peer_priv = netdev_priv(ndev);
4548         if (priv == peer_priv ||
4549             !(priv->netdev->features & NETIF_F_HW_TC))
4550                 return NOTIFY_DONE;
4551
4552         mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
4553
4554         return NOTIFY_DONE;
4555 }
4556
4557 int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
4558 {
4559         struct mlx5e_tc_table *tc = &priv->fs.tc;
4560         int err;
4561
4562         mutex_init(&tc->t_lock);
4563         mutex_init(&tc->mod_hdr.lock);
4564         hash_init(tc->mod_hdr.hlist);
4565         mutex_init(&tc->hairpin_tbl_lock);
4566         hash_init(tc->hairpin_tbl);
4567
4568         err = rhashtable_init(&tc->ht, &tc_ht_params);
4569         if (err)
4570                 return err;
4571
4572         tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
4573         err = register_netdevice_notifier_dev_net(priv->netdev,
4574                                                   &tc->netdevice_nb,
4575                                                   &tc->netdevice_nn);
4576         if (err) {
4577                 tc->netdevice_nb.notifier_call = NULL;
4578                 mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
4579         }
4580
4581         return err;
4582 }
4583
4584 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
4585 {
4586         struct mlx5e_tc_flow *flow = ptr;
4587         struct mlx5e_priv *priv = flow->priv;
4588
4589         mlx5e_tc_del_flow(priv, flow);
4590         kfree(flow);
4591 }
4592
4593 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
4594 {
4595         struct mlx5e_tc_table *tc = &priv->fs.tc;
4596
4597         if (tc->netdevice_nb.notifier_call)
4598                 unregister_netdevice_notifier_dev_net(priv->netdev,
4599                                                       &tc->netdevice_nb,
4600                                                       &tc->netdevice_nn);
4601
4602         mutex_destroy(&tc->mod_hdr.lock);
4603         mutex_destroy(&tc->hairpin_tbl_lock);
4604
4605         rhashtable_destroy(&tc->ht);
4606
4607         if (!IS_ERR_OR_NULL(tc->t)) {
4608                 mlx5_destroy_flow_table(tc->t);
4609                 tc->t = NULL;
4610         }
4611         mutex_destroy(&tc->t_lock);
4612 }
4613
4614 int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
4615 {
4616         const size_t sz_enc_opts = sizeof(struct flow_dissector_key_enc_opts);
4617         struct mlx5_rep_uplink_priv *uplink_priv;
4618         struct mlx5e_rep_priv *priv;
4619         struct mapping_ctx *mapping;
4620         int err;
4621
4622         uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
4623         priv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
4624
4625         mapping = mapping_create(sizeof(struct tunnel_match_key),
4626                                  TUNNEL_INFO_BITS_MASK, true);
4627         if (IS_ERR(mapping)) {
4628                 err = PTR_ERR(mapping);
4629                 goto err_tun_mapping;
4630         }
4631         uplink_priv->tunnel_mapping = mapping;
4632
4633         mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK, true);
4634         if (IS_ERR(mapping)) {
4635                 err = PTR_ERR(mapping);
4636                 goto err_enc_opts_mapping;
4637         }
4638         uplink_priv->tunnel_enc_opts_mapping = mapping;
4639
4640         err = rhashtable_init(tc_ht, &tc_ht_params);
4641         if (err)
4642                 goto err_ht_init;
4643
4644         return err;
4645
4646 err_ht_init:
4647         mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
4648 err_enc_opts_mapping:
4649         mapping_destroy(uplink_priv->tunnel_mapping);
4650 err_tun_mapping:
4651         netdev_warn(priv->netdev,
4652                     "Failed to initialize tc (eswitch), err: %d", err);
4653         return err;
4654 }
4655
4656 void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
4657 {
4658         struct mlx5_rep_uplink_priv *uplink_priv;
4659
4660         rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
4661
4662         uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
4663         mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
4664         mapping_destroy(uplink_priv->tunnel_mapping);
4665 }
4666
4667 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
4668 {
4669         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4670
4671         return atomic_read(&tc_ht->nelems);
4672 }
4673
4674 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
4675 {
4676         struct mlx5e_tc_flow *flow, *tmp;
4677
4678         list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
4679                 __mlx5e_tc_del_fdb_peer_flow(flow);
4680 }
4681
4682 void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
4683 {
4684         struct mlx5_rep_uplink_priv *rpriv =
4685                 container_of(work, struct mlx5_rep_uplink_priv,
4686                              reoffload_flows_work);
4687         struct mlx5e_tc_flow *flow, *tmp;
4688
4689         mutex_lock(&rpriv->unready_flows_lock);
4690         list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
4691                 if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
4692                         unready_flow_del(flow);
4693         }
4694         mutex_unlock(&rpriv->unready_flows_lock);
4695 }
4696
4697 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
4698 static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
4699                                  struct mlx5e_tc_update_priv *tc_priv,
4700                                  u32 tunnel_id)
4701 {
4702         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4703         struct flow_dissector_key_enc_opts enc_opts = {};
4704         struct mlx5_rep_uplink_priv *uplink_priv;
4705         struct mlx5e_rep_priv *uplink_rpriv;
4706         struct metadata_dst *tun_dst;
4707         struct tunnel_match_key key;
4708         u32 tun_id, enc_opts_id;
4709         struct net_device *dev;
4710         int err;
4711
4712         enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK;
4713         tun_id = tunnel_id >> ENC_OPTS_BITS;
4714
4715         if (!tun_id)
4716                 return true;
4717
4718         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
4719         uplink_priv = &uplink_rpriv->uplink_priv;
4720
4721         err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key);
4722         if (err) {
4723                 WARN_ON_ONCE(true);
4724                 netdev_dbg(priv->netdev,
4725                            "Couldn't find tunnel for tun_id: %d, err: %d\n",
4726                            tun_id, err);
4727                 return false;
4728         }
4729
4730         if (enc_opts_id) {
4731                 err = mapping_find(uplink_priv->tunnel_enc_opts_mapping,
4732                                    enc_opts_id, &enc_opts);
4733                 if (err) {
4734                         netdev_dbg(priv->netdev,
4735                                    "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n",
4736                                    enc_opts_id, err);
4737                         return false;
4738                 }
4739         }
4740
4741         tun_dst = tun_rx_dst(enc_opts.len);
4742         if (!tun_dst) {
4743                 WARN_ON_ONCE(true);
4744                 return false;
4745         }
4746
4747         ip_tunnel_key_init(&tun_dst->u.tun_info.key,
4748                            key.enc_ipv4.src, key.enc_ipv4.dst,
4749                            key.enc_ip.tos, key.enc_ip.ttl,
4750                            0, /* label */
4751                            key.enc_tp.src, key.enc_tp.dst,
4752                            key32_to_tunnel_id(key.enc_key_id.keyid),
4753                            TUNNEL_KEY);
4754
4755         if (enc_opts.len)
4756                 ip_tunnel_info_opts_set(&tun_dst->u.tun_info, enc_opts.data,
4757                                         enc_opts.len, enc_opts.dst_opt_type);
4758
4759         skb_dst_set(skb, (struct dst_entry *)tun_dst);
4760         dev = dev_get_by_index(&init_net, key.filter_ifindex);
4761         if (!dev) {
4762                 netdev_dbg(priv->netdev,
4763                            "Couldn't find tunnel device with ifindex: %d\n",
4764                            key.filter_ifindex);
4765                 return false;
4766         }
4767
4768         /* Set tun_dev so we do dev_put() after datapath */
4769         tc_priv->tun_dev = dev;
4770
4771         skb->dev = dev;
4772
4773         return true;
4774 }
4775 #endif /* CONFIG_NET_TC_SKB_EXT */
4776
4777 bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe,
4778                              struct sk_buff *skb,
4779                              struct mlx5e_tc_update_priv *tc_priv)
4780 {
4781 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
4782         u32 chain = 0, reg_c0, reg_c1, tunnel_id;
4783         struct tc_skb_ext *tc_skb_ext;
4784         struct mlx5_eswitch *esw;
4785         struct mlx5e_priv *priv;
4786         int tunnel_moffset;
4787         int err;
4788
4789         reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
4790         if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
4791                 reg_c0 = 0;
4792         reg_c1 = be32_to_cpu(cqe->imm_inval_pkey);
4793
4794         if (!reg_c0)
4795                 return true;
4796
4797         priv = netdev_priv(skb->dev);
4798         esw = priv->mdev->priv.eswitch;
4799
4800         err = mlx5_eswitch_get_chain_for_tag(esw, reg_c0, &chain);
4801         if (err) {
4802                 netdev_dbg(priv->netdev,
4803                            "Couldn't find chain for chain tag: %d, err: %d\n",
4804                            reg_c0, err);
4805                 return false;
4806         }
4807
4808         if (chain) {
4809                 tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
4810                 if (!tc_skb_ext) {
4811                         WARN_ON(1);
4812                         return false;
4813                 }
4814
4815                 tc_skb_ext->chain = chain;
4816         }
4817
4818         tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset;
4819         tunnel_id = reg_c1 >> (8 * tunnel_moffset);
4820         return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
4821 #endif /* CONFIG_NET_TC_SKB_EXT */
4822
4823         return true;
4824 }
4825
4826 void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv)
4827 {
4828         if (tc_priv->tun_dev)
4829                 dev_put(tc_priv->tun_dev);
4830 }