net/mlx5e: Allow re-allocating mod header actions
[linux-2.6-block.git] / drivers / net / ethernet / mellanox / mlx5 / core / en_tc.c
1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <net/flow_dissector.h>
34 #include <net/sch_generic.h>
35 #include <net/pkt_cls.h>
36 #include <net/tc_act/tc_gact.h>
37 #include <net/tc_act/tc_skbedit.h>
38 #include <linux/mlx5/fs.h>
39 #include <linux/mlx5/device.h>
40 #include <linux/rhashtable.h>
41 #include <linux/refcount.h>
42 #include <linux/completion.h>
43 #include <net/tc_act/tc_mirred.h>
44 #include <net/tc_act/tc_vlan.h>
45 #include <net/tc_act/tc_tunnel_key.h>
46 #include <net/tc_act/tc_pedit.h>
47 #include <net/tc_act/tc_csum.h>
48 #include <net/arp.h>
49 #include <net/ipv6_stubs.h>
50 #include "en.h"
51 #include "en_rep.h"
52 #include "en_tc.h"
53 #include "eswitch.h"
54 #include "eswitch_offloads_chains.h"
55 #include "fs_core.h"
56 #include "en/port.h"
57 #include "en/tc_tun.h"
58 #include "lib/devcom.h"
59 #include "lib/geneve.h"
60 #include "diag/en_tc_tracepoint.h"
61
62 struct mlx5_nic_flow_attr {
63         u32 action;
64         u32 flow_tag;
65         struct mlx5_modify_hdr *modify_hdr;
66         u32 hairpin_tirn;
67         u8 match_level;
68         struct mlx5_flow_table  *hairpin_ft;
69         struct mlx5_fc          *counter;
70 };
71
72 #define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
73
74 enum {
75         MLX5E_TC_FLOW_FLAG_INGRESS      = MLX5E_TC_FLAG_INGRESS_BIT,
76         MLX5E_TC_FLOW_FLAG_EGRESS       = MLX5E_TC_FLAG_EGRESS_BIT,
77         MLX5E_TC_FLOW_FLAG_ESWITCH      = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
78         MLX5E_TC_FLOW_FLAG_FT           = MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
79         MLX5E_TC_FLOW_FLAG_NIC          = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
80         MLX5E_TC_FLOW_FLAG_OFFLOADED    = MLX5E_TC_FLOW_BASE,
81         MLX5E_TC_FLOW_FLAG_HAIRPIN      = MLX5E_TC_FLOW_BASE + 1,
82         MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS  = MLX5E_TC_FLOW_BASE + 2,
83         MLX5E_TC_FLOW_FLAG_SLOW         = MLX5E_TC_FLOW_BASE + 3,
84         MLX5E_TC_FLOW_FLAG_DUP          = MLX5E_TC_FLOW_BASE + 4,
85         MLX5E_TC_FLOW_FLAG_NOT_READY    = MLX5E_TC_FLOW_BASE + 5,
86         MLX5E_TC_FLOW_FLAG_DELETED      = MLX5E_TC_FLOW_BASE + 6,
87 };
88
89 #define MLX5E_TC_MAX_SPLITS 1
90
91 /* Helper struct for accessing a struct containing list_head array.
92  * Containing struct
93  *   |- Helper array
94  *      [0] Helper item 0
95  *          |- list_head item 0
96  *          |- index (0)
97  *      [1] Helper item 1
98  *          |- list_head item 1
99  *          |- index (1)
100  * To access the containing struct from one of the list_head items:
101  * 1. Get the helper item from the list_head item using
102  *    helper item =
103  *        container_of(list_head item, helper struct type, list_head field)
104  * 2. Get the contining struct from the helper item and its index in the array:
105  *    containing struct =
106  *        container_of(helper item, containing struct type, helper field[index])
107  */
108 struct encap_flow_item {
109         struct mlx5e_encap_entry *e; /* attached encap instance */
110         struct list_head list;
111         int index;
112 };
113
114 struct mlx5e_tc_flow {
115         struct rhash_head       node;
116         struct mlx5e_priv       *priv;
117         u64                     cookie;
118         unsigned long           flags;
119         struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
120         /* Flow can be associated with multiple encap IDs.
121          * The number of encaps is bounded by the number of supported
122          * destinations.
123          */
124         struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
125         struct mlx5e_tc_flow    *peer_flow;
126         struct mlx5e_mod_hdr_entry *mh; /* attached mod header instance */
127         struct list_head        mod_hdr; /* flows sharing the same mod hdr ID */
128         struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
129         struct list_head        hairpin; /* flows sharing the same hairpin */
130         struct list_head        peer;    /* flows with peer flow */
131         struct list_head        unready; /* flows not ready to be offloaded (e.g due to missing route) */
132         int                     tmp_efi_index;
133         struct list_head        tmp_list; /* temporary flow list used by neigh update */
134         refcount_t              refcnt;
135         struct rcu_head         rcu_head;
136         struct completion       init_done;
137         union {
138                 struct mlx5_esw_flow_attr esw_attr[0];
139                 struct mlx5_nic_flow_attr nic_attr[0];
140         };
141 };
142
143 struct mlx5e_tc_flow_parse_attr {
144         const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
145         struct net_device *filter_dev;
146         struct mlx5_flow_spec spec;
147         struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
148         int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
149 };
150
151 #define MLX5E_TC_TABLE_NUM_GROUPS 4
152 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
153
154 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
155         [CHAIN_TO_REG] = {
156                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
157                 .moffset = 0,
158                 .mlen = 2,
159         },
160 };
161
162 struct mlx5e_hairpin {
163         struct mlx5_hairpin *pair;
164
165         struct mlx5_core_dev *func_mdev;
166         struct mlx5e_priv *func_priv;
167         u32 tdn;
168         u32 tirn;
169
170         int num_channels;
171         struct mlx5e_rqt indir_rqt;
172         u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
173         struct mlx5e_ttc_table ttc;
174 };
175
176 struct mlx5e_hairpin_entry {
177         /* a node of a hash table which keeps all the  hairpin entries */
178         struct hlist_node hairpin_hlist;
179
180         /* protects flows list */
181         spinlock_t flows_lock;
182         /* flows sharing the same hairpin */
183         struct list_head flows;
184         /* hpe's that were not fully initialized when dead peer update event
185          * function traversed them.
186          */
187         struct list_head dead_peer_wait_list;
188
189         u16 peer_vhca_id;
190         u8 prio;
191         struct mlx5e_hairpin *hp;
192         refcount_t refcnt;
193         struct completion res_ready;
194 };
195
196 struct mod_hdr_key {
197         int num_actions;
198         void *actions;
199 };
200
201 struct mlx5e_mod_hdr_entry {
202         /* a node of a hash table which keeps all the mod_hdr entries */
203         struct hlist_node mod_hdr_hlist;
204
205         /* protects flows list */
206         spinlock_t flows_lock;
207         /* flows sharing the same mod_hdr entry */
208         struct list_head flows;
209
210         struct mod_hdr_key key;
211
212         struct mlx5_modify_hdr *modify_hdr;
213
214         refcount_t refcnt;
215         struct completion res_ready;
216         int compl_result;
217 };
218
219 #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
220
221 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
222                               struct mlx5e_tc_flow *flow);
223
224 static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
225 {
226         if (!flow || !refcount_inc_not_zero(&flow->refcnt))
227                 return ERR_PTR(-EINVAL);
228         return flow;
229 }
230
231 static void mlx5e_flow_put(struct mlx5e_priv *priv,
232                            struct mlx5e_tc_flow *flow)
233 {
234         if (refcount_dec_and_test(&flow->refcnt)) {
235                 mlx5e_tc_del_flow(priv, flow);
236                 kfree_rcu(flow, rcu_head);
237         }
238 }
239
240 static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
241 {
242         /* Complete all memory stores before setting bit. */
243         smp_mb__before_atomic();
244         set_bit(flag, &flow->flags);
245 }
246
247 #define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
248
249 static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
250                                      unsigned long flag)
251 {
252         /* test_and_set_bit() provides all necessary barriers */
253         return test_and_set_bit(flag, &flow->flags);
254 }
255
256 #define flow_flag_test_and_set(flow, flag)                      \
257         __flow_flag_test_and_set(flow,                          \
258                                  MLX5E_TC_FLOW_FLAG_##flag)
259
260 static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
261 {
262         /* Complete all memory stores before clearing bit. */
263         smp_mb__before_atomic();
264         clear_bit(flag, &flow->flags);
265 }
266
267 #define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
268                                                       MLX5E_TC_FLOW_FLAG_##flag)
269
270 static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
271 {
272         bool ret = test_bit(flag, &flow->flags);
273
274         /* Read fields of flow structure only after checking flags. */
275         smp_mb__after_atomic();
276         return ret;
277 }
278
279 #define flow_flag_test(flow, flag) __flow_flag_test(flow, \
280                                                     MLX5E_TC_FLOW_FLAG_##flag)
281
282 static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
283 {
284         return flow_flag_test(flow, ESWITCH);
285 }
286
287 static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
288 {
289         return flow_flag_test(flow, FT);
290 }
291
292 static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
293 {
294         return flow_flag_test(flow, OFFLOADED);
295 }
296
297 static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key)
298 {
299         return jhash(key->actions,
300                      key->num_actions * MLX5_MH_ACT_SZ, 0);
301 }
302
303 static inline int cmp_mod_hdr_info(struct mod_hdr_key *a,
304                                    struct mod_hdr_key *b)
305 {
306         if (a->num_actions != b->num_actions)
307                 return 1;
308
309         return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ);
310 }
311
312 static struct mod_hdr_tbl *
313 get_mod_hdr_table(struct mlx5e_priv *priv, int namespace)
314 {
315         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
316
317         return namespace == MLX5_FLOW_NAMESPACE_FDB ? &esw->offloads.mod_hdr :
318                 &priv->fs.tc.mod_hdr;
319 }
320
321 static struct mlx5e_mod_hdr_entry *
322 mlx5e_mod_hdr_get(struct mod_hdr_tbl *tbl, struct mod_hdr_key *key, u32 hash_key)
323 {
324         struct mlx5e_mod_hdr_entry *mh, *found = NULL;
325
326         hash_for_each_possible(tbl->hlist, mh, mod_hdr_hlist, hash_key) {
327                 if (!cmp_mod_hdr_info(&mh->key, key)) {
328                         refcount_inc(&mh->refcnt);
329                         found = mh;
330                         break;
331                 }
332         }
333
334         return found;
335 }
336
337 static void mlx5e_mod_hdr_put(struct mlx5e_priv *priv,
338                               struct mlx5e_mod_hdr_entry *mh,
339                               int namespace)
340 {
341         struct mod_hdr_tbl *tbl = get_mod_hdr_table(priv, namespace);
342
343         if (!refcount_dec_and_mutex_lock(&mh->refcnt, &tbl->lock))
344                 return;
345         hash_del(&mh->mod_hdr_hlist);
346         mutex_unlock(&tbl->lock);
347
348         WARN_ON(!list_empty(&mh->flows));
349         if (mh->compl_result > 0)
350                 mlx5_modify_header_dealloc(priv->mdev, mh->modify_hdr);
351
352         kfree(mh);
353 }
354
355 static int get_flow_name_space(struct mlx5e_tc_flow *flow)
356 {
357         return mlx5e_is_eswitch_flow(flow) ?
358                 MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
359 }
360 static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
361                                 struct mlx5e_tc_flow *flow,
362                                 struct mlx5e_tc_flow_parse_attr *parse_attr)
363 {
364         int num_actions, actions_size, namespace, err;
365         struct mlx5e_mod_hdr_entry *mh;
366         struct mod_hdr_tbl *tbl;
367         struct mod_hdr_key key;
368         u32 hash_key;
369
370         num_actions  = parse_attr->mod_hdr_acts.num_actions;
371         actions_size = MLX5_MH_ACT_SZ * num_actions;
372
373         key.actions = parse_attr->mod_hdr_acts.actions;
374         key.num_actions = num_actions;
375
376         hash_key = hash_mod_hdr_info(&key);
377
378         namespace = get_flow_name_space(flow);
379         tbl = get_mod_hdr_table(priv, namespace);
380
381         mutex_lock(&tbl->lock);
382         mh = mlx5e_mod_hdr_get(tbl, &key, hash_key);
383         if (mh) {
384                 mutex_unlock(&tbl->lock);
385                 wait_for_completion(&mh->res_ready);
386
387                 if (mh->compl_result < 0) {
388                         err = -EREMOTEIO;
389                         goto attach_header_err;
390                 }
391                 goto attach_flow;
392         }
393
394         mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL);
395         if (!mh) {
396                 mutex_unlock(&tbl->lock);
397                 return -ENOMEM;
398         }
399
400         mh->key.actions = (void *)mh + sizeof(*mh);
401         memcpy(mh->key.actions, key.actions, actions_size);
402         mh->key.num_actions = num_actions;
403         spin_lock_init(&mh->flows_lock);
404         INIT_LIST_HEAD(&mh->flows);
405         refcount_set(&mh->refcnt, 1);
406         init_completion(&mh->res_ready);
407
408         hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key);
409         mutex_unlock(&tbl->lock);
410
411         mh->modify_hdr = mlx5_modify_header_alloc(priv->mdev, namespace,
412                                                   mh->key.num_actions,
413                                                   mh->key.actions);
414         if (IS_ERR(mh->modify_hdr)) {
415                 err = PTR_ERR(mh->modify_hdr);
416                 mh->compl_result = err;
417                 goto alloc_header_err;
418         }
419         mh->compl_result = 1;
420         complete_all(&mh->res_ready);
421
422 attach_flow:
423         flow->mh = mh;
424         spin_lock(&mh->flows_lock);
425         list_add(&flow->mod_hdr, &mh->flows);
426         spin_unlock(&mh->flows_lock);
427         if (mlx5e_is_eswitch_flow(flow))
428                 flow->esw_attr->modify_hdr = mh->modify_hdr;
429         else
430                 flow->nic_attr->modify_hdr = mh->modify_hdr;
431
432         return 0;
433
434 alloc_header_err:
435         complete_all(&mh->res_ready);
436 attach_header_err:
437         mlx5e_mod_hdr_put(priv, mh, namespace);
438         return err;
439 }
440
441 static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
442                                  struct mlx5e_tc_flow *flow)
443 {
444         /* flow wasn't fully initialized */
445         if (!flow->mh)
446                 return;
447
448         spin_lock(&flow->mh->flows_lock);
449         list_del(&flow->mod_hdr);
450         spin_unlock(&flow->mh->flows_lock);
451
452         mlx5e_mod_hdr_put(priv, flow->mh, get_flow_name_space(flow));
453         flow->mh = NULL;
454 }
455
456 static
457 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
458 {
459         struct net_device *netdev;
460         struct mlx5e_priv *priv;
461
462         netdev = __dev_get_by_index(net, ifindex);
463         priv = netdev_priv(netdev);
464         return priv->mdev;
465 }
466
467 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
468 {
469         u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {0};
470         void *tirc;
471         int err;
472
473         err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
474         if (err)
475                 goto alloc_tdn_err;
476
477         tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
478
479         MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
480         MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]);
481         MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
482
483         err = mlx5_core_create_tir(hp->func_mdev, in, MLX5_ST_SZ_BYTES(create_tir_in), &hp->tirn);
484         if (err)
485                 goto create_tir_err;
486
487         return 0;
488
489 create_tir_err:
490         mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
491 alloc_tdn_err:
492         return err;
493 }
494
495 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
496 {
497         mlx5_core_destroy_tir(hp->func_mdev, hp->tirn);
498         mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
499 }
500
501 static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
502 {
503         u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn;
504         struct mlx5e_priv *priv = hp->func_priv;
505         int i, ix, sz = MLX5E_INDIR_RQT_SIZE;
506
507         mlx5e_build_default_indir_rqt(indirection_rqt, sz,
508                                       hp->num_channels);
509
510         for (i = 0; i < sz; i++) {
511                 ix = i;
512                 if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR)
513                         ix = mlx5e_bits_invert(i, ilog2(sz));
514                 ix = indirection_rqt[ix];
515                 rqn = hp->pair->rqn[ix];
516                 MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
517         }
518 }
519
520 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
521 {
522         int inlen, err, sz = MLX5E_INDIR_RQT_SIZE;
523         struct mlx5e_priv *priv = hp->func_priv;
524         struct mlx5_core_dev *mdev = priv->mdev;
525         void *rqtc;
526         u32 *in;
527
528         inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
529         in = kvzalloc(inlen, GFP_KERNEL);
530         if (!in)
531                 return -ENOMEM;
532
533         rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
534
535         MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
536         MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
537
538         mlx5e_hairpin_fill_rqt_rqns(hp, rqtc);
539
540         err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn);
541         if (!err)
542                 hp->indir_rqt.enabled = true;
543
544         kvfree(in);
545         return err;
546 }
547
548 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
549 {
550         struct mlx5e_priv *priv = hp->func_priv;
551         u32 in[MLX5_ST_SZ_DW(create_tir_in)];
552         int tt, i, err;
553         void *tirc;
554
555         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
556                 struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt);
557
558                 memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in));
559                 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
560
561                 MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
562                 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
563                 MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn);
564                 mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false);
565
566                 err = mlx5_core_create_tir(hp->func_mdev, in,
567                                            MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]);
568                 if (err) {
569                         mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
570                         goto err_destroy_tirs;
571                 }
572         }
573         return 0;
574
575 err_destroy_tirs:
576         for (i = 0; i < tt; i++)
577                 mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]);
578         return err;
579 }
580
581 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
582 {
583         int tt;
584
585         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
586                 mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]);
587 }
588
589 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
590                                          struct ttc_params *ttc_params)
591 {
592         struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
593         int tt;
594
595         memset(ttc_params, 0, sizeof(*ttc_params));
596
597         ttc_params->any_tt_tirn = hp->tirn;
598
599         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
600                 ttc_params->indir_tirn[tt] = hp->indir_tirn[tt];
601
602         ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
603         ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
604         ft_attr->prio = MLX5E_TC_PRIO;
605 }
606
607 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
608 {
609         struct mlx5e_priv *priv = hp->func_priv;
610         struct ttc_params ttc_params;
611         int err;
612
613         err = mlx5e_hairpin_create_indirect_rqt(hp);
614         if (err)
615                 return err;
616
617         err = mlx5e_hairpin_create_indirect_tirs(hp);
618         if (err)
619                 goto err_create_indirect_tirs;
620
621         mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
622         err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc);
623         if (err)
624                 goto err_create_ttc_table;
625
626         netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
627                    hp->num_channels, hp->ttc.ft.t->id);
628
629         return 0;
630
631 err_create_ttc_table:
632         mlx5e_hairpin_destroy_indirect_tirs(hp);
633 err_create_indirect_tirs:
634         mlx5e_destroy_rqt(priv, &hp->indir_rqt);
635
636         return err;
637 }
638
639 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
640 {
641         struct mlx5e_priv *priv = hp->func_priv;
642
643         mlx5e_destroy_ttc_table(priv, &hp->ttc);
644         mlx5e_hairpin_destroy_indirect_tirs(hp);
645         mlx5e_destroy_rqt(priv, &hp->indir_rqt);
646 }
647
648 static struct mlx5e_hairpin *
649 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
650                      int peer_ifindex)
651 {
652         struct mlx5_core_dev *func_mdev, *peer_mdev;
653         struct mlx5e_hairpin *hp;
654         struct mlx5_hairpin *pair;
655         int err;
656
657         hp = kzalloc(sizeof(*hp), GFP_KERNEL);
658         if (!hp)
659                 return ERR_PTR(-ENOMEM);
660
661         func_mdev = priv->mdev;
662         peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
663
664         pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
665         if (IS_ERR(pair)) {
666                 err = PTR_ERR(pair);
667                 goto create_pair_err;
668         }
669         hp->pair = pair;
670         hp->func_mdev = func_mdev;
671         hp->func_priv = priv;
672         hp->num_channels = params->num_channels;
673
674         err = mlx5e_hairpin_create_transport(hp);
675         if (err)
676                 goto create_transport_err;
677
678         if (hp->num_channels > 1) {
679                 err = mlx5e_hairpin_rss_init(hp);
680                 if (err)
681                         goto rss_init_err;
682         }
683
684         return hp;
685
686 rss_init_err:
687         mlx5e_hairpin_destroy_transport(hp);
688 create_transport_err:
689         mlx5_core_hairpin_destroy(hp->pair);
690 create_pair_err:
691         kfree(hp);
692         return ERR_PTR(err);
693 }
694
695 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
696 {
697         if (hp->num_channels > 1)
698                 mlx5e_hairpin_rss_cleanup(hp);
699         mlx5e_hairpin_destroy_transport(hp);
700         mlx5_core_hairpin_destroy(hp->pair);
701         kvfree(hp);
702 }
703
704 static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
705 {
706         return (peer_vhca_id << 16 | prio);
707 }
708
709 static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
710                                                      u16 peer_vhca_id, u8 prio)
711 {
712         struct mlx5e_hairpin_entry *hpe;
713         u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
714
715         hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
716                                hairpin_hlist, hash_key) {
717                 if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
718                         refcount_inc(&hpe->refcnt);
719                         return hpe;
720                 }
721         }
722
723         return NULL;
724 }
725
726 static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
727                               struct mlx5e_hairpin_entry *hpe)
728 {
729         /* no more hairpin flows for us, release the hairpin pair */
730         if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock))
731                 return;
732         hash_del(&hpe->hairpin_hlist);
733         mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
734
735         if (!IS_ERR_OR_NULL(hpe->hp)) {
736                 netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
737                            dev_name(hpe->hp->pair->peer_mdev->device));
738
739                 mlx5e_hairpin_destroy(hpe->hp);
740         }
741
742         WARN_ON(!list_empty(&hpe->flows));
743         kfree(hpe);
744 }
745
746 #define UNKNOWN_MATCH_PRIO 8
747
748 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
749                                   struct mlx5_flow_spec *spec, u8 *match_prio,
750                                   struct netlink_ext_ack *extack)
751 {
752         void *headers_c, *headers_v;
753         u8 prio_val, prio_mask = 0;
754         bool vlan_present;
755
756 #ifdef CONFIG_MLX5_CORE_EN_DCB
757         if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
758                 NL_SET_ERR_MSG_MOD(extack,
759                                    "only PCP trust state supported for hairpin");
760                 return -EOPNOTSUPP;
761         }
762 #endif
763         headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
764         headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
765
766         vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag);
767         if (vlan_present) {
768                 prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
769                 prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
770         }
771
772         if (!vlan_present || !prio_mask) {
773                 prio_val = UNKNOWN_MATCH_PRIO;
774         } else if (prio_mask != 0x7) {
775                 NL_SET_ERR_MSG_MOD(extack,
776                                    "masked priority match not supported for hairpin");
777                 return -EOPNOTSUPP;
778         }
779
780         *match_prio = prio_val;
781         return 0;
782 }
783
784 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
785                                   struct mlx5e_tc_flow *flow,
786                                   struct mlx5e_tc_flow_parse_attr *parse_attr,
787                                   struct netlink_ext_ack *extack)
788 {
789         int peer_ifindex = parse_attr->mirred_ifindex[0];
790         struct mlx5_hairpin_params params;
791         struct mlx5_core_dev *peer_mdev;
792         struct mlx5e_hairpin_entry *hpe;
793         struct mlx5e_hairpin *hp;
794         u64 link_speed64;
795         u32 link_speed;
796         u8 match_prio;
797         u16 peer_id;
798         int err;
799
800         peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
801         if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
802                 NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
803                 return -EOPNOTSUPP;
804         }
805
806         peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
807         err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
808                                      extack);
809         if (err)
810                 return err;
811
812         mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
813         hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
814         if (hpe) {
815                 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
816                 wait_for_completion(&hpe->res_ready);
817
818                 if (IS_ERR(hpe->hp)) {
819                         err = -EREMOTEIO;
820                         goto out_err;
821                 }
822                 goto attach_flow;
823         }
824
825         hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
826         if (!hpe) {
827                 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
828                 return -ENOMEM;
829         }
830
831         spin_lock_init(&hpe->flows_lock);
832         INIT_LIST_HEAD(&hpe->flows);
833         INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
834         hpe->peer_vhca_id = peer_id;
835         hpe->prio = match_prio;
836         refcount_set(&hpe->refcnt, 1);
837         init_completion(&hpe->res_ready);
838
839         hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
840                  hash_hairpin_info(peer_id, match_prio));
841         mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
842
843         params.log_data_size = 15;
844         params.log_data_size = min_t(u8, params.log_data_size,
845                                      MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
846         params.log_data_size = max_t(u8, params.log_data_size,
847                                      MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz));
848
849         params.log_num_packets = params.log_data_size -
850                                  MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev);
851         params.log_num_packets = min_t(u8, params.log_num_packets,
852                                        MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets));
853
854         params.q_counter = priv->q_counter;
855         /* set hairpin pair per each 50Gbs share of the link */
856         mlx5e_port_max_linkspeed(priv->mdev, &link_speed);
857         link_speed = max_t(u32, link_speed, 50000);
858         link_speed64 = link_speed;
859         do_div(link_speed64, 50000);
860         params.num_channels = link_speed64;
861
862         hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
863         hpe->hp = hp;
864         complete_all(&hpe->res_ready);
865         if (IS_ERR(hp)) {
866                 err = PTR_ERR(hp);
867                 goto out_err;
868         }
869
870         netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
871                    hp->tirn, hp->pair->rqn[0],
872                    dev_name(hp->pair->peer_mdev->device),
873                    hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
874
875 attach_flow:
876         if (hpe->hp->num_channels > 1) {
877                 flow_flag_set(flow, HAIRPIN_RSS);
878                 flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
879         } else {
880                 flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
881         }
882
883         flow->hpe = hpe;
884         spin_lock(&hpe->flows_lock);
885         list_add(&flow->hairpin, &hpe->flows);
886         spin_unlock(&hpe->flows_lock);
887
888         return 0;
889
890 out_err:
891         mlx5e_hairpin_put(priv, hpe);
892         return err;
893 }
894
895 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
896                                    struct mlx5e_tc_flow *flow)
897 {
898         /* flow wasn't fully initialized */
899         if (!flow->hpe)
900                 return;
901
902         spin_lock(&flow->hpe->flows_lock);
903         list_del(&flow->hairpin);
904         spin_unlock(&flow->hpe->flows_lock);
905
906         mlx5e_hairpin_put(priv, flow->hpe);
907         flow->hpe = NULL;
908 }
909
910 static int
911 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
912                       struct mlx5e_tc_flow_parse_attr *parse_attr,
913                       struct mlx5e_tc_flow *flow,
914                       struct netlink_ext_ack *extack)
915 {
916         struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context;
917         struct mlx5_nic_flow_attr *attr = flow->nic_attr;
918         struct mlx5_core_dev *dev = priv->mdev;
919         struct mlx5_flow_destination dest[2] = {};
920         struct mlx5_flow_act flow_act = {
921                 .action = attr->action,
922                 .flags    = FLOW_ACT_NO_APPEND,
923         };
924         struct mlx5_fc *counter = NULL;
925         int err, dest_ix = 0;
926
927         flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
928         flow_context->flow_tag = attr->flow_tag;
929
930         if (flow_flag_test(flow, HAIRPIN)) {
931                 err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
932                 if (err)
933                         return err;
934
935                 if (flow_flag_test(flow, HAIRPIN_RSS)) {
936                         dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
937                         dest[dest_ix].ft = attr->hairpin_ft;
938                 } else {
939                         dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
940                         dest[dest_ix].tir_num = attr->hairpin_tirn;
941                 }
942                 dest_ix++;
943         } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
944                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
945                 dest[dest_ix].ft = priv->fs.vlan.ft.t;
946                 dest_ix++;
947         }
948
949         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
950                 counter = mlx5_fc_create(dev, true);
951                 if (IS_ERR(counter))
952                         return PTR_ERR(counter);
953
954                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
955                 dest[dest_ix].counter_id = mlx5_fc_id(counter);
956                 dest_ix++;
957                 attr->counter = counter;
958         }
959
960         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
961                 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
962                 flow_act.modify_hdr = attr->modify_hdr;
963                 dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
964                 if (err)
965                         return err;
966         }
967
968         mutex_lock(&priv->fs.tc.t_lock);
969         if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
970                 struct mlx5_flow_table_attr ft_attr = {};
971                 int tc_grp_size, tc_tbl_size, tc_num_grps;
972                 u32 max_flow_counter;
973
974                 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
975                                     MLX5_CAP_GEN(dev, max_flow_counter_15_0);
976
977                 tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
978
979                 tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
980                                     BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
981                 tc_num_grps = MLX5E_TC_TABLE_NUM_GROUPS;
982
983                 ft_attr.prio = MLX5E_TC_PRIO;
984                 ft_attr.max_fte = tc_tbl_size;
985                 ft_attr.level = MLX5E_TC_FT_LEVEL;
986                 ft_attr.autogroup.max_num_groups = tc_num_grps;
987                 priv->fs.tc.t =
988                         mlx5_create_auto_grouped_flow_table(priv->fs.ns,
989                                                             &ft_attr);
990                 if (IS_ERR(priv->fs.tc.t)) {
991                         mutex_unlock(&priv->fs.tc.t_lock);
992                         NL_SET_ERR_MSG_MOD(extack,
993                                            "Failed to create tc offload table\n");
994                         netdev_err(priv->netdev,
995                                    "Failed to create tc offload table\n");
996                         return PTR_ERR(priv->fs.tc.t);
997                 }
998         }
999
1000         if (attr->match_level != MLX5_MATCH_NONE)
1001                 parse_attr->spec.match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
1002
1003         flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
1004                                             &flow_act, dest, dest_ix);
1005         mutex_unlock(&priv->fs.tc.t_lock);
1006
1007         return PTR_ERR_OR_ZERO(flow->rule[0]);
1008 }
1009
1010 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
1011                                   struct mlx5e_tc_flow *flow)
1012 {
1013         struct mlx5_nic_flow_attr *attr = flow->nic_attr;
1014         struct mlx5_fc *counter = NULL;
1015
1016         counter = attr->counter;
1017         if (!IS_ERR_OR_NULL(flow->rule[0]))
1018                 mlx5_del_flow_rules(flow->rule[0]);
1019         mlx5_fc_destroy(priv->mdev, counter);
1020
1021         mutex_lock(&priv->fs.tc.t_lock);
1022         if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && priv->fs.tc.t) {
1023                 mlx5_destroy_flow_table(priv->fs.tc.t);
1024                 priv->fs.tc.t = NULL;
1025         }
1026         mutex_unlock(&priv->fs.tc.t_lock);
1027
1028         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1029                 mlx5e_detach_mod_hdr(priv, flow);
1030
1031         if (flow_flag_test(flow, HAIRPIN))
1032                 mlx5e_hairpin_flow_del(priv, flow);
1033 }
1034
1035 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
1036                                struct mlx5e_tc_flow *flow, int out_index);
1037
1038 static int mlx5e_attach_encap(struct mlx5e_priv *priv,
1039                               struct mlx5e_tc_flow *flow,
1040                               struct net_device *mirred_dev,
1041                               int out_index,
1042                               struct netlink_ext_ack *extack,
1043                               struct net_device **encap_dev,
1044                               bool *encap_valid);
1045
1046 static struct mlx5_flow_handle *
1047 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
1048                            struct mlx5e_tc_flow *flow,
1049                            struct mlx5_flow_spec *spec,
1050                            struct mlx5_esw_flow_attr *attr)
1051 {
1052         struct mlx5_flow_handle *rule;
1053
1054         rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1055         if (IS_ERR(rule))
1056                 return rule;
1057
1058         if (attr->split_count) {
1059                 flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
1060                 if (IS_ERR(flow->rule[1])) {
1061                         mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
1062                         return flow->rule[1];
1063                 }
1064         }
1065
1066         return rule;
1067 }
1068
1069 static void
1070 mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
1071                              struct mlx5e_tc_flow *flow,
1072                            struct mlx5_esw_flow_attr *attr)
1073 {
1074         flow_flag_clear(flow, OFFLOADED);
1075
1076         if (attr->split_count)
1077                 mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
1078
1079         mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
1080 }
1081
1082 static struct mlx5_flow_handle *
1083 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
1084                               struct mlx5e_tc_flow *flow,
1085                               struct mlx5_flow_spec *spec,
1086                               struct mlx5_esw_flow_attr *slow_attr)
1087 {
1088         struct mlx5_flow_handle *rule;
1089
1090         memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
1091         slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1092         slow_attr->split_count = 0;
1093         slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1094
1095         rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
1096         if (!IS_ERR(rule))
1097                 flow_flag_set(flow, SLOW);
1098
1099         return rule;
1100 }
1101
1102 static void
1103 mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
1104                                   struct mlx5e_tc_flow *flow,
1105                                   struct mlx5_esw_flow_attr *slow_attr)
1106 {
1107         memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
1108         slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1109         slow_attr->split_count = 0;
1110         slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1111         mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
1112         flow_flag_clear(flow, SLOW);
1113 }
1114
1115 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1116  * function.
1117  */
1118 static void unready_flow_add(struct mlx5e_tc_flow *flow,
1119                              struct list_head *unready_flows)
1120 {
1121         flow_flag_set(flow, NOT_READY);
1122         list_add_tail(&flow->unready, unready_flows);
1123 }
1124
1125 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1126  * function.
1127  */
1128 static void unready_flow_del(struct mlx5e_tc_flow *flow)
1129 {
1130         list_del(&flow->unready);
1131         flow_flag_clear(flow, NOT_READY);
1132 }
1133
1134 static void add_unready_flow(struct mlx5e_tc_flow *flow)
1135 {
1136         struct mlx5_rep_uplink_priv *uplink_priv;
1137         struct mlx5e_rep_priv *rpriv;
1138         struct mlx5_eswitch *esw;
1139
1140         esw = flow->priv->mdev->priv.eswitch;
1141         rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1142         uplink_priv = &rpriv->uplink_priv;
1143
1144         mutex_lock(&uplink_priv->unready_flows_lock);
1145         unready_flow_add(flow, &uplink_priv->unready_flows);
1146         mutex_unlock(&uplink_priv->unready_flows_lock);
1147 }
1148
1149 static void remove_unready_flow(struct mlx5e_tc_flow *flow)
1150 {
1151         struct mlx5_rep_uplink_priv *uplink_priv;
1152         struct mlx5e_rep_priv *rpriv;
1153         struct mlx5_eswitch *esw;
1154
1155         esw = flow->priv->mdev->priv.eswitch;
1156         rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1157         uplink_priv = &rpriv->uplink_priv;
1158
1159         mutex_lock(&uplink_priv->unready_flows_lock);
1160         unready_flow_del(flow);
1161         mutex_unlock(&uplink_priv->unready_flows_lock);
1162 }
1163
1164 static int
1165 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
1166                       struct mlx5e_tc_flow *flow,
1167                       struct netlink_ext_ack *extack)
1168 {
1169         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1170         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
1171         struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
1172         struct net_device *out_dev, *encap_dev = NULL;
1173         struct mlx5_fc *counter = NULL;
1174         struct mlx5e_rep_priv *rpriv;
1175         struct mlx5e_priv *out_priv;
1176         bool encap_valid = true;
1177         u32 max_prio, max_chain;
1178         int err = 0;
1179         int out_index;
1180
1181         if (!mlx5_esw_chains_prios_supported(esw) && attr->prio != 1) {
1182                 NL_SET_ERR_MSG(extack, "E-switch priorities unsupported, upgrade FW");
1183                 return -EOPNOTSUPP;
1184         }
1185
1186         /* We check chain range only for tc flows.
1187          * For ft flows, we checked attr->chain was originally 0 and set it to
1188          * FDB_FT_CHAIN which is outside tc range.
1189          * See mlx5e_rep_setup_ft_cb().
1190          */
1191         max_chain = mlx5_esw_chains_get_chain_range(esw);
1192         if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
1193                 NL_SET_ERR_MSG(extack, "Requested chain is out of supported range");
1194                 return -EOPNOTSUPP;
1195         }
1196
1197         max_prio = mlx5_esw_chains_get_prio_range(esw);
1198         if (attr->prio > max_prio) {
1199                 NL_SET_ERR_MSG(extack, "Requested priority is out of supported range");
1200                 return -EOPNOTSUPP;
1201         }
1202
1203         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1204                 int mirred_ifindex;
1205
1206                 if (!(attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1207                         continue;
1208
1209                 mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1210                 out_dev = __dev_get_by_index(dev_net(priv->netdev),
1211                                              mirred_ifindex);
1212                 err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
1213                                          extack, &encap_dev, &encap_valid);
1214                 if (err)
1215                         return err;
1216
1217                 out_priv = netdev_priv(encap_dev);
1218                 rpriv = out_priv->ppriv;
1219                 attr->dests[out_index].rep = rpriv->rep;
1220                 attr->dests[out_index].mdev = out_priv->mdev;
1221         }
1222
1223         err = mlx5_eswitch_add_vlan_action(esw, attr);
1224         if (err)
1225                 return err;
1226
1227         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1228                 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1229                 dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
1230                 if (err)
1231                         return err;
1232         }
1233
1234         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1235                 counter = mlx5_fc_create(attr->counter_dev, true);
1236                 if (IS_ERR(counter))
1237                         return PTR_ERR(counter);
1238
1239                 attr->counter = counter;
1240         }
1241
1242         /* we get here if one of the following takes place:
1243          * (1) there's no error
1244          * (2) there's an encap action and we don't have valid neigh
1245          */
1246         if (!encap_valid) {
1247                 /* continue with goto slow path rule instead */
1248                 struct mlx5_esw_flow_attr slow_attr;
1249
1250                 flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec, &slow_attr);
1251         } else {
1252                 flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
1253         }
1254
1255         if (IS_ERR(flow->rule[0]))
1256                 return PTR_ERR(flow->rule[0]);
1257         else
1258                 flow_flag_set(flow, OFFLOADED);
1259
1260         return 0;
1261 }
1262
1263 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
1264 {
1265         struct mlx5_flow_spec *spec = &flow->esw_attr->parse_attr->spec;
1266         void *headers_v = MLX5_ADDR_OF(fte_match_param,
1267                                        spec->match_value,
1268                                        misc_parameters_3);
1269         u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
1270                                              headers_v,
1271                                              geneve_tlv_option_0_data);
1272
1273         return !!geneve_tlv_opt_0_data;
1274 }
1275
1276 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
1277                                   struct mlx5e_tc_flow *flow)
1278 {
1279         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1280         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
1281         struct mlx5_esw_flow_attr slow_attr;
1282         int out_index;
1283
1284         if (flow_flag_test(flow, NOT_READY)) {
1285                 remove_unready_flow(flow);
1286                 kvfree(attr->parse_attr);
1287                 return;
1288         }
1289
1290         if (mlx5e_is_offloaded_flow(flow)) {
1291                 if (flow_flag_test(flow, SLOW))
1292                         mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
1293                 else
1294                         mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
1295         }
1296
1297         if (mlx5_flow_has_geneve_opt(flow))
1298                 mlx5_geneve_tlv_option_del(priv->mdev->geneve);
1299
1300         mlx5_eswitch_del_vlan_action(esw, attr);
1301
1302         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
1303                 if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
1304                         mlx5e_detach_encap(priv, flow, out_index);
1305                         kfree(attr->parse_attr->tun_info[out_index]);
1306                 }
1307         kvfree(attr->parse_attr);
1308
1309         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1310                 mlx5e_detach_mod_hdr(priv, flow);
1311
1312         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1313                 mlx5_fc_destroy(attr->counter_dev, attr->counter);
1314 }
1315
1316 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
1317                               struct mlx5e_encap_entry *e,
1318                               struct list_head *flow_list)
1319 {
1320         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1321         struct mlx5_esw_flow_attr slow_attr, *esw_attr;
1322         struct mlx5_flow_handle *rule;
1323         struct mlx5_flow_spec *spec;
1324         struct mlx5e_tc_flow *flow;
1325         int err;
1326
1327         e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
1328                                                      e->reformat_type,
1329                                                      e->encap_size, e->encap_header,
1330                                                      MLX5_FLOW_NAMESPACE_FDB);
1331         if (IS_ERR(e->pkt_reformat)) {
1332                 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
1333                                PTR_ERR(e->pkt_reformat));
1334                 return;
1335         }
1336         e->flags |= MLX5_ENCAP_ENTRY_VALID;
1337         mlx5e_rep_queue_neigh_stats_work(priv);
1338
1339         list_for_each_entry(flow, flow_list, tmp_list) {
1340                 bool all_flow_encaps_valid = true;
1341                 int i;
1342
1343                 if (!mlx5e_is_offloaded_flow(flow))
1344                         continue;
1345                 esw_attr = flow->esw_attr;
1346                 spec = &esw_attr->parse_attr->spec;
1347
1348                 esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat;
1349                 esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1350                 /* Flow can be associated with multiple encap entries.
1351                  * Before offloading the flow verify that all of them have
1352                  * a valid neighbour.
1353                  */
1354                 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
1355                         if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
1356                                 continue;
1357                         if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
1358                                 all_flow_encaps_valid = false;
1359                                 break;
1360                         }
1361                 }
1362                 /* Do not offload flows with unresolved neighbors */
1363                 if (!all_flow_encaps_valid)
1364                         continue;
1365                 /* update from slow path rule to encap rule */
1366                 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr);
1367                 if (IS_ERR(rule)) {
1368                         err = PTR_ERR(rule);
1369                         mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1370                                        err);
1371                         continue;
1372                 }
1373
1374                 mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
1375                 flow->rule[0] = rule;
1376                 /* was unset when slow path rule removed */
1377                 flow_flag_set(flow, OFFLOADED);
1378         }
1379 }
1380
1381 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
1382                               struct mlx5e_encap_entry *e,
1383                               struct list_head *flow_list)
1384 {
1385         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1386         struct mlx5_esw_flow_attr slow_attr;
1387         struct mlx5_flow_handle *rule;
1388         struct mlx5_flow_spec *spec;
1389         struct mlx5e_tc_flow *flow;
1390         int err;
1391
1392         list_for_each_entry(flow, flow_list, tmp_list) {
1393                 if (!mlx5e_is_offloaded_flow(flow))
1394                         continue;
1395                 spec = &flow->esw_attr->parse_attr->spec;
1396
1397                 /* update from encap rule to slow path rule */
1398                 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr);
1399                 /* mark the flow's encap dest as non-valid */
1400                 flow->esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
1401
1402                 if (IS_ERR(rule)) {
1403                         err = PTR_ERR(rule);
1404                         mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1405                                        err);
1406                         continue;
1407                 }
1408
1409                 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr);
1410                 flow->rule[0] = rule;
1411                 /* was unset when fast path rule removed */
1412                 flow_flag_set(flow, OFFLOADED);
1413         }
1414
1415         /* we know that the encap is valid */
1416         e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1417         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1418 }
1419
1420 static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
1421 {
1422         if (mlx5e_is_eswitch_flow(flow))
1423                 return flow->esw_attr->counter;
1424         else
1425                 return flow->nic_attr->counter;
1426 }
1427
1428 /* Takes reference to all flows attached to encap and adds the flows to
1429  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
1430  */
1431 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
1432 {
1433         struct encap_flow_item *efi;
1434         struct mlx5e_tc_flow *flow;
1435
1436         list_for_each_entry(efi, &e->flows, list) {
1437                 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
1438                 if (IS_ERR(mlx5e_flow_get(flow)))
1439                         continue;
1440                 wait_for_completion(&flow->init_done);
1441
1442                 flow->tmp_efi_index = efi->index;
1443                 list_add(&flow->tmp_list, flow_list);
1444         }
1445 }
1446
1447 /* Iterate over tmp_list of flows attached to flow_list head. */
1448 void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
1449 {
1450         struct mlx5e_tc_flow *flow, *tmp;
1451
1452         list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
1453                 mlx5e_flow_put(priv, flow);
1454 }
1455
1456 static struct mlx5e_encap_entry *
1457 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
1458                            struct mlx5e_encap_entry *e)
1459 {
1460         struct mlx5e_encap_entry *next = NULL;
1461
1462 retry:
1463         rcu_read_lock();
1464
1465         /* find encap with non-zero reference counter value */
1466         for (next = e ?
1467                      list_next_or_null_rcu(&nhe->encap_list,
1468                                            &e->encap_list,
1469                                            struct mlx5e_encap_entry,
1470                                            encap_list) :
1471                      list_first_or_null_rcu(&nhe->encap_list,
1472                                             struct mlx5e_encap_entry,
1473                                             encap_list);
1474              next;
1475              next = list_next_or_null_rcu(&nhe->encap_list,
1476                                           &next->encap_list,
1477                                           struct mlx5e_encap_entry,
1478                                           encap_list))
1479                 if (mlx5e_encap_take(next))
1480                         break;
1481
1482         rcu_read_unlock();
1483
1484         /* release starting encap */
1485         if (e)
1486                 mlx5e_encap_put(netdev_priv(e->out_dev), e);
1487         if (!next)
1488                 return next;
1489
1490         /* wait for encap to be fully initialized */
1491         wait_for_completion(&next->res_ready);
1492         /* continue searching if encap entry is not in valid state after completion */
1493         if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
1494                 e = next;
1495                 goto retry;
1496         }
1497
1498         return next;
1499 }
1500
1501 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
1502 {
1503         struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
1504         struct mlx5e_encap_entry *e = NULL;
1505         struct mlx5e_tc_flow *flow;
1506         struct mlx5_fc *counter;
1507         struct neigh_table *tbl;
1508         bool neigh_used = false;
1509         struct neighbour *n;
1510         u64 lastuse;
1511
1512         if (m_neigh->family == AF_INET)
1513                 tbl = &arp_tbl;
1514 #if IS_ENABLED(CONFIG_IPV6)
1515         else if (m_neigh->family == AF_INET6)
1516                 tbl = ipv6_stub->nd_tbl;
1517 #endif
1518         else
1519                 return;
1520
1521         /* mlx5e_get_next_valid_encap() releases previous encap before returning
1522          * next one.
1523          */
1524         while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
1525                 struct mlx5e_priv *priv = netdev_priv(e->out_dev);
1526                 struct encap_flow_item *efi, *tmp;
1527                 struct mlx5_eswitch *esw;
1528                 LIST_HEAD(flow_list);
1529
1530                 esw = priv->mdev->priv.eswitch;
1531                 mutex_lock(&esw->offloads.encap_tbl_lock);
1532                 list_for_each_entry_safe(efi, tmp, &e->flows, list) {
1533                         flow = container_of(efi, struct mlx5e_tc_flow,
1534                                             encaps[efi->index]);
1535                         if (IS_ERR(mlx5e_flow_get(flow)))
1536                                 continue;
1537                         list_add(&flow->tmp_list, &flow_list);
1538
1539                         if (mlx5e_is_offloaded_flow(flow)) {
1540                                 counter = mlx5e_tc_get_counter(flow);
1541                                 lastuse = mlx5_fc_query_lastuse(counter);
1542                                 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
1543                                         neigh_used = true;
1544                                         break;
1545                                 }
1546                         }
1547                 }
1548                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1549
1550                 mlx5e_put_encap_flow_list(priv, &flow_list);
1551                 if (neigh_used) {
1552                         /* release current encap before breaking the loop */
1553                         mlx5e_encap_put(priv, e);
1554                         break;
1555                 }
1556         }
1557
1558         trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
1559
1560         if (neigh_used) {
1561                 nhe->reported_lastuse = jiffies;
1562
1563                 /* find the relevant neigh according to the cached device and
1564                  * dst ip pair
1565                  */
1566                 n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev);
1567                 if (!n)
1568                         return;
1569
1570                 neigh_event_send(n, NULL);
1571                 neigh_release(n);
1572         }
1573 }
1574
1575 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
1576 {
1577         WARN_ON(!list_empty(&e->flows));
1578
1579         if (e->compl_result > 0) {
1580                 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
1581
1582                 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
1583                         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1584         }
1585
1586         kfree(e->tun_info);
1587         kfree(e->encap_header);
1588         kfree_rcu(e, rcu);
1589 }
1590
1591 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
1592 {
1593         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1594
1595         if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
1596                 return;
1597         hash_del_rcu(&e->encap_hlist);
1598         mutex_unlock(&esw->offloads.encap_tbl_lock);
1599
1600         mlx5e_encap_dealloc(priv, e);
1601 }
1602
1603 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
1604                                struct mlx5e_tc_flow *flow, int out_index)
1605 {
1606         struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
1607         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1608
1609         /* flow wasn't fully initialized */
1610         if (!e)
1611                 return;
1612
1613         mutex_lock(&esw->offloads.encap_tbl_lock);
1614         list_del(&flow->encaps[out_index].list);
1615         flow->encaps[out_index].e = NULL;
1616         if (!refcount_dec_and_test(&e->refcnt)) {
1617                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1618                 return;
1619         }
1620         hash_del_rcu(&e->encap_hlist);
1621         mutex_unlock(&esw->offloads.encap_tbl_lock);
1622
1623         mlx5e_encap_dealloc(priv, e);
1624 }
1625
1626 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1627 {
1628         struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
1629
1630         if (!flow_flag_test(flow, ESWITCH) ||
1631             !flow_flag_test(flow, DUP))
1632                 return;
1633
1634         mutex_lock(&esw->offloads.peer_mutex);
1635         list_del(&flow->peer);
1636         mutex_unlock(&esw->offloads.peer_mutex);
1637
1638         flow_flag_clear(flow, DUP);
1639
1640         if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
1641                 mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
1642                 kfree(flow->peer_flow);
1643         }
1644
1645         flow->peer_flow = NULL;
1646 }
1647
1648 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1649 {
1650         struct mlx5_core_dev *dev = flow->priv->mdev;
1651         struct mlx5_devcom *devcom = dev->priv.devcom;
1652         struct mlx5_eswitch *peer_esw;
1653
1654         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1655         if (!peer_esw)
1656                 return;
1657
1658         __mlx5e_tc_del_fdb_peer_flow(flow);
1659         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1660 }
1661
1662 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
1663                               struct mlx5e_tc_flow *flow)
1664 {
1665         if (mlx5e_is_eswitch_flow(flow)) {
1666                 mlx5e_tc_del_fdb_peer_flow(flow);
1667                 mlx5e_tc_del_fdb_flow(priv, flow);
1668         } else {
1669                 mlx5e_tc_del_nic_flow(priv, flow);
1670         }
1671 }
1672
1673
1674 static int parse_tunnel_attr(struct mlx5e_priv *priv,
1675                              struct mlx5_flow_spec *spec,
1676                              struct flow_cls_offload *f,
1677                              struct net_device *filter_dev, u8 *match_level)
1678 {
1679         struct netlink_ext_ack *extack = f->common.extack;
1680         void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1681                                        outer_headers);
1682         void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1683                                        outer_headers);
1684         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1685         int err;
1686
1687         err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
1688                                  headers_c, headers_v, match_level);
1689         if (err) {
1690                 NL_SET_ERR_MSG_MOD(extack,
1691                                    "failed to parse tunnel attributes");
1692                 return err;
1693         }
1694
1695         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
1696                 struct flow_match_control match;
1697                 u16 addr_type;
1698
1699                 flow_rule_match_enc_control(rule, &match);
1700                 addr_type = match.key->addr_type;
1701
1702                 /* For tunnel addr_type used same key id`s as for non-tunnel */
1703                 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1704                         struct flow_match_ipv4_addrs match;
1705
1706                         flow_rule_match_enc_ipv4_addrs(rule, &match);
1707                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1708                                  src_ipv4_src_ipv6.ipv4_layout.ipv4,
1709                                  ntohl(match.mask->src));
1710                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1711                                  src_ipv4_src_ipv6.ipv4_layout.ipv4,
1712                                  ntohl(match.key->src));
1713
1714                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1715                                  dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
1716                                  ntohl(match.mask->dst));
1717                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1718                                  dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
1719                                  ntohl(match.key->dst));
1720
1721                         MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
1722                                          ethertype);
1723                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
1724                                  ETH_P_IP);
1725                 } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
1726                         struct flow_match_ipv6_addrs match;
1727
1728                         flow_rule_match_enc_ipv6_addrs(rule, &match);
1729                         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
1730                                             src_ipv4_src_ipv6.ipv6_layout.ipv6),
1731                                &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
1732                                                                    ipv6));
1733                         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
1734                                             src_ipv4_src_ipv6.ipv6_layout.ipv6),
1735                                &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
1736                                                                   ipv6));
1737
1738                         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
1739                                             dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
1740                                &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
1741                                                                    ipv6));
1742                         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
1743                                             dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
1744                                &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
1745                                                                   ipv6));
1746
1747                         MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
1748                                          ethertype);
1749                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
1750                                  ETH_P_IPV6);
1751                 }
1752         }
1753
1754         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
1755                 struct flow_match_ip match;
1756
1757                 flow_rule_match_enc_ip(rule, &match);
1758                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
1759                          match.mask->tos & 0x3);
1760                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
1761                          match.key->tos & 0x3);
1762
1763                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
1764                          match.mask->tos >> 2);
1765                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
1766                          match.key->tos  >> 2);
1767
1768                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
1769                          match.mask->ttl);
1770                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
1771                          match.key->ttl);
1772
1773                 if (match.mask->ttl &&
1774                     !MLX5_CAP_ESW_FLOWTABLE_FDB
1775                         (priv->mdev,
1776                          ft_field_support.outer_ipv4_ttl)) {
1777                         NL_SET_ERR_MSG_MOD(extack,
1778                                            "Matching on TTL is not supported");
1779                         return -EOPNOTSUPP;
1780                 }
1781
1782         }
1783
1784         /* Enforce DMAC when offloading incoming tunneled flows.
1785          * Flow counters require a match on the DMAC.
1786          */
1787         MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16);
1788         MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0);
1789         ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
1790                                      dmac_47_16), priv->netdev->dev_addr);
1791
1792         /* let software handle IP fragments */
1793         MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
1794         MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
1795
1796         return 0;
1797 }
1798
1799 static void *get_match_headers_criteria(u32 flags,
1800                                         struct mlx5_flow_spec *spec)
1801 {
1802         return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
1803                 MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1804                              inner_headers) :
1805                 MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1806                              outer_headers);
1807 }
1808
1809 static void *get_match_headers_value(u32 flags,
1810                                      struct mlx5_flow_spec *spec)
1811 {
1812         return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
1813                 MLX5_ADDR_OF(fte_match_param, spec->match_value,
1814                              inner_headers) :
1815                 MLX5_ADDR_OF(fte_match_param, spec->match_value,
1816                              outer_headers);
1817 }
1818
1819 static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
1820                                    struct flow_cls_offload *f)
1821 {
1822         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1823         struct netlink_ext_ack *extack = f->common.extack;
1824         struct net_device *ingress_dev;
1825         struct flow_match_meta match;
1826
1827         if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
1828                 return 0;
1829
1830         flow_rule_match_meta(rule, &match);
1831         if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
1832                 NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
1833                 return -EINVAL;
1834         }
1835
1836         ingress_dev = __dev_get_by_index(dev_net(filter_dev),
1837                                          match.key->ingress_ifindex);
1838         if (!ingress_dev) {
1839                 NL_SET_ERR_MSG_MOD(extack,
1840                                    "Can't find the ingress port to match on");
1841                 return -EINVAL;
1842         }
1843
1844         if (ingress_dev != filter_dev) {
1845                 NL_SET_ERR_MSG_MOD(extack,
1846                                    "Can't match on the ingress filter port");
1847                 return -EINVAL;
1848         }
1849
1850         return 0;
1851 }
1852
1853 static int __parse_cls_flower(struct mlx5e_priv *priv,
1854                               struct mlx5_flow_spec *spec,
1855                               struct flow_cls_offload *f,
1856                               struct net_device *filter_dev,
1857                               u8 *inner_match_level, u8 *outer_match_level)
1858 {
1859         struct netlink_ext_ack *extack = f->common.extack;
1860         void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1861                                        outer_headers);
1862         void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1863                                        outer_headers);
1864         void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1865                                     misc_parameters);
1866         void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1867                                     misc_parameters);
1868         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1869         struct flow_dissector *dissector = rule->match.dissector;
1870         u16 addr_type = 0;
1871         u8 ip_proto = 0;
1872         u8 *match_level;
1873         int err;
1874
1875         match_level = outer_match_level;
1876
1877         if (dissector->used_keys &
1878             ~(BIT(FLOW_DISSECTOR_KEY_META) |
1879               BIT(FLOW_DISSECTOR_KEY_CONTROL) |
1880               BIT(FLOW_DISSECTOR_KEY_BASIC) |
1881               BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
1882               BIT(FLOW_DISSECTOR_KEY_VLAN) |
1883               BIT(FLOW_DISSECTOR_KEY_CVLAN) |
1884               BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
1885               BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
1886               BIT(FLOW_DISSECTOR_KEY_PORTS) |
1887               BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
1888               BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
1889               BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
1890               BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
1891               BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
1892               BIT(FLOW_DISSECTOR_KEY_TCP) |
1893               BIT(FLOW_DISSECTOR_KEY_IP)  |
1894               BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
1895               BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) {
1896                 NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
1897                 netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
1898                             dissector->used_keys);
1899                 return -EOPNOTSUPP;
1900         }
1901
1902         if (mlx5e_get_tc_tun(filter_dev)) {
1903                 if (parse_tunnel_attr(priv, spec, f, filter_dev,
1904                                       outer_match_level))
1905                         return -EOPNOTSUPP;
1906
1907                 /* At this point, header pointers should point to the inner
1908                  * headers, outer header were already set by parse_tunnel_attr
1909                  */
1910                 match_level = inner_match_level;
1911                 headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP,
1912                                                        spec);
1913                 headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP,
1914                                                     spec);
1915         }
1916
1917         err = mlx5e_flower_parse_meta(filter_dev, f);
1918         if (err)
1919                 return err;
1920
1921         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
1922                 struct flow_match_basic match;
1923
1924                 flow_rule_match_basic(rule, &match);
1925                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
1926                          ntohs(match.mask->n_proto));
1927                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
1928                          ntohs(match.key->n_proto));
1929
1930                 if (match.mask->n_proto)
1931                         *match_level = MLX5_MATCH_L2;
1932         }
1933         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
1934             is_vlan_dev(filter_dev)) {
1935                 struct flow_dissector_key_vlan filter_dev_mask;
1936                 struct flow_dissector_key_vlan filter_dev_key;
1937                 struct flow_match_vlan match;
1938
1939                 if (is_vlan_dev(filter_dev)) {
1940                         match.key = &filter_dev_key;
1941                         match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
1942                         match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
1943                         match.key->vlan_priority = 0;
1944                         match.mask = &filter_dev_mask;
1945                         memset(match.mask, 0xff, sizeof(*match.mask));
1946                         match.mask->vlan_priority = 0;
1947                 } else {
1948                         flow_rule_match_vlan(rule, &match);
1949                 }
1950                 if (match.mask->vlan_id ||
1951                     match.mask->vlan_priority ||
1952                     match.mask->vlan_tpid) {
1953                         if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
1954                                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1955                                          svlan_tag, 1);
1956                                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1957                                          svlan_tag, 1);
1958                         } else {
1959                                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1960                                          cvlan_tag, 1);
1961                                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1962                                          cvlan_tag, 1);
1963                         }
1964
1965                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
1966                                  match.mask->vlan_id);
1967                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
1968                                  match.key->vlan_id);
1969
1970                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
1971                                  match.mask->vlan_priority);
1972                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
1973                                  match.key->vlan_priority);
1974
1975                         *match_level = MLX5_MATCH_L2;
1976                 }
1977         } else if (*match_level != MLX5_MATCH_NONE) {
1978                 /* cvlan_tag enabled in match criteria and
1979                  * disabled in match value means both S & C tags
1980                  * don't exist (untagged of both)
1981                  */
1982                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1983                 *match_level = MLX5_MATCH_L2;
1984         }
1985
1986         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
1987                 struct flow_match_vlan match;
1988
1989                 flow_rule_match_cvlan(rule, &match);
1990                 if (match.mask->vlan_id ||
1991                     match.mask->vlan_priority ||
1992                     match.mask->vlan_tpid) {
1993                         if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
1994                                 MLX5_SET(fte_match_set_misc, misc_c,
1995                                          outer_second_svlan_tag, 1);
1996                                 MLX5_SET(fte_match_set_misc, misc_v,
1997                                          outer_second_svlan_tag, 1);
1998                         } else {
1999                                 MLX5_SET(fte_match_set_misc, misc_c,
2000                                          outer_second_cvlan_tag, 1);
2001                                 MLX5_SET(fte_match_set_misc, misc_v,
2002                                          outer_second_cvlan_tag, 1);
2003                         }
2004
2005                         MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
2006                                  match.mask->vlan_id);
2007                         MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
2008                                  match.key->vlan_id);
2009                         MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
2010                                  match.mask->vlan_priority);
2011                         MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
2012                                  match.key->vlan_priority);
2013
2014                         *match_level = MLX5_MATCH_L2;
2015                 }
2016         }
2017
2018         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
2019                 struct flow_match_eth_addrs match;
2020
2021                 flow_rule_match_eth_addrs(rule, &match);
2022                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2023                                              dmac_47_16),
2024                                 match.mask->dst);
2025                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2026                                              dmac_47_16),
2027                                 match.key->dst);
2028
2029                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2030                                              smac_47_16),
2031                                 match.mask->src);
2032                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2033                                              smac_47_16),
2034                                 match.key->src);
2035
2036                 if (!is_zero_ether_addr(match.mask->src) ||
2037                     !is_zero_ether_addr(match.mask->dst))
2038                         *match_level = MLX5_MATCH_L2;
2039         }
2040
2041         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
2042                 struct flow_match_control match;
2043
2044                 flow_rule_match_control(rule, &match);
2045                 addr_type = match.key->addr_type;
2046
2047                 /* the HW doesn't support frag first/later */
2048                 if (match.mask->flags & FLOW_DIS_FIRST_FRAG)
2049                         return -EOPNOTSUPP;
2050
2051                 if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
2052                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
2053                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
2054                                  match.key->flags & FLOW_DIS_IS_FRAGMENT);
2055
2056                         /* the HW doesn't need L3 inline to match on frag=no */
2057                         if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
2058                                 *match_level = MLX5_MATCH_L2;
2059         /* ***  L2 attributes parsing up to here *** */
2060                         else
2061                                 *match_level = MLX5_MATCH_L3;
2062                 }
2063         }
2064
2065         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
2066                 struct flow_match_basic match;
2067
2068                 flow_rule_match_basic(rule, &match);
2069                 ip_proto = match.key->ip_proto;
2070
2071                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
2072                          match.mask->ip_proto);
2073                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
2074                          match.key->ip_proto);
2075
2076                 if (match.mask->ip_proto)
2077                         *match_level = MLX5_MATCH_L3;
2078         }
2079
2080         if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2081                 struct flow_match_ipv4_addrs match;
2082
2083                 flow_rule_match_ipv4_addrs(rule, &match);
2084                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2085                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
2086                        &match.mask->src, sizeof(match.mask->src));
2087                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2088                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
2089                        &match.key->src, sizeof(match.key->src));
2090                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2091                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2092                        &match.mask->dst, sizeof(match.mask->dst));
2093                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2094                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2095                        &match.key->dst, sizeof(match.key->dst));
2096
2097                 if (match.mask->src || match.mask->dst)
2098                         *match_level = MLX5_MATCH_L3;
2099         }
2100
2101         if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2102                 struct flow_match_ipv6_addrs match;
2103
2104                 flow_rule_match_ipv6_addrs(rule, &match);
2105                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2106                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
2107                        &match.mask->src, sizeof(match.mask->src));
2108                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2109                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
2110                        &match.key->src, sizeof(match.key->src));
2111
2112                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2113                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2114                        &match.mask->dst, sizeof(match.mask->dst));
2115                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2116                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2117                        &match.key->dst, sizeof(match.key->dst));
2118
2119                 if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
2120                     ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
2121                         *match_level = MLX5_MATCH_L3;
2122         }
2123
2124         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2125                 struct flow_match_ip match;
2126
2127                 flow_rule_match_ip(rule, &match);
2128                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
2129                          match.mask->tos & 0x3);
2130                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
2131                          match.key->tos & 0x3);
2132
2133                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
2134                          match.mask->tos >> 2);
2135                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
2136                          match.key->tos  >> 2);
2137
2138                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
2139                          match.mask->ttl);
2140                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
2141                          match.key->ttl);
2142
2143                 if (match.mask->ttl &&
2144                     !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
2145                                                 ft_field_support.outer_ipv4_ttl)) {
2146                         NL_SET_ERR_MSG_MOD(extack,
2147                                            "Matching on TTL is not supported");
2148                         return -EOPNOTSUPP;
2149                 }
2150
2151                 if (match.mask->tos || match.mask->ttl)
2152                         *match_level = MLX5_MATCH_L3;
2153         }
2154
2155         /* ***  L3 attributes parsing up to here *** */
2156
2157         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
2158                 struct flow_match_ports match;
2159
2160                 flow_rule_match_ports(rule, &match);
2161                 switch (ip_proto) {
2162                 case IPPROTO_TCP:
2163                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2164                                  tcp_sport, ntohs(match.mask->src));
2165                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2166                                  tcp_sport, ntohs(match.key->src));
2167
2168                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2169                                  tcp_dport, ntohs(match.mask->dst));
2170                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2171                                  tcp_dport, ntohs(match.key->dst));
2172                         break;
2173
2174                 case IPPROTO_UDP:
2175                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2176                                  udp_sport, ntohs(match.mask->src));
2177                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2178                                  udp_sport, ntohs(match.key->src));
2179
2180                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2181                                  udp_dport, ntohs(match.mask->dst));
2182                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2183                                  udp_dport, ntohs(match.key->dst));
2184                         break;
2185                 default:
2186                         NL_SET_ERR_MSG_MOD(extack,
2187                                            "Only UDP and TCP transports are supported for L4 matching");
2188                         netdev_err(priv->netdev,
2189                                    "Only UDP and TCP transport are supported\n");
2190                         return -EINVAL;
2191                 }
2192
2193                 if (match.mask->src || match.mask->dst)
2194                         *match_level = MLX5_MATCH_L4;
2195         }
2196
2197         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
2198                 struct flow_match_tcp match;
2199
2200                 flow_rule_match_tcp(rule, &match);
2201                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
2202                          ntohs(match.mask->flags));
2203                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
2204                          ntohs(match.key->flags));
2205
2206                 if (match.mask->flags)
2207                         *match_level = MLX5_MATCH_L4;
2208         }
2209
2210         return 0;
2211 }
2212
2213 static int parse_cls_flower(struct mlx5e_priv *priv,
2214                             struct mlx5e_tc_flow *flow,
2215                             struct mlx5_flow_spec *spec,
2216                             struct flow_cls_offload *f,
2217                             struct net_device *filter_dev)
2218 {
2219         u8 inner_match_level, outer_match_level, non_tunnel_match_level;
2220         struct netlink_ext_ack *extack = f->common.extack;
2221         struct mlx5_core_dev *dev = priv->mdev;
2222         struct mlx5_eswitch *esw = dev->priv.eswitch;
2223         struct mlx5e_rep_priv *rpriv = priv->ppriv;
2224         struct mlx5_eswitch_rep *rep;
2225         bool is_eswitch_flow;
2226         int err;
2227
2228         inner_match_level = MLX5_MATCH_NONE;
2229         outer_match_level = MLX5_MATCH_NONE;
2230
2231         err = __parse_cls_flower(priv, spec, f, filter_dev, &inner_match_level,
2232                                  &outer_match_level);
2233         non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
2234                                  outer_match_level : inner_match_level;
2235
2236         is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
2237         if (!err && is_eswitch_flow) {
2238                 rep = rpriv->rep;
2239                 if (rep->vport != MLX5_VPORT_UPLINK &&
2240                     (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
2241                     esw->offloads.inline_mode < non_tunnel_match_level)) {
2242                         NL_SET_ERR_MSG_MOD(extack,
2243                                            "Flow is not offloaded due to min inline setting");
2244                         netdev_warn(priv->netdev,
2245                                     "Flow is not offloaded due to min inline setting, required %d actual %d\n",
2246                                     non_tunnel_match_level, esw->offloads.inline_mode);
2247                         return -EOPNOTSUPP;
2248                 }
2249         }
2250
2251         if (is_eswitch_flow) {
2252                 flow->esw_attr->inner_match_level = inner_match_level;
2253                 flow->esw_attr->outer_match_level = outer_match_level;
2254         } else {
2255                 flow->nic_attr->match_level = non_tunnel_match_level;
2256         }
2257
2258         return err;
2259 }
2260
2261 struct pedit_headers {
2262         struct ethhdr  eth;
2263         struct vlan_hdr vlan;
2264         struct iphdr   ip4;
2265         struct ipv6hdr ip6;
2266         struct tcphdr  tcp;
2267         struct udphdr  udp;
2268 };
2269
2270 struct pedit_headers_action {
2271         struct pedit_headers    vals;
2272         struct pedit_headers    masks;
2273         u32                     pedits;
2274 };
2275
2276 static int pedit_header_offsets[] = {
2277         [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
2278         [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
2279         [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
2280         [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
2281         [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
2282 };
2283
2284 #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
2285
2286 static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
2287                          struct pedit_headers_action *hdrs)
2288 {
2289         u32 *curr_pmask, *curr_pval;
2290
2291         curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset);
2292         curr_pval  = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
2293
2294         if (*curr_pmask & mask)  /* disallow acting twice on the same location */
2295                 goto out_err;
2296
2297         *curr_pmask |= mask;
2298         *curr_pval  |= (val & mask);
2299
2300         return 0;
2301
2302 out_err:
2303         return -EOPNOTSUPP;
2304 }
2305
2306 struct mlx5_fields {
2307         u8  field;
2308         u8  field_bsize;
2309         u32 field_mask;
2310         u32 offset;
2311         u32 match_offset;
2312 };
2313
2314 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
2315                 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
2316                  offsetof(struct pedit_headers, field) + (off), \
2317                  MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
2318
2319 /* masked values are the same and there are no rewrites that do not have a
2320  * match.
2321  */
2322 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
2323         type matchmaskx = *(type *)(matchmaskp); \
2324         type matchvalx = *(type *)(matchvalp); \
2325         type maskx = *(type *)(maskp); \
2326         type valx = *(type *)(valp); \
2327         \
2328         (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
2329                                                                  matchmaskx)); \
2330 })
2331
2332 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
2333                          void *matchmaskp, u8 bsize)
2334 {
2335         bool same = false;
2336
2337         switch (bsize) {
2338         case 8:
2339                 same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
2340                 break;
2341         case 16:
2342                 same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
2343                 break;
2344         case 32:
2345                 same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
2346                 break;
2347         }
2348
2349         return same;
2350 }
2351
2352 static struct mlx5_fields fields[] = {
2353         OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
2354         OFFLOAD(DMAC_15_0,  16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
2355         OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
2356         OFFLOAD(SMAC_15_0,  16, U16_MAX, eth.h_source[4], 0, smac_15_0),
2357         OFFLOAD(ETHERTYPE,  16, U16_MAX, eth.h_proto, 0, ethertype),
2358         OFFLOAD(FIRST_VID,  16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
2359
2360         OFFLOAD(IP_DSCP, 8,    0xfc, ip4.tos,   0, ip_dscp),
2361         OFFLOAD(IP_TTL,  8,  U8_MAX, ip4.ttl,   0, ttl_hoplimit),
2362         OFFLOAD(SIPV4,  32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
2363         OFFLOAD(DIPV4,  32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2364
2365         OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
2366                 src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
2367         OFFLOAD(SIPV6_95_64,  32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
2368                 src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
2369         OFFLOAD(SIPV6_63_32,  32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
2370                 src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
2371         OFFLOAD(SIPV6_31_0,   32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
2372                 src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
2373         OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
2374                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
2375         OFFLOAD(DIPV6_95_64,  32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
2376                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
2377         OFFLOAD(DIPV6_63_32,  32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
2378                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
2379         OFFLOAD(DIPV6_31_0,   32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
2380                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
2381         OFFLOAD(IPV6_HOPLIMIT, 8,  U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
2382
2383         OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source,  0, tcp_sport),
2384         OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest,    0, tcp_dport),
2385         /* in linux iphdr tcp_flags is 8 bits long */
2386         OFFLOAD(TCP_FLAGS,  8,  U8_MAX, tcp.ack_seq, 5, tcp_flags),
2387
2388         OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
2389         OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest,   0, udp_dport),
2390 };
2391
2392 static int offload_pedit_fields(struct mlx5e_priv *priv,
2393                                 int namespace,
2394                                 struct pedit_headers_action *hdrs,
2395                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
2396                                 u32 *action_flags,
2397                                 struct netlink_ext_ack *extack)
2398 {
2399         struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
2400         int i, action_size, first, last, next_z;
2401         void *headers_c, *headers_v, *action, *vals_p;
2402         u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
2403         struct mlx5e_tc_mod_hdr_acts *mod_acts;
2404         struct mlx5_fields *f;
2405         unsigned long mask;
2406         __be32 mask_be32;
2407         __be16 mask_be16;
2408         int err;
2409         u8 cmd;
2410
2411         mod_acts = &parse_attr->mod_hdr_acts;
2412         headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec);
2413         headers_v = get_match_headers_value(*action_flags, &parse_attr->spec);
2414
2415         set_masks = &hdrs[0].masks;
2416         add_masks = &hdrs[1].masks;
2417         set_vals = &hdrs[0].vals;
2418         add_vals = &hdrs[1].vals;
2419
2420         action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
2421
2422         for (i = 0; i < ARRAY_SIZE(fields); i++) {
2423                 bool skip;
2424
2425                 f = &fields[i];
2426                 /* avoid seeing bits set from previous iterations */
2427                 s_mask = 0;
2428                 a_mask = 0;
2429
2430                 s_masks_p = (void *)set_masks + f->offset;
2431                 a_masks_p = (void *)add_masks + f->offset;
2432
2433                 s_mask = *s_masks_p & f->field_mask;
2434                 a_mask = *a_masks_p & f->field_mask;
2435
2436                 if (!s_mask && !a_mask) /* nothing to offload here */
2437                         continue;
2438
2439                 if (s_mask && a_mask) {
2440                         NL_SET_ERR_MSG_MOD(extack,
2441                                            "can't set and add to the same HW field");
2442                         printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
2443                         return -EOPNOTSUPP;
2444                 }
2445
2446                 skip = false;
2447                 if (s_mask) {
2448                         void *match_mask = headers_c + f->match_offset;
2449                         void *match_val = headers_v + f->match_offset;
2450
2451                         cmd  = MLX5_ACTION_TYPE_SET;
2452                         mask = s_mask;
2453                         vals_p = (void *)set_vals + f->offset;
2454                         /* don't rewrite if we have a match on the same value */
2455                         if (cmp_val_mask(vals_p, s_masks_p, match_val,
2456                                          match_mask, f->field_bsize))
2457                                 skip = true;
2458                         /* clear to denote we consumed this field */
2459                         *s_masks_p &= ~f->field_mask;
2460                 } else {
2461                         cmd  = MLX5_ACTION_TYPE_ADD;
2462                         mask = a_mask;
2463                         vals_p = (void *)add_vals + f->offset;
2464                         /* add 0 is no change */
2465                         if ((*(u32 *)vals_p & f->field_mask) == 0)
2466                                 skip = true;
2467                         /* clear to denote we consumed this field */
2468                         *a_masks_p &= ~f->field_mask;
2469                 }
2470                 if (skip)
2471                         continue;
2472
2473                 if (f->field_bsize == 32) {
2474                         mask_be32 = *(__be32 *)&mask;
2475                         mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
2476                 } else if (f->field_bsize == 16) {
2477                         mask_be16 = *(__be16 *)&mask;
2478                         mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
2479                 }
2480
2481                 first = find_first_bit(&mask, f->field_bsize);
2482                 next_z = find_next_zero_bit(&mask, f->field_bsize, first);
2483                 last  = find_last_bit(&mask, f->field_bsize);
2484                 if (first < next_z && next_z < last) {
2485                         NL_SET_ERR_MSG_MOD(extack,
2486                                            "rewrite of few sub-fields isn't supported");
2487                         printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
2488                                mask);
2489                         return -EOPNOTSUPP;
2490                 }
2491
2492                 err = alloc_mod_hdr_actions(priv->mdev, namespace, mod_acts);
2493                 if (err) {
2494                         NL_SET_ERR_MSG_MOD(extack,
2495                                            "too many pedit actions, can't offload");
2496                         mlx5_core_warn(priv->mdev,
2497                                        "mlx5: parsed %d pedit actions, can't do more\n",
2498                                        mod_acts->num_actions);
2499                         return err;
2500                 }
2501
2502                 action = mod_acts->actions +
2503                          (mod_acts->num_actions * action_size);
2504                 MLX5_SET(set_action_in, action, action_type, cmd);
2505                 MLX5_SET(set_action_in, action, field, f->field);
2506
2507                 if (cmd == MLX5_ACTION_TYPE_SET) {
2508                         int start;
2509
2510                         /* if field is bit sized it can start not from first bit */
2511                         start = find_first_bit((unsigned long *)&f->field_mask,
2512                                                f->field_bsize);
2513
2514                         MLX5_SET(set_action_in, action, offset, first - start);
2515                         /* length is num of bits to be written, zero means length of 32 */
2516                         MLX5_SET(set_action_in, action, length, (last - first + 1));
2517                 }
2518
2519                 if (f->field_bsize == 32)
2520                         MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
2521                 else if (f->field_bsize == 16)
2522                         MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
2523                 else if (f->field_bsize == 8)
2524                         MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
2525
2526                 ++mod_acts->num_actions;
2527         }
2528
2529         return 0;
2530 }
2531
2532 static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev,
2533                                                   int namespace)
2534 {
2535         if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
2536                 return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions);
2537         else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
2538                 return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
2539 }
2540
2541 int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev,
2542                           int namespace,
2543                           struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
2544 {
2545         int action_size, new_num_actions, max_hw_actions;
2546         size_t new_sz, old_sz;
2547         void *ret;
2548
2549         if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions)
2550                 return 0;
2551
2552         action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
2553
2554         max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev,
2555                                                                 namespace);
2556         new_num_actions = min(max_hw_actions,
2557                               mod_hdr_acts->actions ?
2558                               mod_hdr_acts->max_actions * 2 : 1);
2559         if (mod_hdr_acts->max_actions == new_num_actions)
2560                 return -ENOSPC;
2561
2562         new_sz = action_size * new_num_actions;
2563         old_sz = mod_hdr_acts->max_actions * action_size;
2564         ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL);
2565         if (!ret)
2566                 return -ENOMEM;
2567
2568         memset(ret + old_sz, 0, new_sz - old_sz);
2569         mod_hdr_acts->actions = ret;
2570         mod_hdr_acts->max_actions = new_num_actions;
2571
2572         return 0;
2573 }
2574
2575 void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
2576 {
2577         kfree(mod_hdr_acts->actions);
2578         mod_hdr_acts->actions = NULL;
2579         mod_hdr_acts->num_actions = 0;
2580         mod_hdr_acts->max_actions = 0;
2581 }
2582
2583 static const struct pedit_headers zero_masks = {};
2584
2585 static int parse_tc_pedit_action(struct mlx5e_priv *priv,
2586                                  const struct flow_action_entry *act, int namespace,
2587                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
2588                                  struct pedit_headers_action *hdrs,
2589                                  struct netlink_ext_ack *extack)
2590 {
2591         u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
2592         int err = -EOPNOTSUPP;
2593         u32 mask, val, offset;
2594         u8 htype;
2595
2596         htype = act->mangle.htype;
2597         err = -EOPNOTSUPP; /* can't be all optimistic */
2598
2599         if (htype == FLOW_ACT_MANGLE_UNSPEC) {
2600                 NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded");
2601                 goto out_err;
2602         }
2603
2604         if (!mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace)) {
2605                 NL_SET_ERR_MSG_MOD(extack,
2606                                    "The pedit offload action is not supported");
2607                 goto out_err;
2608         }
2609
2610         mask = act->mangle.mask;
2611         val = act->mangle.val;
2612         offset = act->mangle.offset;
2613
2614         err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]);
2615         if (err)
2616                 goto out_err;
2617
2618         hdrs[cmd].pedits++;
2619
2620         return 0;
2621 out_err:
2622         return err;
2623 }
2624
2625 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
2626                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
2627                                  struct pedit_headers_action *hdrs,
2628                                  u32 *action_flags,
2629                                  struct netlink_ext_ack *extack)
2630 {
2631         struct pedit_headers *cmd_masks;
2632         int err;
2633         u8 cmd;
2634
2635         err = offload_pedit_fields(priv, namespace, hdrs, parse_attr,
2636                                    action_flags, extack);
2637         if (err < 0)
2638                 goto out_dealloc_parsed_actions;
2639
2640         for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
2641                 cmd_masks = &hdrs[cmd].masks;
2642                 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
2643                         NL_SET_ERR_MSG_MOD(extack,
2644                                            "attempt to offload an unsupported field");
2645                         netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
2646                         print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
2647                                        16, 1, cmd_masks, sizeof(zero_masks), true);
2648                         err = -EOPNOTSUPP;
2649                         goto out_dealloc_parsed_actions;
2650                 }
2651         }
2652
2653         return 0;
2654
2655 out_dealloc_parsed_actions:
2656         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
2657         return err;
2658 }
2659
2660 static bool csum_offload_supported(struct mlx5e_priv *priv,
2661                                    u32 action,
2662                                    u32 update_flags,
2663                                    struct netlink_ext_ack *extack)
2664 {
2665         u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
2666                          TCA_CSUM_UPDATE_FLAG_UDP;
2667
2668         /*  The HW recalcs checksums only if re-writing headers */
2669         if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
2670                 NL_SET_ERR_MSG_MOD(extack,
2671                                    "TC csum action is only offloaded with pedit");
2672                 netdev_warn(priv->netdev,
2673                             "TC csum action is only offloaded with pedit\n");
2674                 return false;
2675         }
2676
2677         if (update_flags & ~prot_flags) {
2678                 NL_SET_ERR_MSG_MOD(extack,
2679                                    "can't offload TC csum action for some header/s");
2680                 netdev_warn(priv->netdev,
2681                             "can't offload TC csum action for some header/s - flags %#x\n",
2682                             update_flags);
2683                 return false;
2684         }
2685
2686         return true;
2687 }
2688
2689 struct ip_ttl_word {
2690         __u8    ttl;
2691         __u8    protocol;
2692         __sum16 check;
2693 };
2694
2695 struct ipv6_hoplimit_word {
2696         __be16  payload_len;
2697         __u8    nexthdr;
2698         __u8    hop_limit;
2699 };
2700
2701 static bool is_action_keys_supported(const struct flow_action_entry *act)
2702 {
2703         u32 mask, offset;
2704         u8 htype;
2705
2706         htype = act->mangle.htype;
2707         offset = act->mangle.offset;
2708         mask = ~act->mangle.mask;
2709         /* For IPv4 & IPv6 header check 4 byte word,
2710          * to determine that modified fields
2711          * are NOT ttl & hop_limit only.
2712          */
2713         if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
2714                 struct ip_ttl_word *ttl_word =
2715                         (struct ip_ttl_word *)&mask;
2716
2717                 if (offset != offsetof(struct iphdr, ttl) ||
2718                     ttl_word->protocol ||
2719                     ttl_word->check) {
2720                         return true;
2721                 }
2722         } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
2723                 struct ipv6_hoplimit_word *hoplimit_word =
2724                         (struct ipv6_hoplimit_word *)&mask;
2725
2726                 if (offset != offsetof(struct ipv6hdr, payload_len) ||
2727                     hoplimit_word->payload_len ||
2728                     hoplimit_word->nexthdr) {
2729                         return true;
2730                 }
2731         }
2732         return false;
2733 }
2734
2735 static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
2736                                           struct flow_action *flow_action,
2737                                           u32 actions,
2738                                           struct netlink_ext_ack *extack)
2739 {
2740         const struct flow_action_entry *act;
2741         bool modify_ip_header;
2742         void *headers_v;
2743         u16 ethertype;
2744         u8 ip_proto;
2745         int i;
2746
2747         headers_v = get_match_headers_value(actions, spec);
2748         ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
2749
2750         /* for non-IP we only re-write MACs, so we're okay */
2751         if (ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
2752                 goto out_ok;
2753
2754         modify_ip_header = false;
2755         flow_action_for_each(i, act, flow_action) {
2756                 if (act->id != FLOW_ACTION_MANGLE &&
2757                     act->id != FLOW_ACTION_ADD)
2758                         continue;
2759
2760                 if (is_action_keys_supported(act)) {
2761                         modify_ip_header = true;
2762                         break;
2763                 }
2764         }
2765
2766         ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
2767         if (modify_ip_header && ip_proto != IPPROTO_TCP &&
2768             ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
2769                 NL_SET_ERR_MSG_MOD(extack,
2770                                    "can't offload re-write of non TCP/UDP");
2771                 pr_info("can't offload re-write of ip proto %d\n", ip_proto);
2772                 return false;
2773         }
2774
2775 out_ok:
2776         return true;
2777 }
2778
2779 static bool actions_match_supported(struct mlx5e_priv *priv,
2780                                     struct flow_action *flow_action,
2781                                     struct mlx5e_tc_flow_parse_attr *parse_attr,
2782                                     struct mlx5e_tc_flow *flow,
2783                                     struct netlink_ext_ack *extack)
2784 {
2785         u32 actions;
2786
2787         if (mlx5e_is_eswitch_flow(flow))
2788                 actions = flow->esw_attr->action;
2789         else
2790                 actions = flow->nic_attr->action;
2791
2792         if (flow_flag_test(flow, EGRESS) &&
2793             !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) ||
2794               (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
2795               (actions & MLX5_FLOW_CONTEXT_ACTION_DROP)))
2796                 return false;
2797
2798         if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
2799                 return modify_header_match_supported(&parse_attr->spec,
2800                                                      flow_action, actions,
2801                                                      extack);
2802
2803         return true;
2804 }
2805
2806 static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
2807 {
2808         struct mlx5_core_dev *fmdev, *pmdev;
2809         u64 fsystem_guid, psystem_guid;
2810
2811         fmdev = priv->mdev;
2812         pmdev = peer_priv->mdev;
2813
2814         fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
2815         psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
2816
2817         return (fsystem_guid == psystem_guid);
2818 }
2819
2820 static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace,
2821                                    const struct flow_action_entry *act,
2822                                    struct mlx5e_tc_flow_parse_attr *parse_attr,
2823                                    struct pedit_headers_action *hdrs,
2824                                    u32 *action, struct netlink_ext_ack *extack)
2825 {
2826         u16 mask16 = VLAN_VID_MASK;
2827         u16 val16 = act->vlan.vid & VLAN_VID_MASK;
2828         const struct flow_action_entry pedit_act = {
2829                 .id = FLOW_ACTION_MANGLE,
2830                 .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH,
2831                 .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI),
2832                 .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
2833                 .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
2834         };
2835         u8 match_prio_mask, match_prio_val;
2836         void *headers_c, *headers_v;
2837         int err;
2838
2839         headers_c = get_match_headers_criteria(*action, &parse_attr->spec);
2840         headers_v = get_match_headers_value(*action, &parse_attr->spec);
2841
2842         if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
2843               MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
2844                 NL_SET_ERR_MSG_MOD(extack,
2845                                    "VLAN rewrite action must have VLAN protocol match");
2846                 return -EOPNOTSUPP;
2847         }
2848
2849         match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
2850         match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
2851         if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
2852                 NL_SET_ERR_MSG_MOD(extack,
2853                                    "Changing VLAN prio is not supported");
2854                 return -EOPNOTSUPP;
2855         }
2856
2857         err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr,
2858                                     hdrs, NULL);
2859         *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2860
2861         return err;
2862 }
2863
2864 static int
2865 add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
2866                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
2867                                  struct pedit_headers_action *hdrs,
2868                                  u32 *action, struct netlink_ext_ack *extack)
2869 {
2870         const struct flow_action_entry prio_tag_act = {
2871                 .vlan.vid = 0,
2872                 .vlan.prio =
2873                         MLX5_GET(fte_match_set_lyr_2_4,
2874                                  get_match_headers_value(*action,
2875                                                          &parse_attr->spec),
2876                                  first_prio) &
2877                         MLX5_GET(fte_match_set_lyr_2_4,
2878                                  get_match_headers_criteria(*action,
2879                                                             &parse_attr->spec),
2880                                  first_prio),
2881         };
2882
2883         return add_vlan_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
2884                                        &prio_tag_act, parse_attr, hdrs, action,
2885                                        extack);
2886 }
2887
2888 static int parse_tc_nic_actions(struct mlx5e_priv *priv,
2889                                 struct flow_action *flow_action,
2890                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
2891                                 struct mlx5e_tc_flow *flow,
2892                                 struct netlink_ext_ack *extack)
2893 {
2894         struct mlx5_nic_flow_attr *attr = flow->nic_attr;
2895         struct pedit_headers_action hdrs[2] = {};
2896         const struct flow_action_entry *act;
2897         u32 action = 0;
2898         int err, i;
2899
2900         if (!flow_action_has_entries(flow_action))
2901                 return -EINVAL;
2902
2903         attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
2904
2905         flow_action_for_each(i, act, flow_action) {
2906                 switch (act->id) {
2907                 case FLOW_ACTION_ACCEPT:
2908                         action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
2909                                   MLX5_FLOW_CONTEXT_ACTION_COUNT;
2910                         break;
2911                 case FLOW_ACTION_DROP:
2912                         action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
2913                         if (MLX5_CAP_FLOWTABLE(priv->mdev,
2914                                                flow_table_properties_nic_receive.flow_counter))
2915                                 action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
2916                         break;
2917                 case FLOW_ACTION_MANGLE:
2918                 case FLOW_ACTION_ADD:
2919                         err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL,
2920                                                     parse_attr, hdrs, extack);
2921                         if (err)
2922                                 return err;
2923
2924                         action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
2925                                   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
2926                         break;
2927                 case FLOW_ACTION_VLAN_MANGLE:
2928                         err = add_vlan_rewrite_action(priv,
2929                                                       MLX5_FLOW_NAMESPACE_KERNEL,
2930                                                       act, parse_attr, hdrs,
2931                                                       &action, extack);
2932                         if (err)
2933                                 return err;
2934
2935                         break;
2936                 case FLOW_ACTION_CSUM:
2937                         if (csum_offload_supported(priv, action,
2938                                                    act->csum_flags,
2939                                                    extack))
2940                                 break;
2941
2942                         return -EOPNOTSUPP;
2943                 case FLOW_ACTION_REDIRECT: {
2944                         struct net_device *peer_dev = act->dev;
2945
2946                         if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
2947                             same_hw_devs(priv, netdev_priv(peer_dev))) {
2948                                 parse_attr->mirred_ifindex[0] = peer_dev->ifindex;
2949                                 flow_flag_set(flow, HAIRPIN);
2950                                 action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
2951                                           MLX5_FLOW_CONTEXT_ACTION_COUNT;
2952                         } else {
2953                                 NL_SET_ERR_MSG_MOD(extack,
2954                                                    "device is not on same HW, can't offload");
2955                                 netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
2956                                             peer_dev->name);
2957                                 return -EINVAL;
2958                         }
2959                         }
2960                         break;
2961                 case FLOW_ACTION_MARK: {
2962                         u32 mark = act->mark;
2963
2964                         if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
2965                                 NL_SET_ERR_MSG_MOD(extack,
2966                                                    "Bad flow mark - only 16 bit is supported");
2967                                 return -EINVAL;
2968                         }
2969
2970                         attr->flow_tag = mark;
2971                         action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
2972                         }
2973                         break;
2974                 default:
2975                         NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
2976                         return -EOPNOTSUPP;
2977                 }
2978         }
2979
2980         if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
2981             hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
2982                 err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL,
2983                                             parse_attr, hdrs, &action, extack);
2984                 if (err)
2985                         return err;
2986                 /* in case all pedit actions are skipped, remove the MOD_HDR
2987                  * flag.
2988                  */
2989                 if (parse_attr->mod_hdr_acts.num_actions == 0) {
2990                         action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2991                         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
2992                 }
2993         }
2994
2995         attr->action = action;
2996         if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
2997                 return -EOPNOTSUPP;
2998
2999         return 0;
3000 }
3001
3002 struct encap_key {
3003         const struct ip_tunnel_key *ip_tun_key;
3004         struct mlx5e_tc_tunnel *tc_tunnel;
3005 };
3006
3007 static inline int cmp_encap_info(struct encap_key *a,
3008                                  struct encap_key *b)
3009 {
3010         return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
3011                a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
3012 }
3013
3014 static inline int hash_encap_info(struct encap_key *key)
3015 {
3016         return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
3017                      key->tc_tunnel->tunnel_type);
3018 }
3019
3020
3021 static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
3022                                   struct net_device *peer_netdev)
3023 {
3024         struct mlx5e_priv *peer_priv;
3025
3026         peer_priv = netdev_priv(peer_netdev);
3027
3028         return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
3029                 mlx5e_eswitch_rep(priv->netdev) &&
3030                 mlx5e_eswitch_rep(peer_netdev) &&
3031                 same_hw_devs(priv, peer_priv));
3032 }
3033
3034
3035
3036 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
3037 {
3038         return refcount_inc_not_zero(&e->refcnt);
3039 }
3040
3041 static struct mlx5e_encap_entry *
3042 mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
3043                 uintptr_t hash_key)
3044 {
3045         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3046         struct mlx5e_encap_entry *e;
3047         struct encap_key e_key;
3048
3049         hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
3050                                    encap_hlist, hash_key) {
3051                 e_key.ip_tun_key = &e->tun_info->key;
3052                 e_key.tc_tunnel = e->tunnel;
3053                 if (!cmp_encap_info(&e_key, key) &&
3054                     mlx5e_encap_take(e))
3055                         return e;
3056         }
3057
3058         return NULL;
3059 }
3060
3061 static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info)
3062 {
3063         size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
3064
3065         return kmemdup(tun_info, tun_size, GFP_KERNEL);
3066 }
3067
3068 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
3069                                       struct mlx5e_tc_flow *flow,
3070                                       int out_index,
3071                                       struct mlx5e_encap_entry *e,
3072                                       struct netlink_ext_ack *extack)
3073 {
3074         int i;
3075
3076         for (i = 0; i < out_index; i++) {
3077                 if (flow->encaps[i].e != e)
3078                         continue;
3079                 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
3080                 netdev_err(priv->netdev, "can't duplicate encap action\n");
3081                 return true;
3082         }
3083
3084         return false;
3085 }
3086
3087 static int mlx5e_attach_encap(struct mlx5e_priv *priv,
3088                               struct mlx5e_tc_flow *flow,
3089                               struct net_device *mirred_dev,
3090                               int out_index,
3091                               struct netlink_ext_ack *extack,
3092                               struct net_device **encap_dev,
3093                               bool *encap_valid)
3094 {
3095         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3096         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
3097         struct mlx5e_tc_flow_parse_attr *parse_attr;
3098         const struct ip_tunnel_info *tun_info;
3099         struct encap_key key;
3100         struct mlx5e_encap_entry *e;
3101         unsigned short family;
3102         uintptr_t hash_key;
3103         int err = 0;
3104
3105         parse_attr = attr->parse_attr;
3106         tun_info = parse_attr->tun_info[out_index];
3107         family = ip_tunnel_info_af(tun_info);
3108         key.ip_tun_key = &tun_info->key;
3109         key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
3110         if (!key.tc_tunnel) {
3111                 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
3112                 return -EOPNOTSUPP;
3113         }
3114
3115         hash_key = hash_encap_info(&key);
3116
3117         mutex_lock(&esw->offloads.encap_tbl_lock);
3118         e = mlx5e_encap_get(priv, &key, hash_key);
3119
3120         /* must verify if encap is valid or not */
3121         if (e) {
3122                 /* Check that entry was not already attached to this flow */
3123                 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
3124                         err = -EOPNOTSUPP;
3125                         goto out_err;
3126                 }
3127
3128                 mutex_unlock(&esw->offloads.encap_tbl_lock);
3129                 wait_for_completion(&e->res_ready);
3130
3131                 /* Protect against concurrent neigh update. */
3132                 mutex_lock(&esw->offloads.encap_tbl_lock);
3133                 if (e->compl_result < 0) {
3134                         err = -EREMOTEIO;
3135                         goto out_err;
3136                 }
3137                 goto attach_flow;
3138         }
3139
3140         e = kzalloc(sizeof(*e), GFP_KERNEL);
3141         if (!e) {
3142                 err = -ENOMEM;
3143                 goto out_err;
3144         }
3145
3146         refcount_set(&e->refcnt, 1);
3147         init_completion(&e->res_ready);
3148
3149         tun_info = dup_tun_info(tun_info);
3150         if (!tun_info) {
3151                 err = -ENOMEM;
3152                 goto out_err_init;
3153         }
3154         e->tun_info = tun_info;
3155         err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
3156         if (err)
3157                 goto out_err_init;
3158
3159         INIT_LIST_HEAD(&e->flows);
3160         hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
3161         mutex_unlock(&esw->offloads.encap_tbl_lock);
3162
3163         if (family == AF_INET)
3164                 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
3165         else if (family == AF_INET6)
3166                 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
3167
3168         /* Protect against concurrent neigh update. */
3169         mutex_lock(&esw->offloads.encap_tbl_lock);
3170         complete_all(&e->res_ready);
3171         if (err) {
3172                 e->compl_result = err;
3173                 goto out_err;
3174         }
3175         e->compl_result = 1;
3176
3177 attach_flow:
3178         flow->encaps[out_index].e = e;
3179         list_add(&flow->encaps[out_index].list, &e->flows);
3180         flow->encaps[out_index].index = out_index;
3181         *encap_dev = e->out_dev;
3182         if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
3183                 attr->dests[out_index].pkt_reformat = e->pkt_reformat;
3184                 attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
3185                 *encap_valid = true;
3186         } else {
3187                 *encap_valid = false;
3188         }
3189         mutex_unlock(&esw->offloads.encap_tbl_lock);
3190
3191         return err;
3192
3193 out_err:
3194         mutex_unlock(&esw->offloads.encap_tbl_lock);
3195         if (e)
3196                 mlx5e_encap_put(priv, e);
3197         return err;
3198
3199 out_err_init:
3200         mutex_unlock(&esw->offloads.encap_tbl_lock);
3201         kfree(tun_info);
3202         kfree(e);
3203         return err;
3204 }
3205
3206 static int parse_tc_vlan_action(struct mlx5e_priv *priv,
3207                                 const struct flow_action_entry *act,
3208                                 struct mlx5_esw_flow_attr *attr,
3209                                 u32 *action)
3210 {
3211         u8 vlan_idx = attr->total_vlan;
3212
3213         if (vlan_idx >= MLX5_FS_VLAN_DEPTH)
3214                 return -EOPNOTSUPP;
3215
3216         switch (act->id) {
3217         case FLOW_ACTION_VLAN_POP:
3218                 if (vlan_idx) {
3219                         if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
3220                                                                  MLX5_FS_VLAN_DEPTH))
3221                                 return -EOPNOTSUPP;
3222
3223                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2;
3224                 } else {
3225                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
3226                 }
3227                 break;
3228         case FLOW_ACTION_VLAN_PUSH:
3229                 attr->vlan_vid[vlan_idx] = act->vlan.vid;
3230                 attr->vlan_prio[vlan_idx] = act->vlan.prio;
3231                 attr->vlan_proto[vlan_idx] = act->vlan.proto;
3232                 if (!attr->vlan_proto[vlan_idx])
3233                         attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
3234
3235                 if (vlan_idx) {
3236                         if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
3237                                                                  MLX5_FS_VLAN_DEPTH))
3238                                 return -EOPNOTSUPP;
3239
3240                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
3241                 } else {
3242                         if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
3243                             (act->vlan.proto != htons(ETH_P_8021Q) ||
3244                              act->vlan.prio))
3245                                 return -EOPNOTSUPP;
3246
3247                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
3248                 }
3249                 break;
3250         default:
3251                 return -EINVAL;
3252         }
3253
3254         attr->total_vlan = vlan_idx + 1;
3255
3256         return 0;
3257 }
3258
3259 static int add_vlan_push_action(struct mlx5e_priv *priv,
3260                                 struct mlx5_esw_flow_attr *attr,
3261                                 struct net_device **out_dev,
3262                                 u32 *action)
3263 {
3264         struct net_device *vlan_dev = *out_dev;
3265         struct flow_action_entry vlan_act = {
3266                 .id = FLOW_ACTION_VLAN_PUSH,
3267                 .vlan.vid = vlan_dev_vlan_id(vlan_dev),
3268                 .vlan.proto = vlan_dev_vlan_proto(vlan_dev),
3269                 .vlan.prio = 0,
3270         };
3271         int err;
3272
3273         err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
3274         if (err)
3275                 return err;
3276
3277         *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev),
3278                                         dev_get_iflink(vlan_dev));
3279         if (is_vlan_dev(*out_dev))
3280                 err = add_vlan_push_action(priv, attr, out_dev, action);
3281
3282         return err;
3283 }
3284
3285 static int add_vlan_pop_action(struct mlx5e_priv *priv,
3286                                struct mlx5_esw_flow_attr *attr,
3287                                u32 *action)
3288 {
3289         int nest_level = attr->parse_attr->filter_dev->lower_level;
3290         struct flow_action_entry vlan_act = {
3291                 .id = FLOW_ACTION_VLAN_POP,
3292         };
3293         int err = 0;
3294
3295         while (nest_level--) {
3296                 err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
3297                 if (err)
3298                         return err;
3299         }
3300
3301         return err;
3302 }
3303
3304 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
3305                                     struct net_device *out_dev)
3306 {
3307         if (is_merged_eswitch_dev(priv, out_dev))
3308                 return true;
3309
3310         return mlx5e_eswitch_rep(out_dev) &&
3311                same_hw_devs(priv, netdev_priv(out_dev));
3312 }
3313
3314 static bool is_duplicated_output_device(struct net_device *dev,
3315                                         struct net_device *out_dev,
3316                                         int *ifindexes, int if_count,
3317                                         struct netlink_ext_ack *extack)
3318 {
3319         int i;
3320
3321         for (i = 0; i < if_count; i++) {
3322                 if (ifindexes[i] == out_dev->ifindex) {
3323                         NL_SET_ERR_MSG_MOD(extack,
3324                                            "can't duplicate output to same device");
3325                         netdev_err(dev, "can't duplicate output to same device: %s\n",
3326                                    out_dev->name);
3327                         return true;
3328                 }
3329         }
3330
3331         return false;
3332 }
3333
3334 static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
3335                                 struct flow_action *flow_action,
3336                                 struct mlx5e_tc_flow *flow,
3337                                 struct netlink_ext_ack *extack)
3338 {
3339         struct pedit_headers_action hdrs[2] = {};
3340         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3341         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
3342         struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
3343         struct mlx5e_rep_priv *rpriv = priv->ppriv;
3344         const struct ip_tunnel_info *info = NULL;
3345         int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
3346         bool ft_flow = mlx5e_is_ft_flow(flow);
3347         const struct flow_action_entry *act;
3348         int err, i, if_count = 0;
3349         bool encap = false;
3350         u32 action = 0;
3351
3352         if (!flow_action_has_entries(flow_action))
3353                 return -EINVAL;
3354
3355         flow_action_for_each(i, act, flow_action) {
3356                 switch (act->id) {
3357                 case FLOW_ACTION_DROP:
3358                         action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
3359                                   MLX5_FLOW_CONTEXT_ACTION_COUNT;
3360                         break;
3361                 case FLOW_ACTION_MANGLE:
3362                 case FLOW_ACTION_ADD:
3363                         err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
3364                                                     parse_attr, hdrs, extack);
3365                         if (err)
3366                                 return err;
3367
3368                         action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3369                         attr->split_count = attr->out_count;
3370                         break;
3371                 case FLOW_ACTION_CSUM:
3372                         if (csum_offload_supported(priv, action,
3373                                                    act->csum_flags, extack))
3374                                 break;
3375
3376                         return -EOPNOTSUPP;
3377                 case FLOW_ACTION_REDIRECT:
3378                 case FLOW_ACTION_MIRRED: {
3379                         struct mlx5e_priv *out_priv;
3380                         struct net_device *out_dev;
3381
3382                         out_dev = act->dev;
3383                         if (!out_dev) {
3384                                 /* out_dev is NULL when filters with
3385                                  * non-existing mirred device are replayed to
3386                                  * the driver.
3387                                  */
3388                                 return -EINVAL;
3389                         }
3390
3391                         if (ft_flow && out_dev == priv->netdev) {
3392                                 /* Ignore forward to self rules generated
3393                                  * by adding both mlx5 devs to the flow table
3394                                  * block on a normal nft offload setup.
3395                                  */
3396                                 return -EOPNOTSUPP;
3397                         }
3398
3399                         if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
3400                                 NL_SET_ERR_MSG_MOD(extack,
3401                                                    "can't support more output ports, can't offload forwarding");
3402                                 pr_err("can't support more than %d output ports, can't offload forwarding\n",
3403                                        attr->out_count);
3404                                 return -EOPNOTSUPP;
3405                         }
3406
3407                         action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3408                                   MLX5_FLOW_CONTEXT_ACTION_COUNT;
3409                         if (encap) {
3410                                 parse_attr->mirred_ifindex[attr->out_count] =
3411                                         out_dev->ifindex;
3412                                 parse_attr->tun_info[attr->out_count] = dup_tun_info(info);
3413                                 if (!parse_attr->tun_info[attr->out_count])
3414                                         return -ENOMEM;
3415                                 encap = false;
3416                                 attr->dests[attr->out_count].flags |=
3417                                         MLX5_ESW_DEST_ENCAP;
3418                                 attr->out_count++;
3419                                 /* attr->dests[].rep is resolved when we
3420                                  * handle encap
3421                                  */
3422                         } else if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
3423                                 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3424                                 struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
3425                                 struct net_device *uplink_upper;
3426
3427                                 if (is_duplicated_output_device(priv->netdev,
3428                                                                 out_dev,
3429                                                                 ifindexes,
3430                                                                 if_count,
3431                                                                 extack))
3432                                         return -EOPNOTSUPP;
3433
3434                                 ifindexes[if_count] = out_dev->ifindex;
3435                                 if_count++;
3436
3437                                 rcu_read_lock();
3438                                 uplink_upper =
3439                                         netdev_master_upper_dev_get_rcu(uplink_dev);
3440                                 if (uplink_upper &&
3441                                     netif_is_lag_master(uplink_upper) &&
3442                                     uplink_upper == out_dev)
3443                                         out_dev = uplink_dev;
3444                                 rcu_read_unlock();
3445
3446                                 if (is_vlan_dev(out_dev)) {
3447                                         err = add_vlan_push_action(priv, attr,
3448                                                                    &out_dev,
3449                                                                    &action);
3450                                         if (err)
3451                                                 return err;
3452                                 }
3453
3454                                 if (is_vlan_dev(parse_attr->filter_dev)) {
3455                                         err = add_vlan_pop_action(priv, attr,
3456                                                                   &action);
3457                                         if (err)
3458                                                 return err;
3459                                 }
3460
3461                                 if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
3462                                         NL_SET_ERR_MSG_MOD(extack,
3463                                                            "devices are not on same switch HW, can't offload forwarding");
3464                                         pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
3465                                                priv->netdev->name, out_dev->name);
3466                                         return -EOPNOTSUPP;
3467                                 }
3468
3469                                 out_priv = netdev_priv(out_dev);
3470                                 rpriv = out_priv->ppriv;
3471                                 attr->dests[attr->out_count].rep = rpriv->rep;
3472                                 attr->dests[attr->out_count].mdev = out_priv->mdev;
3473                                 attr->out_count++;
3474                         } else if (parse_attr->filter_dev != priv->netdev) {
3475                                 /* All mlx5 devices are called to configure
3476                                  * high level device filters. Therefore, the
3477                                  * *attempt* to  install a filter on invalid
3478                                  * eswitch should not trigger an explicit error
3479                                  */
3480                                 return -EINVAL;
3481                         } else {
3482                                 NL_SET_ERR_MSG_MOD(extack,
3483                                                    "devices are not on same switch HW, can't offload forwarding");
3484                                 pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
3485                                        priv->netdev->name, out_dev->name);
3486                                 return -EINVAL;
3487                         }
3488                         }
3489                         break;
3490                 case FLOW_ACTION_TUNNEL_ENCAP:
3491                         info = act->tunnel;
3492                         if (info)
3493                                 encap = true;
3494                         else
3495                                 return -EOPNOTSUPP;
3496
3497                         break;
3498                 case FLOW_ACTION_VLAN_PUSH:
3499                 case FLOW_ACTION_VLAN_POP:
3500                         if (act->id == FLOW_ACTION_VLAN_PUSH &&
3501                             (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) {
3502                                 /* Replace vlan pop+push with vlan modify */
3503                                 action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
3504                                 err = add_vlan_rewrite_action(priv,
3505                                                               MLX5_FLOW_NAMESPACE_FDB,
3506                                                               act, parse_attr, hdrs,
3507                                                               &action, extack);
3508                         } else {
3509                                 err = parse_tc_vlan_action(priv, act, attr, &action);
3510                         }
3511                         if (err)
3512                                 return err;
3513
3514                         attr->split_count = attr->out_count;
3515                         break;
3516                 case FLOW_ACTION_VLAN_MANGLE:
3517                         err = add_vlan_rewrite_action(priv,
3518                                                       MLX5_FLOW_NAMESPACE_FDB,
3519                                                       act, parse_attr, hdrs,
3520                                                       &action, extack);
3521                         if (err)
3522                                 return err;
3523
3524                         attr->split_count = attr->out_count;
3525                         break;
3526                 case FLOW_ACTION_TUNNEL_DECAP:
3527                         action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
3528                         break;
3529                 case FLOW_ACTION_GOTO: {
3530                         u32 dest_chain = act->chain_index;
3531                         u32 max_chain = mlx5_esw_chains_get_chain_range(esw);
3532
3533                         if (ft_flow) {
3534                                 NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
3535                                 return -EOPNOTSUPP;
3536                         }
3537                         if (dest_chain <= attr->chain) {
3538                                 NL_SET_ERR_MSG(extack, "Goto earlier chain isn't supported");
3539                                 return -EOPNOTSUPP;
3540                         }
3541                         if (dest_chain > max_chain) {
3542                                 NL_SET_ERR_MSG(extack, "Requested destination chain is out of supported range");
3543                                 return -EOPNOTSUPP;
3544                         }
3545                         action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3546                         attr->dest_chain = dest_chain;
3547                         break;
3548                         }
3549                 default:
3550                         NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
3551                         return -EOPNOTSUPP;
3552                 }
3553         }
3554
3555         if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
3556             action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
3557                 /* For prio tag mode, replace vlan pop with rewrite vlan prio
3558                  * tag rewrite.
3559                  */
3560                 action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
3561                 err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs,
3562                                                        &action, extack);
3563                 if (err)
3564                         return err;
3565         }
3566
3567         if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
3568             hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
3569                 err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB,
3570                                             parse_attr, hdrs, &action, extack);
3571                 if (err)
3572                         return err;
3573                 /* in case all pedit actions are skipped, remove the MOD_HDR
3574                  * flag. we might have set split_count either by pedit or
3575                  * pop/push. if there is no pop/push either, reset it too.
3576                  */
3577                 if (parse_attr->mod_hdr_acts.num_actions == 0) {
3578                         action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3579                         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
3580                         if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
3581                               (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
3582                                 attr->split_count = 0;
3583                 }
3584         }
3585
3586         attr->action = action;
3587         if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
3588                 return -EOPNOTSUPP;
3589
3590         if (attr->dest_chain) {
3591                 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
3592                         NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported");
3593                         return -EOPNOTSUPP;
3594                 }
3595                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3596         }
3597
3598         if (!(attr->action &
3599               (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
3600                 NL_SET_ERR_MSG(extack, "Rule must have at least one forward/drop action");
3601                 return -EOPNOTSUPP;
3602         }
3603
3604         if (attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
3605                 NL_SET_ERR_MSG_MOD(extack,
3606                                    "current firmware doesn't support split rule for port mirroring");
3607                 netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
3608                 return -EOPNOTSUPP;
3609         }
3610
3611         return 0;
3612 }
3613
3614 static void get_flags(int flags, unsigned long *flow_flags)
3615 {
3616         unsigned long __flow_flags = 0;
3617
3618         if (flags & MLX5_TC_FLAG(INGRESS))
3619                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
3620         if (flags & MLX5_TC_FLAG(EGRESS))
3621                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
3622
3623         if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
3624                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
3625         if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
3626                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
3627         if (flags & MLX5_TC_FLAG(FT_OFFLOAD))
3628                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT);
3629
3630         *flow_flags = __flow_flags;
3631 }
3632
3633 static const struct rhashtable_params tc_ht_params = {
3634         .head_offset = offsetof(struct mlx5e_tc_flow, node),
3635         .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
3636         .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
3637         .automatic_shrinking = true,
3638 };
3639
3640 static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
3641                                     unsigned long flags)
3642 {
3643         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3644         struct mlx5e_rep_priv *uplink_rpriv;
3645
3646         if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
3647                 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
3648                 return &uplink_rpriv->uplink_priv.tc_ht;
3649         } else /* NIC offload */
3650                 return &priv->fs.tc.ht;
3651 }
3652
3653 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
3654 {
3655         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
3656         bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK &&
3657                 flow_flag_test(flow, INGRESS);
3658         bool act_is_encap = !!(attr->action &
3659                                MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
3660         bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom,
3661                                                 MLX5_DEVCOM_ESW_OFFLOADS);
3662
3663         if (!esw_paired)
3664                 return false;
3665
3666         if ((mlx5_lag_is_sriov(attr->in_mdev) ||
3667              mlx5_lag_is_multipath(attr->in_mdev)) &&
3668             (is_rep_ingress || act_is_encap))
3669                 return true;
3670
3671         return false;
3672 }
3673
3674 static int
3675 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
3676                  struct flow_cls_offload *f, unsigned long flow_flags,
3677                  struct mlx5e_tc_flow_parse_attr **__parse_attr,
3678                  struct mlx5e_tc_flow **__flow)
3679 {
3680         struct mlx5e_tc_flow_parse_attr *parse_attr;
3681         struct mlx5e_tc_flow *flow;
3682         int out_index, err;
3683
3684         flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
3685         parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
3686         if (!parse_attr || !flow) {
3687                 err = -ENOMEM;
3688                 goto err_free;
3689         }
3690
3691         flow->cookie = f->cookie;
3692         flow->flags = flow_flags;
3693         flow->priv = priv;
3694         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
3695                 INIT_LIST_HEAD(&flow->encaps[out_index].list);
3696         INIT_LIST_HEAD(&flow->mod_hdr);
3697         INIT_LIST_HEAD(&flow->hairpin);
3698         refcount_set(&flow->refcnt, 1);
3699         init_completion(&flow->init_done);
3700
3701         *__flow = flow;
3702         *__parse_attr = parse_attr;
3703
3704         return 0;
3705
3706 err_free:
3707         kfree(flow);
3708         kvfree(parse_attr);
3709         return err;
3710 }
3711
3712 static void
3713 mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr,
3714                          struct mlx5e_priv *priv,
3715                          struct mlx5e_tc_flow_parse_attr *parse_attr,
3716                          struct flow_cls_offload *f,
3717                          struct mlx5_eswitch_rep *in_rep,
3718                          struct mlx5_core_dev *in_mdev)
3719 {
3720         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3721
3722         esw_attr->parse_attr = parse_attr;
3723         esw_attr->chain = f->common.chain_index;
3724         esw_attr->prio = f->common.prio;
3725
3726         esw_attr->in_rep = in_rep;
3727         esw_attr->in_mdev = in_mdev;
3728
3729         if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
3730             MLX5_COUNTER_SOURCE_ESWITCH)
3731                 esw_attr->counter_dev = in_mdev;
3732         else
3733                 esw_attr->counter_dev = priv->mdev;
3734 }
3735
3736 static struct mlx5e_tc_flow *
3737 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
3738                      struct flow_cls_offload *f,
3739                      unsigned long flow_flags,
3740                      struct net_device *filter_dev,
3741                      struct mlx5_eswitch_rep *in_rep,
3742                      struct mlx5_core_dev *in_mdev)
3743 {
3744         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
3745         struct netlink_ext_ack *extack = f->common.extack;
3746         struct mlx5e_tc_flow_parse_attr *parse_attr;
3747         struct mlx5e_tc_flow *flow;
3748         int attr_size, err;
3749
3750         flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
3751         attr_size  = sizeof(struct mlx5_esw_flow_attr);
3752         err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
3753                                &parse_attr, &flow);
3754         if (err)
3755                 goto out;
3756
3757         parse_attr->filter_dev = filter_dev;
3758         mlx5e_flow_esw_attr_init(flow->esw_attr,
3759                                  priv, parse_attr,
3760                                  f, in_rep, in_mdev);
3761
3762         err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
3763                                f, filter_dev);
3764         if (err)
3765                 goto err_free;
3766
3767         err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
3768         if (err)
3769                 goto err_free;
3770
3771         err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
3772         complete_all(&flow->init_done);
3773         if (err) {
3774                 if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
3775                         goto err_free;
3776
3777                 add_unready_flow(flow);
3778         }
3779
3780         return flow;
3781
3782 err_free:
3783         mlx5e_flow_put(priv, flow);
3784 out:
3785         return ERR_PTR(err);
3786 }
3787
3788 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
3789                                       struct mlx5e_tc_flow *flow,
3790                                       unsigned long flow_flags)
3791 {
3792         struct mlx5e_priv *priv = flow->priv, *peer_priv;
3793         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
3794         struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
3795         struct mlx5e_tc_flow_parse_attr *parse_attr;
3796         struct mlx5e_rep_priv *peer_urpriv;
3797         struct mlx5e_tc_flow *peer_flow;
3798         struct mlx5_core_dev *in_mdev;
3799         int err = 0;
3800
3801         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
3802         if (!peer_esw)
3803                 return -ENODEV;
3804
3805         peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
3806         peer_priv = netdev_priv(peer_urpriv->netdev);
3807
3808         /* in_mdev is assigned of which the packet originated from.
3809          * So packets redirected to uplink use the same mdev of the
3810          * original flow and packets redirected from uplink use the
3811          * peer mdev.
3812          */
3813         if (flow->esw_attr->in_rep->vport == MLX5_VPORT_UPLINK)
3814                 in_mdev = peer_priv->mdev;
3815         else
3816                 in_mdev = priv->mdev;
3817
3818         parse_attr = flow->esw_attr->parse_attr;
3819         peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
3820                                          parse_attr->filter_dev,
3821                                          flow->esw_attr->in_rep, in_mdev);
3822         if (IS_ERR(peer_flow)) {
3823                 err = PTR_ERR(peer_flow);
3824                 goto out;
3825         }
3826
3827         flow->peer_flow = peer_flow;
3828         flow_flag_set(flow, DUP);
3829         mutex_lock(&esw->offloads.peer_mutex);
3830         list_add_tail(&flow->peer, &esw->offloads.peer_flows);
3831         mutex_unlock(&esw->offloads.peer_mutex);
3832
3833 out:
3834         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
3835         return err;
3836 }
3837
3838 static int
3839 mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
3840                    struct flow_cls_offload *f,
3841                    unsigned long flow_flags,
3842                    struct net_device *filter_dev,
3843                    struct mlx5e_tc_flow **__flow)
3844 {
3845         struct mlx5e_rep_priv *rpriv = priv->ppriv;
3846         struct mlx5_eswitch_rep *in_rep = rpriv->rep;
3847         struct mlx5_core_dev *in_mdev = priv->mdev;
3848         struct mlx5e_tc_flow *flow;
3849         int err;
3850
3851         flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
3852                                     in_mdev);
3853         if (IS_ERR(flow))
3854                 return PTR_ERR(flow);
3855
3856         if (is_peer_flow_needed(flow)) {
3857                 err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
3858                 if (err) {
3859                         mlx5e_tc_del_fdb_flow(priv, flow);
3860                         goto out;
3861                 }
3862         }
3863
3864         *__flow = flow;
3865
3866         return 0;
3867
3868 out:
3869         return err;
3870 }
3871
3872 static int
3873 mlx5e_add_nic_flow(struct mlx5e_priv *priv,
3874                    struct flow_cls_offload *f,
3875                    unsigned long flow_flags,
3876                    struct net_device *filter_dev,
3877                    struct mlx5e_tc_flow **__flow)
3878 {
3879         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
3880         struct netlink_ext_ack *extack = f->common.extack;
3881         struct mlx5e_tc_flow_parse_attr *parse_attr;
3882         struct mlx5e_tc_flow *flow;
3883         int attr_size, err;
3884
3885         /* multi-chain not supported for NIC rules */
3886         if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
3887                 return -EOPNOTSUPP;
3888
3889         flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
3890         attr_size  = sizeof(struct mlx5_nic_flow_attr);
3891         err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
3892                                &parse_attr, &flow);
3893         if (err)
3894                 goto out;
3895
3896         parse_attr->filter_dev = filter_dev;
3897         err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
3898                                f, filter_dev);
3899         if (err)
3900                 goto err_free;
3901
3902         err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack);
3903         if (err)
3904                 goto err_free;
3905
3906         err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack);
3907         if (err)
3908                 goto err_free;
3909
3910         flow_flag_set(flow, OFFLOADED);
3911         kvfree(parse_attr);
3912         *__flow = flow;
3913
3914         return 0;
3915
3916 err_free:
3917         mlx5e_flow_put(priv, flow);
3918         kvfree(parse_attr);
3919 out:
3920         return err;
3921 }
3922
3923 static int
3924 mlx5e_tc_add_flow(struct mlx5e_priv *priv,
3925                   struct flow_cls_offload *f,
3926                   unsigned long flags,
3927                   struct net_device *filter_dev,
3928                   struct mlx5e_tc_flow **flow)
3929 {
3930         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3931         unsigned long flow_flags;
3932         int err;
3933
3934         get_flags(flags, &flow_flags);
3935
3936         if (!tc_can_offload_extack(priv->netdev, f->common.extack))
3937                 return -EOPNOTSUPP;
3938
3939         if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
3940                 err = mlx5e_add_fdb_flow(priv, f, flow_flags,
3941                                          filter_dev, flow);
3942         else
3943                 err = mlx5e_add_nic_flow(priv, f, flow_flags,
3944                                          filter_dev, flow);
3945
3946         return err;
3947 }
3948
3949 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
3950                            struct flow_cls_offload *f, unsigned long flags)
3951 {
3952         struct netlink_ext_ack *extack = f->common.extack;
3953         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
3954         struct mlx5e_tc_flow *flow;
3955         int err = 0;
3956
3957         rcu_read_lock();
3958         flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
3959         rcu_read_unlock();
3960         if (flow) {
3961                 NL_SET_ERR_MSG_MOD(extack,
3962                                    "flow cookie already exists, ignoring");
3963                 netdev_warn_once(priv->netdev,
3964                                  "flow cookie %lx already exists, ignoring\n",
3965                                  f->cookie);
3966                 err = -EEXIST;
3967                 goto out;
3968         }
3969
3970         trace_mlx5e_configure_flower(f);
3971         err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
3972         if (err)
3973                 goto out;
3974
3975         err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
3976         if (err)
3977                 goto err_free;
3978
3979         return 0;
3980
3981 err_free:
3982         mlx5e_flow_put(priv, flow);
3983 out:
3984         return err;
3985 }
3986
3987 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
3988 {
3989         bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
3990         bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
3991
3992         return flow_flag_test(flow, INGRESS) == dir_ingress &&
3993                 flow_flag_test(flow, EGRESS) == dir_egress;
3994 }
3995
3996 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
3997                         struct flow_cls_offload *f, unsigned long flags)
3998 {
3999         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4000         struct mlx5e_tc_flow *flow;
4001         int err;
4002
4003         rcu_read_lock();
4004         flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4005         if (!flow || !same_flow_direction(flow, flags)) {
4006                 err = -EINVAL;
4007                 goto errout;
4008         }
4009
4010         /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
4011          * set.
4012          */
4013         if (flow_flag_test_and_set(flow, DELETED)) {
4014                 err = -EINVAL;
4015                 goto errout;
4016         }
4017         rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
4018         rcu_read_unlock();
4019
4020         trace_mlx5e_delete_flower(f);
4021         mlx5e_flow_put(priv, flow);
4022
4023         return 0;
4024
4025 errout:
4026         rcu_read_unlock();
4027         return err;
4028 }
4029
4030 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
4031                        struct flow_cls_offload *f, unsigned long flags)
4032 {
4033         struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4034         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4035         struct mlx5_eswitch *peer_esw;
4036         struct mlx5e_tc_flow *flow;
4037         struct mlx5_fc *counter;
4038         u64 lastuse = 0;
4039         u64 packets = 0;
4040         u64 bytes = 0;
4041         int err = 0;
4042
4043         rcu_read_lock();
4044         flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
4045                                                 tc_ht_params));
4046         rcu_read_unlock();
4047         if (IS_ERR(flow))
4048                 return PTR_ERR(flow);
4049
4050         if (!same_flow_direction(flow, flags)) {
4051                 err = -EINVAL;
4052                 goto errout;
4053         }
4054
4055         if (mlx5e_is_offloaded_flow(flow)) {
4056                 counter = mlx5e_tc_get_counter(flow);
4057                 if (!counter)
4058                         goto errout;
4059
4060                 mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
4061         }
4062
4063         /* Under multipath it's possible for one rule to be currently
4064          * un-offloaded while the other rule is offloaded.
4065          */
4066         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4067         if (!peer_esw)
4068                 goto out;
4069
4070         if (flow_flag_test(flow, DUP) &&
4071             flow_flag_test(flow->peer_flow, OFFLOADED)) {
4072                 u64 bytes2;
4073                 u64 packets2;
4074                 u64 lastuse2;
4075
4076                 counter = mlx5e_tc_get_counter(flow->peer_flow);
4077                 if (!counter)
4078                         goto no_peer_counter;
4079                 mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
4080
4081                 bytes += bytes2;
4082                 packets += packets2;
4083                 lastuse = max_t(u64, lastuse, lastuse2);
4084         }
4085
4086 no_peer_counter:
4087         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4088 out:
4089         flow_stats_update(&f->stats, bytes, packets, lastuse);
4090         trace_mlx5e_stats_flower(f);
4091 errout:
4092         mlx5e_flow_put(priv, flow);
4093         return err;
4094 }
4095
4096 static int apply_police_params(struct mlx5e_priv *priv, u32 rate,
4097                                struct netlink_ext_ack *extack)
4098 {
4099         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4100         struct mlx5_eswitch *esw;
4101         u16 vport_num;
4102         u32 rate_mbps;
4103         int err;
4104
4105         vport_num = rpriv->rep->vport;
4106         if (vport_num >= MLX5_VPORT_ECPF) {
4107                 NL_SET_ERR_MSG_MOD(extack,
4108                                    "Ingress rate limit is supported only for Eswitch ports connected to VFs");
4109                 return -EOPNOTSUPP;
4110         }
4111
4112         esw = priv->mdev->priv.eswitch;
4113         /* rate is given in bytes/sec.
4114          * First convert to bits/sec and then round to the nearest mbit/secs.
4115          * mbit means million bits.
4116          * Moreover, if rate is non zero we choose to configure to a minimum of
4117          * 1 mbit/sec.
4118          */
4119         rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0;
4120         err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
4121         if (err)
4122                 NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
4123
4124         return err;
4125 }
4126
4127 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
4128                                         struct flow_action *flow_action,
4129                                         struct netlink_ext_ack *extack)
4130 {
4131         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4132         const struct flow_action_entry *act;
4133         int err;
4134         int i;
4135
4136         if (!flow_action_has_entries(flow_action)) {
4137                 NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
4138                 return -EINVAL;
4139         }
4140
4141         if (!flow_offload_has_one_action(flow_action)) {
4142                 NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
4143                 return -EOPNOTSUPP;
4144         }
4145
4146         flow_action_for_each(i, act, flow_action) {
4147                 switch (act->id) {
4148                 case FLOW_ACTION_POLICE:
4149                         err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
4150                         if (err)
4151                                 return err;
4152
4153                         rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
4154                         break;
4155                 default:
4156                         NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
4157                         return -EOPNOTSUPP;
4158                 }
4159         }
4160
4161         return 0;
4162 }
4163
4164 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
4165                                 struct tc_cls_matchall_offload *ma)
4166 {
4167         struct netlink_ext_ack *extack = ma->common.extack;
4168
4169         if (ma->common.prio != 1) {
4170                 NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
4171                 return -EINVAL;
4172         }
4173
4174         return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
4175 }
4176
4177 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
4178                              struct tc_cls_matchall_offload *ma)
4179 {
4180         struct netlink_ext_ack *extack = ma->common.extack;
4181
4182         return apply_police_params(priv, 0, extack);
4183 }
4184
4185 void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
4186                              struct tc_cls_matchall_offload *ma)
4187 {
4188         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4189         struct rtnl_link_stats64 cur_stats;
4190         u64 dbytes;
4191         u64 dpkts;
4192
4193         cur_stats = priv->stats.vf_vport;
4194         dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
4195         dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
4196         rpriv->prev_vf_vport_stats = cur_stats;
4197         flow_stats_update(&ma->stats, dpkts, dbytes, jiffies);
4198 }
4199
4200 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
4201                                               struct mlx5e_priv *peer_priv)
4202 {
4203         struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
4204         struct mlx5e_hairpin_entry *hpe, *tmp;
4205         LIST_HEAD(init_wait_list);
4206         u16 peer_vhca_id;
4207         int bkt;
4208
4209         if (!same_hw_devs(priv, peer_priv))
4210                 return;
4211
4212         peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
4213
4214         mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
4215         hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist)
4216                 if (refcount_inc_not_zero(&hpe->refcnt))
4217                         list_add(&hpe->dead_peer_wait_list, &init_wait_list);
4218         mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
4219
4220         list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
4221                 wait_for_completion(&hpe->res_ready);
4222                 if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
4223                         hpe->hp->pair->peer_gone = true;
4224
4225                 mlx5e_hairpin_put(priv, hpe);
4226         }
4227 }
4228
4229 static int mlx5e_tc_netdev_event(struct notifier_block *this,
4230                                  unsigned long event, void *ptr)
4231 {
4232         struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
4233         struct mlx5e_flow_steering *fs;
4234         struct mlx5e_priv *peer_priv;
4235         struct mlx5e_tc_table *tc;
4236         struct mlx5e_priv *priv;
4237
4238         if (ndev->netdev_ops != &mlx5e_netdev_ops ||
4239             event != NETDEV_UNREGISTER ||
4240             ndev->reg_state == NETREG_REGISTERED)
4241                 return NOTIFY_DONE;
4242
4243         tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
4244         fs = container_of(tc, struct mlx5e_flow_steering, tc);
4245         priv = container_of(fs, struct mlx5e_priv, fs);
4246         peer_priv = netdev_priv(ndev);
4247         if (priv == peer_priv ||
4248             !(priv->netdev->features & NETIF_F_HW_TC))
4249                 return NOTIFY_DONE;
4250
4251         mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
4252
4253         return NOTIFY_DONE;
4254 }
4255
4256 int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
4257 {
4258         struct mlx5e_tc_table *tc = &priv->fs.tc;
4259         int err;
4260
4261         mutex_init(&tc->t_lock);
4262         mutex_init(&tc->mod_hdr.lock);
4263         hash_init(tc->mod_hdr.hlist);
4264         mutex_init(&tc->hairpin_tbl_lock);
4265         hash_init(tc->hairpin_tbl);
4266
4267         err = rhashtable_init(&tc->ht, &tc_ht_params);
4268         if (err)
4269                 return err;
4270
4271         tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
4272         err = register_netdevice_notifier_dev_net(priv->netdev,
4273                                                   &tc->netdevice_nb,
4274                                                   &tc->netdevice_nn);
4275         if (err) {
4276                 tc->netdevice_nb.notifier_call = NULL;
4277                 mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
4278         }
4279
4280         return err;
4281 }
4282
4283 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
4284 {
4285         struct mlx5e_tc_flow *flow = ptr;
4286         struct mlx5e_priv *priv = flow->priv;
4287
4288         mlx5e_tc_del_flow(priv, flow);
4289         kfree(flow);
4290 }
4291
4292 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
4293 {
4294         struct mlx5e_tc_table *tc = &priv->fs.tc;
4295
4296         if (tc->netdevice_nb.notifier_call)
4297                 unregister_netdevice_notifier_dev_net(priv->netdev,
4298                                                       &tc->netdevice_nb,
4299                                                       &tc->netdevice_nn);
4300
4301         mutex_destroy(&tc->mod_hdr.lock);
4302         mutex_destroy(&tc->hairpin_tbl_lock);
4303
4304         rhashtable_destroy(&tc->ht);
4305
4306         if (!IS_ERR_OR_NULL(tc->t)) {
4307                 mlx5_destroy_flow_table(tc->t);
4308                 tc->t = NULL;
4309         }
4310         mutex_destroy(&tc->t_lock);
4311 }
4312
4313 int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
4314 {
4315         return rhashtable_init(tc_ht, &tc_ht_params);
4316 }
4317
4318 void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
4319 {
4320         rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
4321 }
4322
4323 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
4324 {
4325         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4326
4327         return atomic_read(&tc_ht->nelems);
4328 }
4329
4330 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
4331 {
4332         struct mlx5e_tc_flow *flow, *tmp;
4333
4334         list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
4335                 __mlx5e_tc_del_fdb_peer_flow(flow);
4336 }
4337
4338 void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
4339 {
4340         struct mlx5_rep_uplink_priv *rpriv =
4341                 container_of(work, struct mlx5_rep_uplink_priv,
4342                              reoffload_flows_work);
4343         struct mlx5e_tc_flow *flow, *tmp;
4344
4345         mutex_lock(&rpriv->unready_flows_lock);
4346         list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
4347                 if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
4348                         unready_flow_del(flow);
4349         }
4350         mutex_unlock(&rpriv->unready_flows_lock);
4351 }
4352
4353 bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe,
4354                              struct sk_buff *skb)
4355 {
4356 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
4357         struct tc_skb_ext *tc_skb_ext;
4358         struct mlx5_eswitch *esw;
4359         struct mlx5e_priv *priv;
4360         u32 chain = 0, reg_c0;
4361         int err;
4362
4363         reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
4364         if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
4365                 reg_c0 = 0;
4366
4367         if (!reg_c0)
4368                 return true;
4369
4370         priv = netdev_priv(skb->dev);
4371         esw = priv->mdev->priv.eswitch;
4372
4373         err = mlx5_eswitch_get_chain_for_tag(esw, reg_c0, &chain);
4374         if (err) {
4375                 netdev_dbg(priv->netdev,
4376                            "Couldn't find chain for chain tag: %d, err: %d\n",
4377                            reg_c0, err);
4378                 return false;
4379         }
4380
4381         if (!chain)
4382                 return true;
4383
4384         tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
4385         if (!tc_skb_ext) {
4386                 WARN_ON_ONCE(1);
4387                 return false;
4388         }
4389
4390         tc_skb_ext->chain = chain;
4391 #endif /* CONFIG_NET_TC_SKB_EXT */
4392
4393         return true;
4394 }