Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-block.git] / drivers / net / ethernet / broadcom / bnxt / bnxt_tc.c
1 /* Broadcom NetXtreme-C/E network driver.
2  *
3  * Copyright (c) 2017 Broadcom Limited
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation.
8  */
9
10 #include <linux/netdevice.h>
11 #include <linux/inetdevice.h>
12 #include <linux/if_vlan.h>
13 #include <net/flow_dissector.h>
14 #include <net/pkt_cls.h>
15 #include <net/tc_act/tc_gact.h>
16 #include <net/tc_act/tc_skbedit.h>
17 #include <net/tc_act/tc_mirred.h>
18 #include <net/tc_act/tc_vlan.h>
19 #include <net/tc_act/tc_pedit.h>
20 #include <net/tc_act/tc_tunnel_key.h>
21 #include <net/vxlan.h>
22
23 #include "bnxt_hsi.h"
24 #include "bnxt.h"
25 #include "bnxt_sriov.h"
26 #include "bnxt_tc.h"
27 #include "bnxt_vfr.h"
28
29 #define BNXT_FID_INVALID                        0xffff
30 #define VLAN_TCI(vid, prio)     ((vid) | ((prio) << VLAN_PRIO_SHIFT))
31
32 #define is_vlan_pcp_wildcarded(vlan_tci_mask)   \
33         ((ntohs(vlan_tci_mask) & VLAN_PRIO_MASK) == 0x0000)
34 #define is_vlan_pcp_exactmatch(vlan_tci_mask)   \
35         ((ntohs(vlan_tci_mask) & VLAN_PRIO_MASK) == VLAN_PRIO_MASK)
36 #define is_vlan_pcp_zero(vlan_tci)      \
37         ((ntohs(vlan_tci) & VLAN_PRIO_MASK) == 0x0000)
38 #define is_vid_exactmatch(vlan_tci_mask)        \
39         ((ntohs(vlan_tci_mask) & VLAN_VID_MASK) == VLAN_VID_MASK)
40
41 static bool is_wildcard(void *mask, int len);
42 static bool is_exactmatch(void *mask, int len);
43 /* Return the dst fid of the func for flow forwarding
44  * For PFs: src_fid is the fid of the PF
45  * For VF-reps: src_fid the fid of the VF
46  */
47 static u16 bnxt_flow_get_dst_fid(struct bnxt *pf_bp, struct net_device *dev)
48 {
49         struct bnxt *bp;
50
51         /* check if dev belongs to the same switch */
52         if (!netdev_port_same_parent_id(pf_bp->dev, dev)) {
53                 netdev_info(pf_bp->dev, "dev(ifindex=%d) not on same switch",
54                             dev->ifindex);
55                 return BNXT_FID_INVALID;
56         }
57
58         /* Is dev a VF-rep? */
59         if (bnxt_dev_is_vf_rep(dev))
60                 return bnxt_vf_rep_get_fid(dev);
61
62         bp = netdev_priv(dev);
63         return bp->pf.fw_fid;
64 }
65
66 static int bnxt_tc_parse_redir(struct bnxt *bp,
67                                struct bnxt_tc_actions *actions,
68                                const struct flow_action_entry *act)
69 {
70         struct net_device *dev = act->dev;
71
72         if (!dev) {
73                 netdev_info(bp->dev, "no dev in mirred action");
74                 return -EINVAL;
75         }
76
77         actions->flags |= BNXT_TC_ACTION_FLAG_FWD;
78         actions->dst_dev = dev;
79         return 0;
80 }
81
82 static int bnxt_tc_parse_vlan(struct bnxt *bp,
83                               struct bnxt_tc_actions *actions,
84                               const struct flow_action_entry *act)
85 {
86         switch (act->id) {
87         case FLOW_ACTION_VLAN_POP:
88                 actions->flags |= BNXT_TC_ACTION_FLAG_POP_VLAN;
89                 break;
90         case FLOW_ACTION_VLAN_PUSH:
91                 actions->flags |= BNXT_TC_ACTION_FLAG_PUSH_VLAN;
92                 actions->push_vlan_tci = htons(act->vlan.vid);
93                 actions->push_vlan_tpid = act->vlan.proto;
94                 break;
95         default:
96                 return -EOPNOTSUPP;
97         }
98         return 0;
99 }
100
101 static int bnxt_tc_parse_tunnel_set(struct bnxt *bp,
102                                     struct bnxt_tc_actions *actions,
103                                     const struct flow_action_entry *act)
104 {
105         const struct ip_tunnel_info *tun_info = act->tunnel;
106         const struct ip_tunnel_key *tun_key = &tun_info->key;
107
108         if (ip_tunnel_info_af(tun_info) != AF_INET) {
109                 netdev_info(bp->dev, "only IPv4 tunnel-encap is supported");
110                 return -EOPNOTSUPP;
111         }
112
113         actions->tun_encap_key = *tun_key;
114         actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP;
115         return 0;
116 }
117
118 /* Key & Mask from the stack comes unaligned in multiple iterations of 4 bytes
119  * each(u32).
120  * This routine consolidates such multiple unaligned values into one
121  * field each for Key & Mask (for src and dst macs separately)
122  * For example,
123  *                      Mask/Key        Offset  Iteration
124  *                      ==========      ======  =========
125  *      dst mac         0xffffffff      0       1
126  *      dst mac         0x0000ffff      4       2
127  *
128  *      src mac         0xffff0000      4       1
129  *      src mac         0xffffffff      8       2
130  *
131  * The above combination coming from the stack will be consolidated as
132  *                      Mask/Key
133  *                      ==============
134  *      src mac:        0xffffffffffff
135  *      dst mac:        0xffffffffffff
136  */
137 static void bnxt_set_l2_key_mask(u32 part_key, u32 part_mask,
138                                  u8 *actual_key, u8 *actual_mask)
139 {
140         u32 key = get_unaligned((u32 *)actual_key);
141         u32 mask = get_unaligned((u32 *)actual_mask);
142
143         part_key &= part_mask;
144         part_key |= key & ~part_mask;
145
146         put_unaligned(mask | part_mask, (u32 *)actual_mask);
147         put_unaligned(part_key, (u32 *)actual_key);
148 }
149
150 static int
151 bnxt_fill_l2_rewrite_fields(struct bnxt_tc_actions *actions,
152                             u16 *eth_addr, u16 *eth_addr_mask)
153 {
154         u16 *p;
155         int j;
156
157         if (unlikely(bnxt_eth_addr_key_mask_invalid(eth_addr, eth_addr_mask)))
158                 return -EINVAL;
159
160         if (!is_wildcard(&eth_addr_mask[0], ETH_ALEN)) {
161                 if (!is_exactmatch(&eth_addr_mask[0], ETH_ALEN))
162                         return -EINVAL;
163                 /* FW expects dmac to be in u16 array format */
164                 p = eth_addr;
165                 for (j = 0; j < 3; j++)
166                         actions->l2_rewrite_dmac[j] = cpu_to_be16(*(p + j));
167         }
168
169         if (!is_wildcard(&eth_addr_mask[ETH_ALEN / 2], ETH_ALEN)) {
170                 if (!is_exactmatch(&eth_addr_mask[ETH_ALEN / 2], ETH_ALEN))
171                         return -EINVAL;
172                 /* FW expects smac to be in u16 array format */
173                 p = &eth_addr[ETH_ALEN / 2];
174                 for (j = 0; j < 3; j++)
175                         actions->l2_rewrite_smac[j] = cpu_to_be16(*(p + j));
176         }
177
178         return 0;
179 }
180
181 static int
182 bnxt_tc_parse_pedit(struct bnxt *bp, struct bnxt_tc_actions *actions,
183                     struct flow_action_entry *act, int act_idx, u8 *eth_addr,
184                     u8 *eth_addr_mask)
185 {
186         size_t offset_of_ip6_daddr = offsetof(struct ipv6hdr, daddr);
187         size_t offset_of_ip6_saddr = offsetof(struct ipv6hdr, saddr);
188         u32 mask, val, offset, idx;
189         u8 htype;
190
191         offset = act->mangle.offset;
192         htype = act->mangle.htype;
193         mask = ~act->mangle.mask;
194         val = act->mangle.val;
195
196         switch (htype) {
197         case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
198                 if (offset > PEDIT_OFFSET_SMAC_LAST_4_BYTES) {
199                         netdev_err(bp->dev,
200                                    "%s: eth_hdr: Invalid pedit field\n",
201                                    __func__);
202                         return -EINVAL;
203                 }
204                 actions->flags |= BNXT_TC_ACTION_FLAG_L2_REWRITE;
205
206                 bnxt_set_l2_key_mask(val, mask, &eth_addr[offset],
207                                      &eth_addr_mask[offset]);
208                 break;
209         case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
210                 actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE;
211                 actions->nat.l3_is_ipv4 = true;
212                 if (offset ==  offsetof(struct iphdr, saddr)) {
213                         actions->nat.src_xlate = true;
214                         actions->nat.l3.ipv4.saddr.s_addr = htonl(val);
215                 } else if (offset ==  offsetof(struct iphdr, daddr)) {
216                         actions->nat.src_xlate = false;
217                         actions->nat.l3.ipv4.daddr.s_addr = htonl(val);
218                 } else {
219                         netdev_err(bp->dev,
220                                    "%s: IPv4_hdr: Invalid pedit field\n",
221                                    __func__);
222                         return -EINVAL;
223                 }
224
225                 netdev_dbg(bp->dev, "nat.src_xlate = %d src IP: %pI4 dst ip : %pI4\n",
226                            actions->nat.src_xlate, &actions->nat.l3.ipv4.saddr,
227                            &actions->nat.l3.ipv4.daddr);
228                 break;
229
230         case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
231                 actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE;
232                 actions->nat.l3_is_ipv4 = false;
233                 if (offset >= offsetof(struct ipv6hdr, saddr) &&
234                     offset < offset_of_ip6_daddr) {
235                         /* 16 byte IPv6 address comes in 4 iterations of
236                          * 4byte chunks each
237                          */
238                         actions->nat.src_xlate = true;
239                         idx = (offset - offset_of_ip6_saddr) / 4;
240                         /* First 4bytes will be copied to idx 0 and so on */
241                         actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val);
242                 } else if (offset >= offset_of_ip6_daddr &&
243                            offset < offset_of_ip6_daddr + 16) {
244                         actions->nat.src_xlate = false;
245                         idx = (offset - offset_of_ip6_daddr) / 4;
246                         actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val);
247                 } else {
248                         netdev_err(bp->dev,
249                                    "%s: IPv6_hdr: Invalid pedit field\n",
250                                    __func__);
251                         return -EINVAL;
252                 }
253                 break;
254         case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
255         case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
256                 /* HW does not support L4 rewrite alone without L3
257                  * rewrite
258                  */
259                 if (!(actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE)) {
260                         netdev_err(bp->dev,
261                                    "Need to specify L3 rewrite as well\n");
262                         return -EINVAL;
263                 }
264                 if (actions->nat.src_xlate)
265                         actions->nat.l4.ports.sport = htons(val);
266                 else
267                         actions->nat.l4.ports.dport = htons(val);
268                 netdev_dbg(bp->dev, "actions->nat.sport = %d dport = %d\n",
269                            actions->nat.l4.ports.sport,
270                            actions->nat.l4.ports.dport);
271                 break;
272         default:
273                 netdev_err(bp->dev, "%s: Unsupported pedit hdr type\n",
274                            __func__);
275                 return -EINVAL;
276         }
277         return 0;
278 }
279
280 static int bnxt_tc_parse_actions(struct bnxt *bp,
281                                  struct bnxt_tc_actions *actions,
282                                  struct flow_action *flow_action)
283 {
284         /* Used to store the L2 rewrite mask for dmac (6 bytes) followed by
285          * smac (6 bytes) if rewrite of both is specified, otherwise either
286          * dmac or smac
287          */
288         u16 eth_addr_mask[ETH_ALEN] = { 0 };
289         /* Used to store the L2 rewrite key for dmac (6 bytes) followed by
290          * smac (6 bytes) if rewrite of both is specified, otherwise either
291          * dmac or smac
292          */
293         u16 eth_addr[ETH_ALEN] = { 0 };
294         struct flow_action_entry *act;
295         int i, rc;
296
297         if (!flow_action_has_entries(flow_action)) {
298                 netdev_info(bp->dev, "no actions");
299                 return -EINVAL;
300         }
301
302         flow_action_for_each(i, act, flow_action) {
303                 switch (act->id) {
304                 case FLOW_ACTION_DROP:
305                         actions->flags |= BNXT_TC_ACTION_FLAG_DROP;
306                         return 0; /* don't bother with other actions */
307                 case FLOW_ACTION_REDIRECT:
308                         rc = bnxt_tc_parse_redir(bp, actions, act);
309                         if (rc)
310                                 return rc;
311                         break;
312                 case FLOW_ACTION_VLAN_POP:
313                 case FLOW_ACTION_VLAN_PUSH:
314                 case FLOW_ACTION_VLAN_MANGLE:
315                         rc = bnxt_tc_parse_vlan(bp, actions, act);
316                         if (rc)
317                                 return rc;
318                         break;
319                 case FLOW_ACTION_TUNNEL_ENCAP:
320                         rc = bnxt_tc_parse_tunnel_set(bp, actions, act);
321                         if (rc)
322                                 return rc;
323                         break;
324                 case FLOW_ACTION_TUNNEL_DECAP:
325                         actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_DECAP;
326                         break;
327                 /* Packet edit: L2 rewrite, NAT, NAPT */
328                 case FLOW_ACTION_MANGLE:
329                         rc = bnxt_tc_parse_pedit(bp, actions, act, i,
330                                                  (u8 *)eth_addr,
331                                                  (u8 *)eth_addr_mask);
332                         if (rc)
333                                 return rc;
334                         break;
335                 default:
336                         break;
337                 }
338         }
339
340         if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
341                 rc = bnxt_fill_l2_rewrite_fields(actions, eth_addr,
342                                                  eth_addr_mask);
343                 if (rc)
344                         return rc;
345         }
346
347         if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
348                 if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
349                         /* dst_fid is PF's fid */
350                         actions->dst_fid = bp->pf.fw_fid;
351                 } else {
352                         /* find the FID from dst_dev */
353                         actions->dst_fid =
354                                 bnxt_flow_get_dst_fid(bp, actions->dst_dev);
355                         if (actions->dst_fid == BNXT_FID_INVALID)
356                                 return -EINVAL;
357                 }
358         }
359
360         return 0;
361 }
362
363 static int bnxt_tc_parse_flow(struct bnxt *bp,
364                               struct flow_cls_offload *tc_flow_cmd,
365                               struct bnxt_tc_flow *flow)
366 {
367         struct flow_rule *rule = flow_cls_offload_flow_rule(tc_flow_cmd);
368         struct flow_dissector *dissector = rule->match.dissector;
369
370         /* KEY_CONTROL and KEY_BASIC are needed for forming a meaningful key */
371         if ((dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) == 0 ||
372             (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_BASIC)) == 0) {
373                 netdev_info(bp->dev, "cannot form TC key: used_keys = 0x%x",
374                             dissector->used_keys);
375                 return -EOPNOTSUPP;
376         }
377
378         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
379                 struct flow_match_basic match;
380
381                 flow_rule_match_basic(rule, &match);
382                 flow->l2_key.ether_type = match.key->n_proto;
383                 flow->l2_mask.ether_type = match.mask->n_proto;
384
385                 if (match.key->n_proto == htons(ETH_P_IP) ||
386                     match.key->n_proto == htons(ETH_P_IPV6)) {
387                         flow->l4_key.ip_proto = match.key->ip_proto;
388                         flow->l4_mask.ip_proto = match.mask->ip_proto;
389                 }
390         }
391
392         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
393                 struct flow_match_eth_addrs match;
394
395                 flow_rule_match_eth_addrs(rule, &match);
396                 flow->flags |= BNXT_TC_FLOW_FLAGS_ETH_ADDRS;
397                 ether_addr_copy(flow->l2_key.dmac, match.key->dst);
398                 ether_addr_copy(flow->l2_mask.dmac, match.mask->dst);
399                 ether_addr_copy(flow->l2_key.smac, match.key->src);
400                 ether_addr_copy(flow->l2_mask.smac, match.mask->src);
401         }
402
403         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
404                 struct flow_match_vlan match;
405
406                 flow_rule_match_vlan(rule, &match);
407                 flow->l2_key.inner_vlan_tci =
408                         cpu_to_be16(VLAN_TCI(match.key->vlan_id,
409                                              match.key->vlan_priority));
410                 flow->l2_mask.inner_vlan_tci =
411                         cpu_to_be16((VLAN_TCI(match.mask->vlan_id,
412                                               match.mask->vlan_priority)));
413                 flow->l2_key.inner_vlan_tpid = htons(ETH_P_8021Q);
414                 flow->l2_mask.inner_vlan_tpid = htons(0xffff);
415                 flow->l2_key.num_vlans = 1;
416         }
417
418         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
419                 struct flow_match_ipv4_addrs match;
420
421                 flow_rule_match_ipv4_addrs(rule, &match);
422                 flow->flags |= BNXT_TC_FLOW_FLAGS_IPV4_ADDRS;
423                 flow->l3_key.ipv4.daddr.s_addr = match.key->dst;
424                 flow->l3_mask.ipv4.daddr.s_addr = match.mask->dst;
425                 flow->l3_key.ipv4.saddr.s_addr = match.key->src;
426                 flow->l3_mask.ipv4.saddr.s_addr = match.mask->src;
427         } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
428                 struct flow_match_ipv6_addrs match;
429
430                 flow_rule_match_ipv6_addrs(rule, &match);
431                 flow->flags |= BNXT_TC_FLOW_FLAGS_IPV6_ADDRS;
432                 flow->l3_key.ipv6.daddr = match.key->dst;
433                 flow->l3_mask.ipv6.daddr = match.mask->dst;
434                 flow->l3_key.ipv6.saddr = match.key->src;
435                 flow->l3_mask.ipv6.saddr = match.mask->src;
436         }
437
438         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
439                 struct flow_match_ports match;
440
441                 flow_rule_match_ports(rule, &match);
442                 flow->flags |= BNXT_TC_FLOW_FLAGS_PORTS;
443                 flow->l4_key.ports.dport = match.key->dst;
444                 flow->l4_mask.ports.dport = match.mask->dst;
445                 flow->l4_key.ports.sport = match.key->src;
446                 flow->l4_mask.ports.sport = match.mask->src;
447         }
448
449         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
450                 struct flow_match_icmp match;
451
452                 flow_rule_match_icmp(rule, &match);
453                 flow->flags |= BNXT_TC_FLOW_FLAGS_ICMP;
454                 flow->l4_key.icmp.type = match.key->type;
455                 flow->l4_key.icmp.code = match.key->code;
456                 flow->l4_mask.icmp.type = match.mask->type;
457                 flow->l4_mask.icmp.code = match.mask->code;
458         }
459
460         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
461                 struct flow_match_ipv4_addrs match;
462
463                 flow_rule_match_enc_ipv4_addrs(rule, &match);
464                 flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS;
465                 flow->tun_key.u.ipv4.dst = match.key->dst;
466                 flow->tun_mask.u.ipv4.dst = match.mask->dst;
467                 flow->tun_key.u.ipv4.src = match.key->src;
468                 flow->tun_mask.u.ipv4.src = match.mask->src;
469         } else if (flow_rule_match_key(rule,
470                                       FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
471                 return -EOPNOTSUPP;
472         }
473
474         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
475                 struct flow_match_enc_keyid match;
476
477                 flow_rule_match_enc_keyid(rule, &match);
478                 flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ID;
479                 flow->tun_key.tun_id = key32_to_tunnel_id(match.key->keyid);
480                 flow->tun_mask.tun_id = key32_to_tunnel_id(match.mask->keyid);
481         }
482
483         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
484                 struct flow_match_ports match;
485
486                 flow_rule_match_enc_ports(rule, &match);
487                 flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_PORTS;
488                 flow->tun_key.tp_dst = match.key->dst;
489                 flow->tun_mask.tp_dst = match.mask->dst;
490                 flow->tun_key.tp_src = match.key->src;
491                 flow->tun_mask.tp_src = match.mask->src;
492         }
493
494         return bnxt_tc_parse_actions(bp, &flow->actions, &rule->action);
495 }
496
497 static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp,
498                                    struct bnxt_tc_flow_node *flow_node)
499 {
500         struct hwrm_cfa_flow_free_input req = { 0 };
501         int rc;
502
503         bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_FREE, -1, -1);
504         if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE)
505                 req.ext_flow_handle = flow_node->ext_flow_handle;
506         else
507                 req.flow_handle = flow_node->flow_handle;
508
509         rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
510         if (rc)
511                 netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
512
513         return rc;
514 }
515
516 static int ipv6_mask_len(struct in6_addr *mask)
517 {
518         int mask_len = 0, i;
519
520         for (i = 0; i < 4; i++)
521                 mask_len += inet_mask_len(mask->s6_addr32[i]);
522
523         return mask_len;
524 }
525
526 static bool is_wildcard(void *mask, int len)
527 {
528         const u8 *p = mask;
529         int i;
530
531         for (i = 0; i < len; i++) {
532                 if (p[i] != 0)
533                         return false;
534         }
535         return true;
536 }
537
538 static bool is_exactmatch(void *mask, int len)
539 {
540         const u8 *p = mask;
541         int i;
542
543         for (i = 0; i < len; i++)
544                 if (p[i] != 0xff)
545                         return false;
546
547         return true;
548 }
549
550 static bool is_vlan_tci_allowed(__be16  vlan_tci_mask,
551                                 __be16  vlan_tci)
552 {
553         /* VLAN priority must be either exactly zero or fully wildcarded and
554          * VLAN id must be exact match.
555          */
556         if (is_vid_exactmatch(vlan_tci_mask) &&
557             ((is_vlan_pcp_exactmatch(vlan_tci_mask) &&
558               is_vlan_pcp_zero(vlan_tci)) ||
559              is_vlan_pcp_wildcarded(vlan_tci_mask)))
560                 return true;
561
562         return false;
563 }
564
565 static bool bits_set(void *key, int len)
566 {
567         const u8 *p = key;
568         int i;
569
570         for (i = 0; i < len; i++)
571                 if (p[i] != 0)
572                         return true;
573
574         return false;
575 }
576
577 static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
578                                     __le16 ref_flow_handle,
579                                     __le32 tunnel_handle,
580                                     struct bnxt_tc_flow_node *flow_node)
581 {
582         struct bnxt_tc_actions *actions = &flow->actions;
583         struct bnxt_tc_l3_key *l3_mask = &flow->l3_mask;
584         struct bnxt_tc_l3_key *l3_key = &flow->l3_key;
585         struct hwrm_cfa_flow_alloc_input req = { 0 };
586         struct hwrm_cfa_flow_alloc_output *resp;
587         u16 flow_flags = 0, action_flags = 0;
588         int rc;
589
590         bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_ALLOC, -1, -1);
591
592         req.src_fid = cpu_to_le16(flow->src_fid);
593         req.ref_flow_handle = ref_flow_handle;
594
595         if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
596                 memcpy(req.l2_rewrite_dmac, actions->l2_rewrite_dmac,
597                        ETH_ALEN);
598                 memcpy(req.l2_rewrite_smac, actions->l2_rewrite_smac,
599                        ETH_ALEN);
600                 action_flags |=
601                         CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
602         }
603
604         if (actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE) {
605                 if (actions->nat.l3_is_ipv4) {
606                         action_flags |=
607                                 CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_IPV4_ADDRESS;
608
609                         if (actions->nat.src_xlate) {
610                                 action_flags |=
611                                         CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
612                                 /* L3 source rewrite */
613                                 req.nat_ip_address[0] =
614                                         actions->nat.l3.ipv4.saddr.s_addr;
615                                 /* L4 source port */
616                                 if (actions->nat.l4.ports.sport)
617                                         req.nat_port =
618                                                 actions->nat.l4.ports.sport;
619                         } else {
620                                 action_flags |=
621                                         CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
622                                 /* L3 destination rewrite */
623                                 req.nat_ip_address[0] =
624                                         actions->nat.l3.ipv4.daddr.s_addr;
625                                 /* L4 destination port */
626                                 if (actions->nat.l4.ports.dport)
627                                         req.nat_port =
628                                                 actions->nat.l4.ports.dport;
629                         }
630                         netdev_dbg(bp->dev,
631                                    "req.nat_ip_address: %pI4 src_xlate: %d req.nat_port: %x\n",
632                                    req.nat_ip_address, actions->nat.src_xlate,
633                                    req.nat_port);
634                 } else {
635                         if (actions->nat.src_xlate) {
636                                 action_flags |=
637                                         CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
638                                 /* L3 source rewrite */
639                                 memcpy(req.nat_ip_address,
640                                        actions->nat.l3.ipv6.saddr.s6_addr32,
641                                        sizeof(req.nat_ip_address));
642                                 /* L4 source port */
643                                 if (actions->nat.l4.ports.sport)
644                                         req.nat_port =
645                                                 actions->nat.l4.ports.sport;
646                         } else {
647                                 action_flags |=
648                                         CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
649                                 /* L3 destination rewrite */
650                                 memcpy(req.nat_ip_address,
651                                        actions->nat.l3.ipv6.daddr.s6_addr32,
652                                        sizeof(req.nat_ip_address));
653                                 /* L4 destination port */
654                                 if (actions->nat.l4.ports.dport)
655                                         req.nat_port =
656                                                 actions->nat.l4.ports.dport;
657                         }
658                         netdev_dbg(bp->dev,
659                                    "req.nat_ip_address: %pI6 src_xlate: %d req.nat_port: %x\n",
660                                    req.nat_ip_address, actions->nat.src_xlate,
661                                    req.nat_port);
662                 }
663         }
664
665         if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP ||
666             actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
667                 req.tunnel_handle = tunnel_handle;
668                 flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_TUNNEL;
669                 action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TUNNEL;
670         }
671
672         req.ethertype = flow->l2_key.ether_type;
673         req.ip_proto = flow->l4_key.ip_proto;
674
675         if (flow->flags & BNXT_TC_FLOW_FLAGS_ETH_ADDRS) {
676                 memcpy(req.dmac, flow->l2_key.dmac, ETH_ALEN);
677                 memcpy(req.smac, flow->l2_key.smac, ETH_ALEN);
678         }
679
680         if (flow->l2_key.num_vlans > 0) {
681                 flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_ONE;
682                 /* FW expects the inner_vlan_tci value to be set
683                  * in outer_vlan_tci when num_vlans is 1 (which is
684                  * always the case in TC.)
685                  */
686                 req.outer_vlan_tci = flow->l2_key.inner_vlan_tci;
687         }
688
689         /* If all IP and L4 fields are wildcarded then this is an L2 flow */
690         if (is_wildcard(l3_mask, sizeof(*l3_mask)) &&
691             is_wildcard(&flow->l4_mask, sizeof(flow->l4_mask))) {
692                 flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2;
693         } else {
694                 flow_flags |= flow->l2_key.ether_type == htons(ETH_P_IP) ?
695                                 CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV4 :
696                                 CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV6;
697
698                 if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV4_ADDRS) {
699                         req.ip_dst[0] = l3_key->ipv4.daddr.s_addr;
700                         req.ip_dst_mask_len =
701                                 inet_mask_len(l3_mask->ipv4.daddr.s_addr);
702                         req.ip_src[0] = l3_key->ipv4.saddr.s_addr;
703                         req.ip_src_mask_len =
704                                 inet_mask_len(l3_mask->ipv4.saddr.s_addr);
705                 } else if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV6_ADDRS) {
706                         memcpy(req.ip_dst, l3_key->ipv6.daddr.s6_addr32,
707                                sizeof(req.ip_dst));
708                         req.ip_dst_mask_len =
709                                         ipv6_mask_len(&l3_mask->ipv6.daddr);
710                         memcpy(req.ip_src, l3_key->ipv6.saddr.s6_addr32,
711                                sizeof(req.ip_src));
712                         req.ip_src_mask_len =
713                                         ipv6_mask_len(&l3_mask->ipv6.saddr);
714                 }
715         }
716
717         if (flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) {
718                 req.l4_src_port = flow->l4_key.ports.sport;
719                 req.l4_src_port_mask = flow->l4_mask.ports.sport;
720                 req.l4_dst_port = flow->l4_key.ports.dport;
721                 req.l4_dst_port_mask = flow->l4_mask.ports.dport;
722         } else if (flow->flags & BNXT_TC_FLOW_FLAGS_ICMP) {
723                 /* l4 ports serve as type/code when ip_proto is ICMP */
724                 req.l4_src_port = htons(flow->l4_key.icmp.type);
725                 req.l4_src_port_mask = htons(flow->l4_mask.icmp.type);
726                 req.l4_dst_port = htons(flow->l4_key.icmp.code);
727                 req.l4_dst_port_mask = htons(flow->l4_mask.icmp.code);
728         }
729         req.flags = cpu_to_le16(flow_flags);
730
731         if (actions->flags & BNXT_TC_ACTION_FLAG_DROP) {
732                 action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_DROP;
733         } else {
734                 if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
735                         action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_FWD;
736                         req.dst_fid = cpu_to_le16(actions->dst_fid);
737                 }
738                 if (actions->flags & BNXT_TC_ACTION_FLAG_PUSH_VLAN) {
739                         action_flags |=
740                             CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
741                         req.l2_rewrite_vlan_tpid = actions->push_vlan_tpid;
742                         req.l2_rewrite_vlan_tci = actions->push_vlan_tci;
743                         memcpy(&req.l2_rewrite_dmac, &req.dmac, ETH_ALEN);
744                         memcpy(&req.l2_rewrite_smac, &req.smac, ETH_ALEN);
745                 }
746                 if (actions->flags & BNXT_TC_ACTION_FLAG_POP_VLAN) {
747                         action_flags |=
748                             CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
749                         /* Rewrite config with tpid = 0 implies vlan pop */
750                         req.l2_rewrite_vlan_tpid = 0;
751                         memcpy(&req.l2_rewrite_dmac, &req.dmac, ETH_ALEN);
752                         memcpy(&req.l2_rewrite_smac, &req.smac, ETH_ALEN);
753                 }
754         }
755         req.action_flags = cpu_to_le16(action_flags);
756
757         mutex_lock(&bp->hwrm_cmd_lock);
758         rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
759         if (!rc) {
760                 resp = bnxt_get_hwrm_resp_addr(bp, &req);
761                 /* CFA_FLOW_ALLOC response interpretation:
762                  *                  fw with          fw with
763                  *                  16-bit           64-bit
764                  *                  flow handle      flow handle
765                  *                  ===========      ===========
766                  * flow_handle      flow handle      flow context id
767                  * ext_flow_handle  INVALID          flow handle
768                  * flow_id          INVALID          flow counter id
769                  */
770                 flow_node->flow_handle = resp->flow_handle;
771                 if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) {
772                         flow_node->ext_flow_handle = resp->ext_flow_handle;
773                         flow_node->flow_id = resp->flow_id;
774                 }
775         }
776         mutex_unlock(&bp->hwrm_cmd_lock);
777         return rc;
778 }
779
780 static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp,
781                                        struct bnxt_tc_flow *flow,
782                                        struct bnxt_tc_l2_key *l2_info,
783                                        __le32 ref_decap_handle,
784                                        __le32 *decap_filter_handle)
785 {
786         struct hwrm_cfa_decap_filter_alloc_input req = { 0 };
787         struct hwrm_cfa_decap_filter_alloc_output *resp;
788         struct ip_tunnel_key *tun_key = &flow->tun_key;
789         u32 enables = 0;
790         int rc;
791
792         bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_DECAP_FILTER_ALLOC, -1, -1);
793
794         req.flags = cpu_to_le32(CFA_DECAP_FILTER_ALLOC_REQ_FLAGS_OVS_TUNNEL);
795         enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE |
796                    CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IP_PROTOCOL;
797         req.tunnel_type = CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN;
798         req.ip_protocol = CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP;
799
800         if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ID) {
801                 enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_ID;
802                 /* tunnel_id is wrongly defined in hsi defn. as __le32 */
803                 req.tunnel_id = tunnel_id_to_key32(tun_key->tun_id);
804         }
805
806         if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS) {
807                 enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR;
808                 ether_addr_copy(req.dst_macaddr, l2_info->dmac);
809         }
810         if (l2_info->num_vlans) {
811                 enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_IVLAN_VID;
812                 req.t_ivlan_vid = l2_info->inner_vlan_tci;
813         }
814
815         enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE;
816         req.ethertype = htons(ETH_P_IP);
817
818         if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS) {
819                 enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_IPADDR |
820                            CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_IPADDR |
821                            CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IPADDR_TYPE;
822                 req.ip_addr_type = CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4;
823                 req.dst_ipaddr[0] = tun_key->u.ipv4.dst;
824                 req.src_ipaddr[0] = tun_key->u.ipv4.src;
825         }
826
827         if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_PORTS) {
828                 enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_PORT;
829                 req.dst_port = tun_key->tp_dst;
830         }
831
832         /* Eventhough the decap_handle returned by hwrm_cfa_decap_filter_alloc
833          * is defined as __le32, l2_ctxt_ref_id is defined in HSI as __le16.
834          */
835         req.l2_ctxt_ref_id = (__force __le16)ref_decap_handle;
836         req.enables = cpu_to_le32(enables);
837
838         mutex_lock(&bp->hwrm_cmd_lock);
839         rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
840         if (!rc) {
841                 resp = bnxt_get_hwrm_resp_addr(bp, &req);
842                 *decap_filter_handle = resp->decap_filter_id;
843         } else {
844                 netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
845         }
846         mutex_unlock(&bp->hwrm_cmd_lock);
847
848         return rc;
849 }
850
851 static int hwrm_cfa_decap_filter_free(struct bnxt *bp,
852                                       __le32 decap_filter_handle)
853 {
854         struct hwrm_cfa_decap_filter_free_input req = { 0 };
855         int rc;
856
857         bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_DECAP_FILTER_FREE, -1, -1);
858         req.decap_filter_id = decap_filter_handle;
859
860         rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
861         if (rc)
862                 netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
863
864         return rc;
865 }
866
867 static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
868                                        struct ip_tunnel_key *encap_key,
869                                        struct bnxt_tc_l2_key *l2_info,
870                                        __le32 *encap_record_handle)
871 {
872         struct hwrm_cfa_encap_record_alloc_input req = { 0 };
873         struct hwrm_cfa_encap_record_alloc_output *resp;
874         struct hwrm_cfa_encap_data_vxlan *encap =
875                         (struct hwrm_cfa_encap_data_vxlan *)&req.encap_data;
876         struct hwrm_vxlan_ipv4_hdr *encap_ipv4 =
877                                 (struct hwrm_vxlan_ipv4_hdr *)encap->l3;
878         int rc;
879
880         bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ENCAP_RECORD_ALLOC, -1, -1);
881
882         req.encap_type = CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN;
883
884         ether_addr_copy(encap->dst_mac_addr, l2_info->dmac);
885         ether_addr_copy(encap->src_mac_addr, l2_info->smac);
886         if (l2_info->num_vlans) {
887                 encap->num_vlan_tags = l2_info->num_vlans;
888                 encap->ovlan_tci = l2_info->inner_vlan_tci;
889                 encap->ovlan_tpid = l2_info->inner_vlan_tpid;
890         }
891
892         encap_ipv4->ver_hlen = 4 << VXLAN_IPV4_HDR_VER_HLEN_VERSION_SFT;
893         encap_ipv4->ver_hlen |= 5 << VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_SFT;
894         encap_ipv4->ttl = encap_key->ttl;
895
896         encap_ipv4->dest_ip_addr = encap_key->u.ipv4.dst;
897         encap_ipv4->src_ip_addr = encap_key->u.ipv4.src;
898         encap_ipv4->protocol = IPPROTO_UDP;
899
900         encap->dst_port = encap_key->tp_dst;
901         encap->vni = tunnel_id_to_key32(encap_key->tun_id);
902
903         mutex_lock(&bp->hwrm_cmd_lock);
904         rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
905         if (!rc) {
906                 resp = bnxt_get_hwrm_resp_addr(bp, &req);
907                 *encap_record_handle = resp->encap_record_id;
908         } else {
909                 netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
910         }
911         mutex_unlock(&bp->hwrm_cmd_lock);
912
913         return rc;
914 }
915
916 static int hwrm_cfa_encap_record_free(struct bnxt *bp,
917                                       __le32 encap_record_handle)
918 {
919         struct hwrm_cfa_encap_record_free_input req = { 0 };
920         int rc;
921
922         bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ENCAP_RECORD_FREE, -1, -1);
923         req.encap_record_id = encap_record_handle;
924
925         rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
926         if (rc)
927                 netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
928
929         return rc;
930 }
931
932 static int bnxt_tc_put_l2_node(struct bnxt *bp,
933                                struct bnxt_tc_flow_node *flow_node)
934 {
935         struct bnxt_tc_l2_node *l2_node = flow_node->l2_node;
936         struct bnxt_tc_info *tc_info = bp->tc_info;
937         int rc;
938
939         /* remove flow_node from the L2 shared flow list */
940         list_del(&flow_node->l2_list_node);
941         if (--l2_node->refcount == 0) {
942                 rc =  rhashtable_remove_fast(&tc_info->l2_table, &l2_node->node,
943                                              tc_info->l2_ht_params);
944                 if (rc)
945                         netdev_err(bp->dev,
946                                    "Error: %s: rhashtable_remove_fast: %d",
947                                    __func__, rc);
948                 kfree_rcu(l2_node, rcu);
949         }
950         return 0;
951 }
952
953 static struct bnxt_tc_l2_node *
954 bnxt_tc_get_l2_node(struct bnxt *bp, struct rhashtable *l2_table,
955                     struct rhashtable_params ht_params,
956                     struct bnxt_tc_l2_key *l2_key)
957 {
958         struct bnxt_tc_l2_node *l2_node;
959         int rc;
960
961         l2_node = rhashtable_lookup_fast(l2_table, l2_key, ht_params);
962         if (!l2_node) {
963                 l2_node = kzalloc(sizeof(*l2_node), GFP_KERNEL);
964                 if (!l2_node) {
965                         rc = -ENOMEM;
966                         return NULL;
967                 }
968
969                 l2_node->key = *l2_key;
970                 rc = rhashtable_insert_fast(l2_table, &l2_node->node,
971                                             ht_params);
972                 if (rc) {
973                         kfree_rcu(l2_node, rcu);
974                         netdev_err(bp->dev,
975                                    "Error: %s: rhashtable_insert_fast: %d",
976                                    __func__, rc);
977                         return NULL;
978                 }
979                 INIT_LIST_HEAD(&l2_node->common_l2_flows);
980         }
981         return l2_node;
982 }
983
984 /* Get the ref_flow_handle for a flow by checking if there are any other
985  * flows that share the same L2 key as this flow.
986  */
987 static int
988 bnxt_tc_get_ref_flow_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
989                             struct bnxt_tc_flow_node *flow_node,
990                             __le16 *ref_flow_handle)
991 {
992         struct bnxt_tc_info *tc_info = bp->tc_info;
993         struct bnxt_tc_flow_node *ref_flow_node;
994         struct bnxt_tc_l2_node *l2_node;
995
996         l2_node = bnxt_tc_get_l2_node(bp, &tc_info->l2_table,
997                                       tc_info->l2_ht_params,
998                                       &flow->l2_key);
999         if (!l2_node)
1000                 return -1;
1001
1002         /* If any other flow is using this l2_node, use it's flow_handle
1003          * as the ref_flow_handle
1004          */
1005         if (l2_node->refcount > 0) {
1006                 ref_flow_node = list_first_entry(&l2_node->common_l2_flows,
1007                                                  struct bnxt_tc_flow_node,
1008                                                  l2_list_node);
1009                 *ref_flow_handle = ref_flow_node->flow_handle;
1010         } else {
1011                 *ref_flow_handle = cpu_to_le16(0xffff);
1012         }
1013
1014         /* Insert the l2_node into the flow_node so that subsequent flows
1015          * with a matching l2 key can use the flow_handle of this flow
1016          * as their ref_flow_handle
1017          */
1018         flow_node->l2_node = l2_node;
1019         list_add(&flow_node->l2_list_node, &l2_node->common_l2_flows);
1020         l2_node->refcount++;
1021         return 0;
1022 }
1023
1024 /* After the flow parsing is done, this routine is used for checking
1025  * if there are any aspects of the flow that prevent it from being
1026  * offloaded.
1027  */
1028 static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow)
1029 {
1030         /* If L4 ports are specified then ip_proto must be TCP or UDP */
1031         if ((flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) &&
1032             (flow->l4_key.ip_proto != IPPROTO_TCP &&
1033              flow->l4_key.ip_proto != IPPROTO_UDP)) {
1034                 netdev_info(bp->dev, "Cannot offload non-TCP/UDP (%d) ports",
1035                             flow->l4_key.ip_proto);
1036                 return false;
1037         }
1038
1039         /* Currently source/dest MAC cannot be partial wildcard  */
1040         if (bits_set(&flow->l2_key.smac, sizeof(flow->l2_key.smac)) &&
1041             !is_exactmatch(flow->l2_mask.smac, sizeof(flow->l2_mask.smac))) {
1042                 netdev_info(bp->dev, "Wildcard match unsupported for Source MAC\n");
1043                 return false;
1044         }
1045         if (bits_set(&flow->l2_key.dmac, sizeof(flow->l2_key.dmac)) &&
1046             !is_exactmatch(&flow->l2_mask.dmac, sizeof(flow->l2_mask.dmac))) {
1047                 netdev_info(bp->dev, "Wildcard match unsupported for Dest MAC\n");
1048                 return false;
1049         }
1050
1051         /* Currently VLAN fields cannot be partial wildcard */
1052         if (bits_set(&flow->l2_key.inner_vlan_tci,
1053                      sizeof(flow->l2_key.inner_vlan_tci)) &&
1054             !is_vlan_tci_allowed(flow->l2_mask.inner_vlan_tci,
1055                                  flow->l2_key.inner_vlan_tci)) {
1056                 netdev_info(bp->dev, "Unsupported VLAN TCI\n");
1057                 return false;
1058         }
1059         if (bits_set(&flow->l2_key.inner_vlan_tpid,
1060                      sizeof(flow->l2_key.inner_vlan_tpid)) &&
1061             !is_exactmatch(&flow->l2_mask.inner_vlan_tpid,
1062                            sizeof(flow->l2_mask.inner_vlan_tpid))) {
1063                 netdev_info(bp->dev, "Wildcard match unsupported for VLAN TPID\n");
1064                 return false;
1065         }
1066
1067         /* Currently Ethertype must be set */
1068         if (!is_exactmatch(&flow->l2_mask.ether_type,
1069                            sizeof(flow->l2_mask.ether_type))) {
1070                 netdev_info(bp->dev, "Wildcard match unsupported for Ethertype\n");
1071                 return false;
1072         }
1073
1074         return true;
1075 }
1076
1077 /* Returns the final refcount of the node on success
1078  * or a -ve error code on failure
1079  */
1080 static int bnxt_tc_put_tunnel_node(struct bnxt *bp,
1081                                    struct rhashtable *tunnel_table,
1082                                    struct rhashtable_params *ht_params,
1083                                    struct bnxt_tc_tunnel_node *tunnel_node)
1084 {
1085         int rc;
1086
1087         if (--tunnel_node->refcount == 0) {
1088                 rc =  rhashtable_remove_fast(tunnel_table, &tunnel_node->node,
1089                                              *ht_params);
1090                 if (rc) {
1091                         netdev_err(bp->dev, "rhashtable_remove_fast rc=%d", rc);
1092                         rc = -1;
1093                 }
1094                 kfree_rcu(tunnel_node, rcu);
1095                 return rc;
1096         } else {
1097                 return tunnel_node->refcount;
1098         }
1099 }
1100
1101 /* Get (or add) either encap or decap tunnel node from/to the supplied
1102  * hash table.
1103  */
1104 static struct bnxt_tc_tunnel_node *
1105 bnxt_tc_get_tunnel_node(struct bnxt *bp, struct rhashtable *tunnel_table,
1106                         struct rhashtable_params *ht_params,
1107                         struct ip_tunnel_key *tun_key)
1108 {
1109         struct bnxt_tc_tunnel_node *tunnel_node;
1110         int rc;
1111
1112         tunnel_node = rhashtable_lookup_fast(tunnel_table, tun_key, *ht_params);
1113         if (!tunnel_node) {
1114                 tunnel_node = kzalloc(sizeof(*tunnel_node), GFP_KERNEL);
1115                 if (!tunnel_node) {
1116                         rc = -ENOMEM;
1117                         goto err;
1118                 }
1119
1120                 tunnel_node->key = *tun_key;
1121                 tunnel_node->tunnel_handle = INVALID_TUNNEL_HANDLE;
1122                 rc = rhashtable_insert_fast(tunnel_table, &tunnel_node->node,
1123                                             *ht_params);
1124                 if (rc) {
1125                         kfree_rcu(tunnel_node, rcu);
1126                         goto err;
1127                 }
1128         }
1129         tunnel_node->refcount++;
1130         return tunnel_node;
1131 err:
1132         netdev_info(bp->dev, "error rc=%d", rc);
1133         return NULL;
1134 }
1135
1136 static int bnxt_tc_get_ref_decap_handle(struct bnxt *bp,
1137                                         struct bnxt_tc_flow *flow,
1138                                         struct bnxt_tc_l2_key *l2_key,
1139                                         struct bnxt_tc_flow_node *flow_node,
1140                                         __le32 *ref_decap_handle)
1141 {
1142         struct bnxt_tc_info *tc_info = bp->tc_info;
1143         struct bnxt_tc_flow_node *ref_flow_node;
1144         struct bnxt_tc_l2_node *decap_l2_node;
1145
1146         decap_l2_node = bnxt_tc_get_l2_node(bp, &tc_info->decap_l2_table,
1147                                             tc_info->decap_l2_ht_params,
1148                                             l2_key);
1149         if (!decap_l2_node)
1150                 return -1;
1151
1152         /* If any other flow is using this decap_l2_node, use it's decap_handle
1153          * as the ref_decap_handle
1154          */
1155         if (decap_l2_node->refcount > 0) {
1156                 ref_flow_node =
1157                         list_first_entry(&decap_l2_node->common_l2_flows,
1158                                          struct bnxt_tc_flow_node,
1159                                          decap_l2_list_node);
1160                 *ref_decap_handle = ref_flow_node->decap_node->tunnel_handle;
1161         } else {
1162                 *ref_decap_handle = INVALID_TUNNEL_HANDLE;
1163         }
1164
1165         /* Insert the l2_node into the flow_node so that subsequent flows
1166          * with a matching decap l2 key can use the decap_filter_handle of
1167          * this flow as their ref_decap_handle
1168          */
1169         flow_node->decap_l2_node = decap_l2_node;
1170         list_add(&flow_node->decap_l2_list_node,
1171                  &decap_l2_node->common_l2_flows);
1172         decap_l2_node->refcount++;
1173         return 0;
1174 }
1175
1176 static void bnxt_tc_put_decap_l2_node(struct bnxt *bp,
1177                                       struct bnxt_tc_flow_node *flow_node)
1178 {
1179         struct bnxt_tc_l2_node *decap_l2_node = flow_node->decap_l2_node;
1180         struct bnxt_tc_info *tc_info = bp->tc_info;
1181         int rc;
1182
1183         /* remove flow_node from the decap L2 sharing flow list */
1184         list_del(&flow_node->decap_l2_list_node);
1185         if (--decap_l2_node->refcount == 0) {
1186                 rc =  rhashtable_remove_fast(&tc_info->decap_l2_table,
1187                                              &decap_l2_node->node,
1188                                              tc_info->decap_l2_ht_params);
1189                 if (rc)
1190                         netdev_err(bp->dev, "rhashtable_remove_fast rc=%d", rc);
1191                 kfree_rcu(decap_l2_node, rcu);
1192         }
1193 }
1194
1195 static void bnxt_tc_put_decap_handle(struct bnxt *bp,
1196                                      struct bnxt_tc_flow_node *flow_node)
1197 {
1198         __le32 decap_handle = flow_node->decap_node->tunnel_handle;
1199         struct bnxt_tc_info *tc_info = bp->tc_info;
1200         int rc;
1201
1202         if (flow_node->decap_l2_node)
1203                 bnxt_tc_put_decap_l2_node(bp, flow_node);
1204
1205         rc = bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table,
1206                                      &tc_info->decap_ht_params,
1207                                      flow_node->decap_node);
1208         if (!rc && decap_handle != INVALID_TUNNEL_HANDLE)
1209                 hwrm_cfa_decap_filter_free(bp, decap_handle);
1210 }
1211
1212 static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp,
1213                                        struct ip_tunnel_key *tun_key,
1214                                        struct bnxt_tc_l2_key *l2_info)
1215 {
1216 #ifdef CONFIG_INET
1217         struct net_device *real_dst_dev = bp->dev;
1218         struct flowi4 flow = { {0} };
1219         struct net_device *dst_dev;
1220         struct neighbour *nbr;
1221         struct rtable *rt;
1222         int rc;
1223
1224         flow.flowi4_proto = IPPROTO_UDP;
1225         flow.fl4_dport = tun_key->tp_dst;
1226         flow.daddr = tun_key->u.ipv4.dst;
1227
1228         rt = ip_route_output_key(dev_net(real_dst_dev), &flow);
1229         if (IS_ERR(rt)) {
1230                 netdev_info(bp->dev, "no route to %pI4b", &flow.daddr);
1231                 return -EOPNOTSUPP;
1232         }
1233
1234         /* The route must either point to the real_dst_dev or a dst_dev that
1235          * uses the real_dst_dev.
1236          */
1237         dst_dev = rt->dst.dev;
1238         if (is_vlan_dev(dst_dev)) {
1239 #if IS_ENABLED(CONFIG_VLAN_8021Q)
1240                 struct vlan_dev_priv *vlan = vlan_dev_priv(dst_dev);
1241
1242                 if (vlan->real_dev != real_dst_dev) {
1243                         netdev_info(bp->dev,
1244                                     "dst_dev(%s) doesn't use PF-if(%s)",
1245                                     netdev_name(dst_dev),
1246                                     netdev_name(real_dst_dev));
1247                         rc = -EOPNOTSUPP;
1248                         goto put_rt;
1249                 }
1250                 l2_info->inner_vlan_tci = htons(vlan->vlan_id);
1251                 l2_info->inner_vlan_tpid = vlan->vlan_proto;
1252                 l2_info->num_vlans = 1;
1253 #endif
1254         } else if (dst_dev != real_dst_dev) {
1255                 netdev_info(bp->dev,
1256                             "dst_dev(%s) for %pI4b is not PF-if(%s)",
1257                             netdev_name(dst_dev), &flow.daddr,
1258                             netdev_name(real_dst_dev));
1259                 rc = -EOPNOTSUPP;
1260                 goto put_rt;
1261         }
1262
1263         nbr = dst_neigh_lookup(&rt->dst, &flow.daddr);
1264         if (!nbr) {
1265                 netdev_info(bp->dev, "can't lookup neighbor for %pI4b",
1266                             &flow.daddr);
1267                 rc = -EOPNOTSUPP;
1268                 goto put_rt;
1269         }
1270
1271         tun_key->u.ipv4.src = flow.saddr;
1272         tun_key->ttl = ip4_dst_hoplimit(&rt->dst);
1273         neigh_ha_snapshot(l2_info->dmac, nbr, dst_dev);
1274         ether_addr_copy(l2_info->smac, dst_dev->dev_addr);
1275         neigh_release(nbr);
1276         ip_rt_put(rt);
1277
1278         return 0;
1279 put_rt:
1280         ip_rt_put(rt);
1281         return rc;
1282 #else
1283         return -EOPNOTSUPP;
1284 #endif
1285 }
1286
1287 static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
1288                                     struct bnxt_tc_flow_node *flow_node,
1289                                     __le32 *decap_filter_handle)
1290 {
1291         struct ip_tunnel_key *decap_key = &flow->tun_key;
1292         struct bnxt_tc_info *tc_info = bp->tc_info;
1293         struct bnxt_tc_l2_key l2_info = { {0} };
1294         struct bnxt_tc_tunnel_node *decap_node;
1295         struct ip_tunnel_key tun_key = { 0 };
1296         struct bnxt_tc_l2_key *decap_l2_info;
1297         __le32 ref_decap_handle;
1298         int rc;
1299
1300         /* Check if there's another flow using the same tunnel decap.
1301          * If not, add this tunnel to the table and resolve the other
1302          * tunnel header fileds. Ignore src_port in the tunnel_key,
1303          * since it is not required for decap filters.
1304          */
1305         decap_key->tp_src = 0;
1306         decap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->decap_table,
1307                                              &tc_info->decap_ht_params,
1308                                              decap_key);
1309         if (!decap_node)
1310                 return -ENOMEM;
1311
1312         flow_node->decap_node = decap_node;
1313
1314         if (decap_node->tunnel_handle != INVALID_TUNNEL_HANDLE)
1315                 goto done;
1316
1317         /* Resolve the L2 fields for tunnel decap
1318          * Resolve the route for remote vtep (saddr) of the decap key
1319          * Find it's next-hop mac addrs
1320          */
1321         tun_key.u.ipv4.dst = flow->tun_key.u.ipv4.src;
1322         tun_key.tp_dst = flow->tun_key.tp_dst;
1323         rc = bnxt_tc_resolve_tunnel_hdrs(bp, &tun_key, &l2_info);
1324         if (rc)
1325                 goto put_decap;
1326
1327         decap_l2_info = &decap_node->l2_info;
1328         /* decap smac is wildcarded */
1329         ether_addr_copy(decap_l2_info->dmac, l2_info.smac);
1330         if (l2_info.num_vlans) {
1331                 decap_l2_info->num_vlans = l2_info.num_vlans;
1332                 decap_l2_info->inner_vlan_tpid = l2_info.inner_vlan_tpid;
1333                 decap_l2_info->inner_vlan_tci = l2_info.inner_vlan_tci;
1334         }
1335         flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS;
1336
1337         /* For getting a decap_filter_handle we first need to check if
1338          * there are any other decap flows that share the same tunnel L2
1339          * key and if so, pass that flow's decap_filter_handle as the
1340          * ref_decap_handle for this flow.
1341          */
1342         rc = bnxt_tc_get_ref_decap_handle(bp, flow, decap_l2_info, flow_node,
1343                                           &ref_decap_handle);
1344         if (rc)
1345                 goto put_decap;
1346
1347         /* Issue the hwrm cmd to allocate a decap filter handle */
1348         rc = hwrm_cfa_decap_filter_alloc(bp, flow, decap_l2_info,
1349                                          ref_decap_handle,
1350                                          &decap_node->tunnel_handle);
1351         if (rc)
1352                 goto put_decap_l2;
1353
1354 done:
1355         *decap_filter_handle = decap_node->tunnel_handle;
1356         return 0;
1357
1358 put_decap_l2:
1359         bnxt_tc_put_decap_l2_node(bp, flow_node);
1360 put_decap:
1361         bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table,
1362                                 &tc_info->decap_ht_params,
1363                                 flow_node->decap_node);
1364         return rc;
1365 }
1366
1367 static void bnxt_tc_put_encap_handle(struct bnxt *bp,
1368                                      struct bnxt_tc_tunnel_node *encap_node)
1369 {
1370         __le32 encap_handle = encap_node->tunnel_handle;
1371         struct bnxt_tc_info *tc_info = bp->tc_info;
1372         int rc;
1373
1374         rc = bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table,
1375                                      &tc_info->encap_ht_params, encap_node);
1376         if (!rc && encap_handle != INVALID_TUNNEL_HANDLE)
1377                 hwrm_cfa_encap_record_free(bp, encap_handle);
1378 }
1379
1380 /* Lookup the tunnel encap table and check if there's an encap_handle
1381  * alloc'd already.
1382  * If not, query L2 info via a route lookup and issue an encap_record_alloc
1383  * cmd to FW.
1384  */
1385 static int bnxt_tc_get_encap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
1386                                     struct bnxt_tc_flow_node *flow_node,
1387                                     __le32 *encap_handle)
1388 {
1389         struct ip_tunnel_key *encap_key = &flow->actions.tun_encap_key;
1390         struct bnxt_tc_info *tc_info = bp->tc_info;
1391         struct bnxt_tc_tunnel_node *encap_node;
1392         int rc;
1393
1394         /* Check if there's another flow using the same tunnel encap.
1395          * If not, add this tunnel to the table and resolve the other
1396          * tunnel header fileds
1397          */
1398         encap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->encap_table,
1399                                              &tc_info->encap_ht_params,
1400                                              encap_key);
1401         if (!encap_node)
1402                 return -ENOMEM;
1403
1404         flow_node->encap_node = encap_node;
1405
1406         if (encap_node->tunnel_handle != INVALID_TUNNEL_HANDLE)
1407                 goto done;
1408
1409         rc = bnxt_tc_resolve_tunnel_hdrs(bp, encap_key, &encap_node->l2_info);
1410         if (rc)
1411                 goto put_encap;
1412
1413         /* Allocate a new tunnel encap record */
1414         rc = hwrm_cfa_encap_record_alloc(bp, encap_key, &encap_node->l2_info,
1415                                          &encap_node->tunnel_handle);
1416         if (rc)
1417                 goto put_encap;
1418
1419 done:
1420         *encap_handle = encap_node->tunnel_handle;
1421         return 0;
1422
1423 put_encap:
1424         bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table,
1425                                 &tc_info->encap_ht_params, encap_node);
1426         return rc;
1427 }
1428
1429 static void bnxt_tc_put_tunnel_handle(struct bnxt *bp,
1430                                       struct bnxt_tc_flow *flow,
1431                                       struct bnxt_tc_flow_node *flow_node)
1432 {
1433         if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
1434                 bnxt_tc_put_decap_handle(bp, flow_node);
1435         else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP)
1436                 bnxt_tc_put_encap_handle(bp, flow_node->encap_node);
1437 }
1438
1439 static int bnxt_tc_get_tunnel_handle(struct bnxt *bp,
1440                                      struct bnxt_tc_flow *flow,
1441                                      struct bnxt_tc_flow_node *flow_node,
1442                                      __le32 *tunnel_handle)
1443 {
1444         if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
1445                 return bnxt_tc_get_decap_handle(bp, flow, flow_node,
1446                                                 tunnel_handle);
1447         else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP)
1448                 return bnxt_tc_get_encap_handle(bp, flow, flow_node,
1449                                                 tunnel_handle);
1450         else
1451                 return 0;
1452 }
1453 static int __bnxt_tc_del_flow(struct bnxt *bp,
1454                               struct bnxt_tc_flow_node *flow_node)
1455 {
1456         struct bnxt_tc_info *tc_info = bp->tc_info;
1457         int rc;
1458
1459         /* send HWRM cmd to free the flow-id */
1460         bnxt_hwrm_cfa_flow_free(bp, flow_node);
1461
1462         mutex_lock(&tc_info->lock);
1463
1464         /* release references to any tunnel encap/decap nodes */
1465         bnxt_tc_put_tunnel_handle(bp, &flow_node->flow, flow_node);
1466
1467         /* release reference to l2 node */
1468         bnxt_tc_put_l2_node(bp, flow_node);
1469
1470         mutex_unlock(&tc_info->lock);
1471
1472         rc = rhashtable_remove_fast(&tc_info->flow_table, &flow_node->node,
1473                                     tc_info->flow_ht_params);
1474         if (rc)
1475                 netdev_err(bp->dev, "Error: %s: rhashtable_remove_fast rc=%d",
1476                            __func__, rc);
1477
1478         kfree_rcu(flow_node, rcu);
1479         return 0;
1480 }
1481
1482 static void bnxt_tc_set_flow_dir(struct bnxt *bp, struct bnxt_tc_flow *flow,
1483                                  u16 src_fid)
1484 {
1485         flow->l2_key.dir = (bp->pf.fw_fid == src_fid) ? BNXT_DIR_RX : BNXT_DIR_TX;
1486 }
1487
1488 static void bnxt_tc_set_src_fid(struct bnxt *bp, struct bnxt_tc_flow *flow,
1489                                 u16 src_fid)
1490 {
1491         if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
1492                 flow->src_fid = bp->pf.fw_fid;
1493         else
1494                 flow->src_fid = src_fid;
1495 }
1496
1497 /* Add a new flow or replace an existing flow.
1498  * Notes on locking:
1499  * There are essentially two critical sections here.
1500  * 1. while adding a new flow
1501  *    a) lookup l2-key
1502  *    b) issue HWRM cmd and get flow_handle
1503  *    c) link l2-key with flow
1504  * 2. while deleting a flow
1505  *    a) unlinking l2-key from flow
1506  * A lock is needed to protect these two critical sections.
1507  *
1508  * The hash-tables are already protected by the rhashtable API.
1509  */
1510 static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,
1511                             struct flow_cls_offload *tc_flow_cmd)
1512 {
1513         struct bnxt_tc_flow_node *new_node, *old_node;
1514         struct bnxt_tc_info *tc_info = bp->tc_info;
1515         struct bnxt_tc_flow *flow;
1516         __le32 tunnel_handle = 0;
1517         __le16 ref_flow_handle;
1518         int rc;
1519
1520         /* allocate memory for the new flow and it's node */
1521         new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
1522         if (!new_node) {
1523                 rc = -ENOMEM;
1524                 goto done;
1525         }
1526         new_node->cookie = tc_flow_cmd->cookie;
1527         flow = &new_node->flow;
1528
1529         rc = bnxt_tc_parse_flow(bp, tc_flow_cmd, flow);
1530         if (rc)
1531                 goto free_node;
1532
1533         bnxt_tc_set_src_fid(bp, flow, src_fid);
1534         bnxt_tc_set_flow_dir(bp, flow, flow->src_fid);
1535
1536         if (!bnxt_tc_can_offload(bp, flow)) {
1537                 rc = -EOPNOTSUPP;
1538                 kfree_rcu(new_node, rcu);
1539                 return rc;
1540         }
1541
1542         /* If a flow exists with the same cookie, delete it */
1543         old_node = rhashtable_lookup_fast(&tc_info->flow_table,
1544                                           &tc_flow_cmd->cookie,
1545                                           tc_info->flow_ht_params);
1546         if (old_node)
1547                 __bnxt_tc_del_flow(bp, old_node);
1548
1549         /* Check if the L2 part of the flow has been offloaded already.
1550          * If so, bump up it's refcnt and get it's reference handle.
1551          */
1552         mutex_lock(&tc_info->lock);
1553         rc = bnxt_tc_get_ref_flow_handle(bp, flow, new_node, &ref_flow_handle);
1554         if (rc)
1555                 goto unlock;
1556
1557         /* If the flow involves tunnel encap/decap, get tunnel_handle */
1558         rc = bnxt_tc_get_tunnel_handle(bp, flow, new_node, &tunnel_handle);
1559         if (rc)
1560                 goto put_l2;
1561
1562         /* send HWRM cmd to alloc the flow */
1563         rc = bnxt_hwrm_cfa_flow_alloc(bp, flow, ref_flow_handle,
1564                                       tunnel_handle, new_node);
1565         if (rc)
1566                 goto put_tunnel;
1567
1568         flow->lastused = jiffies;
1569         spin_lock_init(&flow->stats_lock);
1570         /* add new flow to flow-table */
1571         rc = rhashtable_insert_fast(&tc_info->flow_table, &new_node->node,
1572                                     tc_info->flow_ht_params);
1573         if (rc)
1574                 goto hwrm_flow_free;
1575
1576         mutex_unlock(&tc_info->lock);
1577         return 0;
1578
1579 hwrm_flow_free:
1580         bnxt_hwrm_cfa_flow_free(bp, new_node);
1581 put_tunnel:
1582         bnxt_tc_put_tunnel_handle(bp, flow, new_node);
1583 put_l2:
1584         bnxt_tc_put_l2_node(bp, new_node);
1585 unlock:
1586         mutex_unlock(&tc_info->lock);
1587 free_node:
1588         kfree_rcu(new_node, rcu);
1589 done:
1590         netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d",
1591                    __func__, tc_flow_cmd->cookie, rc);
1592         return rc;
1593 }
1594
1595 static int bnxt_tc_del_flow(struct bnxt *bp,
1596                             struct flow_cls_offload *tc_flow_cmd)
1597 {
1598         struct bnxt_tc_info *tc_info = bp->tc_info;
1599         struct bnxt_tc_flow_node *flow_node;
1600
1601         flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
1602                                            &tc_flow_cmd->cookie,
1603                                            tc_info->flow_ht_params);
1604         if (!flow_node)
1605                 return -EINVAL;
1606
1607         return __bnxt_tc_del_flow(bp, flow_node);
1608 }
1609
1610 static int bnxt_tc_get_flow_stats(struct bnxt *bp,
1611                                   struct flow_cls_offload *tc_flow_cmd)
1612 {
1613         struct bnxt_tc_flow_stats stats, *curr_stats, *prev_stats;
1614         struct bnxt_tc_info *tc_info = bp->tc_info;
1615         struct bnxt_tc_flow_node *flow_node;
1616         struct bnxt_tc_flow *flow;
1617         unsigned long lastused;
1618
1619         flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
1620                                            &tc_flow_cmd->cookie,
1621                                            tc_info->flow_ht_params);
1622         if (!flow_node)
1623                 return -1;
1624
1625         flow = &flow_node->flow;
1626         curr_stats = &flow->stats;
1627         prev_stats = &flow->prev_stats;
1628
1629         spin_lock(&flow->stats_lock);
1630         stats.packets = curr_stats->packets - prev_stats->packets;
1631         stats.bytes = curr_stats->bytes - prev_stats->bytes;
1632         *prev_stats = *curr_stats;
1633         lastused = flow->lastused;
1634         spin_unlock(&flow->stats_lock);
1635
1636         flow_stats_update(&tc_flow_cmd->stats, stats.bytes, stats.packets,
1637                           lastused);
1638         return 0;
1639 }
1640
1641 static void bnxt_fill_cfa_stats_req(struct bnxt *bp,
1642                                     struct bnxt_tc_flow_node *flow_node,
1643                                     __le16 *flow_handle, __le32 *flow_id)
1644 {
1645         u16 handle;
1646
1647         if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) {
1648                 *flow_id = flow_node->flow_id;
1649
1650                 /* If flow_id is used to fetch flow stats then:
1651                  * 1. lower 12 bits of flow_handle must be set to all 1s.
1652                  * 2. 15th bit of flow_handle must specify the flow
1653                  *    direction (TX/RX).
1654                  */
1655                 if (flow_node->flow.l2_key.dir == BNXT_DIR_RX)
1656                         handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_DIR_RX |
1657                                  CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK;
1658                 else
1659                         handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK;
1660
1661                 *flow_handle = cpu_to_le16(handle);
1662         } else {
1663                 *flow_handle = flow_node->flow_handle;
1664         }
1665 }
1666
1667 static int
1668 bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
1669                              struct bnxt_tc_stats_batch stats_batch[])
1670 {
1671         struct hwrm_cfa_flow_stats_input req = { 0 };
1672         struct hwrm_cfa_flow_stats_output *resp;
1673         __le16 *req_flow_handles = &req.flow_handle_0;
1674         __le32 *req_flow_ids = &req.flow_id_0;
1675         int rc, i;
1676
1677         bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_STATS, -1, -1);
1678         req.num_flows = cpu_to_le16(num_flows);
1679         for (i = 0; i < num_flows; i++) {
1680                 struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;
1681
1682                 bnxt_fill_cfa_stats_req(bp, flow_node,
1683                                         &req_flow_handles[i], &req_flow_ids[i]);
1684         }
1685
1686         mutex_lock(&bp->hwrm_cmd_lock);
1687         rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
1688         if (!rc) {
1689                 __le64 *resp_packets;
1690                 __le64 *resp_bytes;
1691
1692                 resp = bnxt_get_hwrm_resp_addr(bp, &req);
1693                 resp_packets = &resp->packet_0;
1694                 resp_bytes = &resp->byte_0;
1695
1696                 for (i = 0; i < num_flows; i++) {
1697                         stats_batch[i].hw_stats.packets =
1698                                                 le64_to_cpu(resp_packets[i]);
1699                         stats_batch[i].hw_stats.bytes =
1700                                                 le64_to_cpu(resp_bytes[i]);
1701                 }
1702         } else {
1703                 netdev_info(bp->dev, "error rc=%d", rc);
1704         }
1705         mutex_unlock(&bp->hwrm_cmd_lock);
1706
1707         return rc;
1708 }
1709
1710 /* Add val to accum while handling a possible wraparound
1711  * of val. Eventhough val is of type u64, its actual width
1712  * is denoted by mask and will wrap-around beyond that width.
1713  */
1714 static void accumulate_val(u64 *accum, u64 val, u64 mask)
1715 {
1716 #define low_bits(x, mask)               ((x) & (mask))
1717 #define high_bits(x, mask)              ((x) & ~(mask))
1718         bool wrapped = val < low_bits(*accum, mask);
1719
1720         *accum = high_bits(*accum, mask) + val;
1721         if (wrapped)
1722                 *accum += (mask + 1);
1723 }
1724
1725 /* The HW counters' width is much less than 64bits.
1726  * Handle possible wrap-around while updating the stat counters
1727  */
1728 static void bnxt_flow_stats_accum(struct bnxt_tc_info *tc_info,
1729                                   struct bnxt_tc_flow_stats *acc_stats,
1730                                   struct bnxt_tc_flow_stats *hw_stats)
1731 {
1732         accumulate_val(&acc_stats->bytes, hw_stats->bytes, tc_info->bytes_mask);
1733         accumulate_val(&acc_stats->packets, hw_stats->packets,
1734                        tc_info->packets_mask);
1735 }
1736
1737 static int
1738 bnxt_tc_flow_stats_batch_update(struct bnxt *bp, int num_flows,
1739                                 struct bnxt_tc_stats_batch stats_batch[])
1740 {
1741         struct bnxt_tc_info *tc_info = bp->tc_info;
1742         int rc, i;
1743
1744         rc = bnxt_hwrm_cfa_flow_stats_get(bp, num_flows, stats_batch);
1745         if (rc)
1746                 return rc;
1747
1748         for (i = 0; i < num_flows; i++) {
1749                 struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;
1750                 struct bnxt_tc_flow *flow = &flow_node->flow;
1751
1752                 spin_lock(&flow->stats_lock);
1753                 bnxt_flow_stats_accum(tc_info, &flow->stats,
1754                                       &stats_batch[i].hw_stats);
1755                 if (flow->stats.packets != flow->prev_stats.packets)
1756                         flow->lastused = jiffies;
1757                 spin_unlock(&flow->stats_lock);
1758         }
1759
1760         return 0;
1761 }
1762
1763 static int
1764 bnxt_tc_flow_stats_batch_prep(struct bnxt *bp,
1765                               struct bnxt_tc_stats_batch stats_batch[],
1766                               int *num_flows)
1767 {
1768         struct bnxt_tc_info *tc_info = bp->tc_info;
1769         struct rhashtable_iter *iter = &tc_info->iter;
1770         void *flow_node;
1771         int rc, i;
1772
1773         rhashtable_walk_start(iter);
1774
1775         rc = 0;
1776         for (i = 0; i < BNXT_FLOW_STATS_BATCH_MAX; i++) {
1777                 flow_node = rhashtable_walk_next(iter);
1778                 if (IS_ERR(flow_node)) {
1779                         i = 0;
1780                         if (PTR_ERR(flow_node) == -EAGAIN) {
1781                                 continue;
1782                         } else {
1783                                 rc = PTR_ERR(flow_node);
1784                                 goto done;
1785                         }
1786                 }
1787
1788                 /* No more flows */
1789                 if (!flow_node)
1790                         goto done;
1791
1792                 stats_batch[i].flow_node = flow_node;
1793         }
1794 done:
1795         rhashtable_walk_stop(iter);
1796         *num_flows = i;
1797         return rc;
1798 }
1799
1800 void bnxt_tc_flow_stats_work(struct bnxt *bp)
1801 {
1802         struct bnxt_tc_info *tc_info = bp->tc_info;
1803         int num_flows, rc;
1804
1805         num_flows = atomic_read(&tc_info->flow_table.nelems);
1806         if (!num_flows)
1807                 return;
1808
1809         rhashtable_walk_enter(&tc_info->flow_table, &tc_info->iter);
1810
1811         for (;;) {
1812                 rc = bnxt_tc_flow_stats_batch_prep(bp, tc_info->stats_batch,
1813                                                    &num_flows);
1814                 if (rc) {
1815                         if (rc == -EAGAIN)
1816                                 continue;
1817                         break;
1818                 }
1819
1820                 if (!num_flows)
1821                         break;
1822
1823                 bnxt_tc_flow_stats_batch_update(bp, num_flows,
1824                                                 tc_info->stats_batch);
1825         }
1826
1827         rhashtable_walk_exit(&tc_info->iter);
1828 }
1829
1830 int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
1831                          struct flow_cls_offload *cls_flower)
1832 {
1833         switch (cls_flower->command) {
1834         case FLOW_CLS_REPLACE:
1835                 return bnxt_tc_add_flow(bp, src_fid, cls_flower);
1836         case FLOW_CLS_DESTROY:
1837                 return bnxt_tc_del_flow(bp, cls_flower);
1838         case FLOW_CLS_STATS:
1839                 return bnxt_tc_get_flow_stats(bp, cls_flower);
1840         default:
1841                 return -EOPNOTSUPP;
1842         }
1843 }
1844
1845 static int bnxt_tc_setup_indr_block_cb(enum tc_setup_type type,
1846                                        void *type_data, void *cb_priv)
1847 {
1848         struct bnxt_flower_indr_block_cb_priv *priv = cb_priv;
1849         struct flow_cls_offload *flower = type_data;
1850         struct bnxt *bp = priv->bp;
1851
1852         if (flower->common.chain_index)
1853                 return -EOPNOTSUPP;
1854
1855         switch (type) {
1856         case TC_SETUP_CLSFLOWER:
1857                 return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, flower);
1858         default:
1859                 return -EOPNOTSUPP;
1860         }
1861 }
1862
1863 static struct bnxt_flower_indr_block_cb_priv *
1864 bnxt_tc_indr_block_cb_lookup(struct bnxt *bp, struct net_device *netdev)
1865 {
1866         struct bnxt_flower_indr_block_cb_priv *cb_priv;
1867
1868         /* All callback list access should be protected by RTNL. */
1869         ASSERT_RTNL();
1870
1871         list_for_each_entry(cb_priv, &bp->tc_indr_block_list, list)
1872                 if (cb_priv->tunnel_netdev == netdev)
1873                         return cb_priv;
1874
1875         return NULL;
1876 }
1877
1878 static void bnxt_tc_setup_indr_rel(void *cb_priv)
1879 {
1880         struct bnxt_flower_indr_block_cb_priv *priv = cb_priv;
1881
1882         list_del(&priv->list);
1883         kfree(priv);
1884 }
1885
1886 static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct bnxt *bp,
1887                                     struct flow_block_offload *f)
1888 {
1889         struct bnxt_flower_indr_block_cb_priv *cb_priv;
1890         struct flow_block_cb *block_cb;
1891
1892         if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
1893                 return -EOPNOTSUPP;
1894
1895         switch (f->command) {
1896         case FLOW_BLOCK_BIND:
1897                 cb_priv = kmalloc(sizeof(*cb_priv), GFP_KERNEL);
1898                 if (!cb_priv)
1899                         return -ENOMEM;
1900
1901                 cb_priv->tunnel_netdev = netdev;
1902                 cb_priv->bp = bp;
1903                 list_add(&cb_priv->list, &bp->tc_indr_block_list);
1904
1905                 block_cb = flow_block_cb_alloc(bnxt_tc_setup_indr_block_cb,
1906                                                cb_priv, cb_priv,
1907                                                bnxt_tc_setup_indr_rel);
1908                 if (IS_ERR(block_cb)) {
1909                         list_del(&cb_priv->list);
1910                         kfree(cb_priv);
1911                         return PTR_ERR(block_cb);
1912                 }
1913
1914                 flow_block_cb_add(block_cb, f);
1915                 list_add_tail(&block_cb->driver_list, &bnxt_block_cb_list);
1916                 break;
1917         case FLOW_BLOCK_UNBIND:
1918                 cb_priv = bnxt_tc_indr_block_cb_lookup(bp, netdev);
1919                 if (!cb_priv)
1920                         return -ENOENT;
1921
1922                 block_cb = flow_block_cb_lookup(f->block,
1923                                                 bnxt_tc_setup_indr_block_cb,
1924                                                 cb_priv);
1925                 if (!block_cb)
1926                         return -ENOENT;
1927
1928                 flow_block_cb_remove(block_cb, f);
1929                 list_del(&block_cb->driver_list);
1930                 break;
1931         default:
1932                 return -EOPNOTSUPP;
1933         }
1934         return 0;
1935 }
1936
1937 static int bnxt_tc_setup_indr_cb(struct net_device *netdev, void *cb_priv,
1938                                  enum tc_setup_type type, void *type_data)
1939 {
1940         switch (type) {
1941         case TC_SETUP_BLOCK:
1942                 return bnxt_tc_setup_indr_block(netdev, cb_priv, type_data);
1943         default:
1944                 return -EOPNOTSUPP;
1945         }
1946 }
1947
1948 static bool bnxt_is_netdev_indr_offload(struct net_device *netdev)
1949 {
1950         return netif_is_vxlan(netdev);
1951 }
1952
1953 static int bnxt_tc_indr_block_event(struct notifier_block *nb,
1954                                     unsigned long event, void *ptr)
1955 {
1956         struct net_device *netdev;
1957         struct bnxt *bp;
1958         int rc;
1959
1960         netdev = netdev_notifier_info_to_dev(ptr);
1961         if (!bnxt_is_netdev_indr_offload(netdev))
1962                 return NOTIFY_OK;
1963
1964         bp = container_of(nb, struct bnxt, tc_netdev_nb);
1965
1966         switch (event) {
1967         case NETDEV_REGISTER:
1968                 rc = __flow_indr_block_cb_register(netdev, bp,
1969                                                    bnxt_tc_setup_indr_cb,
1970                                                    bp);
1971                 if (rc)
1972                         netdev_info(bp->dev,
1973                                     "Failed to register indirect blk: dev: %s",
1974                                     netdev->name);
1975                 break;
1976         case NETDEV_UNREGISTER:
1977                 __flow_indr_block_cb_unregister(netdev,
1978                                                 bnxt_tc_setup_indr_cb,
1979                                                 bp);
1980                 break;
1981         }
1982
1983         return NOTIFY_DONE;
1984 }
1985
1986 static const struct rhashtable_params bnxt_tc_flow_ht_params = {
1987         .head_offset = offsetof(struct bnxt_tc_flow_node, node),
1988         .key_offset = offsetof(struct bnxt_tc_flow_node, cookie),
1989         .key_len = sizeof(((struct bnxt_tc_flow_node *)0)->cookie),
1990         .automatic_shrinking = true
1991 };
1992
1993 static const struct rhashtable_params bnxt_tc_l2_ht_params = {
1994         .head_offset = offsetof(struct bnxt_tc_l2_node, node),
1995         .key_offset = offsetof(struct bnxt_tc_l2_node, key),
1996         .key_len = BNXT_TC_L2_KEY_LEN,
1997         .automatic_shrinking = true
1998 };
1999
2000 static const struct rhashtable_params bnxt_tc_decap_l2_ht_params = {
2001         .head_offset = offsetof(struct bnxt_tc_l2_node, node),
2002         .key_offset = offsetof(struct bnxt_tc_l2_node, key),
2003         .key_len = BNXT_TC_L2_KEY_LEN,
2004         .automatic_shrinking = true
2005 };
2006
2007 static const struct rhashtable_params bnxt_tc_tunnel_ht_params = {
2008         .head_offset = offsetof(struct bnxt_tc_tunnel_node, node),
2009         .key_offset = offsetof(struct bnxt_tc_tunnel_node, key),
2010         .key_len = sizeof(struct ip_tunnel_key),
2011         .automatic_shrinking = true
2012 };
2013
2014 /* convert counter width in bits to a mask */
2015 #define mask(width)             ((u64)~0 >> (64 - (width)))
2016
2017 int bnxt_init_tc(struct bnxt *bp)
2018 {
2019         struct bnxt_tc_info *tc_info;
2020         int rc;
2021
2022         if (bp->hwrm_spec_code < 0x10803) {
2023                 netdev_warn(bp->dev,
2024                             "Firmware does not support TC flower offload.\n");
2025                 return -ENOTSUPP;
2026         }
2027
2028         tc_info = kzalloc(sizeof(*tc_info), GFP_KERNEL);
2029         if (!tc_info)
2030                 return -ENOMEM;
2031         mutex_init(&tc_info->lock);
2032
2033         /* Counter widths are programmed by FW */
2034         tc_info->bytes_mask = mask(36);
2035         tc_info->packets_mask = mask(28);
2036
2037         tc_info->flow_ht_params = bnxt_tc_flow_ht_params;
2038         rc = rhashtable_init(&tc_info->flow_table, &tc_info->flow_ht_params);
2039         if (rc)
2040                 goto free_tc_info;
2041
2042         tc_info->l2_ht_params = bnxt_tc_l2_ht_params;
2043         rc = rhashtable_init(&tc_info->l2_table, &tc_info->l2_ht_params);
2044         if (rc)
2045                 goto destroy_flow_table;
2046
2047         tc_info->decap_l2_ht_params = bnxt_tc_decap_l2_ht_params;
2048         rc = rhashtable_init(&tc_info->decap_l2_table,
2049                              &tc_info->decap_l2_ht_params);
2050         if (rc)
2051                 goto destroy_l2_table;
2052
2053         tc_info->decap_ht_params = bnxt_tc_tunnel_ht_params;
2054         rc = rhashtable_init(&tc_info->decap_table,
2055                              &tc_info->decap_ht_params);
2056         if (rc)
2057                 goto destroy_decap_l2_table;
2058
2059         tc_info->encap_ht_params = bnxt_tc_tunnel_ht_params;
2060         rc = rhashtable_init(&tc_info->encap_table,
2061                              &tc_info->encap_ht_params);
2062         if (rc)
2063                 goto destroy_decap_table;
2064
2065         tc_info->enabled = true;
2066         bp->dev->hw_features |= NETIF_F_HW_TC;
2067         bp->dev->features |= NETIF_F_HW_TC;
2068         bp->tc_info = tc_info;
2069
2070         /* init indirect block notifications */
2071         INIT_LIST_HEAD(&bp->tc_indr_block_list);
2072         bp->tc_netdev_nb.notifier_call = bnxt_tc_indr_block_event;
2073         rc = register_netdevice_notifier(&bp->tc_netdev_nb);
2074         if (!rc)
2075                 return 0;
2076
2077         rhashtable_destroy(&tc_info->encap_table);
2078
2079 destroy_decap_table:
2080         rhashtable_destroy(&tc_info->decap_table);
2081 destroy_decap_l2_table:
2082         rhashtable_destroy(&tc_info->decap_l2_table);
2083 destroy_l2_table:
2084         rhashtable_destroy(&tc_info->l2_table);
2085 destroy_flow_table:
2086         rhashtable_destroy(&tc_info->flow_table);
2087 free_tc_info:
2088         kfree(tc_info);
2089         return rc;
2090 }
2091
2092 void bnxt_shutdown_tc(struct bnxt *bp)
2093 {
2094         struct bnxt_tc_info *tc_info = bp->tc_info;
2095
2096         if (!bnxt_tc_flower_enabled(bp))
2097                 return;
2098
2099         unregister_netdevice_notifier(&bp->tc_netdev_nb);
2100         rhashtable_destroy(&tc_info->flow_table);
2101         rhashtable_destroy(&tc_info->l2_table);
2102         rhashtable_destroy(&tc_info->decap_l2_table);
2103         rhashtable_destroy(&tc_info->decap_table);
2104         rhashtable_destroy(&tc_info->encap_table);
2105         kfree(tc_info);
2106         bp->tc_info = NULL;
2107 }