Merge tag 'nfsd-6.6-2' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
[linux-block.git] / drivers / net / ethernet / sfc / tc_counters.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3  * Driver for Solarflare network controllers and boards
4  * Copyright 2022 Advanced Micro Devices, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published
8  * by the Free Software Foundation, incorporated herein by reference.
9  */
10
11 #include "tc_counters.h"
12 #include "tc_encap_actions.h"
13 #include "mae_counter_format.h"
14 #include "mae.h"
15 #include "rx_common.h"
16
17 /* Counter-management hashtables */
18
19 static const struct rhashtable_params efx_tc_counter_id_ht_params = {
20         .key_len        = offsetof(struct efx_tc_counter_index, linkage),
21         .key_offset     = 0,
22         .head_offset    = offsetof(struct efx_tc_counter_index, linkage),
23 };
24
25 static const struct rhashtable_params efx_tc_counter_ht_params = {
26         .key_len        = offsetof(struct efx_tc_counter, linkage),
27         .key_offset     = 0,
28         .head_offset    = offsetof(struct efx_tc_counter, linkage),
29 };
30
31 static void efx_tc_counter_free(void *ptr, void *__unused)
32 {
33         struct efx_tc_counter *cnt = ptr;
34
35         WARN_ON(!list_empty(&cnt->users));
36         /* We'd like to synchronize_rcu() here, but unfortunately we aren't
37          * removing the element from the hashtable (it's not clear that's a
38          * safe thing to do in an rhashtable_free_and_destroy free_fn), so
39          * threads could still be obtaining new pointers to *cnt if they can
40          * race against this function at all.
41          */
42         flush_work(&cnt->work);
43         EFX_WARN_ON_PARANOID(spin_is_locked(&cnt->lock));
44         kfree(cnt);
45 }
46
47 static void efx_tc_counter_id_free(void *ptr, void *__unused)
48 {
49         struct efx_tc_counter_index *ctr = ptr;
50
51         WARN_ON(refcount_read(&ctr->ref));
52         kfree(ctr);
53 }
54
55 int efx_tc_init_counters(struct efx_nic *efx)
56 {
57         int rc;
58
59         rc = rhashtable_init(&efx->tc->counter_id_ht, &efx_tc_counter_id_ht_params);
60         if (rc < 0)
61                 goto fail_counter_id_ht;
62         rc = rhashtable_init(&efx->tc->counter_ht, &efx_tc_counter_ht_params);
63         if (rc < 0)
64                 goto fail_counter_ht;
65         return 0;
66 fail_counter_ht:
67         rhashtable_destroy(&efx->tc->counter_id_ht);
68 fail_counter_id_ht:
69         return rc;
70 }
71
72 /* Only call this in init failure teardown.
73  * Normal exit should fini instead as there may be entries in the table.
74  */
75 void efx_tc_destroy_counters(struct efx_nic *efx)
76 {
77         rhashtable_destroy(&efx->tc->counter_ht);
78         rhashtable_destroy(&efx->tc->counter_id_ht);
79 }
80
81 void efx_tc_fini_counters(struct efx_nic *efx)
82 {
83         rhashtable_free_and_destroy(&efx->tc->counter_id_ht, efx_tc_counter_id_free, NULL);
84         rhashtable_free_and_destroy(&efx->tc->counter_ht, efx_tc_counter_free, NULL);
85 }
86
87 static void efx_tc_counter_work(struct work_struct *work)
88 {
89         struct efx_tc_counter *cnt = container_of(work, struct efx_tc_counter, work);
90         struct efx_tc_encap_action *encap;
91         struct efx_tc_action_set *act;
92         unsigned long touched;
93         struct neighbour *n;
94
95         spin_lock_bh(&cnt->lock);
96         touched = READ_ONCE(cnt->touched);
97
98         list_for_each_entry(act, &cnt->users, count_user) {
99                 encap = act->encap_md;
100                 if (!encap)
101                         continue;
102                 if (!encap->neigh) /* can't happen */
103                         continue;
104                 if (time_after_eq(encap->neigh->used, touched))
105                         continue;
106                 encap->neigh->used = touched;
107                 /* We have passed traffic using this ARP entry, so
108                  * indicate to the ARP cache that it's still active
109                  */
110                 if (encap->neigh->dst_ip)
111                         n = neigh_lookup(&arp_tbl, &encap->neigh->dst_ip,
112                                          encap->neigh->egdev);
113                 else
114 #if IS_ENABLED(CONFIG_IPV6)
115                         n = neigh_lookup(ipv6_stub->nd_tbl,
116                                          &encap->neigh->dst_ip6,
117                                          encap->neigh->egdev);
118 #else
119                         n = NULL;
120 #endif
121                 if (!n)
122                         continue;
123
124                 neigh_event_send(n, NULL);
125                 neigh_release(n);
126         }
127         spin_unlock_bh(&cnt->lock);
128 }
129
130 /* Counter allocation */
131
132 struct efx_tc_counter *efx_tc_flower_allocate_counter(struct efx_nic *efx,
133                                                       int type)
134 {
135         struct efx_tc_counter *cnt;
136         int rc, rc2;
137
138         cnt = kzalloc(sizeof(*cnt), GFP_USER);
139         if (!cnt)
140                 return ERR_PTR(-ENOMEM);
141
142         spin_lock_init(&cnt->lock);
143         INIT_WORK(&cnt->work, efx_tc_counter_work);
144         cnt->touched = jiffies;
145         cnt->type = type;
146
147         rc = efx_mae_allocate_counter(efx, cnt);
148         if (rc)
149                 goto fail1;
150         INIT_LIST_HEAD(&cnt->users);
151         rc = rhashtable_insert_fast(&efx->tc->counter_ht, &cnt->linkage,
152                                     efx_tc_counter_ht_params);
153         if (rc)
154                 goto fail2;
155         return cnt;
156 fail2:
157         /* If we get here, it implies that we couldn't insert into the table,
158          * which in turn probably means that the fw_id was already taken.
159          * In that case, it's unclear whether we really 'own' the fw_id; but
160          * the firmware seemed to think we did, so it's proper to free it.
161          */
162         rc2 = efx_mae_free_counter(efx, cnt);
163         if (rc2)
164                 netif_warn(efx, hw, efx->net_dev,
165                            "Failed to free MAE counter %u, rc %d\n",
166                            cnt->fw_id, rc2);
167 fail1:
168         kfree(cnt);
169         return ERR_PTR(rc > 0 ? -EIO : rc);
170 }
171
172 void efx_tc_flower_release_counter(struct efx_nic *efx,
173                                    struct efx_tc_counter *cnt)
174 {
175         int rc;
176
177         rhashtable_remove_fast(&efx->tc->counter_ht, &cnt->linkage,
178                                efx_tc_counter_ht_params);
179         rc = efx_mae_free_counter(efx, cnt);
180         if (rc)
181                 netif_warn(efx, hw, efx->net_dev,
182                            "Failed to free MAE counter %u, rc %d\n",
183                            cnt->fw_id, rc);
184         WARN_ON(!list_empty(&cnt->users));
185         /* This doesn't protect counter updates coming in arbitrarily long
186          * after we deleted the counter.  The RCU just ensures that we won't
187          * free the counter while another thread has a pointer to it.
188          * Ensuring we don't update the wrong counter if the ID gets re-used
189          * is handled by the generation count.
190          */
191         synchronize_rcu();
192         flush_work(&cnt->work);
193         EFX_WARN_ON_PARANOID(spin_is_locked(&cnt->lock));
194         kfree(cnt);
195 }
196
197 static struct efx_tc_counter *efx_tc_flower_find_counter_by_fw_id(
198                                 struct efx_nic *efx, int type, u32 fw_id)
199 {
200         struct efx_tc_counter key = {};
201
202         key.fw_id = fw_id;
203         key.type = type;
204
205         return rhashtable_lookup_fast(&efx->tc->counter_ht, &key,
206                                       efx_tc_counter_ht_params);
207 }
208
209 /* TC cookie to counter mapping */
210
211 void efx_tc_flower_put_counter_index(struct efx_nic *efx,
212                                      struct efx_tc_counter_index *ctr)
213 {
214         if (!refcount_dec_and_test(&ctr->ref))
215                 return; /* still in use */
216         rhashtable_remove_fast(&efx->tc->counter_id_ht, &ctr->linkage,
217                                efx_tc_counter_id_ht_params);
218         efx_tc_flower_release_counter(efx, ctr->cnt);
219         kfree(ctr);
220 }
221
222 struct efx_tc_counter_index *efx_tc_flower_get_counter_index(
223                                 struct efx_nic *efx, unsigned long cookie,
224                                 enum efx_tc_counter_type type)
225 {
226         struct efx_tc_counter_index *ctr, *old;
227         struct efx_tc_counter *cnt;
228
229         ctr = kzalloc(sizeof(*ctr), GFP_USER);
230         if (!ctr)
231                 return ERR_PTR(-ENOMEM);
232         ctr->cookie = cookie;
233         old = rhashtable_lookup_get_insert_fast(&efx->tc->counter_id_ht,
234                                                 &ctr->linkage,
235                                                 efx_tc_counter_id_ht_params);
236         if (old) {
237                 /* don't need our new entry */
238                 kfree(ctr);
239                 if (IS_ERR(old)) /* oh dear, it's actually an error */
240                         return ERR_CAST(old);
241                 if (!refcount_inc_not_zero(&old->ref))
242                         return ERR_PTR(-EAGAIN);
243                 /* existing entry found */
244                 ctr = old;
245         } else {
246                 cnt = efx_tc_flower_allocate_counter(efx, type);
247                 if (IS_ERR(cnt)) {
248                         rhashtable_remove_fast(&efx->tc->counter_id_ht,
249                                                &ctr->linkage,
250                                                efx_tc_counter_id_ht_params);
251                         kfree(ctr);
252                         return (void *)cnt; /* it's an ERR_PTR */
253                 }
254                 ctr->cnt = cnt;
255                 refcount_set(&ctr->ref, 1);
256         }
257         return ctr;
258 }
259
260 struct efx_tc_counter_index *efx_tc_flower_find_counter_index(
261                                 struct efx_nic *efx, unsigned long cookie)
262 {
263         struct efx_tc_counter_index key = {};
264
265         key.cookie = cookie;
266         return rhashtable_lookup_fast(&efx->tc->counter_id_ht, &key,
267                                       efx_tc_counter_id_ht_params);
268 }
269
270 /* TC Channel.  Counter updates are delivered on this channel's RXQ. */
271
272 static void efx_tc_handle_no_channel(struct efx_nic *efx)
273 {
274         netif_warn(efx, drv, efx->net_dev,
275                    "MAE counters require MSI-X and 1 additional interrupt vector.\n");
276 }
277
278 static int efx_tc_probe_channel(struct efx_channel *channel)
279 {
280         struct efx_rx_queue *rx_queue = &channel->rx_queue;
281
282         channel->irq_moderation_us = 0;
283         rx_queue->core_index = 0;
284
285         INIT_WORK(&rx_queue->grant_work, efx_mae_counters_grant_credits);
286
287         return 0;
288 }
289
290 static int efx_tc_start_channel(struct efx_channel *channel)
291 {
292         struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
293         struct efx_nic *efx = channel->efx;
294
295         return efx_mae_start_counters(efx, rx_queue);
296 }
297
298 static void efx_tc_stop_channel(struct efx_channel *channel)
299 {
300         struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
301         struct efx_nic *efx = channel->efx;
302         int rc;
303
304         rc = efx_mae_stop_counters(efx, rx_queue);
305         if (rc)
306                 netif_warn(efx, drv, efx->net_dev,
307                            "Failed to stop MAE counters streaming, rc=%d.\n",
308                            rc);
309         rx_queue->grant_credits = false;
310         flush_work(&rx_queue->grant_work);
311 }
312
313 static void efx_tc_remove_channel(struct efx_channel *channel)
314 {
315 }
316
317 static void efx_tc_get_channel_name(struct efx_channel *channel,
318                                     char *buf, size_t len)
319 {
320         snprintf(buf, len, "%s-mae", channel->efx->name);
321 }
322
323 static void efx_tc_counter_update(struct efx_nic *efx,
324                                   enum efx_tc_counter_type counter_type,
325                                   u32 counter_idx, u64 packets, u64 bytes,
326                                   u32 mark)
327 {
328         struct efx_tc_counter *cnt;
329
330         rcu_read_lock(); /* Protect against deletion of 'cnt' */
331         cnt = efx_tc_flower_find_counter_by_fw_id(efx, counter_type, counter_idx);
332         if (!cnt) {
333                 /* This can legitimately happen when a counter is removed,
334                  * with updates for the counter still in-flight; however this
335                  * should be an infrequent occurrence.
336                  */
337                 if (net_ratelimit())
338                         netif_dbg(efx, drv, efx->net_dev,
339                                   "Got update for unwanted MAE counter %u type %u\n",
340                                   counter_idx, counter_type);
341                 goto out;
342         }
343
344         spin_lock_bh(&cnt->lock);
345         if ((s32)mark - (s32)cnt->gen < 0) {
346                 /* This counter update packet is from before the counter was
347                  * allocated; thus it must be for a previous counter with
348                  * the same ID that has since been freed, and it should be
349                  * ignored.
350                  */
351         } else {
352                 /* Update latest seen generation count.  This ensures that
353                  * even a long-lived counter won't start getting ignored if
354                  * the generation count wraps around, unless it somehow
355                  * manages to go 1<<31 generations without an update.
356                  */
357                 cnt->gen = mark;
358                 /* update counter values */
359                 cnt->packets += packets;
360                 cnt->bytes += bytes;
361                 cnt->touched = jiffies;
362         }
363         spin_unlock_bh(&cnt->lock);
364         schedule_work(&cnt->work);
365 out:
366         rcu_read_unlock();
367 }
368
369 static void efx_tc_rx_version_1(struct efx_nic *efx, const u8 *data, u32 mark)
370 {
371         u16 n_counters, i;
372
373         /* Header format:
374          * + |   0    |   1    |   2    |   3    |
375          * 0 |version |         reserved         |
376          * 4 |    seq_index    |   n_counters    |
377          */
378
379         n_counters = le16_to_cpu(*(const __le16 *)(data + 6));
380
381         /* Counter update entry format:
382          * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | a | b | c | d | e | f |
383          * |  counter_idx  |     packet_count      |      byte_count       |
384          */
385         for (i = 0; i < n_counters; i++) {
386                 const void *entry = data + 8 + 16 * i;
387                 u64 packet_count, byte_count;
388                 u32 counter_idx;
389
390                 counter_idx = le32_to_cpu(*(const __le32 *)entry);
391                 packet_count = le32_to_cpu(*(const __le32 *)(entry + 4)) |
392                                ((u64)le16_to_cpu(*(const __le16 *)(entry + 8)) << 32);
393                 byte_count = le16_to_cpu(*(const __le16 *)(entry + 10)) |
394                              ((u64)le32_to_cpu(*(const __le32 *)(entry + 12)) << 16);
395                 efx_tc_counter_update(efx, EFX_TC_COUNTER_TYPE_AR, counter_idx,
396                                       packet_count, byte_count, mark);
397         }
398 }
399
400 #define TCV2_HDR_PTR(pkt, field)                                                \
401         ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_LBN & 7),   \
402          (pkt) + ERF_SC_PACKETISER_HEADER_##field##_LBN / 8)
403 #define TCV2_HDR_BYTE(pkt, field)                                               \
404         ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_WIDTH != 8),\
405          *TCV2_HDR_PTR(pkt, field))
406 #define TCV2_HDR_WORD(pkt, field)                                               \
407         ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_WIDTH != 16),\
408          (void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_LBN & 15),  \
409          *(__force const __le16 *)TCV2_HDR_PTR(pkt, field))
410 #define TCV2_PKT_PTR(pkt, poff, i, field)                                       \
411         ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_PAYLOAD_##field##_LBN & 7),  \
412          (pkt) + ERF_SC_PACKETISER_PAYLOAD_##field##_LBN/8 + poff +             \
413          i * ER_RX_SL_PACKETISER_PAYLOAD_WORD_SIZE)
414
415 /* Read a little-endian 48-bit field with 16-bit alignment */
416 static u64 efx_tc_read48(const __le16 *field)
417 {
418         u64 out = 0;
419         int i;
420
421         for (i = 0; i < 3; i++)
422                 out |= (u64)le16_to_cpu(field[i]) << (i * 16);
423         return out;
424 }
425
426 static enum efx_tc_counter_type efx_tc_rx_version_2(struct efx_nic *efx,
427                                                     const u8 *data, u32 mark)
428 {
429         u8 payload_offset, header_offset, ident;
430         enum efx_tc_counter_type type;
431         u16 n_counters, i;
432
433         ident = TCV2_HDR_BYTE(data, IDENTIFIER);
434         switch (ident) {
435         case ERF_SC_PACKETISER_HEADER_IDENTIFIER_AR:
436                 type = EFX_TC_COUNTER_TYPE_AR;
437                 break;
438         case ERF_SC_PACKETISER_HEADER_IDENTIFIER_CT:
439                 type = EFX_TC_COUNTER_TYPE_CT;
440                 break;
441         case ERF_SC_PACKETISER_HEADER_IDENTIFIER_OR:
442                 type = EFX_TC_COUNTER_TYPE_OR;
443                 break;
444         default:
445                 if (net_ratelimit())
446                         netif_err(efx, drv, efx->net_dev,
447                                   "ignored v2 MAE counter packet (bad identifier %u"
448                                   "), counters may be inaccurate\n", ident);
449                 return EFX_TC_COUNTER_TYPE_MAX;
450         }
451         header_offset = TCV2_HDR_BYTE(data, HEADER_OFFSET);
452         /* mae_counter_format.h implies that this offset is fixed, since it
453          * carries on with SOP-based LBNs for the fields in this header
454          */
455         if (header_offset != ERF_SC_PACKETISER_HEADER_HEADER_OFFSET_DEFAULT) {
456                 if (net_ratelimit())
457                         netif_err(efx, drv, efx->net_dev,
458                                   "choked on v2 MAE counter packet (bad header_offset %u"
459                                   "), counters may be inaccurate\n", header_offset);
460                 return EFX_TC_COUNTER_TYPE_MAX;
461         }
462         payload_offset = TCV2_HDR_BYTE(data, PAYLOAD_OFFSET);
463         n_counters = le16_to_cpu(TCV2_HDR_WORD(data, COUNT));
464
465         for (i = 0; i < n_counters; i++) {
466                 const void *counter_idx_p, *packet_count_p, *byte_count_p;
467                 u64 packet_count, byte_count;
468                 u32 counter_idx;
469
470                 /* 24-bit field with 32-bit alignment */
471                 counter_idx_p = TCV2_PKT_PTR(data, payload_offset, i, COUNTER_INDEX);
472                 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_COUNTER_INDEX_WIDTH != 24);
473                 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_COUNTER_INDEX_LBN & 31);
474                 counter_idx = le32_to_cpu(*(const __le32 *)counter_idx_p) & 0xffffff;
475                 /* 48-bit field with 16-bit alignment */
476                 packet_count_p = TCV2_PKT_PTR(data, payload_offset, i, PACKET_COUNT);
477                 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_WIDTH != 48);
478                 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_LBN & 15);
479                 packet_count = efx_tc_read48((const __le16 *)packet_count_p);
480                 /* 48-bit field with 16-bit alignment */
481                 byte_count_p = TCV2_PKT_PTR(data, payload_offset, i, BYTE_COUNT);
482                 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_WIDTH != 48);
483                 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_LBN & 15);
484                 byte_count = efx_tc_read48((const __le16 *)byte_count_p);
485
486                 if (type == EFX_TC_COUNTER_TYPE_CT) {
487                         /* CT counters are 1-bit saturating counters to update
488                          * the lastuse time in CT stats. A received CT counter
489                          * should have packet counter to 0 and only LSB bit on
490                          * in byte counter.
491                          */
492                         if (packet_count || byte_count != 1)
493                                 netdev_warn_once(efx->net_dev,
494                                                  "CT counter with inconsistent state (%llu, %llu)\n",
495                                                  packet_count, byte_count);
496                         /* Do not increment the driver's byte counter */
497                         byte_count = 0;
498                 }
499
500                 efx_tc_counter_update(efx, type, counter_idx, packet_count,
501                                       byte_count, mark);
502         }
503         return type;
504 }
505
506 /* We always swallow the packet, whether successful or not, since it's not
507  * a network packet and shouldn't ever be forwarded to the stack.
508  * @mark is the generation count for counter allocations.
509  */
510 static bool efx_tc_rx(struct efx_rx_queue *rx_queue, u32 mark)
511 {
512         struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
513         struct efx_rx_buffer *rx_buf = efx_rx_buffer(rx_queue,
514                                                      channel->rx_pkt_index);
515         const u8 *data = efx_rx_buf_va(rx_buf);
516         struct efx_nic *efx = rx_queue->efx;
517         enum efx_tc_counter_type type;
518         u8 version;
519
520         /* version is always first byte of packet */
521         version = *data;
522         switch (version) {
523         case 1:
524                 type = EFX_TC_COUNTER_TYPE_AR;
525                 efx_tc_rx_version_1(efx, data, mark);
526                 break;
527         case ERF_SC_PACKETISER_HEADER_VERSION_VALUE: // 2
528                 type = efx_tc_rx_version_2(efx, data, mark);
529                 break;
530         default:
531                 if (net_ratelimit())
532                         netif_err(efx, drv, efx->net_dev,
533                                   "choked on MAE counter packet (bad version %u"
534                                   "); counters may be inaccurate\n",
535                                   version);
536                 goto out;
537         }
538
539         if (type < EFX_TC_COUNTER_TYPE_MAX) {
540                 /* Update seen_gen unconditionally, to avoid a missed wakeup if
541                  * we race with efx_mae_stop_counters().
542                  */
543                 efx->tc->seen_gen[type] = mark;
544                 if (efx->tc->flush_counters &&
545                     (s32)(efx->tc->flush_gen[type] - mark) <= 0)
546                         wake_up(&efx->tc->flush_wq);
547         }
548 out:
549         efx_free_rx_buffers(rx_queue, rx_buf, 1);
550         channel->rx_pkt_n_frags = 0;
551         return true;
552 }
553
554 const struct efx_channel_type efx_tc_channel_type = {
555         .handle_no_channel      = efx_tc_handle_no_channel,
556         .pre_probe              = efx_tc_probe_channel,
557         .start                  = efx_tc_start_channel,
558         .stop                   = efx_tc_stop_channel,
559         .post_remove            = efx_tc_remove_channel,
560         .get_name               = efx_tc_get_channel_name,
561         .receive_raw            = efx_tc_rx,
562         .keep_eventq            = true,
563 };