2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/mlx5/driver.h>
34 #include <linux/mlx5/fs.h>
35 #include <linux/rbtree.h>
36 #include "mlx5_core.h"
40 #define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000)
41 /* Max number of counters to query in bulk read is 32K */
42 #define MLX5_SW_MAX_COUNTERS_BULK BIT(15)
46 * It is the responsibility of the user to prevent concurrent calls or bad
47 * ordering to mlx5_fc_create(), mlx5_fc_destroy() and accessing a reference
49 * e.g en_tc.c is protected by RTNL lock of its caller, and will never call a
50 * dump (access to struct mlx5_fc) after a counter is destroyed.
52 * access to counter list:
53 * - create (user context)
54 * - mlx5_fc_create() only adds to an addlist to be used by
55 * mlx5_fc_stats_query_work(). addlist is protected by a spinlock.
56 * - spawn thread to do the actual destroy
58 * - destroy (user context)
59 * - mark a counter as deleted
60 * - spawn thread to do the actual del
62 * - dump (user context)
63 * user should not call dump after destroy
65 * - query (single thread workqueue context)
66 * destroy/dump - no conflict (see destroy)
67 * query/dump - packets and bytes might be inconsistent (since update is not
69 * query/create - no conflict (see create)
70 * since every create/destroy spawn the work, only after necessary time has
71 * elapsed, the thread will actually query the hardware.
74 static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter)
76 struct rb_node **new = &root->rb_node;
77 struct rb_node *parent = NULL;
80 struct mlx5_fc *this = rb_entry(*new, struct mlx5_fc, node);
81 int result = counter->id - this->id;
85 new = &((*new)->rb_left);
87 new = &((*new)->rb_right);
90 /* Add new node and rebalance tree. */
91 rb_link_node(&counter->node, parent, new);
92 rb_insert_color(&counter->node, root);
95 /* The function returns the last node that was queried so the caller
96 * function can continue calling it till all counters are queried.
98 static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
99 struct mlx5_fc *first,
102 struct mlx5_cmd_fc_bulk *b;
103 struct rb_node *node = NULL;
108 int max_bulk = min_t(int, MLX5_SW_MAX_COUNTERS_BULK,
109 (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
111 /* first id must be aligned to 4 when using bulk query */
112 afirst_id = first->id & ~0x3;
114 /* number of counters to query inc. the last counter */
115 num = ALIGN(last_id - afirst_id + 1, 4);
116 if (num > max_bulk) {
118 last_id = afirst_id + num - 1;
121 b = mlx5_cmd_fc_bulk_alloc(dev, afirst_id, num);
123 mlx5_core_err(dev, "Error allocating resources for bulk query\n");
127 err = mlx5_cmd_fc_bulk_query(dev, b);
129 mlx5_core_err(dev, "Error doing bulk query: %d\n", err);
133 for (node = &first->node; node; node = rb_next(node)) {
134 struct mlx5_fc *counter = rb_entry(node, struct mlx5_fc, node);
135 struct mlx5_fc_cache *c = &counter->cache;
139 if (counter->id > last_id)
142 mlx5_cmd_fc_bulk_get(dev, b,
143 counter->id, &packets, &bytes);
145 if (c->packets == packets)
148 c->packets = packets;
150 c->lastuse = jiffies;
154 mlx5_cmd_fc_bulk_free(b);
159 static void mlx5_fc_stats_work(struct work_struct *work)
161 struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
162 priv.fc_stats.work.work);
163 struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
164 unsigned long now = jiffies;
165 struct mlx5_fc *counter = NULL;
166 struct mlx5_fc *last = NULL;
167 struct rb_node *node;
170 spin_lock(&fc_stats->addlist_lock);
172 list_splice_tail_init(&fc_stats->addlist, &tmplist);
174 if (!list_empty(&tmplist) || !RB_EMPTY_ROOT(&fc_stats->counters))
175 queue_delayed_work(fc_stats->wq, &fc_stats->work,
176 fc_stats->sampling_interval);
178 spin_unlock(&fc_stats->addlist_lock);
180 list_for_each_entry(counter, &tmplist, list)
181 mlx5_fc_stats_insert(&fc_stats->counters, counter);
183 node = rb_first(&fc_stats->counters);
185 counter = rb_entry(node, struct mlx5_fc, node);
187 node = rb_next(node);
189 if (counter->deleted) {
190 rb_erase(&counter->node, &fc_stats->counters);
192 mlx5_cmd_fc_free(dev, counter->id);
201 if (time_before(now, fc_stats->next_query) || !last)
204 node = rb_first(&fc_stats->counters);
206 counter = rb_entry(node, struct mlx5_fc, node);
208 node = mlx5_fc_stats_query(dev, counter, last->id);
211 fc_stats->next_query = now + fc_stats->sampling_interval;
214 struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
216 struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
217 struct mlx5_fc *counter;
220 counter = kzalloc(sizeof(*counter), GFP_KERNEL);
222 return ERR_PTR(-ENOMEM);
224 err = mlx5_cmd_fc_alloc(dev, &counter->id);
229 counter->cache.lastuse = jiffies;
230 counter->aging = true;
232 spin_lock(&fc_stats->addlist_lock);
233 list_add(&counter->list, &fc_stats->addlist);
234 spin_unlock(&fc_stats->addlist_lock);
236 mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
247 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
249 struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
254 if (counter->aging) {
255 counter->deleted = true;
256 mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
260 mlx5_cmd_fc_free(dev, counter->id);
264 int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
266 struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
268 fc_stats->counters = RB_ROOT;
269 INIT_LIST_HEAD(&fc_stats->addlist);
270 spin_lock_init(&fc_stats->addlist_lock);
272 fc_stats->wq = create_singlethread_workqueue("mlx5_fc");
276 fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD;
277 INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work);
282 void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
284 struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
285 struct mlx5_fc *counter;
287 struct rb_node *node;
289 cancel_delayed_work_sync(&dev->priv.fc_stats.work);
290 destroy_workqueue(dev->priv.fc_stats.wq);
291 dev->priv.fc_stats.wq = NULL;
293 list_for_each_entry_safe(counter, tmp, &fc_stats->addlist, list) {
294 list_del(&counter->list);
296 mlx5_cmd_fc_free(dev, counter->id);
301 node = rb_first(&fc_stats->counters);
303 counter = rb_entry(node, struct mlx5_fc, node);
305 node = rb_next(node);
307 rb_erase(&counter->node, &fc_stats->counters);
309 mlx5_cmd_fc_free(dev, counter->id);
315 void mlx5_fc_query_cached(struct mlx5_fc *counter,
316 u64 *bytes, u64 *packets, u64 *lastuse)
318 struct mlx5_fc_cache c;
322 *bytes = c.bytes - counter->lastbytes;
323 *packets = c.packets - counter->lastpackets;
324 *lastuse = c.lastuse;
326 counter->lastbytes = c.bytes;
327 counter->lastpackets = c.packets;
330 void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev,
331 struct delayed_work *dwork,
334 struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
336 queue_delayed_work(fc_stats->wq, dwork, delay);
339 void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev,
340 unsigned long interval)
342 struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
344 fc_stats->sampling_interval = min_t(unsigned long, interval,
345 fc_stats->sampling_interval);