Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
[linux-block.git] / lib / dynamic_queue_limits.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Dynamic byte queue limits.  See include/linux/dynamic_queue_limits.h
4  *
5  * Copyright (c) 2011, Tom Herbert <therbert@google.com>
6  */
7 #include <linux/types.h>
8 #include <linux/kernel.h>
9 #include <linux/jiffies.h>
10 #include <linux/dynamic_queue_limits.h>
11 #include <linux/compiler.h>
12 #include <linux/export.h>
13 #include <trace/events/napi.h>
14
15 #define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0)
16 #define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0)
17
18 static void dql_check_stall(struct dql *dql, unsigned short stall_thrs)
19 {
20         unsigned long now;
21
22         if (!stall_thrs)
23                 return;
24
25         now = jiffies;
26         /* Check for a potential stall */
27         if (time_after_eq(now, dql->last_reap + stall_thrs)) {
28                 unsigned long hist_head, t, start, end;
29
30                 /* We are trying to detect a period of at least @stall_thrs
31                  * jiffies without any Tx completions, but during first half
32                  * of which some Tx was posted.
33                  */
34 dqs_again:
35                 hist_head = READ_ONCE(dql->history_head);
36                 /* pairs with smp_wmb() in dql_queued() */
37                 smp_rmb();
38
39                 /* Get the previous entry in the ring buffer, which is the
40                  * oldest sample.
41                  */
42                 start = (hist_head - DQL_HIST_LEN + 1) * BITS_PER_LONG;
43
44                 /* Advance start to continue from the last reap time */
45                 if (time_before(start, dql->last_reap + 1))
46                         start = dql->last_reap + 1;
47
48                 /* Newest sample we should have already seen a completion for */
49                 end = hist_head * BITS_PER_LONG + (BITS_PER_LONG - 1);
50
51                 /* Shrink the search space to [start, (now - start_thrs/2)] if
52                  * `end` is beyond the stall zone
53                  */
54                 if (time_before(now, end + stall_thrs / 2))
55                         end = now - stall_thrs / 2;
56
57                 /* Search for the queued time in [t, end] */
58                 for (t = start; time_before_eq(t, end); t++)
59                         if (test_bit(t % (DQL_HIST_LEN * BITS_PER_LONG),
60                                      dql->history))
61                                 break;
62
63                 /* Variable t contains the time of the queue */
64                 if (!time_before_eq(t, end))
65                         goto no_stall;
66
67                 /* The ring buffer was modified in the meantime, retry */
68                 if (hist_head != READ_ONCE(dql->history_head))
69                         goto dqs_again;
70
71                 dql->stall_cnt++;
72                 dql->stall_max = max_t(unsigned short, dql->stall_max, now - t);
73
74                 trace_dql_stall_detected(dql->stall_thrs, now - t,
75                                          dql->last_reap, dql->history_head,
76                                          now, dql->history);
77         }
78 no_stall:
79         dql->last_reap = now;
80 }
81
82 /* Records completed count and recalculates the queue limit */
83 void dql_completed(struct dql *dql, unsigned int count)
84 {
85         unsigned int inprogress, prev_inprogress, limit;
86         unsigned int ovlimit, completed, num_queued;
87         unsigned short stall_thrs;
88         bool all_prev_completed;
89
90         num_queued = READ_ONCE(dql->num_queued);
91         /* Read stall_thrs in advance since it belongs to the same (first)
92          * cache line as ->num_queued. This way, dql_check_stall() does not
93          * need to touch the first cache line again later, reducing the window
94          * of possible false sharing.
95          */
96         stall_thrs = READ_ONCE(dql->stall_thrs);
97
98         /* Can't complete more than what's in queue */
99         BUG_ON(count > num_queued - dql->num_completed);
100
101         completed = dql->num_completed + count;
102         limit = dql->limit;
103         ovlimit = POSDIFF(num_queued - dql->num_completed, limit);
104         inprogress = num_queued - completed;
105         prev_inprogress = dql->prev_num_queued - dql->num_completed;
106         all_prev_completed = AFTER_EQ(completed, dql->prev_num_queued);
107
108         if ((ovlimit && !inprogress) ||
109             (dql->prev_ovlimit && all_prev_completed)) {
110                 /*
111                  * Queue considered starved if:
112                  *   - The queue was over-limit in the last interval,
113                  *     and there is no more data in the queue.
114                  *  OR
115                  *   - The queue was over-limit in the previous interval and
116                  *     when enqueuing it was possible that all queued data
117                  *     had been consumed.  This covers the case when queue
118                  *     may have becomes starved between completion processing
119                  *     running and next time enqueue was scheduled.
120                  *
121                  *     When queue is starved increase the limit by the amount
122                  *     of bytes both sent and completed in the last interval,
123                  *     plus any previous over-limit.
124                  */
125                 limit += POSDIFF(completed, dql->prev_num_queued) +
126                      dql->prev_ovlimit;
127                 dql->slack_start_time = jiffies;
128                 dql->lowest_slack = UINT_MAX;
129         } else if (inprogress && prev_inprogress && !all_prev_completed) {
130                 /*
131                  * Queue was not starved, check if the limit can be decreased.
132                  * A decrease is only considered if the queue has been busy in
133                  * the whole interval (the check above).
134                  *
135                  * If there is slack, the amount of excess data queued above
136                  * the amount needed to prevent starvation, the queue limit
137                  * can be decreased.  To avoid hysteresis we consider the
138                  * minimum amount of slack found over several iterations of the
139                  * completion routine.
140                  */
141                 unsigned int slack, slack_last_objs;
142
143                 /*
144                  * Slack is the maximum of
145                  *   - The queue limit plus previous over-limit minus twice
146                  *     the number of objects completed.  Note that two times
147                  *     number of completed bytes is a basis for an upper bound
148                  *     of the limit.
149                  *   - Portion of objects in the last queuing operation that
150                  *     was not part of non-zero previous over-limit.  That is
151                  *     "round down" by non-overlimit portion of the last
152                  *     queueing operation.
153                  */
154                 slack = POSDIFF(limit + dql->prev_ovlimit,
155                     2 * (completed - dql->num_completed));
156                 slack_last_objs = dql->prev_ovlimit ?
157                     POSDIFF(dql->prev_last_obj_cnt, dql->prev_ovlimit) : 0;
158
159                 slack = max(slack, slack_last_objs);
160
161                 if (slack < dql->lowest_slack)
162                         dql->lowest_slack = slack;
163
164                 if (time_after(jiffies,
165                                dql->slack_start_time + dql->slack_hold_time)) {
166                         limit = POSDIFF(limit, dql->lowest_slack);
167                         dql->slack_start_time = jiffies;
168                         dql->lowest_slack = UINT_MAX;
169                 }
170         }
171
172         /* Enforce bounds on limit */
173         limit = clamp(limit, dql->min_limit, dql->max_limit);
174
175         if (limit != dql->limit) {
176                 dql->limit = limit;
177                 ovlimit = 0;
178         }
179
180         dql->adj_limit = limit + completed;
181         dql->prev_ovlimit = ovlimit;
182         dql->prev_last_obj_cnt = dql->last_obj_cnt;
183         dql->num_completed = completed;
184         dql->prev_num_queued = num_queued;
185
186         dql_check_stall(dql, stall_thrs);
187 }
188 EXPORT_SYMBOL(dql_completed);
189
190 void dql_reset(struct dql *dql)
191 {
192         /* Reset all dynamic values */
193         dql->limit = 0;
194         dql->num_queued = 0;
195         dql->num_completed = 0;
196         dql->last_obj_cnt = 0;
197         dql->prev_num_queued = 0;
198         dql->prev_last_obj_cnt = 0;
199         dql->prev_ovlimit = 0;
200         dql->lowest_slack = UINT_MAX;
201         dql->slack_start_time = jiffies;
202
203         dql->last_reap = jiffies;
204         dql->history_head = jiffies / BITS_PER_LONG;
205         memset(dql->history, 0, sizeof(dql->history));
206 }
207 EXPORT_SYMBOL(dql_reset);
208
209 void dql_init(struct dql *dql, unsigned int hold_time)
210 {
211         dql->max_limit = DQL_MAX_LIMIT;
212         dql->min_limit = 0;
213         dql->slack_hold_time = hold_time;
214         dql->stall_thrs = 0;
215         dql_reset(dql);
216 }
217 EXPORT_SYMBOL(dql_init);