Commit | Line | Data |
---|---|---|
e27b9ff0 NC |
1 | /* |
2 | * IO priority handling helper functions common to the libaio and io_uring | |
3 | * engines. | |
4 | */ | |
5 | ||
6 | #include "cmdprio.h" | |
7 | ||
f0547200 NC |
8 | /* |
9 | * Temporary array used during parsing. Will be freed after the corresponding | |
10 | * struct bsprio_desc has been generated and saved in cmdprio->bsprio_desc. | |
11 | */ | |
12 | struct cmdprio_parse_result { | |
13 | struct split_prio *entries; | |
14 | int nr_entries; | |
15 | }; | |
16 | ||
17 | /* | |
18 | * Temporary array used during init. Will be freed after the corresponding | |
19 | * struct clat_prio_stat array has been saved in td->ts.clat_prio and the | |
20 | * matching clat_prio_indexes have been saved in each struct cmdprio_prio. | |
21 | */ | |
22 | struct cmdprio_values { | |
23 | unsigned int *prios; | |
24 | int nr_prios; | |
25 | }; | |
26 | ||
27 | static int find_clat_prio_index(unsigned int *all_prios, int nr_prios, | |
28 | int32_t prio) | |
e27b9ff0 | 29 | { |
f0547200 | 30 | int i; |
e27b9ff0 | 31 | |
f0547200 NC |
32 | for (i = 0; i < nr_prios; i++) { |
33 | if (all_prios[i] == prio) | |
34 | return i; | |
35 | } | |
e27b9ff0 | 36 | |
f0547200 NC |
37 | return -1; |
38 | } | |
e27b9ff0 | 39 | |
f0547200 NC |
40 | /** |
41 | * assign_clat_prio_index - In order to avoid stat.c the need to loop through | |
42 | * all possible priorities each time add_clat_sample() / add_lat_sample() is | |
43 | * called, save which index to use in each cmdprio_prio. This will later be | |
44 | * propagated to the io_u, if the specific io_u was determined to use a cmdprio | |
45 | * priority value. | |
46 | */ | |
47 | static void assign_clat_prio_index(struct cmdprio_prio *prio, | |
48 | struct cmdprio_values *values) | |
49 | { | |
50 | int clat_prio_index = find_clat_prio_index(values->prios, | |
51 | values->nr_prios, | |
52 | prio->prio); | |
53 | if (clat_prio_index == -1) { | |
54 | clat_prio_index = values->nr_prios; | |
55 | values->prios[clat_prio_index] = prio->prio; | |
56 | values->nr_prios++; | |
57 | } | |
58 | prio->clat_prio_index = clat_prio_index; | |
59 | } | |
60 | ||
61 | /** | |
62 | * init_cmdprio_values - Allocate a temporary array that can hold all unique | |
63 | * priorities (per ddir), so that we can assign_clat_prio_index() for each | |
64 | * cmdprio_prio during setup. This temporary array is freed after setup. | |
65 | */ | |
66 | static int init_cmdprio_values(struct cmdprio_values *values, | |
67 | int max_unique_prios, struct thread_stat *ts) | |
68 | { | |
69 | values->prios = calloc(max_unique_prios + 1, | |
70 | sizeof(*values->prios)); | |
71 | if (!values->prios) | |
e27b9ff0 | 72 | return 1; |
e27b9ff0 | 73 | |
f0547200 NC |
74 | /* td->ioprio/ts->ioprio is always stored at index 0. */ |
75 | values->prios[0] = ts->ioprio; | |
76 | values->nr_prios++; | |
77 | ||
78 | return 0; | |
79 | } | |
80 | ||
81 | /** | |
82 | * init_ts_clat_prio - Allocates and fills a clat_prio_stat array which holds | |
83 | * all unique priorities (per ddir). | |
84 | */ | |
85 | static int init_ts_clat_prio(struct thread_stat *ts, enum fio_ddir ddir, | |
86 | struct cmdprio_values *values) | |
87 | { | |
88 | int i; | |
89 | ||
90 | if (alloc_clat_prio_stat_ddir(ts, ddir, values->nr_prios)) | |
e27b9ff0 NC |
91 | return 1; |
92 | ||
f0547200 NC |
93 | for (i = 0; i < values->nr_prios; i++) |
94 | ts->clat_prio[ddir][i].ioprio = values->prios[i]; | |
95 | ||
96 | return 0; | |
97 | } | |
98 | ||
99 | static int fio_cmdprio_fill_bsprio(struct cmdprio_bsprio *bsprio, | |
100 | struct split_prio *entries, | |
101 | struct cmdprio_values *values, | |
102 | int implicit_cmdprio, int start, int end) | |
103 | { | |
104 | struct cmdprio_prio *prio; | |
105 | int i = end - start + 1; | |
106 | ||
107 | bsprio->prios = calloc(i, sizeof(*bsprio->prios)); | |
108 | if (!bsprio->prios) | |
109 | return 1; | |
110 | ||
111 | bsprio->bs = entries[start].bs; | |
112 | bsprio->nr_prios = 0; | |
113 | for (i = start; i <= end; i++) { | |
114 | prio = &bsprio->prios[bsprio->nr_prios]; | |
115 | prio->perc = entries[i].perc; | |
116 | if (entries[i].prio == -1) | |
117 | prio->prio = implicit_cmdprio; | |
118 | else | |
119 | prio->prio = entries[i].prio; | |
120 | assign_clat_prio_index(prio, values); | |
121 | bsprio->tot_perc += entries[i].perc; | |
122 | if (bsprio->tot_perc > 100) { | |
123 | log_err("fio: cmdprio_bssplit total percentage " | |
124 | "for bs: %"PRIu64" exceeds 100\n", | |
125 | bsprio->bs); | |
126 | free(bsprio->prios); | |
127 | return 1; | |
e27b9ff0 | 128 | } |
f0547200 NC |
129 | bsprio->nr_prios++; |
130 | } | |
131 | ||
132 | return 0; | |
133 | } | |
134 | ||
135 | static int | |
136 | fio_cmdprio_generate_bsprio_desc(struct cmdprio_bsprio_desc *bsprio_desc, | |
137 | struct cmdprio_parse_result *parse_res, | |
138 | struct cmdprio_values *values, | |
139 | int implicit_cmdprio) | |
140 | { | |
141 | struct split_prio *entries = parse_res->entries; | |
142 | int nr_entries = parse_res->nr_entries; | |
143 | struct cmdprio_bsprio *bsprio; | |
144 | int i, start, count = 0; | |
145 | ||
146 | /* | |
147 | * The parsed result is sorted by blocksize, so count only the number | |
148 | * of different blocksizes, to know how many cmdprio_bsprio we need. | |
149 | */ | |
150 | for (i = 0; i < nr_entries; i++) { | |
151 | while (i + 1 < nr_entries && entries[i].bs == entries[i + 1].bs) | |
152 | i++; | |
153 | count++; | |
154 | } | |
155 | ||
156 | /* | |
157 | * This allocation is not freed on error. Instead, the calling function | |
158 | * is responsible for calling fio_cmdprio_cleanup() on error. | |
159 | */ | |
160 | bsprio_desc->bsprios = calloc(count, sizeof(*bsprio_desc->bsprios)); | |
161 | if (!bsprio_desc->bsprios) | |
162 | return 1; | |
163 | ||
164 | start = 0; | |
165 | bsprio_desc->nr_bsprios = 0; | |
166 | for (i = 0; i < nr_entries; i++) { | |
167 | while (i + 1 < nr_entries && entries[i].bs == entries[i + 1].bs) | |
168 | i++; | |
169 | bsprio = &bsprio_desc->bsprios[bsprio_desc->nr_bsprios]; | |
170 | /* | |
171 | * All parsed entries with the same blocksize get saved in the | |
172 | * same cmdprio_bsprio, to expedite the search in the hot path. | |
173 | */ | |
174 | if (fio_cmdprio_fill_bsprio(bsprio, entries, values, | |
175 | implicit_cmdprio, start, i)) | |
176 | return 1; | |
177 | ||
178 | start = i + 1; | |
179 | bsprio_desc->nr_bsprios++; | |
e27b9ff0 NC |
180 | } |
181 | ||
182 | return 0; | |
183 | } | |
184 | ||
f0547200 NC |
185 | static int fio_cmdprio_bssplit_ddir(struct thread_options *to, void *cb_arg, |
186 | enum fio_ddir ddir, char *str, bool data) | |
187 | { | |
188 | struct cmdprio_parse_result *parse_res_arr = cb_arg; | |
189 | struct cmdprio_parse_result *parse_res = &parse_res_arr[ddir]; | |
190 | ||
191 | if (ddir == DDIR_TRIM) | |
192 | return 0; | |
193 | ||
194 | if (split_parse_prio_ddir(to, &parse_res->entries, | |
195 | &parse_res->nr_entries, str)) | |
196 | return 1; | |
197 | ||
198 | return 0; | |
199 | } | |
200 | ||
201 | static int fio_cmdprio_bssplit_parse(struct thread_data *td, const char *input, | |
202 | struct cmdprio_parse_result *parse_res) | |
e27b9ff0 NC |
203 | { |
204 | char *str, *p; | |
d6cbeab4 | 205 | int ret = 0; |
e27b9ff0 NC |
206 | |
207 | p = str = strdup(input); | |
208 | ||
209 | strip_blank_front(&str); | |
210 | strip_blank_end(str); | |
211 | ||
f0547200 | 212 | ret = str_split_parse(td, str, fio_cmdprio_bssplit_ddir, parse_res, |
d6cbeab4 | 213 | false); |
e27b9ff0 NC |
214 | |
215 | free(p); | |
216 | return ret; | |
217 | } | |
218 | ||
f0547200 NC |
219 | /** |
220 | * fio_cmdprio_percentage - Returns the percentage of I/Os that should | |
221 | * use a cmdprio priority value (rather than the default context priority). | |
222 | * | |
223 | * For CMDPRIO_MODE_BSSPLIT, if the percentage is non-zero, we will also | |
224 | * return the matching bsprio, to avoid the same linear search elsewhere. | |
225 | * For CMDPRIO_MODE_PERC, we will never return a bsprio. | |
226 | */ | |
227 | static int fio_cmdprio_percentage(struct cmdprio *cmdprio, struct io_u *io_u, | |
228 | struct cmdprio_bsprio **bsprio) | |
e27b9ff0 | 229 | { |
f0547200 | 230 | struct cmdprio_bsprio *bsprio_entry; |
e27b9ff0 | 231 | enum fio_ddir ddir = io_u->ddir; |
e27b9ff0 NC |
232 | int i; |
233 | ||
97f2d484 NC |
234 | switch (cmdprio->mode) { |
235 | case CMDPRIO_MODE_PERC: | |
f0547200 NC |
236 | *bsprio = NULL; |
237 | return cmdprio->perc_entry[ddir].perc; | |
97f2d484 | 238 | case CMDPRIO_MODE_BSSPLIT: |
f0547200 NC |
239 | for (i = 0; i < cmdprio->bsprio_desc[ddir].nr_bsprios; i++) { |
240 | bsprio_entry = &cmdprio->bsprio_desc[ddir].bsprios[i]; | |
241 | if (bsprio_entry->bs == io_u->buflen) { | |
242 | *bsprio = bsprio_entry; | |
243 | return bsprio_entry->tot_perc; | |
244 | } | |
97f2d484 NC |
245 | } |
246 | break; | |
247 | default: | |
248 | /* | |
249 | * An I/O engine should never call this function if cmdprio | |
250 | * is not is use. | |
251 | */ | |
252 | assert(0); | |
e27b9ff0 NC |
253 | } |
254 | ||
f0547200 NC |
255 | /* |
256 | * This is totally fine, the given blocksize simply does not | |
257 | * have any (non-zero) cmdprio_bssplit entries defined. | |
258 | */ | |
259 | *bsprio = NULL; | |
e27b9ff0 NC |
260 | return 0; |
261 | } | |
262 | ||
127715b6 NC |
263 | /** |
264 | * fio_cmdprio_set_ioprio - Set an io_u ioprio according to cmdprio options | |
265 | * | |
266 | * Generates a random percentage value to determine if an io_u ioprio needs | |
267 | * to be set. If the random percentage value is within the user specified | |
268 | * percentage of I/Os that should use a cmdprio priority value (rather than | |
269 | * the default priority), then this function updates the io_u with an ioprio | |
79012fec DLM |
270 | * value as defined by the cmdprio/cmdprio_hint/cmdprio_class or |
271 | * cmdprio_bssplit options. | |
127715b6 NC |
272 | * |
273 | * Return true if the io_u ioprio was changed and false otherwise. | |
274 | */ | |
275 | bool fio_cmdprio_set_ioprio(struct thread_data *td, struct cmdprio *cmdprio, | |
276 | struct io_u *io_u) | |
277 | { | |
f0547200 NC |
278 | struct cmdprio_bsprio *bsprio; |
279 | unsigned int p, rand; | |
280 | uint32_t perc = 0; | |
281 | int i; | |
282 | ||
283 | p = fio_cmdprio_percentage(cmdprio, io_u, &bsprio); | |
284 | if (!p) | |
285 | return false; | |
286 | ||
287 | rand = rand_between(&td->prio_state, 0, 99); | |
288 | if (rand >= p) | |
289 | return false; | |
290 | ||
291 | switch (cmdprio->mode) { | |
292 | case CMDPRIO_MODE_PERC: | |
293 | io_u->ioprio = cmdprio->perc_entry[io_u->ddir].prio; | |
294 | io_u->clat_prio_index = | |
295 | cmdprio->perc_entry[io_u->ddir].clat_prio_index; | |
127715b6 | 296 | return true; |
f0547200 NC |
297 | case CMDPRIO_MODE_BSSPLIT: |
298 | assert(bsprio); | |
299 | for (i = 0; i < bsprio->nr_prios; i++) { | |
300 | struct cmdprio_prio *prio = &bsprio->prios[i]; | |
301 | ||
302 | perc += prio->perc; | |
303 | if (rand < perc) { | |
304 | io_u->ioprio = prio->prio; | |
305 | io_u->clat_prio_index = prio->clat_prio_index; | |
306 | return true; | |
307 | } | |
308 | } | |
309 | break; | |
310 | default: | |
311 | assert(0); | |
127715b6 NC |
312 | } |
313 | ||
f0547200 NC |
314 | /* When rand < p (total perc), we should always find a cmdprio_prio. */ |
315 | assert(0); | |
316 | return false; | |
317 | } | |
318 | ||
319 | static int fio_cmdprio_gen_perc(struct thread_data *td, struct cmdprio *cmdprio) | |
320 | { | |
321 | struct cmdprio_options *options = cmdprio->options; | |
322 | struct cmdprio_prio *prio; | |
f54b69c3 | 323 | struct cmdprio_values values[CMDPRIO_RWDIR_CNT] = {}; |
f0547200 NC |
324 | struct thread_stat *ts = &td->ts; |
325 | enum fio_ddir ddir; | |
326 | int ret; | |
327 | ||
328 | for (ddir = 0; ddir < CMDPRIO_RWDIR_CNT; ddir++) { | |
127715b6 | 329 | /* |
f0547200 NC |
330 | * Do not allocate a clat_prio array nor set the cmdprio struct |
331 | * if zero percent of the I/Os (for the ddir) should use a | |
332 | * cmdprio priority value, or when the ddir is not enabled. | |
127715b6 | 333 | */ |
f0547200 NC |
334 | if (!options->percentage[ddir] || |
335 | (ddir == DDIR_READ && !td_read(td)) || | |
336 | (ddir == DDIR_WRITE && !td_write(td))) | |
337 | continue; | |
338 | ||
339 | ret = init_cmdprio_values(&values[ddir], 1, ts); | |
340 | if (ret) | |
341 | goto err; | |
342 | ||
343 | prio = &cmdprio->perc_entry[ddir]; | |
344 | prio->perc = options->percentage[ddir]; | |
345 | prio->prio = ioprio_value(options->class[ddir], | |
79012fec DLM |
346 | options->level[ddir], |
347 | options->hint[ddir]); | |
f0547200 NC |
348 | assign_clat_prio_index(prio, &values[ddir]); |
349 | ||
350 | ret = init_ts_clat_prio(ts, ddir, &values[ddir]); | |
351 | if (ret) | |
352 | goto err; | |
353 | ||
354 | free(values[ddir].prios); | |
355 | values[ddir].prios = NULL; | |
356 | values[ddir].nr_prios = 0; | |
127715b6 NC |
357 | } |
358 | ||
f0547200 NC |
359 | return 0; |
360 | ||
361 | err: | |
362 | for (ddir = 0; ddir < CMDPRIO_RWDIR_CNT; ddir++) | |
363 | free(values[ddir].prios); | |
364 | free_clat_prio_stats(ts); | |
365 | ||
366 | return ret; | |
127715b6 NC |
367 | } |
368 | ||
d6cbeab4 NC |
369 | static int fio_cmdprio_parse_and_gen_bssplit(struct thread_data *td, |
370 | struct cmdprio *cmdprio) | |
e27b9ff0 | 371 | { |
d6cbeab4 | 372 | struct cmdprio_options *options = cmdprio->options; |
f54b69c3 BVA |
373 | struct cmdprio_parse_result parse_res[CMDPRIO_RWDIR_CNT] = {}; |
374 | struct cmdprio_values values[CMDPRIO_RWDIR_CNT] = {}; | |
f0547200 NC |
375 | struct thread_stat *ts = &td->ts; |
376 | int ret, implicit_cmdprio; | |
377 | enum fio_ddir ddir; | |
378 | ||
379 | ret = fio_cmdprio_bssplit_parse(td, options->bssplit_str, | |
380 | &parse_res[0]); | |
d6cbeab4 NC |
381 | if (ret) |
382 | goto err; | |
383 | ||
f0547200 NC |
384 | for (ddir = 0; ddir < CMDPRIO_RWDIR_CNT; ddir++) { |
385 | /* | |
386 | * Do not allocate a clat_prio array nor set the cmdprio structs | |
387 | * if there are no non-zero entries (for the ddir), or when the | |
388 | * ddir is not enabled. | |
389 | */ | |
390 | if (!parse_res[ddir].nr_entries || | |
391 | (ddir == DDIR_READ && !td_read(td)) || | |
392 | (ddir == DDIR_WRITE && !td_write(td))) { | |
393 | free(parse_res[ddir].entries); | |
394 | parse_res[ddir].entries = NULL; | |
395 | parse_res[ddir].nr_entries = 0; | |
396 | continue; | |
397 | } | |
398 | ||
399 | ret = init_cmdprio_values(&values[ddir], | |
400 | parse_res[ddir].nr_entries, ts); | |
401 | if (ret) | |
402 | goto err; | |
403 | ||
404 | implicit_cmdprio = ioprio_value(options->class[ddir], | |
79012fec DLM |
405 | options->level[ddir], |
406 | options->hint[ddir]); | |
f0547200 NC |
407 | |
408 | ret = fio_cmdprio_generate_bsprio_desc(&cmdprio->bsprio_desc[ddir], | |
409 | &parse_res[ddir], | |
410 | &values[ddir], | |
411 | implicit_cmdprio); | |
412 | if (ret) | |
413 | goto err; | |
414 | ||
415 | free(parse_res[ddir].entries); | |
416 | parse_res[ddir].entries = NULL; | |
417 | parse_res[ddir].nr_entries = 0; | |
418 | ||
419 | ret = init_ts_clat_prio(ts, ddir, &values[ddir]); | |
420 | if (ret) | |
421 | goto err; | |
422 | ||
423 | free(values[ddir].prios); | |
424 | values[ddir].prios = NULL; | |
425 | values[ddir].nr_prios = 0; | |
426 | } | |
427 | ||
d6cbeab4 NC |
428 | return 0; |
429 | ||
430 | err: | |
f0547200 NC |
431 | for (ddir = 0; ddir < CMDPRIO_RWDIR_CNT; ddir++) { |
432 | free(parse_res[ddir].entries); | |
433 | free(values[ddir].prios); | |
434 | } | |
435 | free_clat_prio_stats(ts); | |
d6cbeab4 NC |
436 | fio_cmdprio_cleanup(cmdprio); |
437 | ||
438 | return ret; | |
439 | } | |
440 | ||
441 | static int fio_cmdprio_parse_and_gen(struct thread_data *td, | |
442 | struct cmdprio *cmdprio) | |
443 | { | |
444 | struct cmdprio_options *options = cmdprio->options; | |
445 | int i, ret; | |
446 | ||
f0547200 NC |
447 | /* |
448 | * If cmdprio_percentage/cmdprio_bssplit is set and cmdprio_class | |
449 | * is not set, default to RT priority class. | |
450 | */ | |
451 | for (i = 0; i < CMDPRIO_RWDIR_CNT; i++) { | |
452 | /* | |
453 | * A cmdprio value is only used when fio_cmdprio_percentage() | |
454 | * returns non-zero, so it is safe to set a class even for a | |
455 | * DDIR that will never use it. | |
456 | */ | |
457 | if (!options->class[i]) | |
458 | options->class[i] = IOPRIO_CLASS_RT; | |
459 | } | |
460 | ||
d6cbeab4 NC |
461 | switch (cmdprio->mode) { |
462 | case CMDPRIO_MODE_BSSPLIT: | |
463 | ret = fio_cmdprio_parse_and_gen_bssplit(td, cmdprio); | |
464 | break; | |
465 | case CMDPRIO_MODE_PERC: | |
f0547200 | 466 | ret = fio_cmdprio_gen_perc(td, cmdprio); |
d6cbeab4 NC |
467 | break; |
468 | default: | |
469 | assert(0); | |
470 | return 1; | |
471 | } | |
e27b9ff0 | 472 | |
d6cbeab4 NC |
473 | return ret; |
474 | } | |
475 | ||
476 | void fio_cmdprio_cleanup(struct cmdprio *cmdprio) | |
477 | { | |
f0547200 NC |
478 | enum fio_ddir ddir; |
479 | int i; | |
d6cbeab4 NC |
480 | |
481 | for (ddir = 0; ddir < CMDPRIO_RWDIR_CNT; ddir++) { | |
f0547200 NC |
482 | for (i = 0; i < cmdprio->bsprio_desc[ddir].nr_bsprios; i++) |
483 | free(cmdprio->bsprio_desc[ddir].bsprios[i].prios); | |
484 | free(cmdprio->bsprio_desc[ddir].bsprios); | |
485 | cmdprio->bsprio_desc[ddir].bsprios = NULL; | |
486 | cmdprio->bsprio_desc[ddir].nr_bsprios = 0; | |
d6cbeab4 NC |
487 | } |
488 | ||
489 | /* | |
490 | * options points to a cmdprio_options struct that is part of td->eo. | |
491 | * td->eo itself will be freed by free_ioengine(). | |
492 | */ | |
493 | cmdprio->options = NULL; | |
494 | } | |
495 | ||
496 | int fio_cmdprio_init(struct thread_data *td, struct cmdprio *cmdprio, | |
497 | struct cmdprio_options *options) | |
498 | { | |
499 | struct thread_options *to = &td->o; | |
500 | bool has_cmdprio_percentage = false; | |
501 | bool has_cmdprio_bssplit = false; | |
502 | int i; | |
503 | ||
504 | cmdprio->options = options; | |
505 | ||
506 | if (options->bssplit_str && strlen(options->bssplit_str)) | |
507 | has_cmdprio_bssplit = true; | |
508 | ||
509 | for (i = 0; i < CMDPRIO_RWDIR_CNT; i++) { | |
510 | if (options->percentage[i]) | |
511 | has_cmdprio_percentage = true; | |
512 | } | |
513 | ||
e27b9ff0 NC |
514 | /* |
515 | * Check for option conflicts | |
516 | */ | |
517 | if (has_cmdprio_percentage && has_cmdprio_bssplit) { | |
518 | log_err("%s: cmdprio_percentage and cmdprio_bssplit options " | |
519 | "are mutually exclusive\n", | |
520 | to->name); | |
521 | return 1; | |
522 | } | |
523 | ||
97f2d484 NC |
524 | if (has_cmdprio_bssplit) |
525 | cmdprio->mode = CMDPRIO_MODE_BSSPLIT; | |
526 | else if (has_cmdprio_percentage) | |
527 | cmdprio->mode = CMDPRIO_MODE_PERC; | |
528 | else | |
529 | cmdprio->mode = CMDPRIO_MODE_NONE; | |
e27b9ff0 | 530 | |
d6cbeab4 NC |
531 | /* Nothing left to do if cmdprio is not used */ |
532 | if (cmdprio->mode == CMDPRIO_MODE_NONE) | |
533 | return 0; | |
534 | ||
535 | return fio_cmdprio_parse_and_gen(td, cmdprio); | |
e27b9ff0 | 536 | } |