Commit | Line | Data |
---|---|---|
e27b9ff0 NC |
1 | /* |
2 | * IO priority handling helper functions common to the libaio and io_uring | |
3 | * engines. | |
4 | */ | |
5 | ||
6 | #include "cmdprio.h" | |
7 | ||
f0547200 NC |
8 | /* |
9 | * Temporary array used during parsing. Will be freed after the corresponding | |
10 | * struct bsprio_desc has been generated and saved in cmdprio->bsprio_desc. | |
11 | */ | |
12 | struct cmdprio_parse_result { | |
13 | struct split_prio *entries; | |
14 | int nr_entries; | |
15 | }; | |
16 | ||
17 | /* | |
18 | * Temporary array used during init. Will be freed after the corresponding | |
19 | * struct clat_prio_stat array has been saved in td->ts.clat_prio and the | |
20 | * matching clat_prio_indexes have been saved in each struct cmdprio_prio. | |
21 | */ | |
22 | struct cmdprio_values { | |
23 | unsigned int *prios; | |
24 | int nr_prios; | |
25 | }; | |
26 | ||
27 | static int find_clat_prio_index(unsigned int *all_prios, int nr_prios, | |
28 | int32_t prio) | |
e27b9ff0 | 29 | { |
f0547200 | 30 | int i; |
e27b9ff0 | 31 | |
f0547200 NC |
32 | for (i = 0; i < nr_prios; i++) { |
33 | if (all_prios[i] == prio) | |
34 | return i; | |
35 | } | |
e27b9ff0 | 36 | |
f0547200 NC |
37 | return -1; |
38 | } | |
e27b9ff0 | 39 | |
f0547200 NC |
40 | /** |
41 | * assign_clat_prio_index - In order to avoid stat.c the need to loop through | |
42 | * all possible priorities each time add_clat_sample() / add_lat_sample() is | |
43 | * called, save which index to use in each cmdprio_prio. This will later be | |
44 | * propagated to the io_u, if the specific io_u was determined to use a cmdprio | |
45 | * priority value. | |
46 | */ | |
47 | static void assign_clat_prio_index(struct cmdprio_prio *prio, | |
48 | struct cmdprio_values *values) | |
49 | { | |
50 | int clat_prio_index = find_clat_prio_index(values->prios, | |
51 | values->nr_prios, | |
52 | prio->prio); | |
53 | if (clat_prio_index == -1) { | |
54 | clat_prio_index = values->nr_prios; | |
55 | values->prios[clat_prio_index] = prio->prio; | |
56 | values->nr_prios++; | |
57 | } | |
58 | prio->clat_prio_index = clat_prio_index; | |
59 | } | |
60 | ||
61 | /** | |
62 | * init_cmdprio_values - Allocate a temporary array that can hold all unique | |
63 | * priorities (per ddir), so that we can assign_clat_prio_index() for each | |
64 | * cmdprio_prio during setup. This temporary array is freed after setup. | |
65 | */ | |
66 | static int init_cmdprio_values(struct cmdprio_values *values, | |
67 | int max_unique_prios, struct thread_stat *ts) | |
68 | { | |
69 | values->prios = calloc(max_unique_prios + 1, | |
70 | sizeof(*values->prios)); | |
71 | if (!values->prios) | |
e27b9ff0 | 72 | return 1; |
e27b9ff0 | 73 | |
f0547200 NC |
74 | /* td->ioprio/ts->ioprio is always stored at index 0. */ |
75 | values->prios[0] = ts->ioprio; | |
76 | values->nr_prios++; | |
77 | ||
78 | return 0; | |
79 | } | |
80 | ||
81 | /** | |
82 | * init_ts_clat_prio - Allocates and fills a clat_prio_stat array which holds | |
83 | * all unique priorities (per ddir). | |
84 | */ | |
85 | static int init_ts_clat_prio(struct thread_stat *ts, enum fio_ddir ddir, | |
86 | struct cmdprio_values *values) | |
87 | { | |
88 | int i; | |
89 | ||
90 | if (alloc_clat_prio_stat_ddir(ts, ddir, values->nr_prios)) | |
e27b9ff0 NC |
91 | return 1; |
92 | ||
f0547200 NC |
93 | for (i = 0; i < values->nr_prios; i++) |
94 | ts->clat_prio[ddir][i].ioprio = values->prios[i]; | |
95 | ||
96 | return 0; | |
97 | } | |
98 | ||
99 | static int fio_cmdprio_fill_bsprio(struct cmdprio_bsprio *bsprio, | |
100 | struct split_prio *entries, | |
101 | struct cmdprio_values *values, | |
102 | int implicit_cmdprio, int start, int end) | |
103 | { | |
104 | struct cmdprio_prio *prio; | |
105 | int i = end - start + 1; | |
106 | ||
107 | bsprio->prios = calloc(i, sizeof(*bsprio->prios)); | |
108 | if (!bsprio->prios) | |
109 | return 1; | |
110 | ||
111 | bsprio->bs = entries[start].bs; | |
112 | bsprio->nr_prios = 0; | |
113 | for (i = start; i <= end; i++) { | |
114 | prio = &bsprio->prios[bsprio->nr_prios]; | |
115 | prio->perc = entries[i].perc; | |
116 | if (entries[i].prio == -1) | |
117 | prio->prio = implicit_cmdprio; | |
118 | else | |
119 | prio->prio = entries[i].prio; | |
120 | assign_clat_prio_index(prio, values); | |
121 | bsprio->tot_perc += entries[i].perc; | |
122 | if (bsprio->tot_perc > 100) { | |
123 | log_err("fio: cmdprio_bssplit total percentage " | |
124 | "for bs: %"PRIu64" exceeds 100\n", | |
125 | bsprio->bs); | |
126 | free(bsprio->prios); | |
127 | return 1; | |
e27b9ff0 | 128 | } |
f0547200 NC |
129 | bsprio->nr_prios++; |
130 | } | |
131 | ||
132 | return 0; | |
133 | } | |
134 | ||
135 | static int | |
136 | fio_cmdprio_generate_bsprio_desc(struct cmdprio_bsprio_desc *bsprio_desc, | |
137 | struct cmdprio_parse_result *parse_res, | |
138 | struct cmdprio_values *values, | |
139 | int implicit_cmdprio) | |
140 | { | |
141 | struct split_prio *entries = parse_res->entries; | |
142 | int nr_entries = parse_res->nr_entries; | |
143 | struct cmdprio_bsprio *bsprio; | |
144 | int i, start, count = 0; | |
145 | ||
146 | /* | |
147 | * The parsed result is sorted by blocksize, so count only the number | |
148 | * of different blocksizes, to know how many cmdprio_bsprio we need. | |
149 | */ | |
150 | for (i = 0; i < nr_entries; i++) { | |
151 | while (i + 1 < nr_entries && entries[i].bs == entries[i + 1].bs) | |
152 | i++; | |
153 | count++; | |
154 | } | |
155 | ||
156 | /* | |
157 | * This allocation is not freed on error. Instead, the calling function | |
158 | * is responsible for calling fio_cmdprio_cleanup() on error. | |
159 | */ | |
160 | bsprio_desc->bsprios = calloc(count, sizeof(*bsprio_desc->bsprios)); | |
161 | if (!bsprio_desc->bsprios) | |
162 | return 1; | |
163 | ||
164 | start = 0; | |
165 | bsprio_desc->nr_bsprios = 0; | |
166 | for (i = 0; i < nr_entries; i++) { | |
167 | while (i + 1 < nr_entries && entries[i].bs == entries[i + 1].bs) | |
168 | i++; | |
169 | bsprio = &bsprio_desc->bsprios[bsprio_desc->nr_bsprios]; | |
170 | /* | |
171 | * All parsed entries with the same blocksize get saved in the | |
172 | * same cmdprio_bsprio, to expedite the search in the hot path. | |
173 | */ | |
174 | if (fio_cmdprio_fill_bsprio(bsprio, entries, values, | |
175 | implicit_cmdprio, start, i)) | |
176 | return 1; | |
177 | ||
178 | start = i + 1; | |
179 | bsprio_desc->nr_bsprios++; | |
e27b9ff0 NC |
180 | } |
181 | ||
182 | return 0; | |
183 | } | |
184 | ||
f0547200 NC |
185 | static int fio_cmdprio_bssplit_ddir(struct thread_options *to, void *cb_arg, |
186 | enum fio_ddir ddir, char *str, bool data) | |
187 | { | |
188 | struct cmdprio_parse_result *parse_res_arr = cb_arg; | |
189 | struct cmdprio_parse_result *parse_res = &parse_res_arr[ddir]; | |
190 | ||
191 | if (ddir == DDIR_TRIM) | |
192 | return 0; | |
193 | ||
194 | if (split_parse_prio_ddir(to, &parse_res->entries, | |
195 | &parse_res->nr_entries, str)) | |
196 | return 1; | |
197 | ||
198 | return 0; | |
199 | } | |
200 | ||
201 | static int fio_cmdprio_bssplit_parse(struct thread_data *td, const char *input, | |
202 | struct cmdprio_parse_result *parse_res) | |
e27b9ff0 NC |
203 | { |
204 | char *str, *p; | |
d6cbeab4 | 205 | int ret = 0; |
e27b9ff0 NC |
206 | |
207 | p = str = strdup(input); | |
208 | ||
209 | strip_blank_front(&str); | |
210 | strip_blank_end(str); | |
211 | ||
f0547200 | 212 | ret = str_split_parse(td, str, fio_cmdprio_bssplit_ddir, parse_res, |
d6cbeab4 | 213 | false); |
e27b9ff0 NC |
214 | |
215 | free(p); | |
216 | return ret; | |
217 | } | |
218 | ||
f0547200 NC |
219 | /** |
220 | * fio_cmdprio_percentage - Returns the percentage of I/Os that should | |
221 | * use a cmdprio priority value (rather than the default context priority). | |
222 | * | |
223 | * For CMDPRIO_MODE_BSSPLIT, if the percentage is non-zero, we will also | |
224 | * return the matching bsprio, to avoid the same linear search elsewhere. | |
225 | * For CMDPRIO_MODE_PERC, we will never return a bsprio. | |
226 | */ | |
227 | static int fio_cmdprio_percentage(struct cmdprio *cmdprio, struct io_u *io_u, | |
228 | struct cmdprio_bsprio **bsprio) | |
e27b9ff0 | 229 | { |
f0547200 | 230 | struct cmdprio_bsprio *bsprio_entry; |
e27b9ff0 | 231 | enum fio_ddir ddir = io_u->ddir; |
e27b9ff0 NC |
232 | int i; |
233 | ||
97f2d484 NC |
234 | switch (cmdprio->mode) { |
235 | case CMDPRIO_MODE_PERC: | |
f0547200 NC |
236 | *bsprio = NULL; |
237 | return cmdprio->perc_entry[ddir].perc; | |
97f2d484 | 238 | case CMDPRIO_MODE_BSSPLIT: |
f0547200 NC |
239 | for (i = 0; i < cmdprio->bsprio_desc[ddir].nr_bsprios; i++) { |
240 | bsprio_entry = &cmdprio->bsprio_desc[ddir].bsprios[i]; | |
241 | if (bsprio_entry->bs == io_u->buflen) { | |
242 | *bsprio = bsprio_entry; | |
243 | return bsprio_entry->tot_perc; | |
244 | } | |
97f2d484 NC |
245 | } |
246 | break; | |
247 | default: | |
248 | /* | |
249 | * An I/O engine should never call this function if cmdprio | |
250 | * is not is use. | |
251 | */ | |
252 | assert(0); | |
e27b9ff0 NC |
253 | } |
254 | ||
f0547200 NC |
255 | /* |
256 | * This is totally fine, the given blocksize simply does not | |
257 | * have any (non-zero) cmdprio_bssplit entries defined. | |
258 | */ | |
259 | *bsprio = NULL; | |
e27b9ff0 NC |
260 | return 0; |
261 | } | |
262 | ||
127715b6 NC |
263 | /** |
264 | * fio_cmdprio_set_ioprio - Set an io_u ioprio according to cmdprio options | |
265 | * | |
266 | * Generates a random percentage value to determine if an io_u ioprio needs | |
267 | * to be set. If the random percentage value is within the user specified | |
268 | * percentage of I/Os that should use a cmdprio priority value (rather than | |
269 | * the default priority), then this function updates the io_u with an ioprio | |
270 | * value as defined by the cmdprio/cmdprio_class or cmdprio_bssplit options. | |
271 | * | |
272 | * Return true if the io_u ioprio was changed and false otherwise. | |
273 | */ | |
274 | bool fio_cmdprio_set_ioprio(struct thread_data *td, struct cmdprio *cmdprio, | |
275 | struct io_u *io_u) | |
276 | { | |
f0547200 NC |
277 | struct cmdprio_bsprio *bsprio; |
278 | unsigned int p, rand; | |
279 | uint32_t perc = 0; | |
280 | int i; | |
281 | ||
282 | p = fio_cmdprio_percentage(cmdprio, io_u, &bsprio); | |
283 | if (!p) | |
284 | return false; | |
285 | ||
286 | rand = rand_between(&td->prio_state, 0, 99); | |
287 | if (rand >= p) | |
288 | return false; | |
289 | ||
290 | switch (cmdprio->mode) { | |
291 | case CMDPRIO_MODE_PERC: | |
292 | io_u->ioprio = cmdprio->perc_entry[io_u->ddir].prio; | |
293 | io_u->clat_prio_index = | |
294 | cmdprio->perc_entry[io_u->ddir].clat_prio_index; | |
127715b6 | 295 | return true; |
f0547200 NC |
296 | case CMDPRIO_MODE_BSSPLIT: |
297 | assert(bsprio); | |
298 | for (i = 0; i < bsprio->nr_prios; i++) { | |
299 | struct cmdprio_prio *prio = &bsprio->prios[i]; | |
300 | ||
301 | perc += prio->perc; | |
302 | if (rand < perc) { | |
303 | io_u->ioprio = prio->prio; | |
304 | io_u->clat_prio_index = prio->clat_prio_index; | |
305 | return true; | |
306 | } | |
307 | } | |
308 | break; | |
309 | default: | |
310 | assert(0); | |
127715b6 NC |
311 | } |
312 | ||
f0547200 NC |
313 | /* When rand < p (total perc), we should always find a cmdprio_prio. */ |
314 | assert(0); | |
315 | return false; | |
316 | } | |
317 | ||
318 | static int fio_cmdprio_gen_perc(struct thread_data *td, struct cmdprio *cmdprio) | |
319 | { | |
320 | struct cmdprio_options *options = cmdprio->options; | |
321 | struct cmdprio_prio *prio; | |
f54b69c3 | 322 | struct cmdprio_values values[CMDPRIO_RWDIR_CNT] = {}; |
f0547200 NC |
323 | struct thread_stat *ts = &td->ts; |
324 | enum fio_ddir ddir; | |
325 | int ret; | |
326 | ||
327 | for (ddir = 0; ddir < CMDPRIO_RWDIR_CNT; ddir++) { | |
127715b6 | 328 | /* |
f0547200 NC |
329 | * Do not allocate a clat_prio array nor set the cmdprio struct |
330 | * if zero percent of the I/Os (for the ddir) should use a | |
331 | * cmdprio priority value, or when the ddir is not enabled. | |
127715b6 | 332 | */ |
f0547200 NC |
333 | if (!options->percentage[ddir] || |
334 | (ddir == DDIR_READ && !td_read(td)) || | |
335 | (ddir == DDIR_WRITE && !td_write(td))) | |
336 | continue; | |
337 | ||
338 | ret = init_cmdprio_values(&values[ddir], 1, ts); | |
339 | if (ret) | |
340 | goto err; | |
341 | ||
342 | prio = &cmdprio->perc_entry[ddir]; | |
343 | prio->perc = options->percentage[ddir]; | |
344 | prio->prio = ioprio_value(options->class[ddir], | |
345 | options->level[ddir]); | |
346 | assign_clat_prio_index(prio, &values[ddir]); | |
347 | ||
348 | ret = init_ts_clat_prio(ts, ddir, &values[ddir]); | |
349 | if (ret) | |
350 | goto err; | |
351 | ||
352 | free(values[ddir].prios); | |
353 | values[ddir].prios = NULL; | |
354 | values[ddir].nr_prios = 0; | |
127715b6 NC |
355 | } |
356 | ||
f0547200 NC |
357 | return 0; |
358 | ||
359 | err: | |
360 | for (ddir = 0; ddir < CMDPRIO_RWDIR_CNT; ddir++) | |
361 | free(values[ddir].prios); | |
362 | free_clat_prio_stats(ts); | |
363 | ||
364 | return ret; | |
127715b6 NC |
365 | } |
366 | ||
d6cbeab4 NC |
367 | static int fio_cmdprio_parse_and_gen_bssplit(struct thread_data *td, |
368 | struct cmdprio *cmdprio) | |
e27b9ff0 | 369 | { |
d6cbeab4 | 370 | struct cmdprio_options *options = cmdprio->options; |
f54b69c3 BVA |
371 | struct cmdprio_parse_result parse_res[CMDPRIO_RWDIR_CNT] = {}; |
372 | struct cmdprio_values values[CMDPRIO_RWDIR_CNT] = {}; | |
f0547200 NC |
373 | struct thread_stat *ts = &td->ts; |
374 | int ret, implicit_cmdprio; | |
375 | enum fio_ddir ddir; | |
376 | ||
377 | ret = fio_cmdprio_bssplit_parse(td, options->bssplit_str, | |
378 | &parse_res[0]); | |
d6cbeab4 NC |
379 | if (ret) |
380 | goto err; | |
381 | ||
f0547200 NC |
382 | for (ddir = 0; ddir < CMDPRIO_RWDIR_CNT; ddir++) { |
383 | /* | |
384 | * Do not allocate a clat_prio array nor set the cmdprio structs | |
385 | * if there are no non-zero entries (for the ddir), or when the | |
386 | * ddir is not enabled. | |
387 | */ | |
388 | if (!parse_res[ddir].nr_entries || | |
389 | (ddir == DDIR_READ && !td_read(td)) || | |
390 | (ddir == DDIR_WRITE && !td_write(td))) { | |
391 | free(parse_res[ddir].entries); | |
392 | parse_res[ddir].entries = NULL; | |
393 | parse_res[ddir].nr_entries = 0; | |
394 | continue; | |
395 | } | |
396 | ||
397 | ret = init_cmdprio_values(&values[ddir], | |
398 | parse_res[ddir].nr_entries, ts); | |
399 | if (ret) | |
400 | goto err; | |
401 | ||
402 | implicit_cmdprio = ioprio_value(options->class[ddir], | |
403 | options->level[ddir]); | |
404 | ||
405 | ret = fio_cmdprio_generate_bsprio_desc(&cmdprio->bsprio_desc[ddir], | |
406 | &parse_res[ddir], | |
407 | &values[ddir], | |
408 | implicit_cmdprio); | |
409 | if (ret) | |
410 | goto err; | |
411 | ||
412 | free(parse_res[ddir].entries); | |
413 | parse_res[ddir].entries = NULL; | |
414 | parse_res[ddir].nr_entries = 0; | |
415 | ||
416 | ret = init_ts_clat_prio(ts, ddir, &values[ddir]); | |
417 | if (ret) | |
418 | goto err; | |
419 | ||
420 | free(values[ddir].prios); | |
421 | values[ddir].prios = NULL; | |
422 | values[ddir].nr_prios = 0; | |
423 | } | |
424 | ||
d6cbeab4 NC |
425 | return 0; |
426 | ||
427 | err: | |
f0547200 NC |
428 | for (ddir = 0; ddir < CMDPRIO_RWDIR_CNT; ddir++) { |
429 | free(parse_res[ddir].entries); | |
430 | free(values[ddir].prios); | |
431 | } | |
432 | free_clat_prio_stats(ts); | |
d6cbeab4 NC |
433 | fio_cmdprio_cleanup(cmdprio); |
434 | ||
435 | return ret; | |
436 | } | |
437 | ||
438 | static int fio_cmdprio_parse_and_gen(struct thread_data *td, | |
439 | struct cmdprio *cmdprio) | |
440 | { | |
441 | struct cmdprio_options *options = cmdprio->options; | |
442 | int i, ret; | |
443 | ||
f0547200 NC |
444 | /* |
445 | * If cmdprio_percentage/cmdprio_bssplit is set and cmdprio_class | |
446 | * is not set, default to RT priority class. | |
447 | */ | |
448 | for (i = 0; i < CMDPRIO_RWDIR_CNT; i++) { | |
449 | /* | |
450 | * A cmdprio value is only used when fio_cmdprio_percentage() | |
451 | * returns non-zero, so it is safe to set a class even for a | |
452 | * DDIR that will never use it. | |
453 | */ | |
454 | if (!options->class[i]) | |
455 | options->class[i] = IOPRIO_CLASS_RT; | |
456 | } | |
457 | ||
d6cbeab4 NC |
458 | switch (cmdprio->mode) { |
459 | case CMDPRIO_MODE_BSSPLIT: | |
460 | ret = fio_cmdprio_parse_and_gen_bssplit(td, cmdprio); | |
461 | break; | |
462 | case CMDPRIO_MODE_PERC: | |
f0547200 | 463 | ret = fio_cmdprio_gen_perc(td, cmdprio); |
d6cbeab4 NC |
464 | break; |
465 | default: | |
466 | assert(0); | |
467 | return 1; | |
468 | } | |
e27b9ff0 | 469 | |
d6cbeab4 NC |
470 | return ret; |
471 | } | |
472 | ||
473 | void fio_cmdprio_cleanup(struct cmdprio *cmdprio) | |
474 | { | |
f0547200 NC |
475 | enum fio_ddir ddir; |
476 | int i; | |
d6cbeab4 NC |
477 | |
478 | for (ddir = 0; ddir < CMDPRIO_RWDIR_CNT; ddir++) { | |
f0547200 NC |
479 | for (i = 0; i < cmdprio->bsprio_desc[ddir].nr_bsprios; i++) |
480 | free(cmdprio->bsprio_desc[ddir].bsprios[i].prios); | |
481 | free(cmdprio->bsprio_desc[ddir].bsprios); | |
482 | cmdprio->bsprio_desc[ddir].bsprios = NULL; | |
483 | cmdprio->bsprio_desc[ddir].nr_bsprios = 0; | |
d6cbeab4 NC |
484 | } |
485 | ||
486 | /* | |
487 | * options points to a cmdprio_options struct that is part of td->eo. | |
488 | * td->eo itself will be freed by free_ioengine(). | |
489 | */ | |
490 | cmdprio->options = NULL; | |
491 | } | |
492 | ||
493 | int fio_cmdprio_init(struct thread_data *td, struct cmdprio *cmdprio, | |
494 | struct cmdprio_options *options) | |
495 | { | |
496 | struct thread_options *to = &td->o; | |
497 | bool has_cmdprio_percentage = false; | |
498 | bool has_cmdprio_bssplit = false; | |
499 | int i; | |
500 | ||
501 | cmdprio->options = options; | |
502 | ||
503 | if (options->bssplit_str && strlen(options->bssplit_str)) | |
504 | has_cmdprio_bssplit = true; | |
505 | ||
506 | for (i = 0; i < CMDPRIO_RWDIR_CNT; i++) { | |
507 | if (options->percentage[i]) | |
508 | has_cmdprio_percentage = true; | |
509 | } | |
510 | ||
e27b9ff0 NC |
511 | /* |
512 | * Check for option conflicts | |
513 | */ | |
514 | if (has_cmdprio_percentage && has_cmdprio_bssplit) { | |
515 | log_err("%s: cmdprio_percentage and cmdprio_bssplit options " | |
516 | "are mutually exclusive\n", | |
517 | to->name); | |
518 | return 1; | |
519 | } | |
520 | ||
97f2d484 NC |
521 | if (has_cmdprio_bssplit) |
522 | cmdprio->mode = CMDPRIO_MODE_BSSPLIT; | |
523 | else if (has_cmdprio_percentage) | |
524 | cmdprio->mode = CMDPRIO_MODE_PERC; | |
525 | else | |
526 | cmdprio->mode = CMDPRIO_MODE_NONE; | |
e27b9ff0 | 527 | |
d6cbeab4 NC |
528 | /* Nothing left to do if cmdprio is not used */ |
529 | if (cmdprio->mode == CMDPRIO_MODE_NONE) | |
530 | return 0; | |
531 | ||
532 | return fio_cmdprio_parse_and_gen(td, cmdprio); | |
e27b9ff0 | 533 | } |