Commit | Line | Data |
---|---|---|
51575029 JA |
1 | /* |
2 | * Rated submission helpers | |
3 | * | |
4 | * Copyright (C) 2015 Jens Axboe <axboe@kernel.dk> | |
5 | * | |
6 | */ | |
a38c70a8 | 7 | #include <assert.h> |
83276370 DP |
8 | #include <errno.h> |
9 | #include <pthread.h> | |
10 | ||
40511301 | 11 | #include "fio.h" |
618ee94c | 12 | #include "ioengines.h" |
40511301 | 13 | #include "lib/getrusage.h" |
24660963 | 14 | #include "rate-submit.h" |
40511301 | 15 | |
c06379a6 VF |
16 | static void check_overlap(struct io_u *io_u) |
17 | { | |
da8f124f | 18 | int res; |
c06379a6 | 19 | |
c76b661c BVA |
20 | /* |
21 | * Allow only one thread to check for overlap at a time to prevent two | |
22 | * threads from thinking the coast is clear and then submitting IOs | |
23 | * that overlap with each other. | |
24 | * | |
25 | * If an overlap is found, release the lock and re-acquire it before | |
26 | * checking again to give other threads a chance to make progress. | |
27 | * | |
28 | * If no overlap is found, release the lock when the io_u's | |
29 | * IO_U_F_FLIGHT flag is set so that this io_u can be checked by other | |
30 | * threads as they assess overlap. | |
31 | */ | |
a38c70a8 | 32 | res = pthread_mutex_lock(&overlap_check); |
83276370 DP |
33 | if (fio_unlikely(res != 0)) { |
34 | log_err("failed to lock overlap check mutex, err: %i:%s", errno, strerror(errno)); | |
35 | abort(); | |
36 | } | |
c76b661c BVA |
37 | |
38 | retry: | |
da8f124f | 39 | for_each_td(td) { |
c76b661c BVA |
40 | if (td->runstate <= TD_SETTING_UP || |
41 | td->runstate >= TD_FINISHING || | |
42 | !td->o.serialize_overlap || | |
43 | td->o.io_submit_mode != IO_MODE_OFFLOAD) | |
44 | continue; | |
c06379a6 | 45 | |
c76b661c BVA |
46 | if (!in_flight_overlap(&td->io_u_all, io_u)) |
47 | continue; | |
48 | ||
a38c70a8 | 49 | res = pthread_mutex_unlock(&overlap_check); |
83276370 DP |
50 | if (fio_unlikely(res != 0)) { |
51 | log_err("failed to unlock overlap check mutex, err: %i:%s", errno, strerror(errno)); | |
52 | abort(); | |
53 | } | |
a38c70a8 | 54 | res = pthread_mutex_lock(&overlap_check); |
83276370 DP |
55 | if (fio_unlikely(res != 0)) { |
56 | log_err("failed to lock overlap check mutex, err: %i:%s", errno, strerror(errno)); | |
57 | abort(); | |
58 | } | |
c76b661c | 59 | goto retry; |
da8f124f | 60 | } end_for_each(); |
c06379a6 VF |
61 | } |
62 | ||
155f2f02 JA |
63 | static int io_workqueue_fn(struct submit_worker *sw, |
64 | struct workqueue_work *work) | |
40511301 JA |
65 | { |
66 | struct io_u *io_u = container_of(work, struct io_u, work); | |
67 | const enum fio_ddir ddir = io_u->ddir; | |
b86ad8f1 | 68 | struct thread_data *td = sw->priv; |
d28174f0 | 69 | int ret, error; |
40511301 | 70 | |
c06379a6 VF |
71 | if (td->o.serialize_overlap) |
72 | check_overlap(io_u); | |
73 | ||
40511301 JA |
74 | dprint(FD_RATE, "io_u %p queued by %u\n", io_u, gettid()); |
75 | ||
1651e431 | 76 | io_u_set(td, io_u, IO_U_F_NO_FILE_PUT); |
40511301 JA |
77 | |
78 | td->cur_depth++; | |
79 | ||
80 | do { | |
81 | ret = td_io_queue(td, io_u); | |
82 | if (ret != FIO_Q_BUSY) | |
83 | break; | |
84 | ret = io_u_queued_complete(td, 1); | |
85 | if (ret > 0) | |
86 | td->cur_depth -= ret; | |
d28174f0 JA |
87 | else if (ret < 0) |
88 | break; | |
1651e431 | 89 | io_u_clear(td, io_u, IO_U_F_FLIGHT); |
40511301 JA |
90 | } while (1); |
91 | ||
92 | dprint(FD_RATE, "io_u %p ret %d by %u\n", io_u, ret, gettid()); | |
93 | ||
d28174f0 | 94 | error = io_queue_event(td, io_u, &ret, ddir, NULL, 0, NULL); |
40511301 JA |
95 | |
96 | if (ret == FIO_Q_COMPLETED) | |
97 | td->cur_depth--; | |
98 | else if (ret == FIO_Q_QUEUED) { | |
99 | unsigned int min_evts; | |
100 | ||
101 | if (td->o.iodepth == 1) | |
102 | min_evts = 1; | |
103 | else | |
104 | min_evts = 0; | |
105 | ||
106 | ret = io_u_queued_complete(td, min_evts); | |
107 | if (ret > 0) | |
108 | td->cur_depth -= ret; | |
40511301 | 109 | } |
155f2f02 | 110 | |
d89ee9f4 BVA |
111 | if (error || td->error) { |
112 | pthread_mutex_lock(&td->io_u_lock); | |
d28174f0 | 113 | pthread_cond_signal(&td->parent->free_cond); |
d89ee9f4 BVA |
114 | pthread_mutex_unlock(&td->io_u_lock); |
115 | } | |
d28174f0 | 116 | |
155f2f02 | 117 | return 0; |
40511301 JA |
118 | } |
119 | ||
120 | static bool io_workqueue_pre_sleep_flush_fn(struct submit_worker *sw) | |
121 | { | |
b86ad8f1 | 122 | struct thread_data *td = sw->priv; |
40511301 | 123 | |
d28174f0 JA |
124 | if (td->error) |
125 | return false; | |
40511301 JA |
126 | if (td->io_u_queued || td->cur_depth || td->io_u_in_flight) |
127 | return true; | |
128 | ||
129 | return false; | |
130 | } | |
131 | ||
132 | static void io_workqueue_pre_sleep_fn(struct submit_worker *sw) | |
133 | { | |
b86ad8f1 | 134 | struct thread_data *td = sw->priv; |
40511301 JA |
135 | int ret; |
136 | ||
137 | ret = io_u_quiesce(td); | |
138 | if (ret > 0) | |
139 | td->cur_depth -= ret; | |
140 | } | |
141 | ||
142 | static int io_workqueue_alloc_fn(struct submit_worker *sw) | |
143 | { | |
144 | struct thread_data *td; | |
145 | ||
146 | td = calloc(1, sizeof(*td)); | |
b86ad8f1 | 147 | sw->priv = td; |
40511301 JA |
148 | return 0; |
149 | } | |
150 | ||
151 | static void io_workqueue_free_fn(struct submit_worker *sw) | |
152 | { | |
b86ad8f1 CB |
153 | free(sw->priv); |
154 | sw->priv = NULL; | |
40511301 JA |
155 | } |
156 | ||
157 | static int io_workqueue_init_worker_fn(struct submit_worker *sw) | |
158 | { | |
159 | struct thread_data *parent = sw->wq->td; | |
b86ad8f1 | 160 | struct thread_data *td = sw->priv; |
40511301 JA |
161 | |
162 | memcpy(&td->o, &parent->o, sizeof(td->o)); | |
163 | memcpy(&td->ts, &parent->ts, sizeof(td->ts)); | |
164 | td->o.uid = td->o.gid = -1U; | |
165 | dup_files(td, parent); | |
166 | td->eo = parent->eo; | |
167 | fio_options_mem_dupe(td); | |
c83579f0 | 168 | td->iolog_f = parent->iolog_f; |
40511301 JA |
169 | |
170 | if (ioengine_load(td)) | |
171 | goto err; | |
172 | ||
40511301 JA |
173 | td->pid = gettid(); |
174 | ||
175 | INIT_FLIST_HEAD(&td->io_log_list); | |
176 | INIT_FLIST_HEAD(&td->io_hist_list); | |
177 | INIT_FLIST_HEAD(&td->verify_list); | |
178 | INIT_FLIST_HEAD(&td->trim_list); | |
40511301 JA |
179 | td->io_hist_tree = RB_ROOT; |
180 | ||
181 | td->o.iodepth = 1; | |
182 | if (td_io_init(td)) | |
183 | goto err_io_init; | |
184 | ||
4c085cf2 VF |
185 | if (td->io_ops->post_init && td->io_ops->post_init(td)) |
186 | goto err_io_init; | |
187 | ||
d5b3cfd4 | 188 | set_epoch_time(td, td->o.log_unix_epoch | td->o.log_alternate_epoch, td->o.log_alternate_epoch_clock_id); |
40511301 JA |
189 | fio_getrusage(&td->ru_start); |
190 | clear_io_state(td, 1); | |
191 | ||
192 | td_set_runstate(td, TD_RUNNING); | |
b7aae4ba | 193 | td->flags |= TD_F_CHILD | TD_F_NEED_LOCK; |
40511301 JA |
194 | td->parent = parent; |
195 | return 0; | |
196 | ||
197 | err_io_init: | |
198 | close_ioengine(td); | |
199 | err: | |
200 | return 1; | |
201 | ||
202 | } | |
203 | ||
204 | static void io_workqueue_exit_worker_fn(struct submit_worker *sw, | |
205 | unsigned int *sum_cnt) | |
206 | { | |
b86ad8f1 | 207 | struct thread_data *td = sw->priv; |
40511301 JA |
208 | |
209 | (*sum_cnt)++; | |
691310e2 NC |
210 | |
211 | /* | |
212 | * io_workqueue_update_acct_fn() doesn't support per prio stats, and | |
213 | * even if it did, offload can't be used with all async IO engines. | |
214 | * If group reporting is set in the parent td, the group result | |
215 | * generated by __show_run_stats() can still contain multiple prios | |
216 | * from different offloaded jobs. | |
217 | */ | |
218 | sw->wq->td->ts.disable_prio_stat = 1; | |
016869be | 219 | sum_thread_stats(&sw->wq->td->ts, &td->ts); |
40511301 JA |
220 | |
221 | fio_options_free(td); | |
222 | close_and_free_files(td); | |
223 | if (td->io_ops) | |
224 | close_ioengine(td); | |
225 | td_set_runstate(td, TD_EXITED); | |
226 | } | |
227 | ||
228 | #ifdef CONFIG_SFAA | |
229 | static void sum_val(uint64_t *dst, uint64_t *src) | |
230 | { | |
231 | if (*src) { | |
232 | __sync_fetch_and_add(dst, *src); | |
233 | *src = 0; | |
234 | } | |
235 | } | |
236 | #else | |
237 | static void sum_val(uint64_t *dst, uint64_t *src) | |
238 | { | |
239 | if (*src) { | |
240 | *dst += *src; | |
241 | *src = 0; | |
242 | } | |
243 | } | |
244 | #endif | |
245 | ||
246 | static void pthread_double_unlock(pthread_mutex_t *lock1, | |
247 | pthread_mutex_t *lock2) | |
248 | { | |
249 | #ifndef CONFIG_SFAA | |
250 | pthread_mutex_unlock(lock1); | |
251 | pthread_mutex_unlock(lock2); | |
252 | #endif | |
253 | } | |
254 | ||
255 | static void pthread_double_lock(pthread_mutex_t *lock1, pthread_mutex_t *lock2) | |
256 | { | |
257 | #ifndef CONFIG_SFAA | |
258 | if (lock1 < lock2) { | |
259 | pthread_mutex_lock(lock1); | |
260 | pthread_mutex_lock(lock2); | |
261 | } else { | |
262 | pthread_mutex_lock(lock2); | |
263 | pthread_mutex_lock(lock1); | |
264 | } | |
265 | #endif | |
266 | } | |
267 | ||
268 | static void sum_ddir(struct thread_data *dst, struct thread_data *src, | |
269 | enum fio_ddir ddir) | |
270 | { | |
271 | pthread_double_lock(&dst->io_wq.stat_lock, &src->io_wq.stat_lock); | |
272 | ||
273 | sum_val(&dst->io_bytes[ddir], &src->io_bytes[ddir]); | |
274 | sum_val(&dst->io_blocks[ddir], &src->io_blocks[ddir]); | |
275 | sum_val(&dst->this_io_blocks[ddir], &src->this_io_blocks[ddir]); | |
276 | sum_val(&dst->this_io_bytes[ddir], &src->this_io_bytes[ddir]); | |
277 | sum_val(&dst->bytes_done[ddir], &src->bytes_done[ddir]); | |
191d6634 SK |
278 | if (ddir == DDIR_READ) |
279 | sum_val(&dst->bytes_verified, &src->bytes_verified); | |
40511301 JA |
280 | |
281 | pthread_double_unlock(&dst->io_wq.stat_lock, &src->io_wq.stat_lock); | |
282 | } | |
283 | ||
284 | static void io_workqueue_update_acct_fn(struct submit_worker *sw) | |
285 | { | |
b86ad8f1 | 286 | struct thread_data *src = sw->priv; |
40511301 JA |
287 | struct thread_data *dst = sw->wq->td; |
288 | ||
289 | if (td_read(src)) | |
290 | sum_ddir(dst, src, DDIR_READ); | |
291 | if (td_write(src)) | |
292 | sum_ddir(dst, src, DDIR_WRITE); | |
293 | if (td_trim(src)) | |
294 | sum_ddir(dst, src, DDIR_TRIM); | |
295 | ||
296 | } | |
297 | ||
103b174e | 298 | static struct workqueue_ops rated_wq_ops = { |
40511301 JA |
299 | .fn = io_workqueue_fn, |
300 | .pre_sleep_flush_fn = io_workqueue_pre_sleep_flush_fn, | |
301 | .pre_sleep_fn = io_workqueue_pre_sleep_fn, | |
302 | .update_acct_fn = io_workqueue_update_acct_fn, | |
303 | .alloc_worker_fn = io_workqueue_alloc_fn, | |
304 | .free_worker_fn = io_workqueue_free_fn, | |
305 | .init_worker_fn = io_workqueue_init_worker_fn, | |
306 | .exit_worker_fn = io_workqueue_exit_worker_fn, | |
307 | }; | |
103b174e | 308 | |
24660963 | 309 | int rate_submit_init(struct thread_data *td, struct sk_out *sk_out) |
103b174e JA |
310 | { |
311 | if (td->o.io_submit_mode != IO_MODE_OFFLOAD) | |
312 | return 0; | |
313 | ||
24660963 | 314 | return workqueue_init(td, &td->io_wq, &rated_wq_ops, td->o.iodepth, sk_out); |
103b174e JA |
315 | } |
316 | ||
317 | void rate_submit_exit(struct thread_data *td) | |
318 | { | |
319 | if (td->o.io_submit_mode != IO_MODE_OFFLOAD) | |
320 | return; | |
321 | ||
322 | workqueue_exit(&td->io_wq); | |
323 | } |