Commit | Line | Data |
---|---|---|
10756b2c BS |
1 | /* |
2 | * Copyright (c)2020 System Fabric Works, Inc. All Rights Reserved. | |
3 | * mailto:info@systemfabricworks.com | |
4 | * | |
5 | * License: GPLv2, see COPYING. | |
6 | * | |
7 | * libcufile engine | |
8 | * | |
9 | * fio I/O engine using the NVIDIA cuFile API. | |
10 | * | |
11 | */ | |
12 | ||
13 | #include <stdlib.h> | |
14 | #include <unistd.h> | |
15 | #include <errno.h> | |
16 | #include <string.h> | |
17 | #include <sys/time.h> | |
18 | #include <sys/resource.h> | |
19 | #include <cufile.h> | |
20 | #include <cuda.h> | |
21 | #include <cuda_runtime.h> | |
22 | #include <pthread.h> | |
23 | ||
24 | #include "../fio.h" | |
25 | #include "../lib/pow2.h" | |
26 | #include "../optgroup.h" | |
27 | #include "../lib/memalign.h" | |
28 | ||
29 | #define ALIGNED_4KB(v) (((v) & 0x0fff) == 0) | |
30 | ||
31 | #define LOGGED_BUFLEN_NOT_ALIGNED 0x01 | |
32 | #define LOGGED_GPU_OFFSET_NOT_ALIGNED 0x02 | |
33 | #define GPU_ID_SEP ":" | |
34 | ||
35 | enum { | |
36 | IO_CUFILE = 1, | |
37 | IO_POSIX = 2 | |
38 | }; | |
39 | ||
40 | struct libcufile_options { | |
41 | struct thread_data *td; | |
42 | char *gpu_ids; /* colon-separated list of GPU ids, | |
43 | one per job */ | |
44 | void *cu_mem_ptr; /* GPU memory */ | |
45 | void *junk_buf; /* buffer to simulate cudaMemcpy with | |
46 | posix I/O write */ | |
47 | int my_gpu_id; /* GPU id to use for this job */ | |
48 | unsigned int cuda_io; /* Type of I/O to use with CUDA */ | |
49 | size_t total_mem; /* size for cu_mem_ptr and junk_buf */ | |
50 | int logged; /* bitmask of log messages that have | |
51 | been output, prevent flood */ | |
52 | }; | |
53 | ||
54 | struct fio_libcufile_data { | |
55 | CUfileDescr_t cf_descr; | |
56 | CUfileHandle_t cf_handle; | |
57 | }; | |
58 | ||
59 | static struct fio_option options[] = { | |
60 | { | |
61 | .name = "gpu_dev_ids", | |
62 | .lname = "libcufile engine gpu dev ids", | |
63 | .type = FIO_OPT_STR_STORE, | |
64 | .off1 = offsetof(struct libcufile_options, gpu_ids), | |
65 | .help = "GPU IDs, one per subjob, separated by " GPU_ID_SEP, | |
66 | .category = FIO_OPT_C_ENGINE, | |
67 | .group = FIO_OPT_G_LIBCUFILE, | |
68 | }, | |
69 | { | |
70 | .name = "cuda_io", | |
71 | .lname = "libcufile cuda io", | |
72 | .type = FIO_OPT_STR, | |
73 | .off1 = offsetof(struct libcufile_options, cuda_io), | |
74 | .help = "Type of I/O to use with CUDA", | |
75 | .def = "cufile", | |
76 | .posval = { | |
77 | { .ival = "cufile", | |
78 | .oval = IO_CUFILE, | |
79 | .help = "libcufile nvidia-fs" | |
80 | }, | |
81 | { .ival = "posix", | |
82 | .oval = IO_POSIX, | |
83 | .help = "POSIX I/O" | |
84 | } | |
85 | }, | |
86 | .category = FIO_OPT_C_ENGINE, | |
87 | .group = FIO_OPT_G_LIBCUFILE, | |
88 | }, | |
89 | { | |
90 | .name = NULL, | |
91 | }, | |
92 | }; | |
93 | ||
94 | static int running = 0; | |
95 | static int cufile_initialized = 0; | |
96 | static pthread_mutex_t running_lock = PTHREAD_MUTEX_INITIALIZER; | |
97 | ||
98 | #define check_cudaruntimecall(fn, rc) \ | |
99 | do { \ | |
100 | cudaError_t res = fn; \ | |
101 | if (res != cudaSuccess) { \ | |
102 | const char *str = cudaGetErrorName(res); \ | |
103 | log_err("cuda runtime api call failed %s:%d : err=%d:%s\n", \ | |
104 | #fn, __LINE__, res, str); \ | |
105 | rc = -1; \ | |
106 | } else \ | |
107 | rc = 0; \ | |
108 | } while(0) | |
109 | ||
110 | static const char *fio_libcufile_get_cuda_error(CUfileError_t st) | |
111 | { | |
112 | if (IS_CUFILE_ERR(st.err)) | |
113 | return cufileop_status_error(st.err); | |
114 | return "unknown"; | |
115 | } | |
116 | ||
117 | /* | |
118 | * Assign GPU to subjob roundrobin, similar to how multiple | |
119 | * entries in 'directory' are handled by fio. | |
120 | */ | |
121 | static int fio_libcufile_find_gpu_id(struct thread_data *td) | |
122 | { | |
123 | struct libcufile_options *o = td->eo; | |
124 | int gpu_id = 0; | |
125 | ||
126 | if (o->gpu_ids != NULL) { | |
127 | char *gpu_ids, *pos, *cur; | |
128 | int i, id_count, gpu_idx; | |
129 | ||
130 | for (id_count = 0, cur = o->gpu_ids; cur != NULL; id_count++) { | |
131 | cur = strchr(cur, GPU_ID_SEP[0]); | |
132 | if (cur != NULL) | |
133 | cur++; | |
134 | } | |
135 | ||
136 | gpu_idx = td->subjob_number % id_count; | |
137 | ||
138 | pos = gpu_ids = strdup(o->gpu_ids); | |
139 | if (gpu_ids == NULL) { | |
140 | log_err("strdup(gpu_ids): err=%d\n", errno); | |
141 | return -1; | |
142 | } | |
143 | ||
144 | i = 0; | |
145 | while (pos != NULL && i <= gpu_idx) { | |
146 | i++; | |
147 | cur = strsep(&pos, GPU_ID_SEP); | |
148 | } | |
149 | ||
150 | if (cur) | |
151 | gpu_id = atoi(cur); | |
152 | ||
153 | free(gpu_ids); | |
154 | } | |
155 | ||
156 | return gpu_id; | |
157 | } | |
158 | ||
159 | static int fio_libcufile_init(struct thread_data *td) | |
160 | { | |
161 | struct libcufile_options *o = td->eo; | |
162 | CUfileError_t status; | |
163 | int initialized; | |
164 | int rc; | |
165 | ||
166 | pthread_mutex_lock(&running_lock); | |
167 | if (running == 0) { | |
168 | assert(cufile_initialized == 0); | |
169 | if (o->cuda_io == IO_CUFILE) { | |
170 | /* only open the driver if this is the first worker thread */ | |
171 | status = cuFileDriverOpen(); | |
172 | if (status.err != CU_FILE_SUCCESS) | |
173 | log_err("cuFileDriverOpen: err=%d:%s\n", status.err, | |
174 | fio_libcufile_get_cuda_error(status)); | |
175 | else | |
176 | cufile_initialized = 1; | |
177 | } | |
178 | } | |
179 | running++; | |
180 | initialized = cufile_initialized; | |
181 | pthread_mutex_unlock(&running_lock); | |
182 | ||
183 | if (o->cuda_io == IO_CUFILE && !initialized) | |
184 | return 1; | |
185 | ||
186 | o->my_gpu_id = fio_libcufile_find_gpu_id(td); | |
187 | if (o->my_gpu_id < 0) | |
188 | return 1; | |
189 | ||
190 | dprint(FD_MEM, "Subjob %d uses GPU %d\n", td->subjob_number, o->my_gpu_id); | |
191 | check_cudaruntimecall(cudaSetDevice(o->my_gpu_id), rc); | |
192 | if (rc != 0) | |
193 | return 1; | |
194 | ||
195 | return 0; | |
196 | } | |
197 | ||
198 | static inline int fio_libcufile_pre_write(struct thread_data *td, | |
199 | struct libcufile_options *o, | |
200 | struct io_u *io_u, | |
201 | size_t gpu_offset) | |
202 | { | |
203 | int rc = 0; | |
204 | ||
205 | if (o->cuda_io == IO_CUFILE) { | |
206 | if (td->o.verify) { | |
207 | /* | |
208 | Data is being verified, copy the io_u buffer to GPU memory. | |
209 | This isn't done in the non-verify case because the data would | |
210 | already be in GPU memory in a normal cuFile application. | |
211 | */ | |
212 | check_cudaruntimecall(cudaMemcpy(((char*) o->cu_mem_ptr) + gpu_offset, | |
213 | io_u->xfer_buf, | |
214 | io_u->xfer_buflen, | |
215 | cudaMemcpyHostToDevice), rc); | |
216 | if (rc != 0) { | |
217 | log_err("DDIR_WRITE cudaMemcpy H2D failed\n"); | |
218 | io_u->error = EIO; | |
219 | } | |
220 | } | |
221 | } else if (o->cuda_io == IO_POSIX) { | |
222 | ||
223 | /* | |
224 | POSIX I/O is being used, the data has to be copied out of the | |
225 | GPU into a CPU buffer. GPU memory doesn't contain the actual | |
226 | data to write, copy the data to the junk buffer. The purpose | |
227 | of this is to add the overhead of cudaMemcpy() that would be | |
228 | present in a POSIX I/O CUDA application. | |
229 | */ | |
230 | check_cudaruntimecall(cudaMemcpy(o->junk_buf + gpu_offset, | |
231 | ((char*) o->cu_mem_ptr) + gpu_offset, | |
232 | io_u->xfer_buflen, | |
233 | cudaMemcpyDeviceToHost), rc); | |
234 | if (rc != 0) { | |
235 | log_err("DDIR_WRITE cudaMemcpy D2H failed\n"); | |
236 | io_u->error = EIO; | |
237 | } | |
238 | } else { | |
239 | log_err("Illegal CUDA IO type: %d\n", o->cuda_io); | |
240 | assert(0); | |
241 | rc = EINVAL; | |
242 | } | |
243 | ||
244 | return rc; | |
245 | } | |
246 | ||
247 | static inline int fio_libcufile_post_read(struct thread_data *td, | |
248 | struct libcufile_options *o, | |
249 | struct io_u *io_u, | |
250 | size_t gpu_offset) | |
251 | { | |
252 | int rc = 0; | |
253 | ||
254 | if (o->cuda_io == IO_CUFILE) { | |
255 | if (td->o.verify) { | |
256 | /* Copy GPU memory to CPU buffer for verify */ | |
257 | check_cudaruntimecall(cudaMemcpy(io_u->xfer_buf, | |
258 | ((char*) o->cu_mem_ptr) + gpu_offset, | |
259 | io_u->xfer_buflen, | |
260 | cudaMemcpyDeviceToHost), rc); | |
261 | if (rc != 0) { | |
262 | log_err("DDIR_READ cudaMemcpy D2H failed\n"); | |
263 | io_u->error = EIO; | |
264 | } | |
265 | } | |
266 | } else if (o->cuda_io == IO_POSIX) { | |
267 | /* POSIX I/O read, copy the CPU buffer to GPU memory */ | |
268 | check_cudaruntimecall(cudaMemcpy(((char*) o->cu_mem_ptr) + gpu_offset, | |
269 | io_u->xfer_buf, | |
270 | io_u->xfer_buflen, | |
271 | cudaMemcpyHostToDevice), rc); | |
272 | if (rc != 0) { | |
273 | log_err("DDIR_READ cudaMemcpy H2D failed\n"); | |
274 | io_u->error = EIO; | |
275 | } | |
276 | } else { | |
277 | log_err("Illegal CUDA IO type: %d\n", o->cuda_io); | |
278 | assert(0); | |
279 | rc = EINVAL; | |
280 | } | |
281 | ||
282 | return rc; | |
283 | } | |
284 | ||
285 | static enum fio_q_status fio_libcufile_queue(struct thread_data *td, | |
286 | struct io_u *io_u) | |
287 | { | |
288 | struct libcufile_options *o = td->eo; | |
289 | struct fio_libcufile_data *fcd = FILE_ENG_DATA(io_u->file); | |
290 | unsigned long long io_offset; | |
291 | ssize_t sz; | |
292 | ssize_t remaining; | |
293 | size_t xfered; | |
294 | size_t gpu_offset; | |
295 | int rc; | |
296 | ||
297 | if (o->cuda_io == IO_CUFILE && fcd == NULL) { | |
298 | io_u->error = EINVAL; | |
299 | td_verror(td, EINVAL, "xfer"); | |
300 | return FIO_Q_COMPLETED; | |
301 | } | |
302 | ||
303 | fio_ro_check(td, io_u); | |
304 | ||
305 | switch(io_u->ddir) { | |
306 | case DDIR_SYNC: | |
307 | rc = fsync(io_u->file->fd); | |
308 | if (rc != 0) { | |
309 | io_u->error = errno; | |
310 | log_err("fsync: err=%d\n", errno); | |
311 | } | |
312 | break; | |
313 | ||
314 | case DDIR_DATASYNC: | |
315 | rc = fdatasync(io_u->file->fd); | |
316 | if (rc != 0) { | |
317 | io_u->error = errno; | |
318 | log_err("fdatasync: err=%d\n", errno); | |
319 | } | |
320 | break; | |
321 | ||
322 | case DDIR_READ: | |
323 | case DDIR_WRITE: | |
324 | /* | |
325 | There may be a better way to calculate gpu_offset. The intent is | |
326 | that gpu_offset equals the the difference between io_u->xfer_buf and | |
327 | the page-aligned base address for io_u buffers. | |
328 | */ | |
329 | gpu_offset = io_u->index * io_u->xfer_buflen; | |
330 | io_offset = io_u->offset; | |
331 | remaining = io_u->xfer_buflen; | |
332 | ||
333 | xfered = 0; | |
334 | sz = 0; | |
335 | ||
336 | assert(gpu_offset + io_u->xfer_buflen <= o->total_mem); | |
337 | ||
338 | if (o->cuda_io == IO_CUFILE) { | |
339 | if (!(ALIGNED_4KB(io_u->xfer_buflen) || | |
340 | (o->logged & LOGGED_BUFLEN_NOT_ALIGNED))) { | |
341 | log_err("buflen not 4KB-aligned: %llu\n", io_u->xfer_buflen); | |
342 | o->logged |= LOGGED_BUFLEN_NOT_ALIGNED; | |
343 | } | |
344 | ||
345 | if (!(ALIGNED_4KB(gpu_offset) || | |
346 | (o->logged & LOGGED_GPU_OFFSET_NOT_ALIGNED))) { | |
347 | log_err("gpu_offset not 4KB-aligned: %lu\n", gpu_offset); | |
348 | o->logged |= LOGGED_GPU_OFFSET_NOT_ALIGNED; | |
349 | } | |
350 | } | |
351 | ||
352 | if (io_u->ddir == DDIR_WRITE) | |
353 | rc = fio_libcufile_pre_write(td, o, io_u, gpu_offset); | |
354 | ||
355 | if (io_u->error != 0) | |
356 | break; | |
357 | ||
358 | while (remaining > 0) { | |
359 | assert(gpu_offset + xfered <= o->total_mem); | |
360 | if (io_u->ddir == DDIR_READ) { | |
361 | if (o->cuda_io == IO_CUFILE) { | |
362 | sz = cuFileRead(fcd->cf_handle, o->cu_mem_ptr, remaining, | |
363 | io_offset + xfered, gpu_offset + xfered); | |
364 | if (sz == -1) { | |
365 | io_u->error = errno; | |
366 | log_err("cuFileRead: err=%d\n", errno); | |
367 | } else if (sz < 0) { | |
368 | io_u->error = EIO; | |
369 | log_err("cuFileRead: err=%ld:%s\n", sz, | |
370 | cufileop_status_error(-sz)); | |
371 | } | |
372 | } else if (o->cuda_io == IO_POSIX) { | |
373 | sz = pread(io_u->file->fd, ((char*) io_u->xfer_buf) + xfered, | |
374 | remaining, io_offset + xfered); | |
375 | if (sz < 0) { | |
376 | io_u->error = errno; | |
377 | log_err("pread: err=%d\n", errno); | |
378 | } | |
379 | } else { | |
380 | log_err("Illegal CUDA IO type: %d\n", o->cuda_io); | |
381 | io_u->error = -1; | |
382 | assert(0); | |
383 | } | |
384 | } else if (io_u->ddir == DDIR_WRITE) { | |
385 | if (o->cuda_io == IO_CUFILE) { | |
386 | sz = cuFileWrite(fcd->cf_handle, o->cu_mem_ptr, remaining, | |
387 | io_offset + xfered, gpu_offset + xfered); | |
388 | if (sz == -1) { | |
389 | io_u->error = errno; | |
390 | log_err("cuFileWrite: err=%d\n", errno); | |
391 | } else if (sz < 0) { | |
392 | io_u->error = EIO; | |
393 | log_err("cuFileWrite: err=%ld:%s\n", sz, | |
394 | cufileop_status_error(-sz)); | |
395 | } | |
396 | } else if (o->cuda_io == IO_POSIX) { | |
397 | sz = pwrite(io_u->file->fd, | |
398 | ((char*) io_u->xfer_buf) + xfered, | |
399 | remaining, io_offset + xfered); | |
400 | if (sz < 0) { | |
401 | io_u->error = errno; | |
402 | log_err("pwrite: err=%d\n", errno); | |
403 | } | |
404 | } else { | |
405 | log_err("Illegal CUDA IO type: %d\n", o->cuda_io); | |
406 | io_u->error = -1; | |
407 | assert(0); | |
408 | } | |
409 | } else { | |
410 | log_err("not DDIR_READ or DDIR_WRITE: %d\n", io_u->ddir); | |
411 | io_u->error = -1; | |
412 | assert(0); | |
413 | break; | |
414 | } | |
415 | ||
416 | if (io_u->error != 0) | |
417 | break; | |
418 | ||
419 | remaining -= sz; | |
420 | xfered += sz; | |
421 | ||
422 | if (remaining != 0) | |
423 | log_info("Incomplete %s: %ld bytes remaining\n", | |
424 | io_u->ddir == DDIR_READ? "read" : "write", remaining); | |
425 | } | |
426 | ||
427 | if (io_u->error != 0) | |
428 | break; | |
429 | ||
430 | if (io_u->ddir == DDIR_READ) | |
431 | rc = fio_libcufile_post_read(td, o, io_u, gpu_offset); | |
432 | break; | |
433 | ||
434 | default: | |
435 | io_u->error = EINVAL; | |
436 | break; | |
437 | } | |
438 | ||
439 | if (io_u->error != 0) { | |
440 | log_err("IO failed\n"); | |
441 | td_verror(td, io_u->error, "xfer"); | |
442 | } | |
443 | ||
444 | return FIO_Q_COMPLETED; | |
445 | } | |
446 | ||
447 | static int fio_libcufile_open_file(struct thread_data *td, struct fio_file *f) | |
448 | { | |
449 | struct libcufile_options *o = td->eo; | |
450 | struct fio_libcufile_data *fcd = NULL; | |
451 | int rc; | |
452 | CUfileError_t status; | |
453 | ||
454 | rc = generic_open_file(td, f); | |
455 | if (rc) | |
456 | return rc; | |
457 | ||
458 | if (o->cuda_io == IO_CUFILE) { | |
459 | fcd = calloc(1, sizeof(*fcd)); | |
460 | if (fcd == NULL) { | |
461 | rc = ENOMEM; | |
462 | goto exit_err; | |
463 | } | |
464 | ||
465 | fcd->cf_descr.handle.fd = f->fd; | |
466 | fcd->cf_descr.type = CU_FILE_HANDLE_TYPE_OPAQUE_FD; | |
467 | status = cuFileHandleRegister(&fcd->cf_handle, &fcd->cf_descr); | |
468 | if (status.err != CU_FILE_SUCCESS) { | |
469 | log_err("cufile register: err=%d:%s\n", status.err, | |
470 | fio_libcufile_get_cuda_error(status)); | |
471 | rc = EINVAL; | |
472 | goto exit_err; | |
473 | } | |
474 | } | |
475 | ||
476 | FILE_SET_ENG_DATA(f, fcd); | |
477 | return 0; | |
478 | ||
479 | exit_err: | |
480 | if (fcd) { | |
481 | free(fcd); | |
482 | fcd = NULL; | |
483 | } | |
484 | if (f) { | |
485 | int rc2 = generic_close_file(td, f); | |
486 | if (rc2) | |
487 | log_err("generic_close_file: err=%d\n", rc2); | |
488 | } | |
489 | return rc; | |
490 | } | |
491 | ||
492 | static int fio_libcufile_close_file(struct thread_data *td, struct fio_file *f) | |
493 | { | |
494 | struct fio_libcufile_data *fcd = FILE_ENG_DATA(f); | |
495 | int rc; | |
496 | ||
497 | if (fcd != NULL) { | |
498 | cuFileHandleDeregister(fcd->cf_handle); | |
499 | FILE_SET_ENG_DATA(f, NULL); | |
500 | free(fcd); | |
501 | } | |
502 | ||
503 | rc = generic_close_file(td, f); | |
504 | ||
505 | return rc; | |
506 | } | |
507 | ||
508 | static int fio_libcufile_iomem_alloc(struct thread_data *td, size_t total_mem) | |
509 | { | |
510 | struct libcufile_options *o = td->eo; | |
511 | int rc; | |
512 | CUfileError_t status; | |
513 | ||
514 | o->total_mem = total_mem; | |
515 | o->logged = 0; | |
516 | o->cu_mem_ptr = NULL; | |
517 | o->junk_buf = NULL; | |
518 | td->orig_buffer = calloc(1, total_mem); | |
519 | if (!td->orig_buffer) { | |
520 | log_err("orig_buffer calloc failed: err=%d\n", errno); | |
521 | goto exit_error; | |
522 | } | |
523 | ||
524 | if (o->cuda_io == IO_POSIX) { | |
525 | o->junk_buf = calloc(1, total_mem); | |
526 | if (o->junk_buf == NULL) { | |
527 | log_err("junk_buf calloc failed: err=%d\n", errno); | |
528 | goto exit_error; | |
529 | } | |
530 | } | |
531 | ||
532 | dprint(FD_MEM, "Alloc %zu for GPU %d\n", total_mem, o->my_gpu_id); | |
533 | check_cudaruntimecall(cudaMalloc(&o->cu_mem_ptr, total_mem), rc); | |
534 | if (rc != 0) | |
535 | goto exit_error; | |
536 | check_cudaruntimecall(cudaMemset(o->cu_mem_ptr, 0xab, total_mem), rc); | |
537 | if (rc != 0) | |
538 | goto exit_error; | |
539 | ||
540 | if (o->cuda_io == IO_CUFILE) { | |
541 | status = cuFileBufRegister(o->cu_mem_ptr, total_mem, 0); | |
542 | if (status.err != CU_FILE_SUCCESS) { | |
543 | log_err("cuFileBufRegister: err=%d:%s\n", status.err, | |
544 | fio_libcufile_get_cuda_error(status)); | |
545 | goto exit_error; | |
546 | } | |
547 | } | |
548 | ||
549 | return 0; | |
550 | ||
551 | exit_error: | |
552 | if (td->orig_buffer) { | |
553 | free(td->orig_buffer); | |
554 | td->orig_buffer = NULL; | |
555 | } | |
556 | if (o->junk_buf) { | |
557 | free(o->junk_buf); | |
558 | o->junk_buf = NULL; | |
559 | } | |
560 | if (o->cu_mem_ptr) { | |
561 | cudaFree(o->cu_mem_ptr); | |
562 | o->cu_mem_ptr = NULL; | |
563 | } | |
564 | return 1; | |
565 | } | |
566 | ||
567 | static void fio_libcufile_iomem_free(struct thread_data *td) | |
568 | { | |
569 | struct libcufile_options *o = td->eo; | |
570 | ||
571 | if (o->junk_buf) { | |
572 | free(o->junk_buf); | |
573 | o->junk_buf = NULL; | |
574 | } | |
575 | if (o->cu_mem_ptr) { | |
576 | if (o->cuda_io == IO_CUFILE) | |
577 | cuFileBufDeregister(o->cu_mem_ptr); | |
578 | cudaFree(o->cu_mem_ptr); | |
579 | o->cu_mem_ptr = NULL; | |
580 | } | |
581 | if (td->orig_buffer) { | |
582 | free(td->orig_buffer); | |
583 | td->orig_buffer = NULL; | |
584 | } | |
585 | } | |
586 | ||
587 | static void fio_libcufile_cleanup(struct thread_data *td) | |
588 | { | |
589 | struct libcufile_options *o = td->eo; | |
590 | ||
591 | pthread_mutex_lock(&running_lock); | |
592 | running--; | |
593 | assert(running >= 0); | |
594 | if (running == 0) { | |
595 | /* only close the driver if initialized and | |
596 | this is the last worker thread */ | |
597 | if (o->cuda_io == IO_CUFILE && cufile_initialized) | |
598 | cuFileDriverClose(); | |
599 | cufile_initialized = 0; | |
600 | } | |
601 | pthread_mutex_unlock(&running_lock); | |
602 | } | |
603 | ||
604 | FIO_STATIC struct ioengine_ops ioengine = { | |
605 | .name = "libcufile", | |
606 | .version = FIO_IOOPS_VERSION, | |
607 | .init = fio_libcufile_init, | |
608 | .queue = fio_libcufile_queue, | |
609 | .open_file = fio_libcufile_open_file, | |
610 | .close_file = fio_libcufile_close_file, | |
611 | .iomem_alloc = fio_libcufile_iomem_alloc, | |
612 | .iomem_free = fio_libcufile_iomem_free, | |
613 | .cleanup = fio_libcufile_cleanup, | |
614 | .flags = FIO_SYNCIO, | |
615 | .options = options, | |
616 | .option_struct_size = sizeof(struct libcufile_options) | |
617 | }; | |
618 | ||
619 | void fio_init fio_libcufile_register(void) | |
620 | { | |
621 | register_ioengine(&ioengine); | |
622 | } | |
623 | ||
624 | void fio_exit fio_libcufile_unregister(void) | |
625 | { | |
626 | unregister_ioengine(&ioengine); | |
627 | } |