docs: update for new data placement options
[fio.git] / filesetup.c
1 #include <unistd.h>
2 #include <fcntl.h>
3 #include <string.h>
4 #include <assert.h>
5 #include <dirent.h>
6 #include <libgen.h>
7 #include <sys/stat.h>
8
9 #include "fio.h"
10 #include "smalloc.h"
11 #include "filehash.h"
12 #include "options.h"
13 #include "os/os.h"
14 #include "hash.h"
15 #include "lib/axmap.h"
16 #include "rwlock.h"
17 #include "zbd.h"
18
19 #ifdef CONFIG_LINUX_FALLOCATE
20 #include <linux/falloc.h>
21 #endif
22
23 static FLIST_HEAD(filename_list);
24
25 /*
26  * List entry for filename_list
27  */
28 struct file_name {
29         struct flist_head list;
30         char *filename;
31 };
32
33 static inline void clear_error(struct thread_data *td)
34 {
35         td->error = 0;
36         td->verror[0] = '\0';
37 }
38
39 static int native_fallocate(struct thread_data *td, struct fio_file *f)
40 {
41         bool success;
42
43         success = fio_fallocate(f, 0, f->real_file_size);
44         dprint(FD_FILE, "native fallocate of file %s size %llu was "
45                         "%ssuccessful\n", f->file_name,
46                         (unsigned long long) f->real_file_size,
47                         !success ? "un": "");
48
49         if (success)
50                 return false;
51
52         if (errno == ENOSYS)
53                 dprint(FD_FILE, "native fallocate is not implemented\n");
54
55         return true;
56 }
57
58 static void fallocate_file(struct thread_data *td, struct fio_file *f)
59 {
60         if (td->o.fill_device)
61                 return;
62
63         switch (td->o.fallocate_mode) {
64         case FIO_FALLOCATE_NATIVE:
65                 native_fallocate(td, f);
66                 break;
67         case FIO_FALLOCATE_NONE:
68                 break;
69 #ifdef CONFIG_POSIX_FALLOCATE
70         case FIO_FALLOCATE_POSIX: {
71                 int r;
72
73                 dprint(FD_FILE, "posix_fallocate file %s size %llu\n",
74                                  f->file_name,
75                                  (unsigned long long) f->real_file_size);
76
77                 r = posix_fallocate(f->fd, 0, f->real_file_size);
78                 if (r > 0)
79                         log_err("fio: posix_fallocate fails: %s\n", strerror(r));
80                 break;
81                 }
82 #endif /* CONFIG_POSIX_FALLOCATE */
83 #ifdef CONFIG_LINUX_FALLOCATE
84         case FIO_FALLOCATE_KEEP_SIZE: {
85                 int r;
86
87                 dprint(FD_FILE, "fallocate(FALLOC_FL_KEEP_SIZE) "
88                                 "file %s size %llu\n", f->file_name,
89                                 (unsigned long long) f->real_file_size);
90
91                 r = fallocate(f->fd, FALLOC_FL_KEEP_SIZE, 0, f->real_file_size);
92                 if (r != 0)
93                         td_verror(td, errno, "fallocate");
94
95                 break;
96                 }
97 #endif /* CONFIG_LINUX_FALLOCATE */
98         case FIO_FALLOCATE_TRUNCATE: {
99                 int r;
100
101                 dprint(FD_FILE, "ftruncate file %s size %llu\n",
102                                 f->file_name,
103                                 (unsigned long long) f->real_file_size);
104                 r = ftruncate(f->fd, f->real_file_size);
105                 if (r != 0)
106                         td_verror(td, errno, "ftruncate");
107
108                 break;
109         }
110         default:
111                 log_err("fio: unknown fallocate mode: %d\n", td->o.fallocate_mode);
112                 assert(0);
113         }
114 }
115
116 /*
117  * Leaves f->fd open on success, caller must close
118  */
119 static int extend_file(struct thread_data *td, struct fio_file *f)
120 {
121         int new_layout = 0, unlink_file = 0, flags;
122         unsigned long long left;
123         unsigned long long bs;
124         char *b = NULL;
125
126         if (read_only) {
127                 log_err("fio: refusing extend of file due to read-only\n");
128                 return 0;
129         }
130
131         /*
132          * check if we need to lay the file out complete again. fio
133          * does that for operations involving reads, or for writes
134          * where overwrite is set
135          */
136         if (td_read(td) ||
137            (td_write(td) && td->o.overwrite && !td->o.file_append) ||
138             (td_write(td) && td_ioengine_flagged(td, FIO_NOEXTEND)))
139                 new_layout = 1;
140         if (td_write(td) && !td->o.overwrite && !td->o.file_append)
141                 unlink_file = 1;
142
143         if (unlink_file || new_layout) {
144                 int ret;
145
146                 dprint(FD_FILE, "layout unlink %s\n", f->file_name);
147
148                 ret = td_io_unlink_file(td, f);
149                 if (ret != 0 && ret != ENOENT) {
150                         td_verror(td, errno, "unlink");
151                         return 1;
152                 }
153         }
154
155         flags = O_WRONLY;
156         if (td->o.allow_create)
157                 flags |= O_CREAT;
158         if (new_layout)
159                 flags |= O_TRUNC;
160
161 #ifdef WIN32
162         flags |= _O_BINARY;
163 #endif
164
165         dprint(FD_FILE, "open file %s, flags %x\n", f->file_name, flags);
166         f->fd = open(f->file_name, flags, 0644);
167         if (f->fd < 0) {
168                 int err = errno;
169
170                 if (err == ENOENT && !td->o.allow_create)
171                         log_err("fio: file creation disallowed by "
172                                         "allow_file_create=0\n");
173                 else
174                         td_verror(td, err, "open");
175                 return 1;
176         }
177
178         fallocate_file(td, f);
179
180         /*
181          * If our jobs don't require regular files initially, we're done.
182          */
183         if (!new_layout)
184                 goto done;
185
186         /*
187          * The size will be -1ULL when fill_device is used, so don't truncate
188          * or fallocate this file, just write it
189          */
190         if (!td->o.fill_device) {
191                 dprint(FD_FILE, "truncate file %s, size %llu\n", f->file_name,
192                                         (unsigned long long) f->real_file_size);
193                 if (ftruncate(f->fd, f->real_file_size) == -1) {
194                         if (errno != EFBIG) {
195                                 td_verror(td, errno, "ftruncate");
196                                 goto err;
197                         }
198                 }
199         }
200
201         left = f->real_file_size;
202         bs = td->o.max_bs[DDIR_WRITE];
203         if (bs > left)
204                 bs = left;
205
206         b = malloc(bs);
207         if (!b) {
208                 td_verror(td, errno, "malloc");
209                 goto err;
210         }
211
212         while (left && !td->terminate) {
213                 ssize_t r;
214
215                 if (bs > left)
216                         bs = left;
217
218                 fill_io_buffer(td, b, bs, bs);
219
220                 r = write(f->fd, b, bs);
221
222                 if (r > 0) {
223                         left -= r;
224                         continue;
225                 } else {
226                         if (r < 0) {
227                                 int __e = errno;
228
229                                 if (__e == ENOSPC) {
230                                         if (td->o.fill_device)
231                                                 break;
232                                         log_info("fio: ENOSPC on laying out "
233                                                  "file, stopping\n");
234                                 }
235                                 td_verror(td, errno, "write");
236                         } else
237                                 td_verror(td, EIO, "write");
238
239                         goto err;
240                 }
241         }
242
243         if (td->terminate) {
244                 dprint(FD_FILE, "terminate unlink %s\n", f->file_name);
245                 td_io_unlink_file(td, f);
246         } else if (td->o.create_fsync) {
247                 if (fsync(f->fd) < 0) {
248                         td_verror(td, errno, "fsync");
249                         goto err;
250                 }
251         }
252         if (td->o.fill_device && !td_write(td)) {
253                 fio_file_clear_size_known(f);
254                 if (td_io_get_file_size(td, f))
255                         goto err;
256                 if (f->io_size > f->real_file_size)
257                         f->io_size = f->real_file_size;
258         }
259
260         free(b);
261 done:
262         return 0;
263 err:
264         close(f->fd);
265         f->fd = -1;
266         if (b)
267                 free(b);
268         return 1;
269 }
270
271 static bool pre_read_file(struct thread_data *td, struct fio_file *f)
272 {
273         int r, did_open = 0, old_runstate;
274         unsigned long long left;
275         unsigned long long bs;
276         bool ret = true;
277         char *b;
278
279         if (td_ioengine_flagged(td, FIO_PIPEIO) ||
280             td_ioengine_flagged(td, FIO_NOIO))
281                 return true;
282
283         if (f->filetype == FIO_TYPE_CHAR)
284                 return true;
285
286         if (!fio_file_open(f)) {
287                 if (td->io_ops->open_file(td, f)) {
288                         log_err("fio: cannot pre-read, failed to open file\n");
289                         return false;
290                 }
291                 did_open = 1;
292         }
293
294         old_runstate = td_bump_runstate(td, TD_PRE_READING);
295
296         left = f->io_size;
297         bs = td->o.max_bs[DDIR_READ];
298         if (bs > left)
299                 bs = left;
300
301         b = malloc(bs);
302         if (!b) {
303                 td_verror(td, errno, "malloc");
304                 ret = false;
305                 goto error;
306         }
307         memset(b, 0, bs);
308
309         if (lseek(f->fd, f->file_offset, SEEK_SET) < 0) {
310                 td_verror(td, errno, "lseek");
311                 log_err("fio: failed to lseek pre-read file\n");
312                 ret = false;
313                 goto error;
314         }
315
316         while (left && !td->terminate) {
317                 if (bs > left)
318                         bs = left;
319
320                 r = read(f->fd, b, bs);
321
322                 if (r == (int) bs) {
323                         left -= bs;
324                         continue;
325                 } else {
326                         td_verror(td, EIO, "pre_read");
327                         break;
328                 }
329         }
330
331 error:
332         td_restore_runstate(td, old_runstate);
333
334         if (did_open)
335                 td->io_ops->close_file(td, f);
336
337         free(b);
338         return ret;
339 }
340
341 /*
342  * Generic function to prepopulate regular file with data.
343  * Useful if you want to make sure I/O engine has data to read.
344  * Leaves f->fd open on success, caller must close.
345  */
346 int generic_prepopulate_file(struct thread_data *td, struct fio_file *f)
347 {
348         int flags;
349         unsigned long long left, bs;
350         char *b = NULL;
351
352         /* generic function for regular files only */
353         assert(f->filetype == FIO_TYPE_FILE);
354
355         if (read_only) {
356                 log_err("fio: refusing to write a file due to read-only\n");
357                 return 0;
358         }
359
360         flags = O_WRONLY;
361         if (td->o.allow_create)
362                 flags |= O_CREAT;
363
364 #ifdef WIN32
365         flags |= _O_BINARY;
366 #endif
367
368         dprint(FD_FILE, "open file %s, flags %x\n", f->file_name, flags);
369         f->fd = open(f->file_name, flags, 0644);
370         if (f->fd < 0) {
371                 int err = errno;
372
373                 if (err == ENOENT && !td->o.allow_create)
374                         log_err("fio: file creation disallowed by "
375                                         "allow_file_create=0\n");
376                 else
377                         td_verror(td, err, "open");
378                 return 1;
379         }
380
381         left = f->real_file_size;
382         bs = td->o.max_bs[DDIR_WRITE];
383         if (bs > left)
384                 bs = left;
385
386         b = malloc(bs);
387         if (!b) {
388                 td_verror(td, errno, "malloc");
389                 goto err;
390         }
391
392         while (left && !td->terminate) {
393                 ssize_t r;
394
395                 if (bs > left)
396                         bs = left;
397
398                 fill_io_buffer(td, b, bs, bs);
399
400                 r = write(f->fd, b, bs);
401
402                 if (r > 0) {
403                         left -= r;
404                 } else {
405                         td_verror(td, errno, "write");
406                         goto err;
407                 }
408         }
409
410         if (td->terminate) {
411                 dprint(FD_FILE, "terminate unlink %s\n", f->file_name);
412                 td_io_unlink_file(td, f);
413         } else if (td->o.create_fsync) {
414                 if (fsync(f->fd) < 0) {
415                         td_verror(td, errno, "fsync");
416                         goto err;
417                 }
418         }
419
420         free(b);
421         return 0;
422 err:
423         close(f->fd);
424         f->fd = -1;
425         if (b)
426                 free(b);
427         return 1;
428 }
429
430 unsigned long long get_rand_file_size(struct thread_data *td)
431 {
432         unsigned long long ret, sized;
433         uint64_t frand_max;
434         uint64_t r;
435
436         frand_max = rand_max(&td->file_size_state);
437         r = __rand(&td->file_size_state);
438         sized = td->o.file_size_high - td->o.file_size_low;
439         ret = (unsigned long long) ((double) sized * (r / (frand_max + 1.0)));
440         ret += td->o.file_size_low;
441         ret -= (ret % td->o.rw_min_bs);
442         return ret;
443 }
444
445 static int file_size(struct thread_data *td, struct fio_file *f)
446 {
447         struct stat st;
448
449         if (stat(f->file_name, &st) == -1) {
450                 td_verror(td, errno, "fstat");
451                 return 1;
452         }
453
454         f->real_file_size = st.st_size;
455         return 0;
456 }
457
458 static int bdev_size(struct thread_data *td, struct fio_file *f)
459 {
460         unsigned long long bytes = 0;
461         int r;
462
463         if (td->io_ops->open_file(td, f)) {
464                 log_err("fio: failed opening blockdev %s for size check\n",
465                         f->file_name);
466                 return 1;
467         }
468
469         r = blockdev_size(f, &bytes);
470         if (r) {
471                 td_verror(td, r, "blockdev_size");
472                 goto err;
473         }
474
475         if (!bytes) {
476                 log_err("%s: zero sized block device?\n", f->file_name);
477                 goto err;
478         }
479
480         f->real_file_size = bytes;
481         td->io_ops->close_file(td, f);
482         return 0;
483 err:
484         td->io_ops->close_file(td, f);
485         return 1;
486 }
487
488 static int char_size(struct thread_data *td, struct fio_file *f)
489 {
490 #ifdef FIO_HAVE_CHARDEV_SIZE
491         unsigned long long bytes = 0;
492         int r;
493
494         if (td->io_ops->open_file(td, f)) {
495                 log_err("fio: failed opening chardev %s for size check\n",
496                         f->file_name);
497                 return 1;
498         }
499
500         r = chardev_size(f, &bytes);
501         if (r) {
502                 td_verror(td, r, "chardev_size");
503                 goto err;
504         }
505
506         if (!bytes) {
507                 log_err("%s: zero sized char device?\n", f->file_name);
508                 goto err;
509         }
510
511         f->real_file_size = bytes;
512         td->io_ops->close_file(td, f);
513         return 0;
514 err:
515         td->io_ops->close_file(td, f);
516         return 1;
517 #else
518         f->real_file_size = -1ULL;
519         return 0;
520 #endif
521 }
522
523 static int get_file_size(struct thread_data *td, struct fio_file *f)
524 {
525         int ret = 0;
526
527         if (fio_file_size_known(f))
528                 return 0;
529
530         if (f->filetype == FIO_TYPE_FILE)
531                 ret = file_size(td, f);
532         else if (f->filetype == FIO_TYPE_BLOCK)
533                 ret = bdev_size(td, f);
534         else if (f->filetype == FIO_TYPE_CHAR)
535                 ret = char_size(td, f);
536         else {
537                 f->real_file_size = -1;
538                 log_info("%s: failed to get file size of %s\n", td->o.name,
539                                         f->file_name);
540                 return 1; /* avoid offset extends end error message */
541         }
542
543         /*
544          * Leave ->real_file_size with 0 since it could be expectation
545          * of initial setup for regular files.
546          */
547         if (ret)
548                 return ret;
549
550         /*
551          * ->file_offset normally hasn't been initialized yet, so this
552          * is basically always false unless ->real_file_size is -1, but
553          * if ->real_file_size is -1 this message doesn't make sense.
554          * As a result, this message is basically useless.
555          */
556         if (f->file_offset > f->real_file_size) {
557                 log_err("%s: offset extends end (%llu > %llu)\n", td->o.name,
558                                         (unsigned long long) f->file_offset,
559                                         (unsigned long long) f->real_file_size);
560                 return 1;
561         }
562
563         fio_file_set_size_known(f);
564         return 0;
565 }
566
567 static int __file_invalidate_cache(struct thread_data *td, struct fio_file *f,
568                                    unsigned long long off,
569                                    unsigned long long len)
570 {
571         int errval = 0, ret = 0;
572
573 #ifdef CONFIG_ESX
574         return 0;
575 #endif
576
577         if (len == -1ULL)
578                 len = f->io_size;
579         if (off == -1ULL)
580                 off = f->file_offset;
581
582         if (len == -1ULL || off == -1ULL)
583                 return 0;
584
585         if (td->io_ops->invalidate) {
586                 dprint(FD_IO, "invalidate %s cache %s\n", td->io_ops->name,
587                         f->file_name);
588                 ret = td->io_ops->invalidate(td, f);
589                 if (ret < 0)
590                         errval = -ret;
591         } else if (td_ioengine_flagged(td, FIO_DISKLESSIO)) {
592                 dprint(FD_IO, "invalidate not supported by ioengine %s\n",
593                        td->io_ops->name);
594         } else if (f->filetype == FIO_TYPE_FILE) {
595                 dprint(FD_IO, "declare unneeded cache %s: %llu/%llu\n",
596                         f->file_name, off, len);
597                 ret = posix_fadvise(f->fd, off, len, POSIX_FADV_DONTNEED);
598                 if (ret)
599                         errval = ret;
600         } else if (f->filetype == FIO_TYPE_BLOCK) {
601                 int retry_count = 0;
602
603                 dprint(FD_IO, "drop page cache %s\n", f->file_name);
604                 ret = blockdev_invalidate_cache(f);
605                 while (ret < 0 && errno == EAGAIN && retry_count++ < 25) {
606                         /*
607                          * Linux multipath devices reject ioctl while
608                          * the maps are being updated. That window can
609                          * last tens of milliseconds; we'll try up to
610                          * a quarter of a second.
611                          */
612                         usleep(10000);
613                         ret = blockdev_invalidate_cache(f);
614                 }
615                 if (ret < 0 && errno == EACCES && geteuid()) {
616                         if (!fio_did_warn(FIO_WARN_ROOT_FLUSH)) {
617                                 log_err("fio: only root may flush block "
618                                         "devices. Cache flush bypassed!\n");
619                         }
620                 }
621                 if (ret < 0)
622                         errval = errno;
623         } else if (f->filetype == FIO_TYPE_CHAR ||
624                    f->filetype == FIO_TYPE_PIPE) {
625                 dprint(FD_IO, "invalidate not supported %s\n", f->file_name);
626         }
627
628         /*
629          * Cache flushing isn't a fatal condition, and we know it will
630          * happen on some platforms where we don't have the proper
631          * function to flush eg block device caches. So just warn and
632          * continue on our way.
633          */
634         if (errval)
635                 log_info("fio: cache invalidation of %s failed: %s\n",
636                          f->file_name, strerror(errval));
637
638         return 0;
639
640 }
641
642 int file_invalidate_cache(struct thread_data *td, struct fio_file *f)
643 {
644         if (!fio_file_open(f))
645                 return 0;
646
647         return __file_invalidate_cache(td, f, -1ULL, -1ULL);
648 }
649
650 int generic_close_file(struct thread_data fio_unused *td, struct fio_file *f)
651 {
652         int ret = 0;
653
654         dprint(FD_FILE, "fd close %s\n", f->file_name);
655
656         remove_file_hash(f);
657
658         if (close(f->fd) < 0)
659                 ret = errno;
660
661         f->fd = -1;
662
663         if (f->shadow_fd != -1) {
664                 close(f->shadow_fd);
665                 f->shadow_fd = -1;
666         }
667
668         f->engine_pos = 0;
669         return ret;
670 }
671
672 int file_lookup_open(struct fio_file *f, int flags)
673 {
674         struct fio_file *__f;
675         int from_hash;
676
677         __f = lookup_file_hash(f->file_name);
678         if (__f) {
679                 dprint(FD_FILE, "found file in hash %s\n", f->file_name);
680                 f->lock = __f->lock;
681                 from_hash = 1;
682         } else {
683                 dprint(FD_FILE, "file not found in hash %s\n", f->file_name);
684                 from_hash = 0;
685         }
686
687 #ifdef WIN32
688         flags |= _O_BINARY;
689 #endif
690
691         f->fd = open(f->file_name, flags, 0600);
692         return from_hash;
693 }
694
695 static int file_close_shadow_fds(struct thread_data *td)
696 {
697         struct fio_file *f;
698         int num_closed = 0;
699         unsigned int i;
700
701         for_each_file(td, f, i) {
702                 if (f->shadow_fd == -1)
703                         continue;
704
705                 close(f->shadow_fd);
706                 f->shadow_fd = -1;
707                 num_closed++;
708         }
709
710         return num_closed;
711 }
712
713 int generic_open_file(struct thread_data *td, struct fio_file *f)
714 {
715         int is_std = 0;
716         int flags = 0;
717         int from_hash = 0;
718
719         dprint(FD_FILE, "fd open %s\n", f->file_name);
720
721         if (!strcmp(f->file_name, "-")) {
722                 if (td_rw(td)) {
723                         log_err("fio: can't read/write to stdin/out\n");
724                         return 1;
725                 }
726                 is_std = 1;
727
728                 /*
729                  * move output logging to stderr, if we are writing to stdout
730                  */
731                 if (td_write(td))
732                         f_out = stderr;
733         }
734
735         if (td_trim(td))
736                 goto skip_flags;
737         if (td->o.odirect)
738                 flags |= OS_O_DIRECT;
739         if (td->o.oatomic) {
740                 if (!FIO_O_ATOMIC) {
741                         td_verror(td, EINVAL, "OS does not support atomic IO");
742                         return 1;
743                 }
744                 flags |= OS_O_DIRECT | FIO_O_ATOMIC;
745         }
746         flags |= td->o.sync_io;
747         if (td->o.create_on_open && td->o.allow_create)
748                 flags |= O_CREAT;
749 skip_flags:
750         if (f->filetype != FIO_TYPE_FILE)
751                 flags |= FIO_O_NOATIME;
752
753 open_again:
754         if (td_write(td)) {
755                 if (!read_only)
756                         flags |= O_RDWR;
757
758                 if (f->filetype == FIO_TYPE_FILE && td->o.allow_create)
759                         flags |= O_CREAT;
760
761                 if (is_std)
762                         f->fd = dup(STDOUT_FILENO);
763                 else
764                         from_hash = file_lookup_open(f, flags);
765         } else if (td_read(td)) {
766                 if (f->filetype == FIO_TYPE_CHAR && !read_only)
767                         flags |= O_RDWR;
768                 else
769                         flags |= O_RDONLY;
770
771                 if (is_std)
772                         f->fd = dup(STDIN_FILENO);
773                 else
774                         from_hash = file_lookup_open(f, flags);
775         } else if (td_trim(td)) {
776                 assert(!td_rw(td)); /* should have matched above */
777                 if (!read_only)
778                         flags |= O_RDWR;
779                 from_hash = file_lookup_open(f, flags);
780         }
781
782         if (f->fd == -1) {
783                 char buf[FIO_VERROR_SIZE];
784                 int __e = errno;
785
786                 if (__e == EPERM && (flags & FIO_O_NOATIME)) {
787                         flags &= ~FIO_O_NOATIME;
788                         goto open_again;
789                 }
790                 if (__e == EMFILE && file_close_shadow_fds(td))
791                         goto open_again;
792
793                 snprintf(buf, sizeof(buf), "open(%s)", f->file_name);
794
795                 if (__e == EINVAL && (flags & OS_O_DIRECT)) {
796                         log_err("fio: looks like your file system does not " \
797                                 "support direct=1/buffered=0\n");
798                 }
799
800                 td_verror(td, __e, buf);
801                 return 1;
802         }
803
804         if (!from_hash && f->fd != -1) {
805                 if (add_file_hash(f)) {
806                         int fio_unused ret;
807
808                         /*
809                          * Stash away descriptor for later close. This is to
810                          * work-around a "feature" on Linux, where a close of
811                          * an fd that has been opened for write will trigger
812                          * udev to call blkid to check partitions, fs id, etc.
813                          * That pollutes the device cache, which can slow down
814                          * unbuffered accesses.
815                          */
816                         if (f->shadow_fd == -1)
817                                 f->shadow_fd = f->fd;
818                         else {
819                                 /*
820                                  * OK to ignore, we haven't done anything
821                                  * with it
822                                  */
823                                 ret = generic_close_file(td, f);
824                         }
825                         goto open_again;
826                 }
827         }
828
829         return 0;
830 }
831
832 /*
833  * This function i.e. get_file_size() is the default .get_file_size
834  * implementation of majority of I/O engines.
835  */
836 int generic_get_file_size(struct thread_data *td, struct fio_file *f)
837 {
838         return get_file_size(td, f);
839 }
840
841 /*
842  * open/close all files, so that ->real_file_size gets set
843  */
844 static int get_file_sizes(struct thread_data *td)
845 {
846         struct fio_file *f;
847         unsigned int i;
848         int err = 0;
849
850         for_each_file(td, f, i) {
851                 dprint(FD_FILE, "get file size for %p/%d/%s\n", f, i,
852                                                                 f->file_name);
853
854                 if (td_io_get_file_size(td, f)) {
855                         if (td->error != ENOENT) {
856                                 log_err("%s\n", td->verror);
857                                 err = 1;
858                                 break;
859                         }
860                         clear_error(td);
861                 }
862
863                 /*
864                  * There are corner cases where we end up with -1 for
865                  * ->real_file_size due to unsupported file type, etc.
866                  * We then just set to size option value divided by number
867                  * of files, similar to the way file ->io_size is set.
868                  * stat(2) failure doesn't set ->real_file_size to -1.
869                  */
870                 if (f->real_file_size == -1ULL && td->o.size)
871                         f->real_file_size = td->o.size / td->o.nr_files;
872         }
873
874         return err;
875 }
876
877 struct fio_mount {
878         struct flist_head list;
879         const char *base;
880         char __base[256];
881         unsigned int key;
882 };
883
884 /*
885  * Get free number of bytes for each file on each unique mount.
886  */
887 static unsigned long long get_fs_free_counts(struct thread_data *td)
888 {
889         struct flist_head *n, *tmp;
890         unsigned long long ret = 0;
891         struct fio_mount *fm;
892         FLIST_HEAD(list);
893         struct fio_file *f;
894         unsigned int i;
895
896         for_each_file(td, f, i) {
897                 struct stat sb;
898                 char buf[256];
899
900                 if (f->filetype == FIO_TYPE_BLOCK || f->filetype == FIO_TYPE_CHAR) {
901                         if (f->real_file_size != -1ULL)
902                                 ret += f->real_file_size;
903                         continue;
904                 } else if (f->filetype != FIO_TYPE_FILE)
905                         continue;
906
907                 snprintf(buf, FIO_ARRAY_SIZE(buf), "%s", f->file_name);
908
909                 if (stat(buf, &sb) < 0) {
910                         if (errno != ENOENT)
911                                 break;
912                         strcpy(buf, ".");
913                         if (stat(buf, &sb) < 0)
914                                 break;
915                 }
916
917                 fm = NULL;
918                 flist_for_each(n, &list) {
919                         fm = flist_entry(n, struct fio_mount, list);
920                         if (fm->key == sb.st_dev)
921                                 break;
922
923                         fm = NULL;
924                 }
925
926                 if (fm)
927                         continue;
928
929                 fm = calloc(1, sizeof(*fm));
930                 snprintf(fm->__base, FIO_ARRAY_SIZE(fm->__base), "%s", buf);
931                 fm->base = basename(fm->__base);
932                 fm->key = sb.st_dev;
933                 flist_add(&fm->list, &list);
934         }
935
936         flist_for_each_safe(n, tmp, &list) {
937                 unsigned long long sz;
938
939                 fm = flist_entry(n, struct fio_mount, list);
940                 flist_del(&fm->list);
941
942                 sz = get_fs_free_size(fm->base);
943                 if (sz && sz != -1ULL)
944                         ret += sz;
945
946                 free(fm);
947         }
948
949         return ret;
950 }
951
952 uint64_t get_start_offset(struct thread_data *td, struct fio_file *f)
953 {
954         bool align = false;
955         struct thread_options *o = &td->o;
956         unsigned long long align_bs;
957         unsigned long long offset;
958         unsigned long long increment;
959
960         if (o->file_append && f->filetype == FIO_TYPE_FILE)
961                 return f->real_file_size;
962
963         if (o->offset_increment_percent) {
964                 assert(!o->offset_increment);
965                 increment = o->offset_increment_percent * f->real_file_size / 100;
966                 align = true;
967         } else
968                 increment = o->offset_increment;
969
970         if (o->start_offset_percent > 0) {
971                 /* calculate the raw offset */
972                 offset = (f->real_file_size * o->start_offset_percent / 100) +
973                         (td->subjob_number * increment);
974
975                 align = true;
976         } else {
977                 /* start_offset_percent not set */
978                 offset = o->start_offset +
979                                 td->subjob_number * increment;
980         }
981
982         if (align) {
983                 /*
984                  * if offset_align is provided, use it
985                  */
986                 if (fio_option_is_set(o, start_offset_align)) {
987                         align_bs = o->start_offset_align;
988                 } else {
989                         /* else take the minimum block size */
990                         align_bs = td_min_bs(td);
991                 }
992
993                 /*
994                  * block align the offset at the next available boundary at
995                  * ceiling(offset / align_bs) * align_bs
996                  */
997                 offset = (offset / align_bs + (offset % align_bs != 0)) * align_bs;
998         }
999
1000         return offset;
1001 }
1002
1003 /*
1004  * Find longest path component that exists and return its length
1005  */
1006 int longest_existing_path(char *path) {
1007         char buf[PATH_MAX];
1008         bool done;
1009         char *buf_pos;
1010         int offset;
1011 #ifdef WIN32
1012         DWORD dwAttr;
1013 #else
1014         struct stat sb;
1015 #endif
1016
1017         sprintf(buf, "%s", path);
1018         done = false;
1019         while (!done) {
1020                 buf_pos = strrchr(buf, FIO_OS_PATH_SEPARATOR);
1021                 if (!buf_pos) {
1022                         done = true;
1023                         offset = 0;
1024                         break;
1025                 }
1026
1027                 *(buf_pos + 1) = '\0';
1028
1029 #ifdef WIN32
1030                 dwAttr = GetFileAttributesA(buf);
1031                 if (dwAttr != INVALID_FILE_ATTRIBUTES) {
1032                         done = true;
1033                 }
1034 #else
1035                 if (stat(buf, &sb) == 0)
1036                         done = true;
1037 #endif
1038                 if (done)
1039                         offset = buf_pos - buf;
1040                 else
1041                         *buf_pos = '\0';
1042         }
1043
1044         return offset;
1045 }
1046
1047 static bool create_work_dirs(struct thread_data *td, const char *fname)
1048 {
1049         char path[PATH_MAX];
1050         char *start, *end;
1051         int offset;
1052
1053         snprintf(path, PATH_MAX, "%s", fname);
1054         start = path;
1055
1056         offset = longest_existing_path(path);
1057         end = start + offset;
1058         while ((end = strchr(end, FIO_OS_PATH_SEPARATOR)) != NULL) {
1059                 if (end == start) {
1060                         end++;
1061                         continue;
1062                 }
1063                 *end = '\0';
1064                 errno = 0;
1065                 if (fio_mkdir(path, 0700) && errno != EEXIST) {
1066                         log_err("fio: failed to create dir (%s): %s\n",
1067                                 start, strerror(errno));
1068                         return false;
1069                 }
1070                 *end = FIO_OS_PATH_SEPARATOR;
1071                 end++;
1072         }
1073         td->flags |= TD_F_DIRS_CREATED;
1074         return true;
1075 }
1076
1077 /*
1078  * Open the files and setup files sizes, creating files if necessary.
1079  */
1080 int setup_files(struct thread_data *td)
1081 {
1082         unsigned long long total_size, extend_size;
1083         struct thread_options *o = &td->o;
1084         struct fio_file *f;
1085         unsigned int i, nr_fs_extra = 0;
1086         int err = 0, need_extend;
1087         int old_state;
1088         const unsigned long long bs = td_min_bs(td);
1089         uint64_t fs = 0;
1090
1091         dprint(FD_FILE, "setup files\n");
1092
1093         old_state = td_bump_runstate(td, TD_SETTING_UP);
1094
1095         for_each_file(td, f, i) {
1096                 if (!td_ioengine_flagged(td, FIO_DISKLESSIO) &&
1097                     strchr(f->file_name, FIO_OS_PATH_SEPARATOR) &&
1098                     !(td->flags & TD_F_DIRS_CREATED) &&
1099                     !create_work_dirs(td, f->file_name))
1100                         goto err_out;
1101         }
1102
1103         /*
1104          * Find out physical size of files or devices for this thread,
1105          * before we determine I/O size and range of our targets.
1106          * If ioengine defines a setup() method, it's responsible for
1107          * opening the files and setting f->real_file_size to indicate
1108          * the valid range for that file.
1109          */
1110         if (td->io_ops->setup)
1111                 err = td->io_ops->setup(td);
1112         else
1113                 err = get_file_sizes(td);
1114
1115         if (err)
1116                 goto err_out;
1117
1118         if (o->read_iolog_file)
1119                 goto done;
1120
1121         /*
1122          * check sizes. if the files/devices do not exist and the size
1123          * isn't passed to fio, abort.
1124          */
1125         total_size = 0;
1126         for_each_file(td, f, i) {
1127                 f->fileno = i;
1128                 if (f->real_file_size == -1ULL)
1129                         total_size = -1ULL;
1130                 else
1131                         total_size += f->real_file_size;
1132         }
1133
1134         if (o->fill_device)
1135                 td->fill_device_size = get_fs_free_counts(td);
1136
1137         /*
1138          * device/file sizes are zero and no size given, punt
1139          */
1140         if ((!total_size || total_size == -1ULL) && !o->size &&
1141             !td_ioengine_flagged(td, FIO_NOIO) && !o->fill_device &&
1142             !(o->nr_files && (o->file_size_low || o->file_size_high))) {
1143                 log_err("%s: you need to specify size=\n", o->name);
1144                 td_verror(td, EINVAL, "total_file_size");
1145                 goto err_out;
1146         }
1147
1148         /*
1149          * Calculate per-file size and potential extra size for the
1150          * first files, if needed (i.e. if we don't have a fixed size).
1151          */
1152         if (!o->file_size_low && o->nr_files) {
1153                 uint64_t all_fs;
1154
1155                 fs = o->size / o->nr_files;
1156                 all_fs = fs * o->nr_files;
1157
1158                 if (all_fs < o->size)
1159                         nr_fs_extra = (o->size - all_fs) / bs;
1160         }
1161
1162         /*
1163          * now file sizes are known, so we can set ->io_size. if size= is
1164          * not given, ->io_size is just equal to ->real_file_size. if size
1165          * is given, ->io_size is size / nr_files.
1166          */
1167         extend_size = total_size = 0;
1168         need_extend = 0;
1169         for_each_file(td, f, i) {
1170                 f->file_offset = get_start_offset(td, f);
1171
1172                 /*
1173                  * Update ->io_size depending on options specified.
1174                  * ->file_size_low being 0 means filesize option isn't set.
1175                  * Non zero ->file_size_low equals ->file_size_high means
1176                  * filesize option is set in a fixed size format.
1177                  * Non zero ->file_size_low not equals ->file_size_high means
1178                  * filesize option is set in a range format.
1179                  */
1180                 if (!o->file_size_low) {
1181                         /*
1182                          * no file size or range given, file size is equal to
1183                          * total size divided by number of files. If the size
1184                          * doesn't divide nicely with the min blocksize,
1185                          * make the first files bigger.
1186                          */
1187                         f->io_size = fs;
1188                         if (nr_fs_extra) {
1189                                 nr_fs_extra--;
1190                                 f->io_size += bs;
1191                         }
1192
1193                         /*
1194                          * We normally don't come here for regular files, but
1195                          * if the result is 0 for a regular file, set it to the
1196                          * real file size. This could be size of the existing
1197                          * one if it already exists, but otherwise will be set
1198                          * to 0. A new file won't be created because
1199                          * ->io_size + ->file_offset equals ->real_file_size.
1200                          */
1201                         if (!f->io_size) {
1202                                 if (f->file_offset > f->real_file_size)
1203                                         goto err_offset;
1204                                 f->io_size = f->real_file_size - f->file_offset;
1205                                 if (!f->io_size)
1206                                         log_info("fio: file %s may be ignored\n",
1207                                                 f->file_name);
1208                         }
1209                 } else if (f->real_file_size < o->file_size_low ||
1210                            f->real_file_size > o->file_size_high) {
1211                         if (f->file_offset > o->file_size_low)
1212                                 goto err_offset;
1213                         /*
1214                          * file size given. if it's fixed, use that. if it's a
1215                          * range, generate a random size in-between.
1216                          */
1217                         if (o->file_size_low == o->file_size_high)
1218                                 f->io_size = o->file_size_low - f->file_offset;
1219                         else {
1220                                 f->io_size = get_rand_file_size(td)
1221                                                 - f->file_offset;
1222                         }
1223                 } else
1224                         f->io_size = f->real_file_size - f->file_offset;
1225
1226                 if (f->io_size == -1ULL)
1227                         total_size = -1ULL;
1228                 else {
1229                         uint64_t io_size;
1230
1231                         if (o->size_percent && o->size_percent != 100) {
1232                                 uint64_t file_size;
1233
1234                                 file_size = f->io_size + f->file_offset;
1235                                 f->io_size = (file_size *
1236                                               o->size_percent) / 100;
1237                                 if (f->io_size > (file_size - f->file_offset))
1238                                         f->io_size = file_size - f->file_offset;
1239
1240                                 f->io_size -= (f->io_size % td_min_bs(td));
1241                         }
1242
1243                         io_size = f->io_size;
1244                         if (o->io_size_percent && o->io_size_percent != 100) {
1245                                 io_size *= o->io_size_percent;
1246                                 io_size /= 100;
1247                         }
1248
1249                         total_size += io_size;
1250                 }
1251
1252                 if (f->filetype == FIO_TYPE_FILE &&
1253                     (f->io_size + f->file_offset) > f->real_file_size) {
1254                         if (!td_ioengine_flagged(td, FIO_DISKLESSIO) &&
1255                             !o->create_on_open) {
1256                                 need_extend++;
1257                                 extend_size += (f->io_size + f->file_offset);
1258                                 fio_file_set_extend(f);
1259                         } else if (!td_ioengine_flagged(td, FIO_DISKLESSIO) ||
1260                                    (td_ioengine_flagged(td, FIO_DISKLESSIO) &&
1261                                     td_ioengine_flagged(td, FIO_FAKEIO)))
1262                                 f->real_file_size = f->io_size + f->file_offset;
1263                 }
1264         }
1265
1266         if (td->o.block_error_hist) {
1267                 int len;
1268
1269                 assert(td->o.nr_files == 1);    /* checked in fixup_options */
1270                 f = td->files[0];
1271                 len = f->io_size / td->o.bs[DDIR_TRIM];
1272                 if (len > MAX_NR_BLOCK_INFOS || len <= 0) {
1273                         log_err("fio: cannot calculate block histogram with "
1274                                 "%d trim blocks, maximum %d\n",
1275                                 len, MAX_NR_BLOCK_INFOS);
1276                         td_verror(td, EINVAL, "block_error_hist");
1277                         goto err_out;
1278                 }
1279
1280                 td->ts.nr_block_infos = len;
1281                 for (i = 0; i < len; i++)
1282                         td->ts.block_infos[i] =
1283                                 BLOCK_INFO(0, BLOCK_STATE_UNINIT);
1284         } else
1285                 td->ts.nr_block_infos = 0;
1286
1287         if (!o->size || (total_size && o->size > total_size))
1288                 o->size = total_size;
1289
1290         if (o->size < td_min_bs(td)) {
1291                 log_err("fio: blocksize is larger than data set range\n");
1292                 goto err_out;
1293         }
1294
1295         /*
1296          * See if we need to extend some files, typically needed when our
1297          * target regular files don't exist yet, but our jobs require them
1298          * initially due to read I/Os.
1299          */
1300         if (need_extend) {
1301                 temp_stall_ts = 1;
1302                 if (output_format & FIO_OUTPUT_NORMAL) {
1303                         log_info("%s: Laying out IO file%s (%u file%s / %s%lluMiB)\n",
1304                                  o->name,
1305                                  need_extend > 1 ? "s" : "",
1306                                  need_extend,
1307                                  need_extend > 1 ? "s" : "",
1308                                  need_extend > 1 ? "total " : "",
1309                                  extend_size >> 20);
1310                 }
1311
1312                 for_each_file(td, f, i) {
1313                         unsigned long long old_len = -1ULL, extend_len = -1ULL;
1314
1315                         if (!fio_file_extend(f))
1316                                 continue;
1317
1318                         assert(f->filetype == FIO_TYPE_FILE);
1319                         fio_file_clear_extend(f);
1320                         if (!o->fill_device) {
1321                                 old_len = f->real_file_size;
1322                                 extend_len = f->io_size + f->file_offset -
1323                                                 old_len;
1324                         }
1325                         f->real_file_size = (f->io_size + f->file_offset);
1326                         err = extend_file(td, f);
1327                         if (err)
1328                                 break;
1329
1330                         err = __file_invalidate_cache(td, f, old_len,
1331                                                                 extend_len);
1332
1333                         /*
1334                          * Shut up static checker
1335                          */
1336                         if (f->fd != -1)
1337                                 close(f->fd);
1338
1339                         f->fd = -1;
1340                         if (err)
1341                                 break;
1342                 }
1343                 temp_stall_ts = 0;
1344         }
1345
1346         if (err)
1347                 goto err_out;
1348
1349         /*
1350          * Prepopulate files with data. It might be expected to read some
1351          * "real" data instead of zero'ed files (if no writes to file occurred
1352          * prior to a read job). Engine has to provide a way to do that.
1353          */
1354         if (td->io_ops->prepopulate_file) {
1355                 temp_stall_ts = 1;
1356
1357                 for_each_file(td, f, i) {
1358                         if (output_format & FIO_OUTPUT_NORMAL) {
1359                                 log_info("%s: Prepopulating IO file (%s)\n",
1360                                                         o->name, f->file_name);
1361                         }
1362
1363                         err = td->io_ops->prepopulate_file(td, f);
1364                         if (err)
1365                                 break;
1366
1367                         err = __file_invalidate_cache(td, f, f->file_offset,
1368                                                                 f->io_size);
1369
1370                         /*
1371                          * Shut up static checker
1372                          */
1373                         if (f->fd != -1)
1374                                 close(f->fd);
1375
1376                         f->fd = -1;
1377                         if (err)
1378                                 break;
1379                 }
1380                 temp_stall_ts = 0;
1381         }
1382
1383         if (err)
1384                 goto err_out;
1385
1386         /*
1387          * iolog already set the total io size, if we read back
1388          * stored entries.
1389          */
1390         if (!o->read_iolog_file) {
1391                 if (o->io_size)
1392                         td->total_io_size = o->io_size * o->loops;
1393                 else
1394                         td->total_io_size = o->size * o->loops;
1395         }
1396
1397 done:
1398         if (o->create_only)
1399                 td->done = 1;
1400
1401         td_restore_runstate(td, old_state);
1402
1403         if (td->o.zone_mode == ZONE_MODE_ZBD) {
1404                 err = zbd_setup_files(td);
1405                 if (err)
1406                         goto err_out;
1407         }
1408         return 0;
1409
1410 err_offset:
1411         log_err("%s: you need to specify valid offset=\n", o->name);
1412 err_out:
1413         td_restore_runstate(td, old_state);
1414         return 1;
1415 }
1416
1417 bool pre_read_files(struct thread_data *td)
1418 {
1419         struct fio_file *f;
1420         unsigned int i;
1421
1422         dprint(FD_FILE, "pre_read files\n");
1423
1424         for_each_file(td, f, i) {
1425                 if (!pre_read_file(td, f))
1426                         return false;
1427         }
1428
1429         return true;
1430 }
1431
1432 static void __init_rand_distribution(struct thread_data *td, struct fio_file *f)
1433 {
1434         unsigned int range_size, seed;
1435         uint64_t nranges;
1436         uint64_t fsize;
1437
1438         range_size = min(td->o.min_bs[DDIR_READ], td->o.min_bs[DDIR_WRITE]);
1439         fsize = min(f->real_file_size, f->io_size);
1440
1441         nranges = (fsize + range_size - 1ULL) / range_size;
1442
1443         seed = jhash(f->file_name, strlen(f->file_name), 0) * td->thread_number;
1444         if (!td->o.rand_repeatable)
1445                 seed = td->rand_seeds[4];
1446
1447         if (td->o.random_distribution == FIO_RAND_DIST_ZIPF)
1448                 zipf_init(&f->zipf, nranges, td->o.zipf_theta.u.f, td->o.random_center.u.f, seed);
1449         else if (td->o.random_distribution == FIO_RAND_DIST_PARETO)
1450                 pareto_init(&f->zipf, nranges, td->o.pareto_h.u.f, td->o.random_center.u.f, seed);
1451         else if (td->o.random_distribution == FIO_RAND_DIST_GAUSS)
1452                 gauss_init(&f->gauss, nranges, td->o.gauss_dev.u.f, td->o.random_center.u.f, seed);
1453 }
1454
1455 static bool init_rand_distribution(struct thread_data *td)
1456 {
1457         struct fio_file *f;
1458         unsigned int i;
1459         int state;
1460
1461         if (td->o.random_distribution == FIO_RAND_DIST_RANDOM ||
1462             td->o.random_distribution == FIO_RAND_DIST_ZONED ||
1463             td->o.random_distribution == FIO_RAND_DIST_ZONED_ABS)
1464                 return false;
1465
1466         state = td_bump_runstate(td, TD_SETTING_UP);
1467
1468         for_each_file(td, f, i)
1469                 __init_rand_distribution(td, f);
1470
1471         td_restore_runstate(td, state);
1472         return true;
1473 }
1474
1475 /*
1476  * Check if the number of blocks exceeds the randomness capability of
1477  * the selected generator. Tausworthe is 32-bit, the others are fullly
1478  * 64-bit capable.
1479  */
1480 static int check_rand_gen_limits(struct thread_data *td, struct fio_file *f,
1481                                  uint64_t blocks)
1482 {
1483         if (blocks <= FRAND32_MAX)
1484                 return 0;
1485         if (td->o.random_generator != FIO_RAND_GEN_TAUSWORTHE)
1486                 return 0;
1487
1488         /*
1489          * If the user hasn't specified a random generator, switch
1490          * to tausworthe64 with informational warning. If the user did
1491          * specify one, just warn.
1492          */
1493         log_info("fio: file %s exceeds 32-bit tausworthe random generator.\n",
1494                         f->file_name);
1495
1496         if (!fio_option_is_set(&td->o, random_generator)) {
1497                 log_info("fio: Switching to tausworthe64. Use the "
1498                          "random_generator= option to get rid of this "
1499                          "warning.\n");
1500                 td->o.random_generator = FIO_RAND_GEN_TAUSWORTHE64;
1501                 return 0;
1502         }
1503
1504         /*
1505          * Just make this information to avoid breaking scripts.
1506          */
1507         log_info("fio: Use the random_generator= option to switch to lfsr or "
1508                          "tausworthe64.\n");
1509         return 0;
1510 }
1511
1512 bool init_random_map(struct thread_data *td)
1513 {
1514         unsigned long long blocks;
1515         struct fio_file *f;
1516         unsigned int i;
1517
1518         if (init_rand_distribution(td))
1519                 return true;
1520         if (!td_random(td))
1521                 return true;
1522
1523         for_each_file(td, f, i) {
1524                 uint64_t fsize = min(f->real_file_size, f->io_size);
1525
1526                 if (td->o.zone_mode == ZONE_MODE_STRIDED)
1527                         fsize = td->o.zone_range;
1528
1529                 blocks = fsize / (unsigned long long) td->o.rw_min_bs;
1530
1531                 if (check_rand_gen_limits(td, f, blocks))
1532                         return false;
1533
1534                 if (td->o.random_generator == FIO_RAND_GEN_LFSR) {
1535                         uint64_t seed;
1536
1537                         seed = td->rand_seeds[FIO_RAND_BLOCK_OFF];
1538
1539                         if (!lfsr_init(&f->lfsr, blocks, seed, 0)) {
1540                                 fio_file_set_lfsr(f);
1541                                 continue;
1542                         } else {
1543                                 log_err("fio: failed initializing LFSR\n");
1544                                 return false;
1545                         }
1546                 } else if (!td->o.norandommap) {
1547                         f->io_axmap = axmap_new(blocks);
1548                         if (f->io_axmap) {
1549                                 fio_file_set_axmap(f);
1550                                 continue;
1551                         }
1552                 } else if (td->o.norandommap)
1553                         continue;
1554
1555                 if (!td->o.softrandommap) {
1556                         log_err("fio: failed allocating random map. If running"
1557                                 " a large number of jobs, try the 'norandommap'"
1558                                 " option or set 'softrandommap'. Or give"
1559                                 " a larger --alloc-size to fio.\n");
1560                         return false;
1561                 }
1562
1563                 log_info("fio: file %s failed allocating random map. Running "
1564                          "job without.\n", f->file_name);
1565         }
1566
1567         return true;
1568 }
1569
1570 void close_files(struct thread_data *td)
1571 {
1572         struct fio_file *f;
1573         unsigned int i;
1574
1575         for_each_file(td, f, i) {
1576                 if (fio_file_open(f))
1577                         td_io_close_file(td, f);
1578         }
1579 }
1580
1581 void fio_file_free(struct fio_file *f)
1582 {
1583         if (fio_file_axmap(f))
1584                 axmap_free(f->io_axmap);
1585         if (!fio_file_smalloc(f)) {
1586                 free(f->file_name);
1587                 free(f);
1588         } else {
1589                 sfree(f->file_name);
1590                 sfree(f);
1591         }
1592 }
1593
1594 void close_and_free_files(struct thread_data *td)
1595 {
1596         struct fio_file *f;
1597         unsigned int i;
1598
1599         dprint(FD_FILE, "close files\n");
1600
1601         for_each_file(td, f, i) {
1602                 if (td->o.unlink && f->filetype == FIO_TYPE_FILE) {
1603                         dprint(FD_FILE, "free unlink %s\n", f->file_name);
1604                         td_io_unlink_file(td, f);
1605                 }
1606
1607                 if (fio_file_open(f))
1608                         td_io_close_file(td, f);
1609
1610                 remove_file_hash(f);
1611
1612                 if (td->o.unlink && f->filetype == FIO_TYPE_FILE) {
1613                         dprint(FD_FILE, "free unlink %s\n", f->file_name);
1614                         td_io_unlink_file(td, f);
1615                 }
1616
1617                 zbd_close_file(f);
1618                 fio_file_free(f);
1619         }
1620
1621         td->o.filename = NULL;
1622         free(td->files);
1623         free(td->file_locks);
1624         td->files_index = 0;
1625         td->files = NULL;
1626         td->file_locks = NULL;
1627         td->o.file_lock_mode = FILE_LOCK_NONE;
1628         td->o.nr_files = 0;
1629 }
1630
1631 static void get_file_type(struct fio_file *f)
1632 {
1633         struct stat sb;
1634
1635         if (!strcmp(f->file_name, "-"))
1636                 f->filetype = FIO_TYPE_PIPE;
1637         else
1638                 f->filetype = FIO_TYPE_FILE;
1639
1640 #ifdef WIN32
1641         /* \\.\ is the device namespace in Windows, where every file is
1642          * a block device */
1643         if (strncmp(f->file_name, "\\\\.\\", 4) == 0)
1644                 f->filetype = FIO_TYPE_BLOCK;
1645 #endif
1646
1647         if (!stat(f->file_name, &sb)) {
1648                 if (S_ISBLK(sb.st_mode))
1649                         f->filetype = FIO_TYPE_BLOCK;
1650                 else if (S_ISCHR(sb.st_mode))
1651                         f->filetype = FIO_TYPE_CHAR;
1652                 else if (S_ISFIFO(sb.st_mode))
1653                         f->filetype = FIO_TYPE_PIPE;
1654         }
1655 }
1656
1657 static bool __is_already_allocated(const char *fname, bool set)
1658 {
1659         struct flist_head *entry;
1660         bool ret;
1661
1662         ret = file_bloom_exists(fname, set);
1663         if (!ret)
1664                 return ret;
1665
1666         flist_for_each(entry, &filename_list) {
1667                 struct file_name *fn;
1668
1669                 fn = flist_entry(entry, struct file_name, list);
1670
1671                 if (!strcmp(fn->filename, fname))
1672                         return true;
1673         }
1674
1675         return false;
1676 }
1677
1678 static bool is_already_allocated(const char *fname)
1679 {
1680         bool ret;
1681
1682         fio_file_hash_lock();
1683         ret = __is_already_allocated(fname, false);
1684         fio_file_hash_unlock();
1685
1686         return ret;
1687 }
1688
1689 static void set_already_allocated(const char *fname)
1690 {
1691         struct file_name *fn;
1692
1693         fn = malloc(sizeof(struct file_name));
1694         fn->filename = strdup(fname);
1695
1696         fio_file_hash_lock();
1697         if (!__is_already_allocated(fname, true)) {
1698                 flist_add_tail(&fn->list, &filename_list);
1699                 fn = NULL;
1700         }
1701         fio_file_hash_unlock();
1702
1703         if (fn) {
1704                 free(fn->filename);
1705                 free(fn);
1706         }
1707 }
1708
1709 static void free_already_allocated(void)
1710 {
1711         struct flist_head *entry, *tmp;
1712         struct file_name *fn;
1713
1714         if (flist_empty(&filename_list))
1715                 return;
1716
1717         fio_file_hash_lock();
1718         flist_for_each_safe(entry, tmp, &filename_list) {
1719                 fn = flist_entry(entry, struct file_name, list);
1720                 free(fn->filename);
1721                 flist_del(&fn->list);
1722                 free(fn);
1723         }
1724
1725         fio_file_hash_unlock();
1726 }
1727
1728 static struct fio_file *alloc_new_file(struct thread_data *td)
1729 {
1730         struct fio_file *f;
1731
1732         if (td_ioengine_flagged(td, FIO_NOFILEHASH))
1733                 f = calloc(1, sizeof(*f));
1734         else
1735                 f = scalloc(1, sizeof(*f));
1736         if (!f) {
1737                 assert(0);
1738                 return NULL;
1739         }
1740
1741         f->fd = -1;
1742         f->shadow_fd = -1;
1743         fio_file_reset(td, f);
1744         if (!td_ioengine_flagged(td, FIO_NOFILEHASH))
1745                 fio_file_set_smalloc(f);
1746         return f;
1747 }
1748
1749 bool exists_and_not_regfile(const char *filename)
1750 {
1751         struct stat sb;
1752
1753         if (lstat(filename, &sb) == -1)
1754                 return false;
1755
1756 #ifndef WIN32 /* NOT Windows */
1757         if (S_ISREG(sb.st_mode))
1758                 return false;
1759 #else
1760         /* \\.\ is the device namespace in Windows, where every file
1761          * is a device node */
1762         if (S_ISREG(sb.st_mode) && strncmp(filename, "\\\\.\\", 4) != 0)
1763                 return false;
1764 #endif
1765
1766         return true;
1767 }
1768
1769 int add_file(struct thread_data *td, const char *fname, int numjob, int inc)
1770 {
1771         int cur_files = td->files_index;
1772         char file_name[PATH_MAX];
1773         struct fio_file *f;
1774         int len = 0;
1775
1776         dprint(FD_FILE, "add file %s\n", fname);
1777
1778         if (td->o.directory)
1779                 len = set_name_idx(file_name, PATH_MAX, td->o.directory, numjob,
1780                                         td->o.unique_filename);
1781
1782         sprintf(file_name + len, "%s", fname);
1783
1784         /* clean cloned siblings using existing files */
1785         if (numjob && is_already_allocated(file_name) &&
1786             !exists_and_not_regfile(fname))
1787                 return 0;
1788
1789         f = alloc_new_file(td);
1790
1791         if (td->files_size <= td->files_index) {
1792                 unsigned int new_size = td->o.nr_files + 1;
1793
1794                 dprint(FD_FILE, "resize file array to %d files\n", new_size);
1795
1796                 td->files = realloc(td->files, new_size * sizeof(f));
1797                 if (td->files == NULL) {
1798                         log_err("fio: realloc OOM\n");
1799                         assert(0);
1800                 }
1801                 if (td->o.file_lock_mode != FILE_LOCK_NONE) {
1802                         td->file_locks = realloc(td->file_locks, new_size);
1803                         if (!td->file_locks) {
1804                                 log_err("fio: realloc OOM\n");
1805                                 assert(0);
1806                         }
1807                         td->file_locks[cur_files] = FILE_LOCK_NONE;
1808                 }
1809                 td->files_size = new_size;
1810         }
1811         td->files[cur_files] = f;
1812         f->fileno = cur_files;
1813
1814         /*
1815          * init function, io engine may not be loaded yet
1816          */
1817         if (td->io_ops && td_ioengine_flagged(td, FIO_DISKLESSIO))
1818                 f->real_file_size = -1ULL;
1819
1820         if (td_ioengine_flagged(td, FIO_NOFILEHASH))
1821                 f->file_name = strdup(file_name);
1822         else
1823                 f->file_name = smalloc_strdup(file_name);
1824
1825         /* can't handle smalloc failure from here */
1826         assert(f->file_name);
1827
1828         get_file_type(f);
1829
1830         switch (td->o.file_lock_mode) {
1831         case FILE_LOCK_NONE:
1832                 break;
1833         case FILE_LOCK_READWRITE:
1834                 f->rwlock = fio_rwlock_init();
1835                 break;
1836         case FILE_LOCK_EXCLUSIVE:
1837                 f->lock = fio_sem_init(FIO_SEM_UNLOCKED);
1838                 break;
1839         default:
1840                 log_err("fio: unknown lock mode: %d\n", td->o.file_lock_mode);
1841                 assert(0);
1842         }
1843
1844         td->files_index++;
1845
1846         if (td->o.numjobs > 1)
1847                 set_already_allocated(file_name);
1848
1849         if (inc)
1850                 td->o.nr_files++;
1851
1852         dprint(FD_FILE, "file %p \"%s\" added at %d\n", f, f->file_name,
1853                                                         cur_files);
1854
1855         return cur_files;
1856 }
1857
1858 int add_file_exclusive(struct thread_data *td, const char *fname)
1859 {
1860         struct fio_file *f;
1861         unsigned int i;
1862
1863         for_each_file(td, f, i) {
1864                 if (!strcmp(f->file_name, fname))
1865                         return i;
1866         }
1867
1868         return add_file(td, fname, 0, 1);
1869 }
1870
1871 void get_file(struct fio_file *f)
1872 {
1873         dprint(FD_FILE, "get file %s, ref=%d\n", f->file_name, f->references);
1874         assert(fio_file_open(f));
1875         f->references++;
1876 }
1877
1878 int put_file(struct thread_data *td, struct fio_file *f)
1879 {
1880         int f_ret = 0, ret = 0;
1881
1882         dprint(FD_FILE, "put file %s, ref=%d\n", f->file_name, f->references);
1883
1884         if (!fio_file_open(f)) {
1885                 assert(f->fd == -1);
1886                 return 0;
1887         }
1888
1889         assert(f->references);
1890         if (--f->references)
1891                 return 0;
1892
1893         disk_util_dec(f->du);
1894
1895         if (td->o.file_lock_mode != FILE_LOCK_NONE)
1896                 unlock_file_all(td, f);
1897
1898         if (should_fsync(td) && td->o.fsync_on_close) {
1899                 f_ret = fsync(f->fd);
1900                 if (f_ret < 0)
1901                         f_ret = errno;
1902         }
1903
1904         if (td->io_ops->close_file)
1905                 ret = td->io_ops->close_file(td, f);
1906
1907         if (!ret)
1908                 ret = f_ret;
1909
1910         td->nr_open_files--;
1911         fio_file_clear_closing(f);
1912         fio_file_clear_open(f);
1913         assert(f->fd == -1);
1914         return ret;
1915 }
1916
1917 void lock_file(struct thread_data *td, struct fio_file *f, enum fio_ddir ddir)
1918 {
1919         if (!f->lock || td->o.file_lock_mode == FILE_LOCK_NONE)
1920                 return;
1921
1922         if (td->o.file_lock_mode == FILE_LOCK_READWRITE) {
1923                 if (ddir == DDIR_READ)
1924                         fio_rwlock_read(f->rwlock);
1925                 else
1926                         fio_rwlock_write(f->rwlock);
1927         } else if (td->o.file_lock_mode == FILE_LOCK_EXCLUSIVE)
1928                 fio_sem_down(f->lock);
1929
1930         td->file_locks[f->fileno] = td->o.file_lock_mode;
1931 }
1932
1933 void unlock_file(struct thread_data *td, struct fio_file *f)
1934 {
1935         if (!f->lock || td->o.file_lock_mode == FILE_LOCK_NONE)
1936                 return;
1937
1938         if (td->o.file_lock_mode == FILE_LOCK_READWRITE)
1939                 fio_rwlock_unlock(f->rwlock);
1940         else if (td->o.file_lock_mode == FILE_LOCK_EXCLUSIVE)
1941                 fio_sem_up(f->lock);
1942
1943         td->file_locks[f->fileno] = FILE_LOCK_NONE;
1944 }
1945
1946 void unlock_file_all(struct thread_data *td, struct fio_file *f)
1947 {
1948         if (td->o.file_lock_mode == FILE_LOCK_NONE || !td->file_locks)
1949                 return;
1950         if (td->file_locks[f->fileno] != FILE_LOCK_NONE)
1951                 unlock_file(td, f);
1952 }
1953
1954 static bool recurse_dir(struct thread_data *td, const char *dirname)
1955 {
1956         struct dirent *dir;
1957         bool ret = false;
1958         DIR *D;
1959
1960         D = opendir(dirname);
1961         if (!D) {
1962                 char buf[FIO_VERROR_SIZE];
1963
1964                 snprintf(buf, FIO_VERROR_SIZE, "opendir(%s)", dirname);
1965                 td_verror(td, errno, buf);
1966                 return true;
1967         }
1968
1969         while ((dir = readdir(D)) != NULL) {
1970                 char full_path[PATH_MAX];
1971                 struct stat sb;
1972
1973                 if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, ".."))
1974                         continue;
1975
1976                 sprintf(full_path, "%s%c%s", dirname, FIO_OS_PATH_SEPARATOR, dir->d_name);
1977
1978                 if (lstat(full_path, &sb) == -1) {
1979                         if (errno != ENOENT) {
1980                                 td_verror(td, errno, "stat");
1981                                 ret = true;
1982                                 break;
1983                         }
1984                 }
1985
1986                 if (S_ISREG(sb.st_mode)) {
1987                         add_file(td, full_path, 0, 1);
1988                         continue;
1989                 }
1990                 if (!S_ISDIR(sb.st_mode))
1991                         continue;
1992
1993                 ret = recurse_dir(td, full_path);
1994                 if (ret)
1995                         break;
1996         }
1997
1998         closedir(D);
1999         return ret;
2000 }
2001
2002 int add_dir_files(struct thread_data *td, const char *path)
2003 {
2004         int ret = recurse_dir(td, path);
2005
2006         if (!ret)
2007                 log_info("fio: opendir added %d files\n", td->o.nr_files);
2008
2009         return ret;
2010 }
2011
2012 void dup_files(struct thread_data *td, struct thread_data *org)
2013 {
2014         struct fio_file *f;
2015         unsigned int i;
2016
2017         dprint(FD_FILE, "dup files: %d\n", org->files_index);
2018
2019         if (!org->files)
2020                 return;
2021
2022         td->files = malloc(org->files_index * sizeof(f));
2023
2024         if (td->o.file_lock_mode != FILE_LOCK_NONE)
2025                 td->file_locks = malloc(org->files_index);
2026
2027         for_each_file(org, f, i) {
2028                 struct fio_file *__f;
2029
2030                 __f = alloc_new_file(td);
2031
2032                 if (f->file_name) {
2033                         if (td_ioengine_flagged(td, FIO_NOFILEHASH))
2034                                 __f->file_name = strdup(f->file_name);
2035                         else
2036                                 __f->file_name = smalloc_strdup(f->file_name);
2037
2038                         /* can't handle smalloc failure from here */
2039                         assert(__f->file_name);
2040                         __f->filetype = f->filetype;
2041                 }
2042
2043                 if (td->o.file_lock_mode == FILE_LOCK_EXCLUSIVE)
2044                         __f->lock = f->lock;
2045                 else if (td->o.file_lock_mode == FILE_LOCK_READWRITE)
2046                         __f->rwlock = f->rwlock;
2047
2048                 td->files[i] = __f;
2049         }
2050 }
2051
2052 /*
2053  * Returns the index that matches the filename, or -1 if not there
2054  */
2055 int get_fileno(struct thread_data *td, const char *fname)
2056 {
2057         struct fio_file *f;
2058         unsigned int i;
2059
2060         for_each_file(td, f, i)
2061                 if (!strcmp(f->file_name, fname))
2062                         return i;
2063
2064         return -1;
2065 }
2066
2067 /*
2068  * For log usage, where we add/open/close files automatically
2069  */
2070 void free_release_files(struct thread_data *td)
2071 {
2072         close_files(td);
2073         td->o.nr_files = 0;
2074         td->o.open_files = 0;
2075         td->files_index = 0;
2076 }
2077
2078 void fio_file_reset(struct thread_data *td, struct fio_file *f)
2079 {
2080         int i;
2081
2082         for (i = 0; i < DDIR_RWDIR_CNT; i++) {
2083                 f->last_pos[i] = f->file_offset;
2084                 f->last_start[i] = -1ULL;
2085         }
2086
2087         if (fio_file_axmap(f))
2088                 axmap_reset(f->io_axmap);
2089         else if (fio_file_lfsr(f))
2090                 lfsr_reset(&f->lfsr, td->rand_seeds[FIO_RAND_BLOCK_OFF]);
2091
2092         zbd_file_reset(td, f);
2093 }
2094
2095 bool fio_files_done(struct thread_data *td)
2096 {
2097         struct fio_file *f;
2098         unsigned int i;
2099
2100         for_each_file(td, f, i)
2101                 if (!fio_file_done(f))
2102                         return false;
2103
2104         return true;
2105 }
2106
2107 /* free memory used in initialization phase only */
2108 void filesetup_mem_free(void)
2109 {
2110         free_already_allocated();
2111 }
2112
2113 /*
2114  * This function is for platforms which support direct I/O but not O_DIRECT.
2115  */
2116 int fio_set_directio(struct thread_data *td, struct fio_file *f)
2117 {
2118 #ifdef FIO_OS_DIRECTIO
2119         int ret = fio_set_odirect(f);
2120
2121         if (ret) {
2122                 td_verror(td, ret, "fio_set_directio");
2123 #if defined(__sun__)
2124                 if (ret == ENOTTY) { /* ENOTTY suggests RAW device or ZFS */
2125                         log_err("fio: doing directIO to RAW devices or ZFS not supported\n");
2126                 } else {
2127                         log_err("fio: the file system does not seem to support direct IO\n");
2128                 }
2129 #else
2130                 log_err("fio: the file system does not seem to support direct IO\n");
2131 #endif
2132                 return -1;
2133         }
2134
2135         return 0;
2136 #else
2137         log_err("fio: direct IO is not supported on this host operating system\n");
2138         return -1;
2139 #endif
2140 }