Add missing fio_mutex_up() on return
[fio.git] / filesetup.c
1 #include <unistd.h>
2 #include <fcntl.h>
3 #include <string.h>
4 #include <assert.h>
5 #include <dirent.h>
6 #include <libgen.h>
7 #include <sys/stat.h>
8 #include <sys/mman.h>
9 #include <sys/types.h>
10
11 #include "fio.h"
12 #include "smalloc.h"
13 #include "filehash.h"
14 #include "options.h"
15 #include "os/os.h"
16 #include "hash.h"
17 #include "lib/axmap.h"
18
19 #ifdef CONFIG_LINUX_FALLOCATE
20 #include <linux/falloc.h>
21 #endif
22
23 static int root_warn;
24
25 static FLIST_HEAD(filename_list);
26
27 static inline void clear_error(struct thread_data *td)
28 {
29         td->error = 0;
30         td->verror[0] = '\0';
31 }
32
33 /*
34  * Leaves f->fd open on success, caller must close
35  */
36 static int extend_file(struct thread_data *td, struct fio_file *f)
37 {
38         int r, new_layout = 0, unlink_file = 0, flags;
39         unsigned long long left;
40         unsigned int bs;
41         char *b = NULL;
42
43         if (read_only) {
44                 log_err("fio: refusing extend of file due to read-only\n");
45                 return 0;
46         }
47
48         /*
49          * check if we need to lay the file out complete again. fio
50          * does that for operations involving reads, or for writes
51          * where overwrite is set
52          */
53         if (td_read(td) ||
54            (td_write(td) && td->o.overwrite && !td->o.file_append) ||
55             (td_write(td) && td->io_ops->flags & FIO_NOEXTEND))
56                 new_layout = 1;
57         if (td_write(td) && !td->o.overwrite && !td->o.file_append)
58                 unlink_file = 1;
59
60         if (unlink_file || new_layout) {
61                 dprint(FD_FILE, "layout unlink %s\n", f->file_name);
62                 if ((td_io_unlink_file(td, f) < 0) && (errno != ENOENT)) {
63                         td_verror(td, errno, "unlink");
64                         return 1;
65                 }
66         }
67
68         flags = O_WRONLY;
69         if (td->o.allow_create)
70                 flags |= O_CREAT;
71         if (new_layout)
72                 flags |= O_TRUNC;
73
74 #ifdef WIN32
75         flags |= _O_BINARY;
76 #endif
77
78         dprint(FD_FILE, "open file %s, flags %x\n", f->file_name, flags);
79         f->fd = open(f->file_name, flags, 0644);
80         if (f->fd < 0) {
81                 int err = errno;
82
83                 if (err == ENOENT && !td->o.allow_create)
84                         log_err("fio: file creation disallowed by "
85                                         "allow_file_create=0\n");
86                 else
87                         td_verror(td, err, "open");
88                 return 1;
89         }
90
91 #ifdef CONFIG_POSIX_FALLOCATE
92         if (!td->o.fill_device) {
93                 switch (td->o.fallocate_mode) {
94                 case FIO_FALLOCATE_NONE:
95                         break;
96                 case FIO_FALLOCATE_POSIX:
97                         dprint(FD_FILE, "posix_fallocate file %s size %llu\n",
98                                  f->file_name,
99                                  (unsigned long long) f->real_file_size);
100
101                         r = posix_fallocate(f->fd, 0, f->real_file_size);
102                         if (r > 0) {
103                                 log_err("fio: posix_fallocate fails: %s\n",
104                                                 strerror(r));
105                         }
106                         break;
107 #ifdef CONFIG_LINUX_FALLOCATE
108                 case FIO_FALLOCATE_KEEP_SIZE:
109                         dprint(FD_FILE,
110                                 "fallocate(FALLOC_FL_KEEP_SIZE) "
111                                 "file %s size %llu\n", f->file_name,
112                                 (unsigned long long) f->real_file_size);
113
114                         r = fallocate(f->fd, FALLOC_FL_KEEP_SIZE, 0,
115                                         f->real_file_size);
116                         if (r != 0)
117                                 td_verror(td, errno, "fallocate");
118
119                         break;
120 #endif /* CONFIG_LINUX_FALLOCATE */
121                 default:
122                         log_err("fio: unknown fallocate mode: %d\n",
123                                 td->o.fallocate_mode);
124                         assert(0);
125                 }
126         }
127 #endif /* CONFIG_POSIX_FALLOCATE */
128
129         if (!new_layout)
130                 goto done;
131
132         /*
133          * The size will be -1ULL when fill_device is used, so don't truncate
134          * or fallocate this file, just write it
135          */
136         if (!td->o.fill_device) {
137                 dprint(FD_FILE, "truncate file %s, size %llu\n", f->file_name,
138                                         (unsigned long long) f->real_file_size);
139                 if (ftruncate(f->fd, f->real_file_size) == -1) {
140                         if (errno != EFBIG) {
141                                 td_verror(td, errno, "ftruncate");
142                                 goto err;
143                         }
144                 }
145         }
146
147         b = malloc(td->o.max_bs[DDIR_WRITE]);
148
149         left = f->real_file_size;
150         while (left && !td->terminate) {
151                 bs = td->o.max_bs[DDIR_WRITE];
152                 if (bs > left)
153                         bs = left;
154
155                 fill_io_buffer(td, b, bs, bs);
156
157                 r = write(f->fd, b, bs);
158
159                 if (r > 0) {
160                         left -= r;
161                         continue;
162                 } else {
163                         if (r < 0) {
164                                 int __e = errno;
165
166                                 if (__e == ENOSPC) {
167                                         if (td->o.fill_device)
168                                                 break;
169                                         log_info("fio: ENOSPC on laying out "
170                                                  "file, stopping\n");
171                                         break;
172                                 }
173                                 td_verror(td, errno, "write");
174                         } else
175                                 td_verror(td, EIO, "write");
176
177                         break;
178                 }
179         }
180
181         if (td->terminate) {
182                 dprint(FD_FILE, "terminate unlink %s\n", f->file_name);
183                 td_io_unlink_file(td, f);
184         } else if (td->o.create_fsync) {
185                 if (fsync(f->fd) < 0) {
186                         td_verror(td, errno, "fsync");
187                         goto err;
188                 }
189         }
190         if (td->o.fill_device && !td_write(td)) {
191                 fio_file_clear_size_known(f);
192                 if (td_io_get_file_size(td, f))
193                         goto err;
194                 if (f->io_size > f->real_file_size)
195                         f->io_size = f->real_file_size;
196         }
197
198         free(b);
199 done:
200         return 0;
201 err:
202         close(f->fd);
203         f->fd = -1;
204         if (b)
205                 free(b);
206         return 1;
207 }
208
209 static int pre_read_file(struct thread_data *td, struct fio_file *f)
210 {
211         int ret = 0, r, did_open = 0, old_runstate;
212         unsigned long long left;
213         unsigned int bs;
214         char *b;
215
216         if (td->io_ops->flags & FIO_PIPEIO)
217                 return 0;
218
219         if (!fio_file_open(f)) {
220                 if (td->io_ops->open_file(td, f)) {
221                         log_err("fio: cannot pre-read, failed to open file\n");
222                         return 1;
223                 }
224                 did_open = 1;
225         }
226
227         old_runstate = td_bump_runstate(td, TD_PRE_READING);
228
229         bs = td->o.max_bs[DDIR_READ];
230         b = malloc(bs);
231         memset(b, 0, bs);
232
233         if (lseek(f->fd, f->file_offset, SEEK_SET) < 0) {
234                 td_verror(td, errno, "lseek");
235                 log_err("fio: failed to lseek pre-read file\n");
236                 ret = 1;
237                 goto error;
238         }
239
240         left = f->io_size;
241
242         while (left && !td->terminate) {
243                 if (bs > left)
244                         bs = left;
245
246                 r = read(f->fd, b, bs);
247
248                 if (r == (int) bs) {
249                         left -= bs;
250                         continue;
251                 } else {
252                         td_verror(td, EIO, "pre_read");
253                         break;
254                 }
255         }
256
257 error:
258         td_restore_runstate(td, old_runstate);
259
260         if (did_open)
261                 td->io_ops->close_file(td, f);
262
263         free(b);
264         return ret;
265 }
266
267 static unsigned long long get_rand_file_size(struct thread_data *td)
268 {
269         unsigned long long ret, sized;
270         unsigned long r;
271
272         r = __rand(&td->file_size_state);
273         sized = td->o.file_size_high - td->o.file_size_low;
274         ret = (unsigned long long) ((double) sized * (r / (FRAND_MAX + 1.0)));
275         ret += td->o.file_size_low;
276         ret -= (ret % td->o.rw_min_bs);
277         return ret;
278 }
279
280 static int file_size(struct thread_data *td, struct fio_file *f)
281 {
282         struct stat st;
283
284         if (stat(f->file_name, &st) == -1) {
285                 td_verror(td, errno, "fstat");
286                 return 1;
287         }
288
289         f->real_file_size = st.st_size;
290         return 0;
291 }
292
293 static int bdev_size(struct thread_data *td, struct fio_file *f)
294 {
295         unsigned long long bytes = 0;
296         int r;
297
298         if (td->io_ops->open_file(td, f)) {
299                 log_err("fio: failed opening blockdev %s for size check\n",
300                         f->file_name);
301                 return 1;
302         }
303
304         r = blockdev_size(f, &bytes);
305         if (r) {
306                 td_verror(td, r, "blockdev_size");
307                 goto err;
308         }
309
310         if (!bytes) {
311                 log_err("%s: zero sized block device?\n", f->file_name);
312                 goto err;
313         }
314
315         f->real_file_size = bytes;
316         td->io_ops->close_file(td, f);
317         return 0;
318 err:
319         td->io_ops->close_file(td, f);
320         return 1;
321 }
322
323 static int char_size(struct thread_data *td, struct fio_file *f)
324 {
325 #ifdef FIO_HAVE_CHARDEV_SIZE
326         unsigned long long bytes = 0;
327         int r;
328
329         if (td->io_ops->open_file(td, f)) {
330                 log_err("fio: failed opening blockdev %s for size check\n",
331                         f->file_name);
332                 return 1;
333         }
334
335         r = chardev_size(f, &bytes);
336         if (r) {
337                 td_verror(td, r, "chardev_size");
338                 goto err;
339         }
340
341         if (!bytes) {
342                 log_err("%s: zero sized char device?\n", f->file_name);
343                 goto err;
344         }
345
346         f->real_file_size = bytes;
347         td->io_ops->close_file(td, f);
348         return 0;
349 err:
350         td->io_ops->close_file(td, f);
351         return 1;
352 #else
353         f->real_file_size = -1ULL;
354         return 0;
355 #endif
356 }
357
358 static int get_file_size(struct thread_data *td, struct fio_file *f)
359 {
360         int ret = 0;
361
362         if (fio_file_size_known(f))
363                 return 0;
364
365         if (f->filetype == FIO_TYPE_FILE)
366                 ret = file_size(td, f);
367         else if (f->filetype == FIO_TYPE_BD)
368                 ret = bdev_size(td, f);
369         else if (f->filetype == FIO_TYPE_CHAR)
370                 ret = char_size(td, f);
371         else
372                 f->real_file_size = -1;
373
374         if (ret)
375                 return ret;
376
377         if (f->file_offset > f->real_file_size) {
378                 log_err("%s: offset extends end (%llu > %llu)\n", td->o.name,
379                                         (unsigned long long) f->file_offset,
380                                         (unsigned long long) f->real_file_size);
381                 return 1;
382         }
383
384         fio_file_set_size_known(f);
385         return 0;
386 }
387
388 static int __file_invalidate_cache(struct thread_data *td, struct fio_file *f,
389                                    unsigned long long off,
390                                    unsigned long long len)
391 {
392         int ret = 0;
393
394 #ifdef CONFIG_ESX
395         return 0;
396 #endif
397
398         if (len == -1ULL)
399                 len = f->io_size;
400         if (off == -1ULL)
401                 off = f->file_offset;
402
403         if (len == -1ULL || off == -1ULL)
404                 return 0;
405
406         dprint(FD_IO, "invalidate cache %s: %llu/%llu\n", f->file_name, off,
407                                                                 len);
408
409         if (td->io_ops->invalidate)
410                 ret = td->io_ops->invalidate(td, f);
411         else if (f->filetype == FIO_TYPE_FILE)
412                 ret = posix_fadvise(f->fd, off, len, POSIX_FADV_DONTNEED);
413         else if (f->filetype == FIO_TYPE_BD) {
414                 ret = blockdev_invalidate_cache(f);
415                 if (ret < 0 && errno == EACCES && geteuid()) {
416                         if (!root_warn) {
417                                 log_err("fio: only root may flush block "
418                                         "devices. Cache flush bypassed!\n");
419                                 root_warn = 1;
420                         }
421                         ret = 0;
422                 }
423         } else if (f->filetype == FIO_TYPE_CHAR || f->filetype == FIO_TYPE_PIPE)
424                 ret = 0;
425
426         /*
427          * Cache flushing isn't a fatal condition, and we know it will
428          * happen on some platforms where we don't have the proper
429          * function to flush eg block device caches. So just warn and
430          * continue on our way.
431          */
432         if (ret) {
433                 log_info("fio: cache invalidation of %s failed: %s\n", f->file_name, strerror(errno));
434                 ret = 0;
435         }
436
437         return 0;
438
439 }
440
441 int file_invalidate_cache(struct thread_data *td, struct fio_file *f)
442 {
443         if (!fio_file_open(f))
444                 return 0;
445
446         return __file_invalidate_cache(td, f, -1ULL, -1ULL);
447 }
448
449 int generic_close_file(struct thread_data fio_unused *td, struct fio_file *f)
450 {
451         int ret = 0;
452
453         dprint(FD_FILE, "fd close %s\n", f->file_name);
454
455         remove_file_hash(f);
456
457         if (close(f->fd) < 0)
458                 ret = errno;
459
460         f->fd = -1;
461
462         if (f->shadow_fd != -1) {
463                 close(f->shadow_fd);
464                 f->shadow_fd = -1;
465         }
466
467         f->engine_data = 0;
468         return ret;
469 }
470
471 int file_lookup_open(struct fio_file *f, int flags)
472 {
473         struct fio_file *__f;
474         int from_hash;
475
476         __f = lookup_file_hash(f->file_name);
477         if (__f) {
478                 dprint(FD_FILE, "found file in hash %s\n", f->file_name);
479                 /*
480                  * racy, need the __f->lock locked
481                  */
482                 f->lock = __f->lock;
483                 from_hash = 1;
484         } else {
485                 dprint(FD_FILE, "file not found in hash %s\n", f->file_name);
486                 from_hash = 0;
487         }
488
489 #ifdef WIN32
490         flags |= _O_BINARY;
491 #endif
492
493         f->fd = open(f->file_name, flags, 0600);
494         return from_hash;
495 }
496
497 static int file_close_shadow_fds(struct thread_data *td)
498 {
499         struct fio_file *f;
500         int num_closed = 0;
501         unsigned int i;
502
503         for_each_file(td, f, i) {
504                 if (f->shadow_fd == -1)
505                         continue;
506
507                 close(f->shadow_fd);
508                 f->shadow_fd = -1;
509                 num_closed++;
510         }
511
512         return num_closed;
513 }
514
515 int generic_open_file(struct thread_data *td, struct fio_file *f)
516 {
517         int is_std = 0;
518         int flags = 0;
519         int from_hash = 0;
520
521         dprint(FD_FILE, "fd open %s\n", f->file_name);
522
523         if (!strcmp(f->file_name, "-")) {
524                 if (td_rw(td)) {
525                         log_err("fio: can't read/write to stdin/out\n");
526                         return 1;
527                 }
528                 is_std = 1;
529
530                 /*
531                  * move output logging to stderr, if we are writing to stdout
532                  */
533                 if (td_write(td))
534                         f_out = stderr;
535         }
536
537         if (td_trim(td))
538                 goto skip_flags;
539         if (td->o.odirect)
540                 flags |= OS_O_DIRECT;
541         if (td->o.oatomic) {
542                 if (!FIO_O_ATOMIC) {
543                         td_verror(td, EINVAL, "OS does not support atomic IO");
544                         return 1;
545                 }
546                 flags |= OS_O_DIRECT | FIO_O_ATOMIC;
547         }
548         if (td->o.sync_io)
549                 flags |= O_SYNC;
550         if (td->o.create_on_open && td->o.allow_create)
551                 flags |= O_CREAT;
552 skip_flags:
553         if (f->filetype != FIO_TYPE_FILE)
554                 flags |= FIO_O_NOATIME;
555
556 open_again:
557         if (td_write(td)) {
558                 if (!read_only)
559                         flags |= O_RDWR;
560
561                 if (f->filetype == FIO_TYPE_FILE && td->o.allow_create)
562                         flags |= O_CREAT;
563
564                 if (is_std)
565                         f->fd = dup(STDOUT_FILENO);
566                 else
567                         from_hash = file_lookup_open(f, flags);
568         } else if (td_read(td)) {
569                 if (f->filetype == FIO_TYPE_CHAR && !read_only)
570                         flags |= O_RDWR;
571                 else
572                         flags |= O_RDONLY;
573
574                 if (is_std)
575                         f->fd = dup(STDIN_FILENO);
576                 else
577                         from_hash = file_lookup_open(f, flags);
578         } else { //td trim
579                 flags |= O_RDWR;
580                 from_hash = file_lookup_open(f, flags);
581         }
582
583         if (f->fd == -1) {
584                 char buf[FIO_VERROR_SIZE];
585                 int __e = errno;
586
587                 if (__e == EPERM && (flags & FIO_O_NOATIME)) {
588                         flags &= ~FIO_O_NOATIME;
589                         goto open_again;
590                 }
591                 if (__e == EMFILE && file_close_shadow_fds(td))
592                         goto open_again;
593
594                 snprintf(buf, sizeof(buf), "open(%s)", f->file_name);
595
596                 if (__e == EINVAL && (flags & OS_O_DIRECT)) {
597                         log_err("fio: looks like your file system does not " \
598                                 "support direct=1/buffered=0\n");
599                 }
600
601                 td_verror(td, __e, buf);
602                 return 1;
603         }
604
605         if (!from_hash && f->fd != -1) {
606                 if (add_file_hash(f)) {
607                         int fio_unused ret;
608
609                         /*
610                          * Stash away descriptor for later close. This is to
611                          * work-around a "feature" on Linux, where a close of
612                          * an fd that has been opened for write will trigger
613                          * udev to call blkid to check partitions, fs id, etc.
614                          * That pollutes the device cache, which can slow down
615                          * unbuffered accesses.
616                          */
617                         if (f->shadow_fd == -1)
618                                 f->shadow_fd = f->fd;
619                         else {
620                                 /*
621                                  * OK to ignore, we haven't done anything
622                                  * with it
623                                  */
624                                 ret = generic_close_file(td, f);
625                         }
626                         goto open_again;
627                 }
628         }
629
630         return 0;
631 }
632
633 int generic_get_file_size(struct thread_data *td, struct fio_file *f)
634 {
635         return get_file_size(td, f);
636 }
637
638 /*
639  * open/close all files, so that ->real_file_size gets set
640  */
641 static int get_file_sizes(struct thread_data *td)
642 {
643         struct fio_file *f;
644         unsigned int i;
645         int err = 0;
646
647         for_each_file(td, f, i) {
648                 dprint(FD_FILE, "get file size for %p/%d/%p\n", f, i,
649                                                                 f->file_name);
650
651                 if (td_io_get_file_size(td, f)) {
652                         if (td->error != ENOENT) {
653                                 log_err("%s\n", td->verror);
654                                 err = 1;
655                                 break;
656                         }
657                         clear_error(td);
658                 }
659
660                 if (f->real_file_size == -1ULL && td->o.size)
661                         f->real_file_size = td->o.size / td->o.nr_files;
662         }
663
664         return err;
665 }
666
667 struct fio_mount {
668         struct flist_head list;
669         const char *base;
670         char __base[256];
671         unsigned int key;
672 };
673
674 /*
675  * Get free number of bytes for each file on each unique mount.
676  */
677 static unsigned long long get_fs_free_counts(struct thread_data *td)
678 {
679         struct flist_head *n, *tmp;
680         unsigned long long ret = 0;
681         struct fio_mount *fm;
682         FLIST_HEAD(list);
683         struct fio_file *f;
684         unsigned int i;
685
686         for_each_file(td, f, i) {
687                 struct stat sb;
688                 char buf[256];
689
690                 if (f->filetype == FIO_TYPE_BD || f->filetype == FIO_TYPE_CHAR) {
691                         if (f->real_file_size != -1ULL)
692                                 ret += f->real_file_size;
693                         continue;
694                 } else if (f->filetype != FIO_TYPE_FILE)
695                         continue;
696
697                 buf[255] = '\0';
698                 strncpy(buf, f->file_name, 255);
699
700                 if (stat(buf, &sb) < 0) {
701                         if (errno != ENOENT)
702                                 break;
703                         strcpy(buf, ".");
704                         if (stat(buf, &sb) < 0)
705                                 break;
706                 }
707
708                 fm = NULL;
709                 flist_for_each(n, &list) {
710                         fm = flist_entry(n, struct fio_mount, list);
711                         if (fm->key == sb.st_dev)
712                                 break;
713
714                         fm = NULL;
715                 }
716
717                 if (fm)
718                         continue;
719
720                 fm = calloc(1, sizeof(*fm));
721                 strncpy(fm->__base, buf, sizeof(fm->__base) - 1);
722                 fm->base = basename(fm->__base);
723                 fm->key = sb.st_dev;
724                 flist_add(&fm->list, &list);
725         }
726
727         flist_for_each_safe(n, tmp, &list) {
728                 unsigned long long sz;
729
730                 fm = flist_entry(n, struct fio_mount, list);
731                 flist_del(&fm->list);
732
733                 sz = get_fs_size(fm->base);
734                 if (sz && sz != -1ULL)
735                         ret += sz;
736
737                 free(fm);
738         }
739
740         return ret;
741 }
742
743 uint64_t get_start_offset(struct thread_data *td, struct fio_file *f)
744 {
745         struct thread_options *o = &td->o;
746
747         if (o->file_append && f->filetype == FIO_TYPE_FILE)
748                 return f->real_file_size;
749
750         return td->o.start_offset +
751                 td->subjob_number * td->o.offset_increment;
752 }
753
754 /*
755  * Open the files and setup files sizes, creating files if necessary.
756  */
757 int setup_files(struct thread_data *td)
758 {
759         unsigned long long total_size, extend_size;
760         struct thread_options *o = &td->o;
761         struct fio_file *f;
762         unsigned int i, nr_fs_extra = 0;
763         int err = 0, need_extend;
764         int old_state;
765         const unsigned int bs = td_min_bs(td);
766         uint64_t fs = 0;
767
768         dprint(FD_FILE, "setup files\n");
769
770         old_state = td_bump_runstate(td, TD_SETTING_UP);
771
772         if (o->read_iolog_file)
773                 goto done;
774
775         /*
776          * if ioengine defines a setup() method, it's responsible for
777          * opening the files and setting f->real_file_size to indicate
778          * the valid range for that file.
779          */
780         if (td->io_ops->setup)
781                 err = td->io_ops->setup(td);
782         else
783                 err = get_file_sizes(td);
784
785         if (err)
786                 goto err_out;
787
788         /*
789          * check sizes. if the files/devices do not exist and the size
790          * isn't passed to fio, abort.
791          */
792         total_size = 0;
793         for_each_file(td, f, i) {
794                 if (f->real_file_size == -1ULL)
795                         total_size = -1ULL;
796                 else
797                         total_size += f->real_file_size;
798         }
799
800         if (o->fill_device)
801                 td->fill_device_size = get_fs_free_counts(td);
802
803         /*
804          * device/file sizes are zero and no size given, punt
805          */
806         if ((!total_size || total_size == -1ULL) && !o->size &&
807             !(td->io_ops->flags & FIO_NOIO) && !o->fill_device &&
808             !(o->nr_files && (o->file_size_low || o->file_size_high))) {
809                 log_err("%s: you need to specify size=\n", o->name);
810                 td_verror(td, EINVAL, "total_file_size");
811                 goto err_out;
812         }
813
814         /*
815          * Calculate per-file size and potential extra size for the
816          * first files, if needed.
817          */
818         if (!o->file_size_low && o->nr_files) {
819                 uint64_t all_fs;
820
821                 fs = o->size / o->nr_files;
822                 all_fs = fs * o->nr_files;
823
824                 if (all_fs < o->size)
825                         nr_fs_extra = (o->size - all_fs) / bs;
826         }
827
828         /*
829          * now file sizes are known, so we can set ->io_size. if size= is
830          * not given, ->io_size is just equal to ->real_file_size. if size
831          * is given, ->io_size is size / nr_files.
832          */
833         extend_size = total_size = 0;
834         need_extend = 0;
835         for_each_file(td, f, i) {
836                 f->file_offset = get_start_offset(td, f);
837
838                 if (!o->file_size_low) {
839                         /*
840                          * no file size range given, file size is equal to
841                          * total size divided by number of files. If that is
842                          * zero, set it to the real file size. If the size
843                          * doesn't divide nicely with the min blocksize,
844                          * make the first files bigger.
845                          */
846                         f->io_size = fs;
847                         if (nr_fs_extra) {
848                                 nr_fs_extra--;
849                                 f->io_size += bs;
850                         }
851
852                         if (!f->io_size)
853                                 f->io_size = f->real_file_size - f->file_offset;
854                 } else if (f->real_file_size < o->file_size_low ||
855                            f->real_file_size > o->file_size_high) {
856                         if (f->file_offset > o->file_size_low)
857                                 goto err_offset;
858                         /*
859                          * file size given. if it's fixed, use that. if it's a
860                          * range, generate a random size in-between.
861                          */
862                         if (o->file_size_low == o->file_size_high)
863                                 f->io_size = o->file_size_low - f->file_offset;
864                         else {
865                                 f->io_size = get_rand_file_size(td)
866                                                 - f->file_offset;
867                         }
868                 } else
869                         f->io_size = f->real_file_size - f->file_offset;
870
871                 if (f->io_size == -1ULL)
872                         total_size = -1ULL;
873                 else {
874                         if (o->size_percent)
875                                 f->io_size = (f->io_size * o->size_percent) / 100;
876                         total_size += f->io_size;
877                 }
878
879                 if (f->filetype == FIO_TYPE_FILE &&
880                     (f->io_size + f->file_offset) > f->real_file_size &&
881                     !(td->io_ops->flags & FIO_DISKLESSIO)) {
882                         if (!o->create_on_open) {
883                                 need_extend++;
884                                 extend_size += (f->io_size + f->file_offset);
885                         } else
886                                 f->real_file_size = f->io_size + f->file_offset;
887                         fio_file_set_extend(f);
888                 }
889         }
890
891         if (td->o.block_error_hist) {
892                 int len;
893
894                 assert(td->o.nr_files == 1);    /* checked in fixup_options */
895                 f = td->files[0];
896                 len = f->io_size / td->o.bs[DDIR_TRIM];
897                 if (len > MAX_NR_BLOCK_INFOS || len <= 0) {
898                         log_err("fio: cannot calculate block histogram with "
899                                 "%d trim blocks, maximum %d\n",
900                                 len, MAX_NR_BLOCK_INFOS);
901                         td_verror(td, EINVAL, "block_error_hist");
902                         goto err_out;
903                 }
904
905                 td->ts.nr_block_infos = len;
906                 for (int i = 0; i < len; i++)
907                         td->ts.block_infos[i] =
908                                 BLOCK_INFO(0, BLOCK_STATE_UNINIT);
909         } else
910                 td->ts.nr_block_infos = 0;
911
912         if (!o->size || (total_size && o->size > total_size))
913                 o->size = total_size;
914
915         if (o->size < td_min_bs(td)) {
916                 log_err("fio: blocksize too large for data set\n");
917                 goto err_out;
918         }
919
920         /*
921          * See if we need to extend some files
922          */
923         if (need_extend) {
924                 temp_stall_ts = 1;
925                 if (output_format == FIO_OUTPUT_NORMAL)
926                         log_info("%s: Laying out IO file(s) (%u file(s) /"
927                                  " %lluMB)\n", o->name, need_extend,
928                                         extend_size >> 20);
929
930                 for_each_file(td, f, i) {
931                         unsigned long long old_len = -1ULL, extend_len = -1ULL;
932
933                         if (!fio_file_extend(f))
934                                 continue;
935
936                         assert(f->filetype == FIO_TYPE_FILE);
937                         fio_file_clear_extend(f);
938                         if (!o->fill_device) {
939                                 old_len = f->real_file_size;
940                                 extend_len = f->io_size + f->file_offset -
941                                                 old_len;
942                         }
943                         f->real_file_size = (f->io_size + f->file_offset);
944                         err = extend_file(td, f);
945                         if (err)
946                                 break;
947
948                         err = __file_invalidate_cache(td, f, old_len,
949                                                                 extend_len);
950
951                         /*
952                          * Shut up static checker
953                          */
954                         if (f->fd != -1)
955                                 close(f->fd);
956
957                         f->fd = -1;
958                         if (err)
959                                 break;
960                 }
961                 temp_stall_ts = 0;
962         }
963
964         if (err)
965                 goto err_out;
966
967         if (!o->zone_size)
968                 o->zone_size = o->size;
969
970         /*
971          * iolog already set the total io size, if we read back
972          * stored entries.
973          */
974         if (!o->read_iolog_file) {
975                 if (o->io_limit)
976                         td->total_io_size = o->io_limit * o->loops;
977                 else
978                         td->total_io_size = o->size * o->loops;
979         }
980
981 done:
982         if (o->create_only)
983                 td->done = 1;
984
985         td_restore_runstate(td, old_state);
986         return 0;
987 err_offset:
988         log_err("%s: you need to specify valid offset=\n", o->name);
989 err_out:
990         td_restore_runstate(td, old_state);
991         return 1;
992 }
993
994 int pre_read_files(struct thread_data *td)
995 {
996         struct fio_file *f;
997         unsigned int i;
998
999         dprint(FD_FILE, "pre_read files\n");
1000
1001         for_each_file(td, f, i) {
1002                 pre_read_file(td, f);
1003         }
1004
1005         return 1;
1006 }
1007
1008 static int __init_rand_distribution(struct thread_data *td, struct fio_file *f)
1009 {
1010         unsigned int range_size, seed;
1011         unsigned long nranges;
1012         uint64_t fsize;
1013
1014         range_size = min(td->o.min_bs[DDIR_READ], td->o.min_bs[DDIR_WRITE]);
1015         fsize = min(f->real_file_size, f->io_size);
1016
1017         nranges = (fsize + range_size - 1) / range_size;
1018
1019         seed = jhash(f->file_name, strlen(f->file_name), 0) * td->thread_number;
1020         if (!td->o.rand_repeatable)
1021                 seed = td->rand_seeds[4];
1022
1023         if (td->o.random_distribution == FIO_RAND_DIST_ZIPF)
1024                 zipf_init(&f->zipf, nranges, td->o.zipf_theta.u.f, seed);
1025         else if (td->o.random_distribution == FIO_RAND_DIST_PARETO)
1026                 pareto_init(&f->zipf, nranges, td->o.pareto_h.u.f, seed);
1027         else if (td->o.random_distribution == FIO_RAND_DIST_GAUSS)
1028                 gauss_init(&f->gauss, nranges, td->o.gauss_dev.u.f, seed);
1029
1030         return 1;
1031 }
1032
1033 static int init_rand_distribution(struct thread_data *td)
1034 {
1035         struct fio_file *f;
1036         unsigned int i;
1037         int state;
1038
1039         if (td->o.random_distribution == FIO_RAND_DIST_RANDOM)
1040                 return 0;
1041
1042         state = td_bump_runstate(td, TD_SETTING_UP);
1043
1044         for_each_file(td, f, i)
1045                 __init_rand_distribution(td, f);
1046
1047         td_restore_runstate(td, state);
1048
1049         return 1;
1050 }
1051
1052 int init_random_map(struct thread_data *td)
1053 {
1054         unsigned long long blocks;
1055         struct fio_file *f;
1056         unsigned int i;
1057
1058         if (init_rand_distribution(td))
1059                 return 0;
1060         if (!td_random(td))
1061                 return 0;
1062
1063         for_each_file(td, f, i) {
1064                 uint64_t fsize = min(f->real_file_size, f->io_size);
1065
1066                 blocks = fsize / (unsigned long long) td->o.rw_min_bs;
1067
1068                 if (td->o.random_generator == FIO_RAND_GEN_LFSR) {
1069                         unsigned long seed;
1070
1071                         seed = td->rand_seeds[FIO_RAND_BLOCK_OFF];
1072
1073                         if (!lfsr_init(&f->lfsr, blocks, seed, 0)) {
1074                                 fio_file_set_lfsr(f);
1075                                 continue;
1076                         }
1077                 } else if (!td->o.norandommap) {
1078                         f->io_axmap = axmap_new(blocks);
1079                         if (f->io_axmap) {
1080                                 fio_file_set_axmap(f);
1081                                 continue;
1082                         }
1083                 } else if (td->o.norandommap)
1084                         continue;
1085
1086                 if (!td->o.softrandommap) {
1087                         log_err("fio: failed allocating random map. If running"
1088                                 " a large number of jobs, try the 'norandommap'"
1089                                 " option or set 'softrandommap'. Or give"
1090                                 " a larger --alloc-size to fio.\n");
1091                         return 1;
1092                 }
1093
1094                 log_info("fio: file %s failed allocating random map. Running "
1095                          "job without.\n", f->file_name);
1096         }
1097
1098         return 0;
1099 }
1100
1101 void close_files(struct thread_data *td)
1102 {
1103         struct fio_file *f;
1104         unsigned int i;
1105
1106         for_each_file(td, f, i) {
1107                 if (fio_file_open(f))
1108                         td_io_close_file(td, f);
1109         }
1110 }
1111
1112 void close_and_free_files(struct thread_data *td)
1113 {
1114         struct fio_file *f;
1115         unsigned int i;
1116
1117         dprint(FD_FILE, "close files\n");
1118
1119         for_each_file(td, f, i) {
1120                 if (td->o.unlink && f->filetype == FIO_TYPE_FILE) {
1121                         dprint(FD_FILE, "free unlink %s\n", f->file_name);
1122                         td_io_unlink_file(td, f);
1123                 }
1124
1125                 if (fio_file_open(f))
1126                         td_io_close_file(td, f);
1127
1128                 remove_file_hash(f);
1129
1130                 if (td->o.unlink && f->filetype == FIO_TYPE_FILE) {
1131                         dprint(FD_FILE, "free unlink %s\n", f->file_name);
1132                         td_io_unlink_file(td, f);
1133                 }
1134
1135                 sfree(f->file_name);
1136                 f->file_name = NULL;
1137                 if (fio_file_axmap(f)) {
1138                         axmap_free(f->io_axmap);
1139                         f->io_axmap = NULL;
1140                 }
1141                 sfree(f);
1142         }
1143
1144         td->o.filename = NULL;
1145         free(td->files);
1146         free(td->file_locks);
1147         td->files_index = 0;
1148         td->files = NULL;
1149         td->file_locks = NULL;
1150         td->o.file_lock_mode = FILE_LOCK_NONE;
1151         td->o.nr_files = 0;
1152 }
1153
1154 static void get_file_type(struct fio_file *f)
1155 {
1156         struct stat sb;
1157
1158         if (!strcmp(f->file_name, "-"))
1159                 f->filetype = FIO_TYPE_PIPE;
1160         else
1161                 f->filetype = FIO_TYPE_FILE;
1162
1163         /* \\.\ is the device namespace in Windows, where every file is
1164          * a block device */
1165         if (strncmp(f->file_name, "\\\\.\\", 4) == 0)
1166                 f->filetype = FIO_TYPE_BD;
1167
1168         if (!stat(f->file_name, &sb)) {
1169                 if (S_ISBLK(sb.st_mode))
1170                         f->filetype = FIO_TYPE_BD;
1171                 else if (S_ISCHR(sb.st_mode))
1172                         f->filetype = FIO_TYPE_CHAR;
1173                 else if (S_ISFIFO(sb.st_mode))
1174                         f->filetype = FIO_TYPE_PIPE;
1175         }
1176 }
1177
1178 static int __is_already_allocated(const char *fname)
1179 {
1180         struct flist_head *entry;
1181         char *filename;
1182
1183         if (flist_empty(&filename_list))
1184                 return 0;
1185
1186         flist_for_each(entry, &filename_list) {
1187                 filename = flist_entry(entry, struct file_name, list)->filename;
1188
1189                 if (strcmp(filename, fname) == 0)
1190                         return 1;
1191         }
1192
1193         return 0;
1194 }
1195
1196 static int is_already_allocated(const char *fname)
1197 {
1198         int ret;
1199
1200         fio_file_hash_lock();
1201         ret = __is_already_allocated(fname);
1202         fio_file_hash_unlock();
1203         return ret;
1204 }
1205
1206 static void set_already_allocated(const char *fname)
1207 {
1208         struct file_name *fn;
1209
1210         fn = malloc(sizeof(struct file_name));
1211         fn->filename = strdup(fname);
1212
1213         fio_file_hash_lock();
1214         if (!__is_already_allocated(fname)) {
1215                 flist_add_tail(&fn->list, &filename_list);
1216                 fn = NULL;
1217         }
1218         fio_file_hash_unlock();
1219
1220         if (fn) {
1221                 free(fn->filename);
1222                 free(fn);
1223         }
1224 }
1225
1226
1227 static void free_already_allocated(void)
1228 {
1229         struct flist_head *entry, *tmp;
1230         struct file_name *fn;
1231
1232         if (flist_empty(&filename_list))
1233                 return;
1234
1235         fio_file_hash_lock();
1236         flist_for_each_safe(entry, tmp, &filename_list) {
1237                 fn = flist_entry(entry, struct file_name, list);
1238                 free(fn->filename);
1239                 flist_del(&fn->list);
1240                 free(fn);
1241         }
1242
1243         fio_file_hash_unlock();
1244 }
1245
1246 static struct fio_file *alloc_new_file(struct thread_data *td)
1247 {
1248         struct fio_file *f;
1249
1250         f = smalloc(sizeof(*f));
1251         if (!f) {
1252                 log_err("fio: smalloc OOM\n");
1253                 assert(0);
1254                 return NULL;
1255         }
1256
1257         f->fd = -1;
1258         f->shadow_fd = -1;
1259         fio_file_reset(td, f);
1260         return f;
1261 }
1262
1263 int add_file(struct thread_data *td, const char *fname, int numjob, int inc)
1264 {
1265         int cur_files = td->files_index;
1266         char file_name[PATH_MAX];
1267         struct fio_file *f;
1268         int len = 0;
1269
1270         dprint(FD_FILE, "add file %s\n", fname);
1271
1272         if (td->o.directory)
1273                 len = set_name_idx(file_name, PATH_MAX, td->o.directory, numjob);
1274
1275         sprintf(file_name + len, "%s", fname);
1276
1277         /* clean cloned siblings using existing files */
1278         if (numjob && is_already_allocated(file_name))
1279                 return 0;
1280
1281         f = alloc_new_file(td);
1282
1283         if (td->files_size <= td->files_index) {
1284                 unsigned int new_size = td->o.nr_files + 1;
1285
1286                 dprint(FD_FILE, "resize file array to %d files\n", new_size);
1287
1288                 td->files = realloc(td->files, new_size * sizeof(f));
1289                 if (td->files == NULL) {
1290                         log_err("fio: realloc OOM\n");
1291                         assert(0);
1292                 }
1293                 if (td->o.file_lock_mode != FILE_LOCK_NONE) {
1294                         td->file_locks = realloc(td->file_locks, new_size);
1295                         if (!td->file_locks) {
1296                                 log_err("fio: realloc OOM\n");
1297                                 assert(0);
1298                         }
1299                         td->file_locks[cur_files] = FILE_LOCK_NONE;
1300                 }
1301                 td->files_size = new_size;
1302         }
1303         td->files[cur_files] = f;
1304         f->fileno = cur_files;
1305
1306         /*
1307          * init function, io engine may not be loaded yet
1308          */
1309         if (td->io_ops && (td->io_ops->flags & FIO_DISKLESSIO))
1310                 f->real_file_size = -1ULL;
1311
1312         f->file_name = smalloc_strdup(file_name);
1313         if (!f->file_name) {
1314                 log_err("fio: smalloc OOM\n");
1315                 assert(0);
1316         }
1317
1318         get_file_type(f);
1319
1320         switch (td->o.file_lock_mode) {
1321         case FILE_LOCK_NONE:
1322                 break;
1323         case FILE_LOCK_READWRITE:
1324                 f->rwlock = fio_rwlock_init();
1325                 break;
1326         case FILE_LOCK_EXCLUSIVE:
1327                 f->lock = fio_mutex_init(FIO_MUTEX_UNLOCKED);
1328                 break;
1329         default:
1330                 log_err("fio: unknown lock mode: %d\n", td->o.file_lock_mode);
1331                 assert(0);
1332         }
1333
1334         td->files_index++;
1335         if (f->filetype == FIO_TYPE_FILE)
1336                 td->nr_normal_files++;
1337
1338         set_already_allocated(file_name);
1339
1340         if (inc)
1341                 td->o.nr_files++;
1342
1343         dprint(FD_FILE, "file %p \"%s\" added at %d\n", f, f->file_name,
1344                                                         cur_files);
1345
1346         return cur_files;
1347 }
1348
1349 int add_file_exclusive(struct thread_data *td, const char *fname)
1350 {
1351         struct fio_file *f;
1352         unsigned int i;
1353
1354         for_each_file(td, f, i) {
1355                 if (!strcmp(f->file_name, fname))
1356                         return i;
1357         }
1358
1359         return add_file(td, fname, 0, 1);
1360 }
1361
1362 void get_file(struct fio_file *f)
1363 {
1364         dprint(FD_FILE, "get file %s, ref=%d\n", f->file_name, f->references);
1365         assert(fio_file_open(f));
1366         f->references++;
1367 }
1368
1369 int put_file(struct thread_data *td, struct fio_file *f)
1370 {
1371         int f_ret = 0, ret = 0;
1372
1373         dprint(FD_FILE, "put file %s, ref=%d\n", f->file_name, f->references);
1374
1375         if (!fio_file_open(f)) {
1376                 assert(f->fd == -1);
1377                 return 0;
1378         }
1379
1380         assert(f->references);
1381         if (--f->references)
1382                 return 0;
1383
1384         if (should_fsync(td) && td->o.fsync_on_close) {
1385                 f_ret = fsync(f->fd);
1386                 if (f_ret < 0)
1387                         f_ret = errno;
1388         }
1389
1390         if (td->io_ops->close_file)
1391                 ret = td->io_ops->close_file(td, f);
1392
1393         if (!ret)
1394                 ret = f_ret;
1395
1396         td->nr_open_files--;
1397         fio_file_clear_open(f);
1398         assert(f->fd == -1);
1399         return ret;
1400 }
1401
1402 void lock_file(struct thread_data *td, struct fio_file *f, enum fio_ddir ddir)
1403 {
1404         if (!f->lock || td->o.file_lock_mode == FILE_LOCK_NONE)
1405                 return;
1406
1407         if (td->o.file_lock_mode == FILE_LOCK_READWRITE) {
1408                 if (ddir == DDIR_READ)
1409                         fio_rwlock_read(f->rwlock);
1410                 else
1411                         fio_rwlock_write(f->rwlock);
1412         } else if (td->o.file_lock_mode == FILE_LOCK_EXCLUSIVE)
1413                 fio_mutex_down(f->lock);
1414
1415         td->file_locks[f->fileno] = td->o.file_lock_mode;
1416 }
1417
1418 void unlock_file(struct thread_data *td, struct fio_file *f)
1419 {
1420         if (!f->lock || td->o.file_lock_mode == FILE_LOCK_NONE)
1421                 return;
1422
1423         if (td->o.file_lock_mode == FILE_LOCK_READWRITE)
1424                 fio_rwlock_unlock(f->rwlock);
1425         else if (td->o.file_lock_mode == FILE_LOCK_EXCLUSIVE)
1426                 fio_mutex_up(f->lock);
1427
1428         td->file_locks[f->fileno] = FILE_LOCK_NONE;
1429 }
1430
1431 void unlock_file_all(struct thread_data *td, struct fio_file *f)
1432 {
1433         if (td->o.file_lock_mode == FILE_LOCK_NONE || !td->file_locks)
1434                 return;
1435         if (td->file_locks[f->fileno] != FILE_LOCK_NONE)
1436                 unlock_file(td, f);
1437 }
1438
1439 static int recurse_dir(struct thread_data *td, const char *dirname)
1440 {
1441         struct dirent *dir;
1442         int ret = 0;
1443         DIR *D;
1444
1445         D = opendir(dirname);
1446         if (!D) {
1447                 char buf[FIO_VERROR_SIZE];
1448
1449                 snprintf(buf, FIO_VERROR_SIZE, "opendir(%s)", dirname);
1450                 td_verror(td, errno, buf);
1451                 return 1;
1452         }
1453
1454         while ((dir = readdir(D)) != NULL) {
1455                 char full_path[PATH_MAX];
1456                 struct stat sb;
1457
1458                 if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, ".."))
1459                         continue;
1460
1461                 sprintf(full_path, "%s%s%s", dirname, FIO_OS_PATH_SEPARATOR, dir->d_name);
1462
1463                 if (lstat(full_path, &sb) == -1) {
1464                         if (errno != ENOENT) {
1465                                 td_verror(td, errno, "stat");
1466                                 ret = 1;
1467                                 break;
1468                         }
1469                 }
1470
1471                 if (S_ISREG(sb.st_mode)) {
1472                         add_file(td, full_path, 0, 1);
1473                         continue;
1474                 }
1475                 if (!S_ISDIR(sb.st_mode))
1476                         continue;
1477
1478                 ret = recurse_dir(td, full_path);
1479                 if (ret)
1480                         break;
1481         }
1482
1483         closedir(D);
1484         return ret;
1485 }
1486
1487 int add_dir_files(struct thread_data *td, const char *path)
1488 {
1489         int ret = recurse_dir(td, path);
1490
1491         if (!ret)
1492                 log_info("fio: opendir added %d files\n", td->o.nr_files);
1493
1494         return ret;
1495 }
1496
1497 void dup_files(struct thread_data *td, struct thread_data *org)
1498 {
1499         struct fio_file *f;
1500         unsigned int i;
1501
1502         dprint(FD_FILE, "dup files: %d\n", org->files_index);
1503
1504         if (!org->files)
1505                 return;
1506
1507         td->files = malloc(org->files_index * sizeof(f));
1508
1509         if (td->o.file_lock_mode != FILE_LOCK_NONE)
1510                 td->file_locks = malloc(org->files_index);
1511
1512         for_each_file(org, f, i) {
1513                 struct fio_file *__f;
1514
1515                 __f = alloc_new_file(td);
1516
1517                 if (f->file_name) {
1518                         __f->file_name = smalloc_strdup(f->file_name);
1519                         if (!__f->file_name) {
1520                                 log_err("fio: smalloc OOM\n");
1521                                 assert(0);
1522                         }
1523
1524                         __f->filetype = f->filetype;
1525                 }
1526
1527                 if (td->o.file_lock_mode == FILE_LOCK_EXCLUSIVE)
1528                         __f->lock = f->lock;
1529                 else if (td->o.file_lock_mode == FILE_LOCK_READWRITE)
1530                         __f->rwlock = f->rwlock;
1531
1532                 td->files[i] = __f;
1533         }
1534 }
1535
1536 /*
1537  * Returns the index that matches the filename, or -1 if not there
1538  */
1539 int get_fileno(struct thread_data *td, const char *fname)
1540 {
1541         struct fio_file *f;
1542         unsigned int i;
1543
1544         for_each_file(td, f, i)
1545                 if (!strcmp(f->file_name, fname))
1546                         return i;
1547
1548         return -1;
1549 }
1550
1551 /*
1552  * For log usage, where we add/open/close files automatically
1553  */
1554 void free_release_files(struct thread_data *td)
1555 {
1556         close_files(td);
1557         td->o.nr_files = 0;
1558         td->o.open_files = 0;
1559         td->files_index = 0;
1560         td->nr_normal_files = 0;
1561 }
1562
1563 void fio_file_reset(struct thread_data *td, struct fio_file *f)
1564 {
1565         int i;
1566
1567         for (i = 0; i < DDIR_RWDIR_CNT; i++) {
1568                 f->last_pos[i] = f->file_offset;
1569                 f->last_start[i] = -1ULL;
1570         }
1571
1572         if (fio_file_axmap(f))
1573                 axmap_reset(f->io_axmap);
1574         else if (fio_file_lfsr(f))
1575                 lfsr_reset(&f->lfsr, td->rand_seeds[FIO_RAND_BLOCK_OFF]);
1576 }
1577
1578 int fio_files_done(struct thread_data *td)
1579 {
1580         struct fio_file *f;
1581         unsigned int i;
1582
1583         for_each_file(td, f, i)
1584                 if (!fio_file_done(f))
1585                         return 0;
1586
1587         return 1;
1588 }
1589
1590 /* free memory used in initialization phase only */
1591 void filesetup_mem_free(void)
1592 {
1593         free_already_allocated();
1594 }