zipf: cap range calculation at 10M
[fio.git] / filesetup.c
1 #include <unistd.h>
2 #include <fcntl.h>
3 #include <string.h>
4 #include <assert.h>
5 #include <dirent.h>
6 #include <libgen.h>
7 #include <sys/stat.h>
8 #include <sys/mman.h>
9 #include <sys/types.h>
10
11 #include "fio.h"
12 #include "smalloc.h"
13 #include "filehash.h"
14 #include "os/os.h"
15 #include "hash.h"
16
17 #ifdef FIO_HAVE_LINUX_FALLOCATE
18 #include <linux/falloc.h>
19 #endif
20
21 static int root_warn;
22
23 static inline void clear_error(struct thread_data *td)
24 {
25         td->error = 0;
26         td->verror[0] = '\0';
27 }
28
29 /*
30  * Leaves f->fd open on success, caller must close
31  */
32 static int extend_file(struct thread_data *td, struct fio_file *f)
33 {
34         int r, new_layout = 0, unlink_file = 0, flags;
35         unsigned long long left;
36         unsigned int bs;
37         char *b;
38
39         if (read_only) {
40                 log_err("fio: refusing extend of file due to read-only\n");
41                 return 0;
42         }
43
44         /*
45          * check if we need to lay the file out complete again. fio
46          * does that for operations involving reads, or for writes
47          * where overwrite is set
48          */
49         if (td_read(td) || (td_write(td) && td->o.overwrite) ||
50             (td_write(td) && td->io_ops->flags & FIO_NOEXTEND))
51                 new_layout = 1;
52         if (td_write(td) && !td->o.overwrite)
53                 unlink_file = 1;
54
55         if (unlink_file || new_layout) {
56                 dprint(FD_FILE, "layout unlink %s\n", f->file_name);
57                 if ((unlink(f->file_name) < 0) && (errno != ENOENT)) {
58                         td_verror(td, errno, "unlink");
59                         return 1;
60                 }
61         }
62
63         flags = O_WRONLY | O_CREAT;
64         if (new_layout)
65                 flags |= O_TRUNC;
66
67         dprint(FD_FILE, "open file %s, flags %x\n", f->file_name, flags);
68         f->fd = open(f->file_name, flags, 0644);
69         if (f->fd < 0) {
70                 td_verror(td, errno, "open");
71                 return 1;
72         }
73
74 #ifdef FIO_HAVE_FALLOCATE
75         if (!td->o.fill_device) {
76                 switch (td->o.fallocate_mode) {
77                 case FIO_FALLOCATE_NONE:
78                         break;
79                 case FIO_FALLOCATE_POSIX:
80                         dprint(FD_FILE, "posix_fallocate file %s size %llu\n",
81                                  f->file_name, f->real_file_size);
82
83                         r = posix_fallocate(f->fd, 0, f->real_file_size);
84                         if (r > 0) {
85                                 log_err("fio: posix_fallocate fails: %s\n",
86                                                 strerror(r));
87                         }
88                         break;
89 #ifdef FIO_HAVE_LINUX_FALLOCATE
90                 case FIO_FALLOCATE_KEEP_SIZE:
91                         dprint(FD_FILE,
92                                 "fallocate(FALLOC_FL_KEEP_SIZE) "
93                                 "file %s size %llu\n",
94                                 f->file_name, f->real_file_size);
95
96                         r = fallocate(f->fd, FALLOC_FL_KEEP_SIZE, 0,
97                                         f->real_file_size);
98                         if (r != 0) {
99                                 td_verror(td, errno, "fallocate");
100                         }
101                         break;
102 #endif /* FIO_HAVE_LINUX_FALLOCATE */
103                 default:
104                         log_err("fio: unknown fallocate mode: %d\n",
105                                 td->o.fallocate_mode);
106                         assert(0);
107                 }
108         }
109 #endif /* FIO_HAVE_FALLOCATE */
110
111         if (!new_layout)
112                 goto done;
113
114         /*
115          * The size will be -1ULL when fill_device is used, so don't truncate
116          * or fallocate this file, just write it
117          */
118         if (!td->o.fill_device) {
119                 dprint(FD_FILE, "truncate file %s, size %llu\n", f->file_name,
120                                                         f->real_file_size);
121                 if (ftruncate(f->fd, f->real_file_size) == -1) {
122                         td_verror(td, errno, "ftruncate");
123                         goto err;
124                 }
125         }
126
127         b = malloc(td->o.max_bs[DDIR_WRITE]);
128         memset(b, 0, td->o.max_bs[DDIR_WRITE]);
129
130         left = f->real_file_size;
131         while (left && !td->terminate) {
132                 bs = td->o.max_bs[DDIR_WRITE];
133                 if (bs > left)
134                         bs = left;
135
136                 r = write(f->fd, b, bs);
137
138                 if (r > 0) {
139                         left -= r;
140                         continue;
141                 } else {
142                         if (r < 0) {
143                                 int __e = errno;
144
145                                 if (__e == ENOSPC) {
146                                         if (td->o.fill_device)
147                                                 break;
148                                         log_info("fio: ENOSPC on laying out "
149                                                  "file, stopping\n");
150                                         break;
151                                 }
152                                 td_verror(td, errno, "write");
153                         } else
154                                 td_verror(td, EIO, "write");
155
156                         break;
157                 }
158         }
159
160         if (td->terminate) {
161                 dprint(FD_FILE, "terminate unlink %s\n", f->file_name);
162                 unlink(f->file_name);
163         } else if (td->o.create_fsync) {
164                 if (fsync(f->fd) < 0) {
165                         td_verror(td, errno, "fsync");
166                         goto err;
167                 }
168         }
169         if (td->o.fill_device && !td_write(td)) {
170                 fio_file_clear_size_known(f);
171                 if (td_io_get_file_size(td, f))
172                         goto err;
173                 if (f->io_size > f->real_file_size)
174                         f->io_size = f->real_file_size;
175         }
176
177         free(b);
178 done:
179         return 0;
180 err:
181         close(f->fd);
182         f->fd = -1;
183         return 1;
184 }
185
186 static int pre_read_file(struct thread_data *td, struct fio_file *f)
187 {
188         int r, did_open = 0, old_runstate;
189         unsigned long long left;
190         unsigned int bs;
191         char *b;
192
193         if (td->io_ops->flags & FIO_PIPEIO)
194                 return 0;
195
196         if (!fio_file_open(f)) {
197                 if (td->io_ops->open_file(td, f)) {
198                         log_err("fio: cannot pre-read, failed to open file\n");
199                         return 1;
200                 }
201                 did_open = 1;
202         }
203
204         old_runstate = td->runstate;
205         td_set_runstate(td, TD_PRE_READING);
206
207         bs = td->o.max_bs[DDIR_READ];
208         b = malloc(bs);
209         memset(b, 0, bs);
210
211         lseek(f->fd, f->file_offset, SEEK_SET);
212         left = f->io_size;
213
214         while (left && !td->terminate) {
215                 if (bs > left)
216                         bs = left;
217
218                 r = read(f->fd, b, bs);
219
220                 if (r == (int) bs) {
221                         left -= bs;
222                         continue;
223                 } else {
224                         td_verror(td, EIO, "pre_read");
225                         break;
226                 }
227         }
228
229         td_set_runstate(td, old_runstate);
230
231         if (did_open)
232                 td->io_ops->close_file(td, f);
233         free(b);
234         return 0;
235 }
236
237 static unsigned long long get_rand_file_size(struct thread_data *td)
238 {
239         unsigned long long ret, sized;
240         unsigned long r;
241
242         if (td->o.use_os_rand) {
243                 r = os_random_long(&td->file_size_state);
244                 sized = td->o.file_size_high - td->o.file_size_low;
245                 ret = (unsigned long long) ((double) sized * (r / (OS_RAND_MAX + 1.0)));
246         } else {
247                 r = __rand(&td->__file_size_state);
248                 sized = td->o.file_size_high - td->o.file_size_low;
249                 ret = (unsigned long long) ((double) sized * (r / (FRAND_MAX + 1.0)));
250         }
251
252         ret += td->o.file_size_low;
253         ret -= (ret % td->o.rw_min_bs);
254         return ret;
255 }
256
257 static int file_size(struct thread_data *td, struct fio_file *f)
258 {
259         struct stat st;
260
261         if (stat(f->file_name, &st) == -1) {
262                 td_verror(td, errno, "fstat");
263                 return 1;
264         }
265
266         f->real_file_size = st.st_size;
267         return 0;
268 }
269
270 static int bdev_size(struct thread_data *td, struct fio_file *f)
271 {
272         unsigned long long bytes = 0;
273         int r;
274
275         if (td->io_ops->open_file(td, f)) {
276                 log_err("fio: failed opening blockdev %s for size check\n",
277                         f->file_name);
278                 return 1;
279         }
280
281         r = blockdev_size(f, &bytes);
282         if (r) {
283                 td_verror(td, r, "blockdev_size");
284                 goto err;
285         }
286
287         if (!bytes) {
288                 log_err("%s: zero sized block device?\n", f->file_name);
289                 goto err;
290         }
291
292         f->real_file_size = bytes;
293         td->io_ops->close_file(td, f);
294         return 0;
295 err:
296         td->io_ops->close_file(td, f);
297         return 1;
298 }
299
300 static int char_size(struct thread_data *td, struct fio_file *f)
301 {
302 #ifdef FIO_HAVE_CHARDEV_SIZE
303         unsigned long long bytes = 0;
304         int r;
305
306         if (td->io_ops->open_file(td, f)) {
307                 log_err("fio: failed opening blockdev %s for size check\n",
308                         f->file_name);
309                 return 1;
310         }
311
312         r = chardev_size(f, &bytes);
313         if (r) {
314                 td_verror(td, r, "chardev_size");
315                 goto err;
316         }
317
318         if (!bytes) {
319                 log_err("%s: zero sized char device?\n", f->file_name);
320                 goto err;
321         }
322
323         f->real_file_size = bytes;
324         td->io_ops->close_file(td, f);
325         return 0;
326 err:
327         td->io_ops->close_file(td, f);
328         return 1;
329 #else
330         f->real_file_size = -1ULL;
331         return 0;
332 #endif
333 }
334
335 static int get_file_size(struct thread_data *td, struct fio_file *f)
336 {
337         int ret = 0;
338
339         if (fio_file_size_known(f))
340                 return 0;
341
342         if (f->filetype == FIO_TYPE_FILE)
343                 ret = file_size(td, f);
344         else if (f->filetype == FIO_TYPE_BD)
345                 ret = bdev_size(td, f);
346         else if (f->filetype == FIO_TYPE_CHAR)
347                 ret = char_size(td, f);
348         else
349                 f->real_file_size = -1;
350
351         if (ret)
352                 return ret;
353
354         if (f->file_offset > f->real_file_size) {
355                 log_err("%s: offset extends end (%llu > %llu)\n", td->o.name,
356                                         f->file_offset, f->real_file_size);
357                 return 1;
358         }
359
360         fio_file_set_size_known(f);
361         return 0;
362 }
363
364 static int __file_invalidate_cache(struct thread_data *td, struct fio_file *f,
365                                    unsigned long long off,
366                                    unsigned long long len)
367 {
368         int ret = 0;
369
370         if (len == -1ULL)
371                 len = f->io_size;
372         if (off == -1ULL)
373                 off = f->file_offset;
374
375         if (len == -1ULL || off == -1ULL)
376                 return 0;
377
378         dprint(FD_IO, "invalidate cache %s: %llu/%llu\n", f->file_name, off,
379                                                                 len);
380
381         /*
382          * FIXME: add blockdev flushing too
383          */
384         if (f->mmap_ptr) {
385                 ret = posix_madvise(f->mmap_ptr, f->mmap_sz, POSIX_MADV_DONTNEED);
386 #ifdef FIO_MADV_FREE
387                 (void) posix_madvise(f->mmap_ptr, f->mmap_sz, FIO_MADV_FREE);
388 #endif
389         } else if (f->filetype == FIO_TYPE_FILE) {
390                 ret = posix_fadvise(f->fd, off, len, POSIX_FADV_DONTNEED);
391         } else if (f->filetype == FIO_TYPE_BD) {
392                 ret = blockdev_invalidate_cache(f);
393                 if (ret < 0 && errno == EACCES && geteuid()) {
394                         if (!root_warn) {
395                                 log_err("fio: only root may flush block "
396                                         "devices. Cache flush bypassed!\n");
397                                 root_warn = 1;
398                         }
399                         ret = 0;
400                 }
401         } else if (f->filetype == FIO_TYPE_CHAR || f->filetype == FIO_TYPE_PIPE)
402                 ret = 0;
403
404         if (ret < 0) {
405                 td_verror(td, errno, "invalidate_cache");
406                 return 1;
407         } else if (ret > 0) {
408                 td_verror(td, ret, "invalidate_cache");
409                 return 1;
410         }
411
412         return ret;
413
414 }
415
416 int file_invalidate_cache(struct thread_data *td, struct fio_file *f)
417 {
418         if (!fio_file_open(f))
419                 return 0;
420
421         return __file_invalidate_cache(td, f, -1ULL, -1ULL);
422 }
423
424 int generic_close_file(struct thread_data fio_unused *td, struct fio_file *f)
425 {
426         int ret = 0;
427
428         dprint(FD_FILE, "fd close %s\n", f->file_name);
429
430         remove_file_hash(f);
431
432         if (close(f->fd) < 0)
433                 ret = errno;
434
435         f->fd = -1;
436         return ret;
437 }
438
439 int file_lookup_open(struct fio_file *f, int flags)
440 {
441         struct fio_file *__f;
442         int from_hash;
443
444         __f = lookup_file_hash(f->file_name);
445         if (__f) {
446                 dprint(FD_FILE, "found file in hash %s\n", f->file_name);
447                 /*
448                  * racy, need the __f->lock locked
449                  */
450                 f->lock = __f->lock;
451                 f->lock_owner = __f->lock_owner;
452                 f->lock_batch = __f->lock_batch;
453                 f->lock_ddir = __f->lock_ddir;
454                 from_hash = 1;
455         } else {
456                 dprint(FD_FILE, "file not found in hash %s\n", f->file_name);
457                 from_hash = 0;
458         }
459
460         f->fd = open(f->file_name, flags, 0600);
461         return from_hash;
462 }
463
464 int generic_open_file(struct thread_data *td, struct fio_file *f)
465 {
466         int is_std = 0;
467         int flags = 0;
468         int from_hash = 0;
469
470         dprint(FD_FILE, "fd open %s\n", f->file_name);
471
472         if (td_trim(td) && f->filetype != FIO_TYPE_BD) {
473                 log_err("fio: trim only applies to block device\n");
474                 return 1;
475         }
476
477         if (!strcmp(f->file_name, "-")) {
478                 if (td_rw(td)) {
479                         log_err("fio: can't read/write to stdin/out\n");
480                         return 1;
481                 }
482                 is_std = 1;
483
484                 /*
485                  * move output logging to stderr, if we are writing to stdout
486                  */
487                 if (td_write(td))
488                         f_out = stderr;
489         }
490
491         if (td_trim(td))
492                 goto skip_flags;
493         if (td->o.odirect)
494                 flags |= OS_O_DIRECT;
495         if (td->o.sync_io)
496                 flags |= O_SYNC;
497         if (td->o.create_on_open)
498                 flags |= O_CREAT;
499 skip_flags:
500         if (f->filetype != FIO_TYPE_FILE)
501                 flags |= FIO_O_NOATIME;
502
503 open_again:
504         if (td_write(td)) {
505                 if (!read_only)
506                         flags |= O_RDWR;
507
508                 if (f->filetype == FIO_TYPE_FILE)
509                         flags |= O_CREAT;
510
511                 if (is_std)
512                         f->fd = dup(STDOUT_FILENO);
513                 else
514                         from_hash = file_lookup_open(f, flags);
515         } else if (td_read(td)) {
516                 if (f->filetype == FIO_TYPE_CHAR && !read_only)
517                         flags |= O_RDWR;
518                 else
519                         flags |= O_RDONLY;
520
521                 if (is_std)
522                         f->fd = dup(STDIN_FILENO);
523                 else
524                         from_hash = file_lookup_open(f, flags);
525         } else { //td trim
526                 flags |= O_RDWR;
527                 from_hash = file_lookup_open(f, flags);
528         }
529
530         if (f->fd == -1) {
531                 char buf[FIO_VERROR_SIZE];
532                 int __e = errno;
533
534                 if (__e == EPERM && (flags & FIO_O_NOATIME)) {
535                         flags &= ~FIO_O_NOATIME;
536                         goto open_again;
537                 }
538
539                 snprintf(buf, sizeof(buf) - 1, "open(%s)", f->file_name);
540
541                 if (__e == EINVAL && (flags & OS_O_DIRECT)) {
542                         log_err("fio: looks like your file system does not " \
543                                 "support direct=1/buffered=0\n");
544                 }
545
546                 td_verror(td, __e, buf);
547         }
548
549         if (!from_hash && f->fd != -1) {
550                 if (add_file_hash(f)) {
551                         int fio_unused ret;
552
553                         /*
554                          * OK to ignore, we haven't done anything with it
555                          */
556                         ret = generic_close_file(td, f);
557                         goto open_again;
558                 }
559         }
560
561         return 0;
562 }
563
564 int generic_get_file_size(struct thread_data *td, struct fio_file *f)
565 {
566         return get_file_size(td, f);
567 }
568
569 /*
570  * open/close all files, so that ->real_file_size gets set
571  */
572 static int get_file_sizes(struct thread_data *td)
573 {
574         struct fio_file *f;
575         unsigned int i;
576         int err = 0;
577
578         for_each_file(td, f, i) {
579                 dprint(FD_FILE, "get file size for %p/%d/%p\n", f, i,
580                                                                 f->file_name);
581
582                 if (td_io_get_file_size(td, f)) {
583                         if (td->error != ENOENT) {
584                                 log_err("%s\n", td->verror);
585                                 err = 1;
586                         }
587                         clear_error(td);
588                 }
589
590                 if (f->real_file_size == -1ULL && td->o.size)
591                         f->real_file_size = td->o.size / td->o.nr_files;
592         }
593
594         return err;
595 }
596
597 struct fio_mount {
598         struct flist_head list;
599         const char *base;
600         char __base[256];
601         unsigned int key;
602 };
603
604 /*
605  * Get free number of bytes for each file on each unique mount.
606  */
607 static unsigned long long get_fs_free_counts(struct thread_data *td)
608 {
609         struct flist_head *n, *tmp;
610         unsigned long long ret = 0;
611         struct fio_mount *fm;
612         FLIST_HEAD(list);
613         struct fio_file *f;
614         unsigned int i;
615
616         for_each_file(td, f, i) {
617                 struct stat sb;
618                 char buf[256];
619
620                 if (f->filetype == FIO_TYPE_BD || f->filetype == FIO_TYPE_CHAR) {
621                         if (f->real_file_size != -1ULL)
622                                 ret += f->real_file_size;
623                         continue;
624                 } else if (f->filetype != FIO_TYPE_FILE)
625                         continue;
626
627                 strcpy(buf, f->file_name);
628
629                 if (stat(buf, &sb) < 0) {
630                         if (errno != ENOENT)
631                                 break;
632                         strcpy(buf, ".");
633                         if (stat(buf, &sb) < 0)
634                                 break;
635                 }
636
637                 fm = NULL;
638                 flist_for_each(n, &list) {
639                         fm = flist_entry(n, struct fio_mount, list);
640                         if (fm->key == sb.st_dev)
641                                 break;
642
643                         fm = NULL;
644                 }
645
646                 if (fm)
647                         continue;
648
649                 fm = malloc(sizeof(*fm));
650                 strcpy(fm->__base, buf);
651                 fm->base = basename(fm->__base);
652                 fm->key = sb.st_dev;
653                 flist_add(&fm->list, &list);
654         }
655
656         flist_for_each_safe(n, tmp, &list) {
657                 unsigned long long sz;
658
659                 fm = flist_entry(n, struct fio_mount, list);
660                 flist_del(&fm->list);
661
662                 sz = get_fs_size(fm->base);
663                 if (sz && sz != -1ULL)
664                         ret += sz;
665
666                 free(fm);
667         }
668
669         return ret;
670 }
671
672 unsigned long long get_start_offset(struct thread_data *td)
673 {
674         return td->o.start_offset +
675                 (td->thread_number - 1) * td->o.offset_increment;
676 }
677
678 /*
679  * Open the files and setup files sizes, creating files if necessary.
680  */
681 int setup_files(struct thread_data *td)
682 {
683         unsigned long long total_size, extend_size;
684         struct fio_file *f;
685         unsigned int i;
686         int err = 0, need_extend;
687
688         dprint(FD_FILE, "setup files\n");
689
690         if (td->o.read_iolog_file)
691                 goto done;
692
693         /*
694          * if ioengine defines a setup() method, it's responsible for
695          * opening the files and setting f->real_file_size to indicate
696          * the valid range for that file.
697          */
698         if (td->io_ops->setup)
699                 err = td->io_ops->setup(td);
700         else
701                 err = get_file_sizes(td);
702
703         if (err)
704                 return err;
705
706         /*
707          * check sizes. if the files/devices do not exist and the size
708          * isn't passed to fio, abort.
709          */
710         total_size = 0;
711         for_each_file(td, f, i) {
712                 if (f->real_file_size == -1ULL)
713                         total_size = -1ULL;
714                 else
715                         total_size += f->real_file_size;
716         }
717
718         if (td->o.fill_device)
719                 td->fill_device_size = get_fs_free_counts(td);
720
721         /*
722          * device/file sizes are zero and no size given, punt
723          */
724         if ((!total_size || total_size == -1ULL) && !td->o.size &&
725             !(td->io_ops->flags & FIO_NOIO) && !td->o.fill_device) {
726                 log_err("%s: you need to specify size=\n", td->o.name);
727                 td_verror(td, EINVAL, "total_file_size");
728                 return 1;
729         }
730
731         /*
732          * now file sizes are known, so we can set ->io_size. if size= is
733          * not given, ->io_size is just equal to ->real_file_size. if size
734          * is given, ->io_size is size / nr_files.
735          */
736         extend_size = total_size = 0;
737         need_extend = 0;
738         for_each_file(td, f, i) {
739                 f->file_offset = get_start_offset(td);
740
741                 if (!td->o.file_size_low) {
742                         /*
743                          * no file size range given, file size is equal to
744                          * total size divided by number of files. if that is
745                          * zero, set it to the real file size.
746                          */
747                         f->io_size = td->o.size / td->o.nr_files;
748                         if (!f->io_size)
749                                 f->io_size = f->real_file_size - f->file_offset;
750                 } else if (f->real_file_size < td->o.file_size_low ||
751                            f->real_file_size > td->o.file_size_high) {
752                         if (f->file_offset > td->o.file_size_low)
753                                 goto err_offset;
754                         /*
755                          * file size given. if it's fixed, use that. if it's a
756                          * range, generate a random size in-between.
757                          */
758                         if (td->o.file_size_low == td->o.file_size_high) {
759                                 f->io_size = td->o.file_size_low
760                                                 - f->file_offset;
761                         } else {
762                                 f->io_size = get_rand_file_size(td)
763                                                 - f->file_offset;
764                         }
765                 } else
766                         f->io_size = f->real_file_size - f->file_offset;
767
768                 if (f->io_size == -1ULL)
769                         total_size = -1ULL;
770                 else {
771                         if (td->o.size_percent)
772                                 f->io_size = (f->io_size * td->o.size_percent) / 100;
773                         total_size += f->io_size;
774                 }
775
776                 if (f->filetype == FIO_TYPE_FILE &&
777                     (f->io_size + f->file_offset) > f->real_file_size &&
778                     !(td->io_ops->flags & FIO_DISKLESSIO)) {
779                         if (!td->o.create_on_open) {
780                                 need_extend++;
781                                 extend_size += (f->io_size + f->file_offset);
782                         } else
783                                 f->real_file_size = f->io_size + f->file_offset;
784                         fio_file_set_extend(f);
785                 }
786         }
787
788         if (!td->o.size || td->o.size > total_size)
789                 td->o.size = total_size;
790
791         /*
792          * See if we need to extend some files
793          */
794         if (need_extend) {
795                 temp_stall_ts = 1;
796                 if (output_format == FIO_OUTPUT_NORMAL)
797                         log_info("%s: Laying out IO file(s) (%u file(s) /"
798                                  " %lluMB)\n", td->o.name, need_extend,
799                                         extend_size >> 20);
800
801                 for_each_file(td, f, i) {
802                         unsigned long long old_len = -1ULL, extend_len = -1ULL;
803
804                         if (!fio_file_extend(f))
805                                 continue;
806
807                         assert(f->filetype == FIO_TYPE_FILE);
808                         fio_file_clear_extend(f);
809                         if (!td->o.fill_device) {
810                                 old_len = f->real_file_size;
811                                 extend_len = f->io_size + f->file_offset -
812                                                 old_len;
813                         }
814                         f->real_file_size = (f->io_size + f->file_offset);
815                         err = extend_file(td, f);
816                         if (err)
817                                 break;
818
819                         err = __file_invalidate_cache(td, f, old_len,
820                                                                 extend_len);
821                         close(f->fd);
822                         f->fd = -1;
823                         if (err)
824                                 break;
825                 }
826                 temp_stall_ts = 0;
827         }
828
829         if (err)
830                 return err;
831
832         if (!td->o.zone_size)
833                 td->o.zone_size = td->o.size;
834
835         /*
836          * iolog already set the total io size, if we read back
837          * stored entries.
838          */
839         if (!td->o.read_iolog_file)
840                 td->total_io_size = td->o.size * td->o.loops;
841
842 done:
843         if (td->o.create_only)
844                 td->done = 1;
845
846         return 0;
847 err_offset:
848         log_err("%s: you need to specify valid offset=\n", td->o.name);
849         return 1;
850 }
851
852 int pre_read_files(struct thread_data *td)
853 {
854         struct fio_file *f;
855         unsigned int i;
856
857         dprint(FD_FILE, "pre_read files\n");
858
859         for_each_file(td, f, i) {
860                 pre_read_file(td, f);
861         }
862
863         return 1;
864 }
865
866 static int __init_rand_distribution(struct thread_data *td, struct fio_file *f)
867 {
868         unsigned int range_size, seed;
869         unsigned long nranges;
870
871         range_size = min(td->o.min_bs[DDIR_READ], td->o.min_bs[DDIR_WRITE]);
872
873         nranges = (f->real_file_size + range_size - 1) / range_size;
874
875         seed = jhash(f->file_name, strlen(f->file_name), 0) * td->thread_number;
876         if (td->o.random_distribution == FIO_RAND_DIST_ZIPF)
877                 zipf_init(&f->zipf, nranges, td->o.zipf_theta, seed);
878         else
879                 pareto_init(&f->zipf, nranges, td->o.pareto_h, seed);
880
881         return 1;
882 }
883
884 static int init_rand_distribution(struct thread_data *td)
885 {
886         struct fio_file *f;
887         unsigned int i;
888         int state;
889
890         if (td->o.random_distribution == FIO_RAND_DIST_RANDOM)
891                 return 0;
892
893         state = td->runstate;
894         td_set_runstate(td, TD_SETTING_UP);
895         for_each_file(td, f, i)
896                 __init_rand_distribution(td, f);
897         td_set_runstate(td, state);
898
899         return 1;
900 }
901
902 int init_random_map(struct thread_data *td)
903 {
904         unsigned long long blocks, num_maps;
905         struct fio_file *f;
906         unsigned int i;
907
908         if (init_rand_distribution(td))
909                 return 0;
910         if (td->o.norandommap || !td_random(td))
911                 return 0;
912
913         for_each_file(td, f, i) {
914                 blocks = (f->real_file_size + td->o.rw_min_bs - 1) /
915                                 (unsigned long long) td->o.rw_min_bs;
916                 num_maps = (blocks + BLOCKS_PER_MAP - 1) /
917                                 (unsigned long long) BLOCKS_PER_MAP;
918                 if (num_maps == (unsigned long) num_maps) {
919                         f->file_map = smalloc(num_maps * sizeof(unsigned long));
920                         if (f->file_map) {
921                                 f->num_maps = num_maps;
922                                 continue;
923                         }
924                 } else
925                         f->file_map = NULL;
926
927                 if (!td->o.softrandommap) {
928                         log_err("fio: failed allocating random map. If running"
929                                 " a large number of jobs, try the 'norandommap'"
930                                 " option or set 'softrandommap'. Or give"
931                                 " a larger --alloc-size to fio.\n");
932                         return 1;
933                 }
934
935                 log_info("fio: file %s failed allocating random map. Running "
936                          "job without.\n", f->file_name);
937                 f->num_maps = 0;
938         }
939
940         return 0;
941 }
942
943 void close_files(struct thread_data *td)
944 {
945         struct fio_file *f;
946         unsigned int i;
947
948         for_each_file(td, f, i) {
949                 if (fio_file_open(f))
950                         td_io_close_file(td, f);
951         }
952 }
953
954 void close_and_free_files(struct thread_data *td)
955 {
956         struct fio_file *f;
957         unsigned int i;
958
959         dprint(FD_FILE, "close files\n");
960
961         for_each_file(td, f, i) {
962                 if (td->o.unlink && f->filetype == FIO_TYPE_FILE) {
963                         dprint(FD_FILE, "free unlink %s\n", f->file_name);
964                         unlink(f->file_name);
965                 }
966
967                 if (fio_file_open(f))
968                         td_io_close_file(td, f);
969
970                 remove_file_hash(f);
971
972                 sfree(f->file_name);
973                 f->file_name = NULL;
974                 sfree(f->file_map);
975                 f->file_map = NULL;
976                 sfree(f);
977         }
978
979         td->o.filename = NULL;
980         free(td->files);
981         td->files_index = 0;
982         td->files = NULL;
983         td->o.nr_files = 0;
984 }
985
986 static void get_file_type(struct fio_file *f)
987 {
988         struct stat sb;
989
990         if (!strcmp(f->file_name, "-"))
991                 f->filetype = FIO_TYPE_PIPE;
992         else
993                 f->filetype = FIO_TYPE_FILE;
994
995         /* \\.\ is the device namespace in Windows, where every file is
996          * a block device */
997         if (strncmp(f->file_name, "\\\\.\\", 4) == 0)
998                 f->filetype = FIO_TYPE_BD;
999
1000         if (!stat(f->file_name, &sb)) {
1001                 if (S_ISBLK(sb.st_mode))
1002                         f->filetype = FIO_TYPE_BD;
1003                 else if (S_ISCHR(sb.st_mode))
1004                         f->filetype = FIO_TYPE_CHAR;
1005                 else if (S_ISFIFO(sb.st_mode))
1006                         f->filetype = FIO_TYPE_PIPE;
1007         }
1008 }
1009
1010 int add_file(struct thread_data *td, const char *fname)
1011 {
1012         int cur_files = td->files_index;
1013         char file_name[PATH_MAX];
1014         struct fio_file *f;
1015         int len = 0;
1016
1017         dprint(FD_FILE, "add file %s\n", fname);
1018
1019         f = smalloc(sizeof(*f));
1020         if (!f) {
1021                 log_err("fio: smalloc OOM\n");
1022                 assert(0);
1023         }
1024
1025         f->fd = -1;
1026         fio_file_reset(f);
1027
1028         if (td->files_size <= td->files_index) {
1029                 int new_size = td->o.nr_files + 1;
1030
1031                 dprint(FD_FILE, "resize file array to %d files\n", new_size);
1032
1033                 td->files = realloc(td->files, new_size * sizeof(f));
1034                 td->files_size = new_size;
1035         }
1036         td->files[cur_files] = f;
1037         f->fileno = cur_files;
1038
1039         /*
1040          * init function, io engine may not be loaded yet
1041          */
1042         if (td->io_ops && (td->io_ops->flags & FIO_DISKLESSIO))
1043                 f->real_file_size = -1ULL;
1044
1045         if (td->o.directory)
1046                 len = sprintf(file_name, "%s/", td->o.directory);
1047
1048         sprintf(file_name + len, "%s", fname);
1049         f->file_name = smalloc_strdup(file_name);
1050         if (!f->file_name) {
1051                 log_err("fio: smalloc OOM\n");
1052                 assert(0);
1053         }
1054
1055         get_file_type(f);
1056
1057         switch (td->o.file_lock_mode) {
1058         case FILE_LOCK_NONE:
1059                 break;
1060         case FILE_LOCK_READWRITE:
1061                 f->lock = fio_mutex_rw_init();
1062                 break;
1063         case FILE_LOCK_EXCLUSIVE:
1064                 f->lock = fio_mutex_init(FIO_MUTEX_UNLOCKED);
1065                 break;
1066         default:
1067                 log_err("fio: unknown lock mode: %d\n", td->o.file_lock_mode);
1068                 assert(0);
1069         }
1070
1071         td->files_index++;
1072         if (f->filetype == FIO_TYPE_FILE)
1073                 td->nr_normal_files++;
1074
1075         dprint(FD_FILE, "file %p \"%s\" added at %d\n", f, f->file_name,
1076                                                         cur_files);
1077
1078         return cur_files;
1079 }
1080
1081 int add_file_exclusive(struct thread_data *td, const char *fname)
1082 {
1083         struct fio_file *f;
1084         unsigned int i;
1085
1086         for_each_file(td, f, i) {
1087                 if (!strcmp(f->file_name, fname))
1088                         return i;
1089         }
1090
1091         return add_file(td, fname);
1092 }
1093
1094 void get_file(struct fio_file *f)
1095 {
1096         dprint(FD_FILE, "get file %s, ref=%d\n", f->file_name, f->references);
1097         assert(fio_file_open(f));
1098         f->references++;
1099 }
1100
1101 int put_file(struct thread_data *td, struct fio_file *f)
1102 {
1103         int f_ret = 0, ret = 0;
1104
1105         dprint(FD_FILE, "put file %s, ref=%d\n", f->file_name, f->references);
1106
1107         if (!fio_file_open(f)) {
1108                 assert(f->fd == -1);
1109                 return 0;
1110         }
1111
1112         assert(f->references);
1113         if (--f->references)
1114                 return 0;
1115
1116         if (should_fsync(td) && td->o.fsync_on_close)
1117                 f_ret = fsync(f->fd);
1118
1119         if (td->io_ops->close_file)
1120                 ret = td->io_ops->close_file(td, f);
1121
1122         if (!ret)
1123                 ret = f_ret;
1124
1125         td->nr_open_files--;
1126         fio_file_clear_open(f);
1127         assert(f->fd == -1);
1128         return ret;
1129 }
1130
1131 void lock_file(struct thread_data *td, struct fio_file *f, enum fio_ddir ddir)
1132 {
1133         if (!f->lock || td->o.file_lock_mode == FILE_LOCK_NONE)
1134                 return;
1135
1136         if (f->lock_owner == td && f->lock_batch--)
1137                 return;
1138
1139         if (td->o.file_lock_mode == FILE_LOCK_READWRITE) {
1140                 if (ddir == DDIR_READ)
1141                         fio_mutex_down_read(f->lock);
1142                 else
1143                         fio_mutex_down_write(f->lock);
1144         } else if (td->o.file_lock_mode == FILE_LOCK_EXCLUSIVE)
1145                 fio_mutex_down(f->lock);
1146
1147         f->lock_owner = td;
1148         f->lock_batch = td->o.lockfile_batch;
1149         f->lock_ddir = ddir;
1150 }
1151
1152 void unlock_file(struct thread_data *td, struct fio_file *f)
1153 {
1154         if (!f->lock || td->o.file_lock_mode == FILE_LOCK_NONE)
1155                 return;
1156         if (f->lock_batch)
1157                 return;
1158
1159         if (td->o.file_lock_mode == FILE_LOCK_READWRITE) {
1160                 const int is_read = f->lock_ddir == DDIR_READ;
1161                 int val = fio_mutex_getval(f->lock);
1162
1163                 if ((is_read && val == 1) || (!is_read && val == -1))
1164                         f->lock_owner = NULL;
1165
1166                 if (is_read)
1167                         fio_mutex_up_read(f->lock);
1168                 else
1169                         fio_mutex_up_write(f->lock);
1170         } else if (td->o.file_lock_mode == FILE_LOCK_EXCLUSIVE) {
1171                 int val = fio_mutex_getval(f->lock);
1172
1173                 if (val == 0)
1174                         f->lock_owner = NULL;
1175
1176                 fio_mutex_up(f->lock);
1177         }
1178 }
1179
1180 void unlock_file_all(struct thread_data *td, struct fio_file *f)
1181 {
1182         if (f->lock_owner != td)
1183                 return;
1184
1185         f->lock_batch = 0;
1186         unlock_file(td, f);
1187 }
1188
1189 static int recurse_dir(struct thread_data *td, const char *dirname)
1190 {
1191         struct dirent *dir;
1192         int ret = 0;
1193         DIR *D;
1194
1195         D = opendir(dirname);
1196         if (!D) {
1197                 char buf[FIO_VERROR_SIZE];
1198
1199                 snprintf(buf, FIO_VERROR_SIZE - 1, "opendir(%s)", dirname);
1200                 td_verror(td, errno, buf);
1201                 return 1;
1202         }
1203
1204         while ((dir = readdir(D)) != NULL) {
1205                 char full_path[PATH_MAX];
1206                 struct stat sb;
1207
1208                 if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, ".."))
1209                         continue;
1210
1211                 sprintf(full_path, "%s%s%s", dirname, FIO_OS_PATH_SEPARATOR, dir->d_name);
1212
1213                 if (lstat(full_path, &sb) == -1) {
1214                         if (errno != ENOENT) {
1215                                 td_verror(td, errno, "stat");
1216                                 return 1;
1217                         }
1218                 }
1219
1220                 if (S_ISREG(sb.st_mode)) {
1221                         add_file(td, full_path);
1222                         td->o.nr_files++;
1223                         continue;
1224                 }
1225                 if (!S_ISDIR(sb.st_mode))
1226                         continue;
1227
1228                 ret = recurse_dir(td, full_path);
1229                 if (ret)
1230                         break;
1231         }
1232
1233         closedir(D);
1234         return ret;
1235 }
1236
1237 int add_dir_files(struct thread_data *td, const char *path)
1238 {
1239         int ret = recurse_dir(td, path);
1240
1241         if (!ret)
1242                 log_info("fio: opendir added %d files\n", td->o.nr_files);
1243
1244         return ret;
1245 }
1246
1247 void dup_files(struct thread_data *td, struct thread_data *org)
1248 {
1249         struct fio_file *f;
1250         unsigned int i;
1251
1252         dprint(FD_FILE, "dup files: %d\n", org->files_index);
1253
1254         if (!org->files)
1255                 return;
1256
1257         td->files = malloc(org->files_index * sizeof(f));
1258
1259         for_each_file(org, f, i) {
1260                 struct fio_file *__f;
1261
1262                 __f = smalloc(sizeof(*__f));
1263                 if (!__f) {
1264                         log_err("fio: smalloc OOM\n");
1265                         assert(0);
1266                 }
1267                 __f->fd = -1;
1268                 fio_file_reset(__f);
1269
1270                 if (f->file_name) {
1271                         __f->file_name = smalloc_strdup(f->file_name);
1272                         if (!__f->file_name) {
1273                                 log_err("fio: smalloc OOM\n");
1274                                 assert(0);
1275                         }
1276
1277                         __f->filetype = f->filetype;
1278                 }
1279
1280                 td->files[i] = __f;
1281         }
1282 }
1283
1284 /*
1285  * Returns the index that matches the filename, or -1 if not there
1286  */
1287 int get_fileno(struct thread_data *td, const char *fname)
1288 {
1289         struct fio_file *f;
1290         unsigned int i;
1291
1292         for_each_file(td, f, i)
1293                 if (!strcmp(f->file_name, fname))
1294                         return i;
1295
1296         return -1;
1297 }
1298
1299 /*
1300  * For log usage, where we add/open/close files automatically
1301  */
1302 void free_release_files(struct thread_data *td)
1303 {
1304         close_files(td);
1305         td->files_index = 0;
1306         td->nr_normal_files = 0;
1307 }