filesetup: add native fallocate

[fio.git] / fio.1
diff --git a/fio.1 b/fio.1

index 8d596fb19105df2e6b3aabcbda171cf462f3157e..9783646d752988a455b24a2fc029f1725c848e48 100644 (file)
--- a/fio.1
+++ b/fio.1
@@ -1,4 +1,4 @@
-.TH fio 1 "December 2014" "User Manual"
+.TH fio 1 "June 2017" "User Manual"
  .SH NAME
  fio \- flexible I/O tester
  .SH SYNOPSIS
@@ -30,7 +30,7 @@ dump of the latency buckets.
  Limit run time to \fIruntime\fR seconds.
  .TP
  .B \-\-bandwidth\-log
-Generate per-job bandwidth logs.
+Generate aggregate bandwidth logs.
  .TP
  .B \-\-minimal
  Print statistics in a terse, semicolon-delimited format.
@@ -43,7 +43,7 @@ Deprecated, use \-\-output-format instead to select multiple formats.
  Display version information and exit.
  .TP
  .BI \-\-terse\-version \fR=\fPversion
-Set terse version output format (Current version 3, or older version 2).
+Set terse version output format (default 3, or 2, 4, 5)
  .TP
  .B \-\-help
  Display usage information and exit.
@@ -147,19 +147,77 @@ parentheses). The types used are:
  String: a sequence of alphanumeric characters.
  .TP
  .I int
-SI integer: a whole number, possibly containing a suffix denoting the base unit
-of the value.  Accepted suffixes are `k', 'M', 'G', 'T', and 'P', denoting
-kilo (1024), mega (1024^2), giga (1024^3), tera (1024^4), and peta (1024^5)
-respectively. If prefixed with '0x', the value is assumed to be base 16
-(hexadecimal). A suffix may include a trailing 'b', for instance 'kb' is
-identical to 'k'. You can specify a base 10 value by using 'KiB', 'MiB','GiB',
-etc. This is useful for disk drives where values are often given in base 10
-values. Specifying '30GiB' will get you 30*1000^3 bytes.
-When specifying times the default suffix meaning changes, still denoting the
-base unit of the value, but accepted suffixes are 'D' (days), 'H' (hours), 'M'
-(minutes), 'S' Seconds, 'ms' (or msec) milli seconds, 'us' (or 'usec') micro
-seconds. Time values without a unit specify seconds.
-The suffixes are not case sensitive.
+Integer. A whole number value, which may contain an integer prefix
+and an integer suffix.
+
+[integer prefix]number[integer suffix]
+
+The optional integer prefix specifies the number's base. The default
+is decimal. 0x specifies hexadecimal.
+
+The optional integer suffix specifies the number's units, and includes
+an optional unit prefix and an optional unit.  For quantities
+of data, the default unit is bytes. For quantities of time,
+the default unit is seconds.
+
+With \fBkb_base=1000\fR, fio follows international standards for unit prefixes.
+To specify power-of-10 decimal values defined in the International
+System of Units (SI):
+.nf
+ki means kilo (K) or 1000
+mi means mega (M) or 1000**2
+gi means giga (G) or 1000**3
+ti means tera (T) or 1000**4
+pi means peta (P) or 1000**5
+.fi
+
+To specify power-of-2 binary values defined in IEC 80000-13:
+.nf
+k means kibi (Ki) or 1024
+m means mebi (Mi) or 1024**2
+g means gibi (Gi) or 1024**3
+t means tebi (Ti) or 1024**4
+p means pebi (Pi) or 1024**5
+.fi
+
+With \fBkb_base=1024\fR (the default), the unit prefixes are opposite from
+those specified in the SI and IEC 80000-13 standards to provide
+compatibility with old scripts.  For example, 4k means 4096.
+
+.nf
+Examples with \fBkb_base=1000\fR:
+4 KiB: 4096, 4096b, 4096B, 4k, 4kb, 4kB, 4K, 4KB
+1 MiB: 1048576, 1m, 1024k
+1 MB: 1000000, 1mi, 1000ki
+1 TiB: 1073741824, 1t, 1024m, 1048576k
+1 TB: 1000000000, 1ti, 1000mi, 1000000ki
+.fi
+
+.nf
+Examples with \fBkb_base=1024\fR (default):
+4 KiB: 4096, 4096b, 4096B, 4k, 4kb, 4kB, 4K, 4KB
+1 MiB: 1048576, 1m, 1024k
+1 MB: 1000000, 1mi, 1000ki
+1 TiB: 1073741824, 1t, 1024m, 1048576k
+1 TB: 1000000000, 1ti, 1000mi, 1000000ki
+.fi
+
+For quantities of data, an optional unit of 'B' may be included
+(e.g.,  'kb' is the same as 'k').
+
+The integer suffix is not case sensitive (e.g., m/mi mean mebi/mega,
+not milli). 'b' and 'B' both mean byte, not bit.
+
+To specify times (units are not case sensitive):
+.nf
+D means days
+H means hours
+M mean minutes
+s or sec means seconds (default)
+ms or msec means milliseconds
+us or usec means microseconds
+.fi
+
  .TP
  .I bool
  Boolean: a true or false value. `0' denotes false, `1' denotes true.
@@ -287,7 +345,7 @@ Sequential reads.
  Sequential writes.
  .TP
  .B trim
-Sequential trim (Linux block devices only).
+Sequential trims (Linux block devices only).
  .TP
  .B randread
  Random reads.
@@ -296,7 +354,7 @@ Random reads.
  Random writes.
  .TP
  .B randtrim
-Random trim (Linux block devices only).
+Random trims (Linux block devices only).
  .TP
  .B rw, readwrite
  Mixed sequential reads and writes.
@@ -305,8 +363,8 @@ Mixed sequential reads and writes.
  Mixed random reads and writes.
  .TP
  .B trimwrite
-Trim and write mixed workload. Blocks will be trimmed first, then the same
-blocks will be written to.
+Sequential trim and write mixed workload. Blocks will be trimmed first, then
+the same blocks will be written to.
  .RE
  .P
  Fio defaults to read if the option is not specified.
@@ -353,7 +411,7 @@ reasons. Allowed values are 1024 or 1000, with 1024 being the default.
  .TP
  .BI unified_rw_reporting \fR=\fPbool
  Fio normally reports statistics on a per data direction basis, meaning that
-read, write, and trim are accounted and reported separately. If this option is
+reads, writes, and trims are accounted and reported separately. If this option is
  set fio sums the results and reports them as "mixed" instead.
  .TP
  .BI randrepeat \fR=\fPbool
@@ -378,6 +436,10 @@ are:
  .B none
  Do not pre-allocate space.
  .TP
+.B native
+Use a platform's native pre-allocation call but fall back to 'none' behavior if
+it fails/is not implemented.
+.TP
  .B posix
  Pre-allocate via \fBposix_fallocate\fR\|(3).
  .TP
@@ -392,13 +454,32 @@ Backward-compatible alias for 'posix'.
  .RE
  .P
  May not be available on all supported platforms. 'keep' is only
-available on Linux. If using ZFS on Solaris this must be set to 'none'
-because ZFS doesn't support it. Default: 'posix'.
+available on Linux. If using ZFS on Solaris this cannot be set to 'posix'
+because ZFS doesn't support it. Default: 'native' if any pre-allocation methods
+are available, 'none' if not.
  .RE
  .TP
-.BI fadvise_hint \fR=\fPbool
+.BI fadvise_hint \fR=\fPstr
  Use \fBposix_fadvise\fR\|(2) to advise the kernel what I/O patterns
-are likely to be issued. Default: true.
+are likely to be issued. Accepted values are:
+.RS
+.RS
+.TP
+.B 0
+Backwards compatible hint for "no hint".
+.TP
+.B 1
+Backwards compatible hint for "advise with fio workload type". This
+uses \fBFADV_RANDOM\fR for a random workload, and \fBFADV_SEQUENTIAL\fR
+for a sequential workload.
+.TP
+.B sequential
+Advise using \fBFADV_SEQUENTIAL\fR
+.TP
+.B random
+Advise using \fBFADV_RANDOM\fR
+.RE
+.RE
  .TP
  .BI fadvise_stream \fR=\fPint
  Use \fBposix_fadvise\fR\|(2) to advise the kernel what stream ID the
@@ -445,20 +526,32 @@ size of a file. If this option is set, then fio will append to the file
  instead. This has identical behavior to setting \fRoffset\fP to the size
  of a file. This option is ignored on non-regular files.
  .TP
-.BI blocksize \fR=\fPint[,int] "\fR,\fB bs" \fR=\fPint[,int]
-Block size for I/O units.  Default: 4k.  Values for reads, writes, and trims
-can be specified separately in the format \fIread\fR,\fIwrite\fR,\fItrim\fR
-either of which may be empty to leave that value at its default. If a trailing
-comma isn't given, the remainder will inherit the last value set.
-.TP
-.BI blocksize_range \fR=\fPirange[,irange] "\fR,\fB bsrange" \fR=\fPirange[,irange]
-Specify a range of I/O block sizes.  The issued I/O unit will always be a
-multiple of the minimum size, unless \fBblocksize_unaligned\fR is set.  Applies
-to both reads and writes if only one range is given, but can be specified
-separately with a comma separating the values. Example: bsrange=1k-4k,2k-8k.
-Also (see \fBblocksize\fR).
-.TP
-.BI bssplit \fR=\fPstr
+.BI blocksize \fR=\fPint[,int][,int] "\fR,\fB bs" \fR=\fPint[,int][,int]
+The block size in bytes for I/O units.  Default: 4096.
+A single value applies to reads, writes, and trims.
+Comma-separated values may be specified for reads, writes, and trims.
+Empty values separated by commas use the default value. A value not
+terminated in a comma applies to subsequent types.
+.nf
+Examples:
+bs=256k    means 256k for reads, writes and trims
+bs=8k,32k  means 8k for reads, 32k for writes and trims
+bs=8k,32k, means 8k for reads, 32k for writes, and default for trims
+bs=,8k     means default for reads, 8k for writes and trims
+bs=,8k,    means default for reads, 8k for writes, and default for trims
+.fi
+.TP
+.BI blocksize_range \fR=\fPirange[,irange][,irange] "\fR,\fB bsrange" \fR=\fPirange[,irange][,irange]
+A range of block sizes in bytes for I/O units.
+The issued I/O unit will always be a multiple of the minimum size, unless
+\fBblocksize_unaligned\fR is set.
+Comma-separated ranges may be specified for reads, writes, and trims
+as described in \fBblocksize\fR.
+.nf
+Example: bsrange=1k-4k,2k-8k.
+.fi
+.TP
+.BI bssplit \fR=\fPstr[,str][,str]
  This option allows even finer grained control of the block sizes issued,
  not just even splits between them. With this option, you can weight various
  block sizes for exact control of the issued IO for a job that has mixed
@@ -466,26 +559,28 @@ block sizes. The format of the option is bssplit=blocksize/percentage,
  optionally adding as many definitions as needed separated by a colon.
  Example: bssplit=4k/10:64k/50:32k/40 would issue 50% 64k blocks, 10% 4k
  blocks and 40% 32k blocks. \fBbssplit\fR also supports giving separate
-splits to reads and writes. The format is identical to what the
-\fBbs\fR option accepts, the read and write parts are separated with a
-comma.
+splits to reads, writes, and trims.
+Comma-separated values may be specified for reads, writes, and trims
+as described in \fBblocksize\fR.
  .TP
-.B blocksize_unaligned\fR,\fP bs_unaligned
-If set, any size in \fBblocksize_range\fR may be used.  This typically won't
+.B blocksize_unaligned\fR,\fB bs_unaligned
+If set, fio will issue I/O units with any size within \fBblocksize_range\fR,
+not just multiples of the minimum size.  This typically won't
  work with direct I/O, as that normally requires sector alignment.
  .TP
-.BI blockalign \fR=\fPint[,int] "\fR,\fB ba" \fR=\fPint[,int]
-At what boundary to align random IO offsets. Defaults to the same as 'blocksize'
-the minimum blocksize given.  Minimum alignment is typically 512b
-for using direct IO, though it usually depends on the hardware block size.
-This option is mutually exclusive with using a random map for files, so it
-will turn off that option.
-.TP
  .BI bs_is_seq_rand \fR=\fPbool
  If this option is set, fio will use the normal read,write blocksize settings as
-sequential,random instead. Any random read or write will use the WRITE
-blocksize settings, and any sequential read or write will use the READ
-blocksize setting.
+sequential,random blocksize settings instead. Any random read or write will
+use the WRITE blocksize settings, and any sequential read or write will use
+the READ blocksize settings.
+.TP
+.BI blockalign \fR=\fPint[,int][,int] "\fR,\fB ba" \fR=\fPint[,int][,int]
+Boundary to which fio will align random I/O units. Default: \fBblocksize\fR.
+Minimum alignment is typically 512b for using direct IO, though it usually
+depends on the hardware block size.  This option is mutually exclusive with
+using a random map for files, so it will turn off that option.
+Comma-separated values may be specified for reads, writes, and trims
+as described in \fBblocksize\fR.
  .TP
  .B zero_buffers
  Initialize buffers with all zeros. Default: fill buffers with random data.
@@ -521,13 +616,20 @@ the remaining zeroed. With this set to some chunk size smaller than the block
  size, fio can alternate random and zeroed data throughout the IO buffer.
  .TP
  .BI buffer_pattern \fR=\fPstr
-If set, fio will fill the IO buffers with this pattern. If not set, the contents
-of IO buffers is defined by the other options related to buffer contents. The
-setting can be any pattern of bytes, and can be prefixed with 0x for hex
-values. It may also be a string, where the string must then be wrapped with
-"", e.g.:
+If set, fio will fill the I/O buffers with this pattern or with the contents
+of a file. If not set, the contents of I/O buffers are defined by the other
+options related to buffer contents. The setting can be any pattern of bytes,
+and can be prefixed with 0x for hex values. It may also be a string, where
+the string must then be wrapped with ``""``. Or it may also be a filename,
+where the filename must be wrapped with ``''`` in which case the file is
+opened and read. Note that not all the file contents will be read if that
+would cause the buffers to overflow. So, for example:
+.RS
  .RS
+\fBbuffer_pattern\fR='filename'
  .RS
+or
+.RE
  \fBbuffer_pattern\fR="abcd"
  .RS
  or
@@ -542,7 +644,7 @@ or
  Also you can combine everything together in any order:
  .LP
  .RS
-\fBbuffer_pattern\fR=0xdeadface"abcd"-12
+\fBbuffer_pattern\fR=0xdeadface"abcd"-12'filename'
  .RE
  .RE
  .TP
@@ -579,8 +681,11 @@ Use a zipfian distribution to decide what file to access.
  .B pareto
  Use a pareto distribution to decide what file to access.
  .TP
+.B normal
+Use a Gaussian (normal) distribution to decide what file to access.
+.TP
  .B gauss
-Use a gaussian (normal) distribution to decide what file to access.
+Alias for normal.
  .RE
  .P
  For \fBrandom\fR, \fBroundrobin\fR, and \fBsequential\fR, a postfix can be
@@ -717,11 +822,16 @@ properly.
  Read, write and erase an MTD character device (e.g., /dev/mtd0). Discards are
  treated as erases. Depending on the underlying device type, the I/O may have
  to go in a certain pattern, e.g., on NAND, writing sequentially to erase blocks
-and discarding before overwriting. The writetrim mode works well for this
+and discarding before overwriting. The trimwrite mode works well for this
  constraint.
  .TP
  .B pmemblk
-Read and write through the NVML libpmemblk interface.
+Read and write using filesystem DAX to a file on a filesystem mounted with
+DAX on a persistent memory device through the NVML libpmemblk library.
+.TP
+.B dev-dax
+Read and write using device DAX to a persistent memory device
+(e.g., /dev/dax0.0) through the NVML libpmem library.
  .RE
  .P
  .RE
@@ -764,7 +874,7 @@ Example #1:
  \fBiodepth_batch_complete_max\fR=<iodepth>
  .RE
  
-which means that we will retrieve at leat 1 IO and up to the
+which means that we will retrieve at least 1 IO and up to the
  whole submitted queue depth. If none of IO has been completed
  yet, we will wait.
  
@@ -809,7 +919,13 @@ If true, use buffered I/O.  This is the opposite of the \fBdirect\fR parameter.
  Default: true.
  .TP
  .BI offset \fR=\fPint
-Offset in the file to start I/O. Data before the offset will not be touched.
+Start I/O at the provided offset in the file, given as either a fixed size in
+bytes or a percentage. If a percentage is given, the next \fBblockalign\fR-ed
+offset will be used. Data before the given offset will not be touched. This
+effectively caps the file size at (real_size - offset). Can be combined with
+\fBsize\fR to constrain the start and end range of the I/O workload. A percentage
+can be specified by a number between 1 and 100 followed by '%', for example,
+offset=20% to specify 20%.
  .TP
  .BI offset_increment \fR=\fPint
  If this is provided, then the real offset becomes the
@@ -896,8 +1012,8 @@ Zipf distribution
  .B pareto
  Pareto distribution
  .TP
-.B gauss
-Normal (gaussian) distribution
+.B normal
+Normal (Gaussian) distribution
  .TP
  .B zoned
  Zoned random distribution
@@ -909,8 +1025,8 @@ For \fBpareto\fR, it's the pareto power. Fio includes a test program, genzipf,
  that can be used visualize what the given input values will yield in terms of
  hit rates. If you wanted to use \fBzipf\fR with a theta of 1.2, you would use
  random_distribution=zipf:1.2 as the option. If a non-uniform model is used,
-fio will disable use of the random map. For the \fBgauss\fR distribution, a
-normal deviation is supplied as a value between 0 and 100.
+fio will disable use of the random map. For the \fBnormal\fR distribution, a
+normal (Gaussian) deviation is supplied as a value between 0 and 100.
  .P
  .RS
  For a \fBzoned\fR distribution, fio supports specifying percentages of IO
@@ -942,7 +1058,7 @@ sizes. Like \fBbssplit\fR, it's possible to specify separate zones for reads,
  writes, and trims. If just one set is given, it'll apply to all of them.
  .RE
  .TP
-.BI percentage_random \fR=\fPint
+.BI percentage_random \fR=\fPint[,int][,int]
  For a random workload, set how big a percentage should be random. This defaults
  to 100%, in which case the workload is fully random. It can be set from
  anywhere from 0 to 100.  Setting it to 0 would make the workload fully
@@ -1011,28 +1127,29 @@ will be queued before we have to complete it and do our thinktime. In other
  words, this setting effectively caps the queue depth if the latter is larger.
  Default: 1.
  .TP
-.BI rate \fR=\fPint
+.BI rate \fR=\fPint[,int][,int]
  Cap bandwidth used by this job. The number is in bytes/sec, the normal postfix
  rules apply. You can use \fBrate\fR=500k to limit reads and writes to 500k each,
-or you can specify read and writes separately. Using \fBrate\fR=1m,500k would
-limit reads to 1MB/sec and writes to 500KB/sec. Capping only reads or writes
+or you can specify reads, write, and trim limits separately.
+Using \fBrate\fR=1m,500k would
+limit reads to 1MiB/sec and writes to 500KiB/sec. Capping only reads or writes
  can be done with \fBrate\fR=,500k or \fBrate\fR=500k,. The former will only
-limit writes (to 500KB/sec), the latter will only limit reads.
+limit writes (to 500KiB/sec), the latter will only limit reads.
  .TP
-.BI rate_min \fR=\fPint
+.BI rate_min \fR=\fPint[,int][,int]
  Tell \fBfio\fR to do whatever it can to maintain at least the given bandwidth.
  Failing to meet this requirement will cause the job to exit. The same format
-as \fBrate\fR is used for read vs write separation.
+as \fBrate\fR is used for read vs write vs trim separation.
  .TP
-.BI rate_iops \fR=\fPint
+.BI rate_iops \fR=\fPint[,int][,int]
  Cap the bandwidth to this number of IOPS. Basically the same as rate, just
  specified independently of bandwidth. The same format as \fBrate\fR is used for
-read vs write separation. If \fBblocksize\fR is a range, the smallest block
+read vs write vs trim separation. If \fBblocksize\fR is a range, the smallest block
  size is used as the metric.
  .TP
-.BI rate_iops_min \fR=\fPint
+.BI rate_iops_min \fR=\fPint[,int][,int]
  If this rate of I/O is not met, the job will exit. The same format as \fBrate\fR
-is used for read vs write separation.
+is used for read vs write vs trim separation.
  .TP
  .BI rate_process \fR=\fPstr
  This option controls how fio manages rated IO submissions. The default is
@@ -1139,6 +1256,50 @@ logging results, thus minimizing the runtime required for stable results. Note
  that the \fBramp_time\fR is considered lead in time for a job, thus it will
  increase the total runtime if a special timeout or runtime is specified.
  .TP
+.BI steadystate \fR=\fPstr:float "\fR,\fP ss" \fR=\fPstr:float
+Define the criterion and limit for assessing steady state performance. The
+first parameter designates the criterion whereas the second parameter sets the
+threshold. When the criterion falls below the threshold for the specified
+duration, the job will stop. For example, iops_slope:0.1% will direct fio
+to terminate the job when the least squares regression slope falls below 0.1%
+of the mean IOPS. If group_reporting is enabled this will apply to all jobs in
+the group. All assessments are carried out using only data from the rolling
+collection window. Threshold limits can be expressed as a fixed value or as a
+percentage of the mean in the collection window. Below are the available steady
+state assessment criteria.
+.RS
+.RS
+.TP
+.B iops
+Collect IOPS data. Stop the job if all individual IOPS measurements are within
+the specified limit of the mean IOPS (e.g., iops:2 means that all individual
+IOPS values must be within 2 of the mean, whereas iops:0.2% means that all
+individual IOPS values must be within 0.2% of the mean IOPS to terminate the
+job).
+.TP
+.B iops_slope
+Collect IOPS data and calculate the least squares regression slope. Stop the
+job if the slope falls below the specified limit.
+.TP
+.B bw
+Collect bandwidth data. Stop the job if all individual bandwidth measurements
+are within the specified limit of the mean bandwidth.
+.TP
+.B bw_slope
+Collect bandwidth data and calculate the least squares regression slope. Stop
+the job if the slope falls below the specified limit.
+.RE
+.RE
+.TP
+.BI steadystate_duration \fR=\fPtime "\fR,\fP ss_dur" \fR=\fPtime
+A rolling window of this duration will be used to judge whether steady state
+has been reached. Data will be collected once per second. The default is 0
+which disables steady state detection.
+.TP
+.BI steadystate_ramp_time \fR=\fPtime "\fR,\fP ss_ramp" \fR=\fPtime
+Allow the job to run for the specified duration before beginning data collection
+for checking the steady state job termination criterion. The default is 0.
+.TP
  .BI invalidate \fR=\fPbool
  Invalidate buffer-cache for the file prior to starting I/O.  Default: true.
  .TP
@@ -1169,6 +1330,9 @@ Same as \fBmmap\fR, but use huge files as backing.
  .TP
  .B mmapshared
  Same as \fBmmap\fR, but use a MMAP_SHARED mapping.
+.TP
+.B cudamalloc
+Use GPU memory as the buffers for GPUDirect RDMA benchmark. The ioengine must be \fBrdma\fR.
  .RE
  .P
  The amount of memory allocated is the maximum allowed \fBblocksize\fR for the
@@ -1192,7 +1356,7 @@ sum of the \fBiomem_align\fR and \fBbs\fR used.
  .TP
  .BI hugepage\-size \fR=\fPint
  Defines the size of a huge page.  Must be at least equal to the system setting.
-Should be a multiple of 1MB. Default: 4MB.
+Should be a multiple of 1MiB. Default: 4MiB.
  .TP
  .B exitall
  Terminate all jobs when one finishes.  Default: wait for each job to finish.
@@ -1274,7 +1438,7 @@ option.  The allowed values are:
  .RS
  .RS
  .TP
-.B md5 crc16 crc32 crc32c crc32c-intel crc64 crc7 sha256 sha512 sha1 xxhash
+.B md5 crc16 crc32 crc32c crc32c-intel crc64 crc7 sha256 sha512 sha1 sha3-224 sha3-256 sha3-384 sha3-512 xxhash
  Store appropriate checksum in the header of each block. crc32c-intel is
  hardware accelerated SSE4.2 driven, falls back to regular crc32c if
  not supported by the system.
@@ -1324,7 +1488,7 @@ fio will fill 1/2/3/4 bytes of the buffer at the time(it can be either a
  decimal or a hex number). The verify_pattern if larger than a 32-bit quantity
  has to be a hex number that starts with either "0x" or "0X". Use with
  \fBverify\fP=str. Also, verify_pattern supports %o format, which means that for
-each block offset will be written and then verifyied back, e.g.:
+each block offset will be written and then verified back, e.g.:
  .RS
  .RS
  \fBverify_pattern\fR=%o
@@ -1408,6 +1572,10 @@ Wait for preceding jobs in the job file to exit before starting this one.
  Start a new reporting group.  If not given, all jobs in a file will be part
  of the same reporting group, unless separated by a stonewall.
  .TP
+.BI stats \fR=\fPbool
+By default, fio collects and shows final output results for all jobs that run.
+If this option is set to 0, then fio will ignore it in the final stat output.
+.TP
  .BI numjobs \fR=\fPint
  Number of clones (processes/threads performing the same workload) of this job.
  Default: 1.
@@ -1462,13 +1630,13 @@ If set, this generates bw/clat/iops log with per file private filenames. If
  not set, jobs with identical names will share the log filename. Default: true.
  .TP
  .BI write_bw_log \fR=\fPstr
-If given, write a bandwidth log of the jobs in this job file. Can be used to
-store data of the bandwidth of the jobs in their lifetime. The included
-fio_generate_plots script uses gnuplot to turn these text files into nice
-graphs. See \fBwrite_lat_log\fR for behaviour of given filename. For this
-option, the postfix is _bw.x.log, where x is the index of the job (1..N,
-where N is the number of jobs). If \fBper_job_logs\fR is false, then the
-filename will not include the job index. See the \fBLOG FILE FORMATS\fR
+If given, write a bandwidth log for this job. Can be used to store data of the
+bandwidth of the jobs in their lifetime. The included fio_generate_plots script
+uses gnuplot to turn these text files into nice graphs. See \fBwrite_lat_log\fR
+for behaviour of given filename. For this option, the postfix is _bw.x.log,
+where x is the index of the job (1..N, where N is the number of jobs). If
+\fBper_job_logs\fR is false, then the filename will not include the job index.
+See the \fBLOG FILE FORMATS\fR
  section.
  .TP
  .BI write_lat_log \fR=\fPstr
@@ -1826,7 +1994,7 @@ Preallocate donor's file on init
  .BI 1:
  allocate space immediately inside defragment event, and free right after event
  .RE
-.TP 
+.TP
  .BI (rbd)clustername \fR=\fPstr
  Specifies the name of the ceph cluster.
  .TP
@@ -1848,7 +2016,7 @@ While running, \fBfio\fR will display the status of the created jobs.  For
  example:
  .RS
  .P
-Threads: 1: [_r] [24.8% done] [ 13509/  8334 kb/s] [eta 00h:01m:31s]
+Jobs: 1: [_r] [24.8% done] [ 13509/  8334 kb/s] [eta 00h:01m:31s]
  .RE
  .P
  The characters in the first set of brackets denote the current status of each
@@ -2003,14 +2171,15 @@ scripted use.
  A job description (if provided) follows on a new line.  Note that the first
  number in the line is the version number. If the output has to be changed
  for some reason, this number will be incremented by 1 to signify that
-change.  The fields are:
+change. Numbers in brackets (e.g. "[v3]") indicate which terse version
+introduced a field. The fields are:
  .P
  .RS
-.B terse version, fio version, jobname, groupid, error
+.B terse version, fio version [v3], jobname, groupid, error
  .P
  Read status:
  .RS
-.B Total I/O \fR(KB)\fP, bandwidth \fR(KB/s)\fP, IOPS, runtime \fR(ms)\fP
+.B Total I/O \fR(KiB)\fP, bandwidth \fR(KiB/s)\fP, IOPS, runtime \fR(ms)\fP
  .P
  Submission latency:
  .RS
@@ -2030,13 +2199,17 @@ Total latency:
  .RE
  Bandwidth:
  .RS
-.B min, max, aggregate percentage of total, mean, standard deviation
+.B min, max, aggregate percentage of total, mean, standard deviation, number of samples [v5]
+.RE
+IOPS [v5]:
+.RS
+.B min, max, mean, standard deviation, number of samples
  .RE
  .RE
  .P
  Write status:
  .RS
-.B Total I/O \fR(KB)\fP, bandwidth \fR(KB/s)\fP, IOPS, runtime \fR(ms)\fP
+.B Total I/O \fR(KiB)\fP, bandwidth \fR(KiB/s)\fP, IOPS, runtime \fR(ms)\fP
  .P
  Submission latency:
  .RS
@@ -2056,10 +2229,19 @@ Total latency:
  .RE
  Bandwidth:
  .RS
-.B min, max, aggregate percentage of total, mean, standard deviation
+.B min, max, aggregate percentage of total, mean, standard deviation, number of samples [v5]
+.RE
+IOPS [v5]:
+.RS
+.B min, max, mean, standard deviation, number of samples
  .RE
  .RE
  .P
+Trim status [all but version 3]:
+.RS
+Similar to Read/Write status but for trims.
+.RE
+.P
  CPU usage:
  .RS
  .B user, system, context switches, major page faults, minor page faults
@@ -2082,7 +2264,7 @@ Milliseconds:
  .RE
  .RE
  .P
-Disk utilization (1 for each disk used):
+Disk utilization (1 for each disk used) [v3]:
  .RS
  .B name, read ios, write ios, read merges, write merges, read ticks, write ticks, read in-queue time, write in-queue time, disk utilization percentage
  .RE
@@ -2094,6 +2276,15 @@ Error Info (dependent on continue_on_error, default off):
  .P
  .B text description (if provided in config - appears on newline)
  .RE
+.P
+Below is a single line containing short names for each of the fields in
+the minimal output v3, separated by semicolons:
+.RS
+.P
+.nf
+terse_version_3;fio_version;jobname;groupid;error;read_kb;read_bandwidth;read_iops;read_runtime_ms;read_slat_min;read_slat_max;read_slat_mean;read_slat_dev;read_clat_max;read_clat_min;read_clat_mean;read_clat_dev;read_clat_pct01;read_clat_pct02;read_clat_pct03;read_clat_pct04;read_clat_pct05;read_clat_pct06;read_clat_pct07;read_clat_pct08;read_clat_pct09;read_clat_pct10;read_clat_pct11;read_clat_pct12;read_clat_pct13;read_clat_pct14;read_clat_pct15;read_clat_pct16;read_clat_pct17;read_clat_pct18;read_clat_pct19;read_clat_pct20;read_tlat_min;read_lat_max;read_lat_mean;read_lat_dev;read_bw_min;read_bw_max;read_bw_agg_pct;read_bw_mean;read_bw_dev;write_kb;write_bandwidth;write_iops;write_runtime_ms;write_slat_min;write_slat_max;write_slat_mean;write_slat_dev;write_clat_max;write_clat_min;write_clat_mean;write_clat_dev;write_clat_pct01;write_clat_pct02;write_clat_pct03;write_clat_pct04;write_clat_pct05;write_clat_pct06;write_clat_pct07;write_clat_pct08;write_clat_pct09;write_clat_pct10;write_clat_pct11;write_clat_pct12;write_clat_pct13;write_clat_pct14;write_clat_pct15;write_clat_pct16;write_clat_pct17;write_clat_pct18;write_clat_pct19;write_clat_pct20;write_tlat_min;write_lat_max;write_lat_mean;write_lat_dev;write_bw_min;write_bw_max;write_bw_agg_pct;write_bw_mean;write_bw_dev;cpu_user;cpu_sys;cpu_csw;cpu_mjf;pu_minf;iodepth_1;iodepth_2;iodepth_4;iodepth_8;iodepth_16;iodepth_32;iodepth_64;lat_2us;lat_4us;lat_10us;lat_20us;lat_50us;lat_100us;lat_250us;lat_500us;lat_750us;lat_1000us;lat_2ms;lat_4ms;lat_10ms;lat_20ms;lat_50ms;lat_100ms;lat_250ms;lat_500ms;lat_750ms;lat_1000ms;lat_2000ms;lat_over_2000ms;disk_name;disk_read_iops;disk_write_iops;disk_read_merges;disk_write_merges;disk_read_ticks;write_ticks;disk_queue_time;disk_util
+.fi
+.RE
  .SH TRACE FILE FORMAT
  There are two trace file format that you can encounter. The older (v1) format
  is unsupported since version 1.20-rc3 (March 2008). It will still be described
@@ -2299,7 +2490,7 @@ on the type of log, it will be one of the following:
  Value is in latency in usecs
  .TP
  .B Bandwidth log
-Value is in KB/sec
+Value is in KiB/sec
  .TP
  .B IOPS log
  Value is in IOPS
@@ -2325,7 +2516,7 @@ IO is a TRIM
  The \fIoffset\fR is the offset, in bytes, from the start of the file, for that
  particular IO. The logging of the offset can be toggled with \fBlog_offset\fR.
  
-If windowed logging is enabled though \fBlog_avg_msec\fR, then fio doesn't log
+If windowed logging is enabled through \fBlog_avg_msec\fR, then fio doesn't log
  individual IOs. Instead of logs the average values over the specified
  period of time. Since \fIdata direction\fR and \fIoffset\fR are per-IO values,
  they aren't applicable if windowed logging is enabled. If windowed logging
@@ -2444,3 +2635,10 @@ See \fBREADME\fR.
  For further documentation see \fBHOWTO\fR and \fBREADME\fR.
  .br
  Sample jobfiles are available in the \fBexamples\fR directory.
+.br
+These are typically located under /usr/share/doc/fio.
+
+\fBHOWTO\fR:  http://git.kernel.dk/cgit/fio/plain/HOWTO
+.br
+\fBREADME\fR: http://git.kernel.dk/cgit/fio/plain/README
+.br