Merge branch 'atomic-writes'

[fio.git] / fio.1
diff --git a/fio.1 b/fio.1

index 1c8e3a56707e3e3700e3267bd1747786cd2540d0..0fd0fb25f288e6a027c6611e2c49c425577750f7 100644 (file)
--- a/fio.1
+++ b/fio.1
@@ -585,6 +585,11 @@ the first device, `\\\\.\\PhysicalDrive1' for the second etc.
  Note: Windows and FreeBSD prevent write access to areas
  of the disk containing in-use data (e.g. filesystems).
  .P
+For HTTP and S3 access, specify a valid URL path or S3 key, respectively. 
+A filename for path-style S3 includes a bucket name (`/bucket/k/e.y') 
+while a virtual-hosted-style S3 filename (`/k/e.y') does not because its 
+bucket name is specified in \fBhttp_host\fR.
+.P
  The filename `\-' is a reserved name, meaning *stdin* or *stdout*. Which
  of the two depends on the read/write direction set.
  .RE
@@ -2261,6 +2266,19 @@ cached data. Currently the RWF_NOWAIT flag does not supported for cached write.
  For direct I/O, requests will only succeed if cache invalidation isn't required,
  file blocks are fully allocated and the disk request could be issued immediately.
  .TP
+.BI (pvsync2,libaio,io_uring)atomic \fR=\fPbool
+This option means that writes are issued with torn-write protection, meaning
+that for a power fail or kernel crash, all or none of the data from the write
+will be stored, but never a mix of old and new data. Torn-write protection is
+also known as atomic writes.
+
+This option sets the RWF_ATOMIC flag (supported from the 6.11 Linux kernel) on
+a per-IO basis.
+
+Writes with RWF_ATOMIC set will be rejected by the kernel when the file does
+not support torn-write protection. To learn a file's torn-write limits, issue
+statx with STATX_WRITE_ATOMIC.
+.TP
  .BI (io_uring_cmd,xnvme)fdp \fR=\fPbool
  Enable Flexible Data Placement mode for write commands.
  .TP
@@ -2305,12 +2323,17 @@ The available placement ID (indices) are defined by \fBplids\fR or
  .RE
  .TP
  .BI (io_uring_cmd,xnvme)plids=str, fdp_pli \fR=\fPstr
-Select which Placement IDs (streams) or Placement ID Indicies (FDP) this job is
-allowed to use for writes.  For FDP by default, the job will cycle through all
-available Placement IDs, so use this to isolate these identifiers to specific
-jobs. If you want fio to use placement identifier only at indices 0, 2 and 5
-specify, you would set `plids=0,2,5`. For streams this should be a
-comma-separated list of Stream IDs.
+Select which Placement ID Indices (FDP) or Placement IDs (streams) this job is
+allowed to use for writes. This option accepts a comma-separated list of values
+or ranges (e.g., 1,2-4,5,6-8).
+
+For FDP by default, the job will cycle through all available Placement IDs, so
+use this option to be selective. The values specified here are array indices
+for the list of placement IDs returned by the nvme-cli command `nvme fdp
+status'. If you want fio to use FDP placement identifiers only at indices 0, 2
+and 5, set `plids=0,2,5'.
+
+For streams this should be a list of Stream IDs.
  .TP
  .BI (io_uring_cmd,xnvme)\fR\fBdp_scheme\fP=str
  Defines which placement ID (index) to be selected based on offset(LBA) range.
@@ -2421,7 +2444,7 @@ The TCP or UDP port to bind to or connect to. If this is used with
  this will be the starting port number since fio will use a range of
  ports.
  .TP
-.BI (rdma,librpma_*)port \fR=\fPint
+.BI (rdma)port \fR=\fPint
  The port to use for RDMA-CM communication. This should be the same
  value on the client and the server side.
  .TP
@@ -2430,16 +2453,6 @@ The hostname or IP address to use for TCP, UDP or RDMA-CM based I/O.
  If the job is a TCP listener or UDP reader, the hostname is not used
  and must be omitted unless it is a valid UDP multicast address.
  .TP
-.BI (librpma_*)serverip \fR=\fPstr
-The IP address to be used for RDMA-CM based I/O.
-.TP
-.BI (librpma_*_server)direct_write_to_pmem \fR=\fPbool
-Set to 1 only when Direct Write to PMem from the remote host is possible. Otherwise, set to 0.
-.TP
-.BI (librpma_*_server)busy_wait_polling \fR=\fPbool
-Set to 0 to wait for completion instead of busy-wait polling completion.
-Default: 1.
-.TP
  .BI (netsplice,net)interface \fR=\fPstr
  The IP address of the network interface used to send or receive UDP
  multicast.
@@ -2549,8 +2562,10 @@ Touching all objects affects ceph caches and likely impacts test results.
  Enabled by default.
  .TP
  .BI (http)http_host \fR=\fPstr
-Hostname to connect to. For S3, this could be the bucket name. Default
-is \fBlocalhost\fR
+Hostname to connect to.  HTTP port 80 is used automatically when the value 
+of the \fBhttps\fP parameter is \fRoff\fP, and HTTPS port 443 if it is \Ron\fP.  
+A virtual-hosted-style S3 hostname starts with a bucket name, while a 
+path-style S3 hostname does not.  Default is \fBlocalhost\fR.
  .TP
  .BI (http)http_user \fR=\fPstr
  Username for HTTP authentication.
@@ -2632,14 +2647,34 @@ that "owns" the device also needs to support hipri (also known as iopoll
  and mq_poll). The MegaRAID driver is an example of a SCSI LLD.
  Default: clear (0) which does normal (interrupted based) IO.
  .TP
-.BI (sg)readfua \fR=\fPbool
+.BI (sg, io_uring_cmd)readfua \fR=\fPbool
  With readfua option set to 1, read operations include the force
  unit access (fua) flag. Default: 0.
  .TP
-.BI (sg)writefua \fR=\fPbool
+.BI (sg, io_uring_cmd)writefua \fR=\fPbool
  With writefua option set to 1, write operations include the force
  unit access (fua) flag. Default: 0.
  .TP
+.BI (io_uring_cmd)write_mode \fR=\fPstr
+Specifies the type of write operation.  Defaults to 'write'.
+.RS
+.RS
+.TP
+.B write
+Use Write commands for write operations
+.TP
+.B uncor
+Use Write Uncorrectable commands for write operations
+.TP
+.B zeroes
+Use Write Zeroes commands for write operations
+.TP
+.B verify
+Use Verify commands for write operations
+.TP
+.RE
+.RE
+.TP
  .BI (sg)sg_write_mode \fR=\fPstr
  Specify the type of write commands to issue. This option can take multiple
  values:
@@ -3691,6 +3726,14 @@ Enable experimental verification. Standard verify records I/O metadata for
  later use during the verification phase. Experimental verify instead resets the
  file after the write phase and then replays I/Os for the verification phase.
  .TP
+.BI verify_write_sequence \fR=\fPbool
+Verify the header write sequence number. In a scenario with multiple jobs,
+verification of the write sequence number may fail. Disabling this option
+will mean that write sequence number checking is skipped. Doing that can be
+useful for testing atomic writes, as it means that checksum verification can
+still be attempted. For when \fBatomic\fR is enabled, checksum verification
+is expected to succeed (while write sequence checking can still fail).
+.TP
  .BI trim_percentage \fR=\fPint
  Number of verify blocks to discard/trim.
  .TP
@@ -3911,9 +3954,16 @@ entry as well as the other data values. Defaults to 0 meaning that
  offsets are not present in logs. Also see \fBLOG FILE FORMATS\fR section.
  .TP
  .BI log_prio \fR=\fPbool
-If this is set, the iolog options will include the I/O priority for the I/O
-entry as well as the other data values. Defaults to 0 meaning that
-I/O priorities are not present in logs. Also see \fBLOG FILE FORMATS\fR section.
+If this is set, the `Command priority` field in \fBLOG FILE FORMATS\fR
+shows the priority value and the IO priority class of the command.
+Otherwise, the field shows if the command has the highest RT priority
+class or not. Also see \fBLOG FILE FORMATS\fR section.
+.TP
+.BI log_issue_time \fR=\fPbool
+If this is set, the iolog options will include the command issue time for the
+I/O entry as well as the other data values. Defaults to 0 meaning that command
+issue times are not present in logs. Also see \fBLOG FILE FORMATS\fR section.
+This option shall be set together with \fBwrite_lat_log\fR and \fBlog_offset\fR.
  .TP
  .BI log_compression \fR=\fPint
  If this is set, fio will compress the I/O logs as it goes, to keep the
@@ -4910,7 +4960,7 @@ and IOPS. The logs share a common format, which looks like this:
  .RS
  .P
  time (msec), value, data direction, block size (bytes), offset (bytes),
-command priority
+command priority, issue time (nsec)
  .RE
  .P
  `Time' for the log entry is always in milliseconds. The `value' logged depends
@@ -4953,6 +5003,11 @@ number with the lowest 13 bits indicating the priority value (\fBprio\fR and
  \fBcmdprio\fR options) and the highest 3 bits indicating the IO priority class
  (\fBprioclass\fR and \fBcmdprio_class\fR options).
  .P
+The entry's `issue time` is the command issue time in nanoseconds. The logging
+of the issue time can be toggled with \fBlog_issue_time\fR. This field has valid
+values in completion latency log file (clat), or submit latency log file (slat).
+The field has value 0 in other log files.
+.P
  Fio defaults to logging every individual I/O but when windowed logging is set
  through \fBlog_avg_msec\fR, either the average (by default), the maximum
  (\fBlog_window_value\fR is set to max) `value' seen over the specified period of
@@ -4962,12 +5017,12 @@ takes this form:
  .RS
  .P
  time (msec), value, value1, data direction, block size (bytes), offset (bytes),
-command priority
+command priority, issue time (nsec)
  .RE
  .P
  Each `data direction' seen within the window period will aggregate its values
-in a separate row. Further, when using windowed logging the `block size' and
-`offset' entries will always contain 0.
+in a separate row. Further, when using windowed logging the `block size',
+`offset' and `issue time` entries will always contain 0.
  .SH CLIENT / SERVER
  Normally fio is invoked as a stand-alone application on the machine where the
  I/O workload should be generated. However, the backend and frontend of fio can