X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=HOWTO;h=4fd025126925eb6fd75910608530b8f8c20ba27f;hp=529c96784117b0955b93fe3f3ad31c3faa58e1e2;hb=fdc0f3b646e417497849d4398029f780b0e5262f;hpb=2a988d8bcb447eb098fc382835cc507587c6ba66

diff --git a/HOWTO b/HOWTO
index 529c9678..4fd02512 100644
--- a/HOWTO
+++ b/HOWTO
@@ -9,6 +9,7 @@ Table of contents
 6. Normal output
 7. Terse output
 8. Trace file format
+9. CPU idleness profiling
 
 1.0 Overview and history
 ------------------------
@@ -272,17 +273,43 @@ filename=str	Fio normally makes up a filename based on the job name,
 		can specify a number of files by separating the names with a
 		':' colon. So if you wanted a job to open /dev/sda and /dev/sdb
 		as the two working files, you would use
-		filename=/dev/sda:/dev/sdb. On Windows, disk devices are accessed
-		as \\.\PhysicalDrive0 for the first device, \\.\PhysicalDrive1
-		for the second etc.
-		Note: Windows and FreeBSD prevent write access to areas of the disk
-		containing in-use data (e.g. filesystems).
-		If the wanted filename does need to include a colon, then escape that
- 		with a '\' character.
-		For instance, if the filename is "/dev/dsk/foo@3,0:c",
-		then you would use filename="/dev/dsk/foo@3,0\:c".
-		'-' is a reserved name, meaning stdin or stdout. Which of the
-		two depends on the read/write direction set.
+		filename=/dev/sda:/dev/sdb. On Windows, disk devices are
+		accessed as \\.\PhysicalDrive0 for the first device,
+		\\.\PhysicalDrive1 for the second etc. Note: Windows and
+		FreeBSD prevent write access to areas of the disk containing
+		in-use data (e.g. filesystems).
+		If the wanted filename does need to include a colon, then
+		escape that with a '\' character. For instance, if the filename
+		is "/dev/dsk/foo@3,0:c", then you would use
+		filename="/dev/dsk/foo@3,0\:c". '-' is a reserved name, meaning
+		stdin or stdout. Which of the two depends on the read/write
+		direction set.
+
+filename_format=str
+		If sharing multiple files between jobs, it is usually necessary
+		to  have fio generate the exact names that you want. By default,
+		fio will name a file based on the default file format
+		specification of jobname.jobnumber.filenumber. With this
+		option, that can be customized. Fio will recognize and replace
+		the following keywords in this string:
+
+		$jobname
+			The name of the worker thread or process.
+
+		$jobnum
+			The incremental number of the worker thread or
+			process.
+
+		$filenum
+			The incremental number of the file for that worker
+			thread or process.
+
+		To have dependent jobs share a set of files, this option can
+		be set to have fio generate filenames that are shared between
+		the two. For instance, if testfiles.$filenum is specified,
+		file number 4 for any job will be named testfiles.4. The
+		default of $jobname.$jobnum.$filenum will be used if
+		no other format specifier is given.
 
 opendir=str	Tell fio to recursively add any file it can find in this
 		directory and down the file system tree.
@@ -301,11 +328,6 @@ lockfile=str	Fio defaults to not locking any files before it does
 					same time, but writes get exclusive
 					access.
 
-		The option may be post-fixed with a lock batch number. If
-		set, then each thread/process may do that amount of IOs to
-		the file before giving up the lock. Since lock acquisition is
-		expensive, batching the lock/unlocks will speed up IO.
-
 readwrite=str
 rw=str		Type of io pattern. Accepted values are:
 
@@ -353,6 +375,12 @@ kb_base=int	The base unit for a kilobyte. The defacto base is 2^10, 1024.
 		ten unit instead, for obvious reasons. Allow values are
 		1024 or 1000, with 1024 being the default.
 
+unified_rw_reporting=bool	Fio normally reports statistics on a per
+		data direction basis, meaning that read, write, and trim are
+		accounted and reported separately. If this option is set,
+		the fio will sum the results and report them as "mixed"
+		instead.
+
 randrepeat=bool	For random IO workloads, seed the generator in a predictable
 		way so that results are repeatable across repetitions.
 
@@ -403,7 +431,7 @@ filesize=int	Individual file sizes. May be a range, in which case fio
 fill_device=bool
 fill_fs=bool	Sets size to something really large and waits for ENOSPC (no
 		space left on device) as the terminating condition. Only makes
-                sense with sequential write. For a read workload, the mount
+		sense with sequential write. For a read workload, the mount
 		point will be filled first then IO started on the result. This
 		option doesn't make sense if operating on a raw device node,
 		since the size of that is already known by the file system.
@@ -540,6 +568,8 @@ ioengine=str	Defines how the job issues io to the file. The following
 
 			vsync	Basic readv(2) or writev(2) IO.
 
+			psyncv	Basic preadv(2) or pwritev(2) IO.
+
 			libaio	Linux native asynchronous io. Note that Linux
 				may only support queued behaviour with
 				non-buffered IO (set direct=1 or buffered=0).
@@ -704,7 +734,7 @@ overwrite=bool	If true, writes to a file will always overwrite existing
 		and is large enough for the specified write phase, nothing
 		will be done.
 
-end_fsync=bool	If true, fsync file contents when the job exits.
+end_fsync=bool	If true, fsync file contents when a write stage has completed.
 
 fsync_on_close=bool	If true, fio will fsync() a dirty file on close.
 		This differs from end_fsync in that it will happen on every
@@ -738,6 +768,17 @@ random_distribution=str:float	By default, fio will use a completely uniform
 		random_distribution=zipf:1.2 as the option. If a non-uniform
 		model is used, fio will disable use of the random map.
 
+percentage_random=int	For a random workload, set how big a percentage should
+		be random. This defaults to 100%, in which case the workload
+		is fully random. It can be set from anywhere from 0 to 100.
+		Setting it to 0 would make the workload fully sequential. Any
+		setting in between will result in a random mix of sequential
+		and random IO, at the given percentages.
+	
+percentage_sequential=int	See percentage_random. It is guaranteed that
+		they add up to 100. The later setting has priority, each
+		will adjust the other.
+
 norandommap	Normally fio will cover every block of the file when doing
 		random IO. If this option is given, fio will just get a
 		new random offset without looking at past io history. This
@@ -769,7 +810,7 @@ random_generator=str	Fio supports the following engines for generating
 		block sizes, not with workloads that use multiple block
 		sizes. If used with such a workload, fio may read or write
 		some blocks multiple times.
-		
+
 nice=int	Run the job with the given nice value. See man nice(2).
 
 prio=int	Set the io priority value of this job. Linux limits us to
@@ -789,11 +830,15 @@ thinktime_spin=int
 		to sleeping for the rest of the period specified by
 		thinktime.
 
-thinktime_blocks
+thinktime_blocks=int
 		Only valid if thinktime is set - control how many blocks
 		to issue, before waiting 'thinktime' usecs. If not set,
 		defaults to 1 which will make fio wait 'thinktime' usecs
-		after every block.
+		after every block. This effectively makes any queue depth
+		setting redundant, since no more than 1 IO will be queued
+		before we have to complete it and do our thinktime. In
+		other words, this setting effectively caps the queue depth
+		if the latter is larger.
 
 rate=int	Cap the bandwidth used by this job. The number is in bytes/sec,
 		the normal suffix rules apply. You can use rate=500k to limit
@@ -844,9 +889,7 @@ cpus_allowed=str Controls the same options as cpumask, but it allows a text
 numa_cpu_nodes=str Set this job running on spcified NUMA nodes' CPUs. The
 		arguments allow comma delimited list of cpu numbers,
 		A-B ranges, or 'all'. Note, to enable numa options support,
-		export the following environment variables,
-			export EXTFLAGS+=" -DFIO_HAVE_LIBNUMA "
-			export EXTLIBS+=" -lnuma "
+		fio must be built on a system with libnuma-dev(el) installed.
 
 numa_mem_policy=str Set this job's memory policy and corresponding NUMA
 		nodes. Format of the argements:
@@ -1193,7 +1236,7 @@ write_lat_log=str Same as write_bw_log, except that this option stores io
 
 		write_lat_log=foo
 
-		The actual log names will be foo_slat.log, foo_slat.log,
+		The actual log names will be foo_slat.log, foo_clat.log,
 		and foo_lat.log. This helps fio_generate_plot fine the logs
 		automatically.
 
@@ -1215,12 +1258,15 @@ log_avg_msec=int By default, fio will log an entry in the iops, latency,
 lockmem=int	Pin down the specified amount of memory with mlock(2). Can
 		potentially be used instead of removing memory or booting
 		with less memory to simulate a smaller amount of memory.
+		The amount specified is per worker.
 
 exec_prerun=str	Before running this job, issue the command specified
-		through system(3).
+		through system(3). Output is redirected in a file called
+		jobname.prerun.txt.
 
 exec_postrun=str After the job completes, issue the command specified
-		 though system(3).
+		 though system(3). Output is redirected in a file called
+		 jobname.postrun.txt.
 
 ioscheduler=str	Attempt to switch the device hosting the file to the specified
 		io scheduler before running.
@@ -1256,6 +1302,22 @@ percentile_list=float_list Overwrite the default list of percentiles
 		the values of completion latency below which 99.5% and
 		99.9% of the observed latencies fell, respectively.
 
+clocksource=str	Use the given clocksource as the base of timing. The
+		supported options are:
+
+			gettimeofday	gettimeofday(2)
+
+			clock_gettime	clock_gettime(2)
+
+			cpu		Internal CPU clock source
+
+		cpu is the preferred clocksource if it is reliable, as it
+		is very fast (and fio is heavy on time calls). Fio will
+		automatically use this clocksource if it's supported and
+		considered reliable on the system it is running on, unless
+		another clocksource is specifically set. For x86/x86-64 CPUs,
+		this means supporting TSC Invariant.
+
 gtod_reduce=bool Enable all of the gettimeofday() reducing options
 		(disable_clat, disable_slat, disable_bw) plus reduce
 		precision of the timeout somewhat to really shrink
@@ -1381,6 +1443,9 @@ that defines them is selected.
 [netsplice] port=int
 [net] port=int	The TCP or UDP port to bind to or connect to.
 
+[netsplice] nodelay=bool
+[net] nodelay=bool	Set TCP_NODELAY on TCP connections.
+
 [netsplice] protocol=str
 [netsplice] proto=str
 [net] protocol=str
@@ -1554,6 +1619,9 @@ util=		The disk utilization. A value of 100% means we kept the disk
 
 It is also possible to get fio to dump the current output while it is
 running, without terminating the job. To do that, send fio the USR1 signal.
+You can also get regularly timed dumps by using the --status-interval
+parameter, or by creating a file in /tmp named fio-dump-status. If fio
+sees this file, it will unlink it and dump the current output status.
 
 
 7.0 Terse output
@@ -1672,3 +1740,18 @@ write      Write 'length' bytes beginning from 'offset'
 sync       fsync() the file
 datasync   fdatasync() the file
 trim       trim the given file from the given 'offset' for 'length' bytes
+
+
+9.0 CPU idleness profiling
+--------------------------
+In some cases, we want to understand CPU overhead in a test. For example,
+we test patches for the specific goodness of whether they reduce CPU usage.
+fio implements a balloon approach to create a thread per CPU that runs at
+idle priority, meaning that it only runs when nobody else needs the cpu.
+By measuring the amount of work completed by the thread, idleness of each
+CPU can be derived accordingly.
+
+An unit work is defined as touching a full page of unsigned characters. Mean
+and standard deviation of time to complete an unit work is reported in "unit
+work" section. Options can be chosen to report detailed percpu idleness or
+overall system idleness by aggregating percpu stats.