--- /dev/null
+fio
+*.o
+.depend
--- /dev/null
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+\f
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+\f
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+\f
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+\f
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+\f
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
--- /dev/null
+CC = gcc
+CFLAGS = -Wall -O2 -g -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
+PROGS = fio
+SCRIPTS = fio_generate_plots
+
+all: depend $(PROGS) $(SCRIPTS)
+
+fio: fio.o fio-io.o fio-ini.o md5.o crc32.o
+ $(CC) $(CFLAGS) -o $@ $(filter %.o,$^) -lpthread -laio -lm -lrt
+
+clean:
+ -rm -f *.o .depend $(PROGS)
+
+depend:
+ @$(CC) -MM $(ALL_CFLAGS) *.c 1> .depend
+
+INSTALL = install
+prefix = /usr/local
+bindir = $(prefix)/bin
+
+install: $(PROGS) $(SCRIPTS)
+ $(INSTALL) -m755 -d $(DESTDIR)$(bindir)
+ $(INSTALL) $(PROGS) $(SCRIPTS) $(DESTDIR)$(bindir)
+
+ifneq ($(wildcard .depend),)
+include .depend
+endif
--- /dev/null
+fio
+---
+
+fio is a tool that will spawn a number of thread doing a particular
+type of io action as specified by the user. fio takes a number of
+global parameters, each inherited by the thread unless otherwise
+parameters given to them overriding that setting is given.
+
+Options
+-------
+
+$ fio
+ -s IO is sequential
+ -b block size in KiB for each io
+ -t <sec> Runtime in seconds
+ -r For random io, sequence must be repeatable
+ -R <on> If one thread fails to meet rate, quit all
+ -o <on> Use direct IO is 1, buffered if 0
+ -l Generate per-job latency logs
+ -w Generate per-job bandwidth logs
+ -f <file> Read <file> for job descriptions
+ -v Print version information and exit
+
+The <jobs> format is as follows:
+
+ directory=x Use 'x' as the top level directory for storing files
+ rw=x 'x' may be: read, randread, write, or randwrite
+ size=x Set file size to x bytes (x string can include k/m/g)
+ ioengine=x 'x' may be: aio/libaio/linuxaio for Linux aio,
+ posixaio for POSIX aio, sync for regular read/write io,
+ mmap for mmap'ed io, or sgio for direct SG_IO io. The
+ latter only works on Linux on SCSI (or SCSI-like
+ devices, such as usb-storage or sata/libata driven)
+ devices.
+ iodepth=x For async io, allow 'x' ios in flight
+ overwrite=x If 'x', layout a write file first.
+ prio=x Run io at prio X, 0-7 is the kernel allowed range
+ prioclass=x Run io at prio class X
+ bs=x Use 'x' for thread blocksize. May include k/m postfix.
+ bsrange=x-y Mix thread block sizes randomly between x and y. May
+ also include k/m postfix.
+ direct=x 1 for direct IO, 0 for buffered IO
+ thinktime=x "Think" x usec after each io
+ rate=x Throttle rate to x KiB/sec
+ ratemin=x Quit if rate of x KiB/sec can't be met
+ ratecycle=x ratemin averaged over x msecs
+ cpumask=x Only allow job to run on CPUs defined by mask.
+ fsync=x If writing, fsync after every x blocks have been written
+ startdelay=x Start this thread x seconds after startup
+ timeout=x Terminate x seconds after startup
+ offset=x Start io at offset x (x string can include k/m/g)
+ invalidate=x Invalidate page cache for file prior to doing io
+ sync=x Use sync writes if x and writing
+ mem=x If x == malloc, use malloc for buffers. If x == shm,
+ use shm for buffers. If x == mmap, use anon mmap.
+ exitall When one thread quits, terminate the others
+ bwavgtime=x Average bandwidth stats over an x msec window.
+ create_serialize=x If 'x', serialize file creation.
+ create_fsync=x If 'x', run fsync() after file creation.
+ loops=x Run the job 'x' number of times.
+ verify=x If 'x' == md5, use md5 for verifies. If 'x' == crc32,
+ use crc32 for verifies. md5 is 'safer', but crc32 is
+ a lot faster. Only makes sense for writing to a file.
+ stonewall Wait for preceeding jobs to end before running.
+ numjobs=x Create 'x' similar entries for this job
+ thread Use pthreads instead of forked jobs
+
+
+Examples using a job file
+-------------------------
+
+A sample job file doing the same as above would look like this:
+
+[read_file]
+rw=0
+bs=4096
+
+[write_file]
+rw=1
+bs=16384
+
+And fio would be invoked as:
+
+$ fio -o1 -s -f file_with_above
+
+The second example would look like this:
+
+[rf1]
+rw=0
+prio=6
+
+[rf2]
+rw=0
+prio=3
+
+[rf3]
+rw=0
+prio=0
+direct=1
+
+And fio would be invoked as:
+
+$ fio -o0 -s -b4096 -f file_with_above
+
+'global' is a reserved keyword. When used as the filename, it sets the
+default options for the threads following that section. It is possible
+to have more than one global section in the file, as it only affects
+subsequent jobs.
+
+Also see the examples/ dir for sample job files.
+
+
+Interpreting the output
+-----------------------
+
+fio spits out a lot of output. While running, fio will display the
+status of the jobs created. An example of that would be:
+
+Threads now running: 2 : [ww] [5.73% done]
+
+The characters inside the square brackets denote the current status of
+each thread. The possible values (in typical life cycle order) are:
+
+Idle Run
+---- ---
+P Thread setup, but not started.
+C Thread created and running, but not doing anything yet
+ R Running, doing sequential reads.
+ r Running, doing random reads.
+ W Running, doing sequential writes.
+ w Running, doing random writes.
+V Running, doing verification of written data.
+E Thread exited, not reaped by main thread yet.
+_ Thread reaped.
+
+The other values are fairly self explanatory - number of thread currently
+running and doing io, and the estimated completion percentage.
+
+When fio is done (or interrupted by ctrl-c), it will show the data for
+each thread, group of threads, and disks in that order. For each data
+direction, the output looks like:
+
+Client1 (g=0): err= 0:
+ write: io= 32MiB, bw= 666KiB/s, runt= 50320msec
+ slat (msec): min= 0, max= 136, avg= 0.03, dev= 1.92
+ clat (msec): min= 0, max= 631, avg=48.50, dev=86.82
+ bw (KiB/s) : min= 0, max= 1196, per=51.00%, avg=664.02, dev=681.68
+ cpu : usr=1.49%, sys=0.25%, ctx=7969
+
+The client number is printed, along with the group id and error of that
+thread. Below is the io statistics, here for writes. In the order listed,
+they denote:
+
+io= Number of megabytes io performed
+bw= Average bandwidth rate
+runt= The runtime of that thread
+ slat= Submission latency (avg being the average, dev being the
+ standard deviation). This is the time it took to submit
+ the io. For sync io, the slat is really the completion
+ latency, since queue/complete is one operation there.
+ clat= Completion latency. Same names as slat, this denotes the
+ time from submission to completion of the io pieces. For
+ sync io, clat will usually be equal (or very close) to 0,
+ as the time from submit to complete is basically just
+ CPU time (io has already been done, see slat explanation).
+ bw= Bandwidth. Same names as the xlat stats, but also includes
+ an approximate percentage of total aggregate bandwidth
+ this thread received in this group. This last value is
+ only really useful if the threads in this group are on the
+ same disk, since they are then competing for disk access.
+cpu= CPU usage. User and system time, along with the number
+ of context switches this thread went through.
+
+After each client has been listed, the group statistics are printed. They
+will look like this:
+
+Run status group 0 (all jobs):
+ READ: io=64MiB, aggrb=22178, minb=11355, maxb=11814, mint=2840msec, maxt=2955msec
+ WRITE: io=64MiB, aggrb=1302, minb=666, maxb=669, mint=50093msec, maxt=50320msec
+
+For each data direction, it prints:
+
+io= Number of megabytes io performed.
+aggrb= Aggregate bandwidth of threads in this group.
+minb= The minimum average bandwidth a thread saw.
+maxb= The maximum average bandwidth a thread saw.
+mint= The minimum runtime of a thread.
+maxt= The maximum runtime of a thread.
+
+And finally, the disk statistics are printed. They will look like this:
+
+Disk stats (read/write):
+ sda: ios=16398/16511, merge=30/162, ticks=6853/819634, in_queue=826487, util=100.00%
+
+Each value is printed for both reads and writes, with reads first. The
+numbers denote:
+
+ios= Number of ios performed by all groups.
+merge= Number of merges io the io scheduler.
+ticks= Number of ticks we kept the disk busy.
+io_queue= Total time spent in the disk queue.
+util= The disk utilization. A value of 100% means we kept the disk
+ busy constantly, 50% would be a disk idling half of the time.
--- /dev/null
+#ifndef ARCH_ALPHA_H
+#define ARCH_ALPHA_H
+
+#define ARCH (arch_alpha)
+
+#ifndef __NR_ioprio_set
+#define __NR_ioprio_set 442
+#define __NR_ioprio_get 443
+#endif
+
+#ifndef __NR_fadvise64
+#define __NR_fadvise64 413
+#endif
+
+#define nop do { } while (0)
+#define ffz(v) generic_ffz((v))
+
+#endif
--- /dev/null
+#ifndef ARCH_IA64_H
+#define ARCH_IA64_H
+
+#define ARCH (arch_ia64)
+
+#ifndef __NR_ioprio_set
+#define __NR_ioprio_set 1274
+#define __NR_ioprio_get 1275
+#endif
+
+#ifndef __NR_fadvise64
+#define __NR_fadvise64 1234
+#endif
+
+#define nop asm volatile ("hint @pause" ::: "memory");
+
+#define ia64_popcnt(x) \
+({ \
+ unsigned long ia64_intri_res; \
+ asm ("popcnt %0=%1" : "=r" (ia64_intri_res) : "r" (x)); \
+ ia64_intri_res; \
+})
+
+static inline unsigned long ffz(unsigned long bitmask)
+{
+ return ia64_popcnt(bitmask & (~bitmask - 1));
+}
+
+#endif
--- /dev/null
+#ifndef ARCH_PPC_H
+#define ARCH_PPH_H
+
+#define ARCH (arch_ppc)
+
+#ifndef __NR_ioprio_set
+#define __NR_ioprio_set 273
+#define __NR_ioprio_get 274
+#endif
+
+#ifndef __NR_fadvise64
+#define __NR_fadvise64 233
+#endif
+
+#define nop do { } while (0)
+
+static inline int __ilog2(unsigned long bitmask)
+{
+ int lz;
+
+ asm ("cntlzw %0,%1" : "=r" (lz) : "r" (bitmask));
+ return 31 - lz;
+}
+
+static inline int ffz(unsigned long bitmask)
+{
+ if ((bitmask = ~bitmask) == 0)
+ return 32;
+ return __ilog2(bitmask & -bitmask);
+}
+
+#endif
--- /dev/null
+#ifndef ARCH_S390_H
+#define ARCH_S390_H
+
+#define ARCH (arch_s390)
+
+#ifndef __NR_ioprio_set
+#define __NR_ioprio_set 282
+#define __NR_ioprio_get 283
+#endif
+
+#ifndef __NR_fadvise64
+#define __NR_fadvise64 253
+#endif
+
+#define nop asm volatile ("diag 0,0,68" : : : "memory")
+#define ffz(v) generic_ffz((v))
+
+#endif
--- /dev/null
+#ifndef ARCH_X86_H
+#define ARCH_X86_H
+
+#define ARCH (arch_i386)
+
+#ifndef __NR_ioprio_set
+#define __NR_ioprio_set 289
+#define __NR_ioprio_get 290
+#endif
+
+#ifndef __NR_fadvise64
+#define __NR_fadvise64 250
+#endif
+
+#define nop __asm__ __volatile__("rep;nop": : :"memory")
+
+static inline unsigned long ffz(unsigned long bitmask)
+{
+ __asm__("bsfl %1,%0" :"=r" (bitmask) :"r" (~bitmask));
+ return bitmask;
+}
+
+#endif
--- /dev/null
+#ifndef ARCH_X86_64_h
+#define ARCH_X86_64_h
+
+#define ARCH (arch_x86_64)
+
+#ifndef __NR_ioprio_set
+#define __NR_ioprio_set 251
+#define __NR_ioprio_get 252
+#endif
+
+#ifndef __NR_fadvise64
+#define __NR_fadvise64 221
+#endif
+
+#define nop __asm__ __volatile__("rep;nop": : :"memory")
+
+static inline unsigned long ffz(unsigned long bitmask)
+{
+ __asm__("bsfq %1,%0" :"=r" (bitmask) :"r" (~bitmask));
+ return bitmask;
+}
+
+
+#endif
--- /dev/null
+#ifndef ARCH_H
+#define ARCH_H
+
+enum {
+ arch_x86_64,
+ arch_i386,
+ arch_ppc,
+ arch_ia64,
+ arch_s390,
+ arch_alpha,
+};
+
+static inline unsigned long generic_ffz(unsigned long word)
+{
+ unsigned int i;
+
+ for (i = 0; i < sizeof(word) * 8; i++)
+ if ((word & (1UL << i)) == 0)
+ return i;
+
+ return -1;
+}
+
+#if defined(__i386__)
+#include "arch-x86.h"
+#elif defined(__x86_64__)
+#include "arch-x86_64.h"
+#elif defined(__powerpc__) || defined(__powerpc64__)
+#include "arch-ppc.h"
+#elif defined(__ia64__)
+#include "arch-ia64.h"
+#elif defined(__alpha__)
+#include "arch-alpha.h"
+#elif defined(__s390x__) || defined(__s390__)
+#include "arch-s390.h"
+#else
+#error "Unsupported arch"
+#endif
+
+#define BITS_PER_LONG (__WORDSIZE)
+
+#endif
--- /dev/null
+/* crc32 -- calculate and POSIX.2 checksum
+ Copyright (C) 92, 1995-1999 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include "crc32.h"
+
+static const unsigned long crctab[256] = {
+ 0x0,
+ 0x04C11DB7, 0x09823B6E, 0x0D4326D9, 0x130476DC, 0x17C56B6B,
+ 0x1A864DB2, 0x1E475005, 0x2608EDB8, 0x22C9F00F, 0x2F8AD6D6,
+ 0x2B4BCB61, 0x350C9B64, 0x31CD86D3, 0x3C8EA00A, 0x384FBDBD,
+ 0x4C11DB70, 0x48D0C6C7, 0x4593E01E, 0x4152FDA9, 0x5F15ADAC,
+ 0x5BD4B01B, 0x569796C2, 0x52568B75, 0x6A1936C8, 0x6ED82B7F,
+ 0x639B0DA6, 0x675A1011, 0x791D4014, 0x7DDC5DA3, 0x709F7B7A,
+ 0x745E66CD, 0x9823B6E0, 0x9CE2AB57, 0x91A18D8E, 0x95609039,
+ 0x8B27C03C, 0x8FE6DD8B, 0x82A5FB52, 0x8664E6E5, 0xBE2B5B58,
+ 0xBAEA46EF, 0xB7A96036, 0xB3687D81, 0xAD2F2D84, 0xA9EE3033,
+ 0xA4AD16EA, 0xA06C0B5D, 0xD4326D90, 0xD0F37027, 0xDDB056FE,
+ 0xD9714B49, 0xC7361B4C, 0xC3F706FB, 0xCEB42022, 0xCA753D95,
+ 0xF23A8028, 0xF6FB9D9F, 0xFBB8BB46, 0xFF79A6F1, 0xE13EF6F4,
+ 0xE5FFEB43, 0xE8BCCD9A, 0xEC7DD02D, 0x34867077, 0x30476DC0,
+ 0x3D044B19, 0x39C556AE, 0x278206AB, 0x23431B1C, 0x2E003DC5,
+ 0x2AC12072, 0x128E9DCF, 0x164F8078, 0x1B0CA6A1, 0x1FCDBB16,
+ 0x018AEB13, 0x054BF6A4, 0x0808D07D, 0x0CC9CDCA, 0x7897AB07,
+ 0x7C56B6B0, 0x71159069, 0x75D48DDE, 0x6B93DDDB, 0x6F52C06C,
+ 0x6211E6B5, 0x66D0FB02, 0x5E9F46BF, 0x5A5E5B08, 0x571D7DD1,
+ 0x53DC6066, 0x4D9B3063, 0x495A2DD4, 0x44190B0D, 0x40D816BA,
+ 0xACA5C697, 0xA864DB20, 0xA527FDF9, 0xA1E6E04E, 0xBFA1B04B,
+ 0xBB60ADFC, 0xB6238B25, 0xB2E29692, 0x8AAD2B2F, 0x8E6C3698,
+ 0x832F1041, 0x87EE0DF6, 0x99A95DF3, 0x9D684044, 0x902B669D,
+ 0x94EA7B2A, 0xE0B41DE7, 0xE4750050, 0xE9362689, 0xEDF73B3E,
+ 0xF3B06B3B, 0xF771768C, 0xFA325055, 0xFEF34DE2, 0xC6BCF05F,
+ 0xC27DEDE8, 0xCF3ECB31, 0xCBFFD686, 0xD5B88683, 0xD1799B34,
+ 0xDC3ABDED, 0xD8FBA05A, 0x690CE0EE, 0x6DCDFD59, 0x608EDB80,
+ 0x644FC637, 0x7A089632, 0x7EC98B85, 0x738AAD5C, 0x774BB0EB,
+ 0x4F040D56, 0x4BC510E1, 0x46863638, 0x42472B8F, 0x5C007B8A,
+ 0x58C1663D, 0x558240E4, 0x51435D53, 0x251D3B9E, 0x21DC2629,
+ 0x2C9F00F0, 0x285E1D47, 0x36194D42, 0x32D850F5, 0x3F9B762C,
+ 0x3B5A6B9B, 0x0315D626, 0x07D4CB91, 0x0A97ED48, 0x0E56F0FF,
+ 0x1011A0FA, 0x14D0BD4D, 0x19939B94, 0x1D528623, 0xF12F560E,
+ 0xF5EE4BB9, 0xF8AD6D60, 0xFC6C70D7, 0xE22B20D2, 0xE6EA3D65,
+ 0xEBA91BBC, 0xEF68060B, 0xD727BBB6, 0xD3E6A601, 0xDEA580D8,
+ 0xDA649D6F, 0xC423CD6A, 0xC0E2D0DD, 0xCDA1F604, 0xC960EBB3,
+ 0xBD3E8D7E, 0xB9FF90C9, 0xB4BCB610, 0xB07DABA7, 0xAE3AFBA2,
+ 0xAAFBE615, 0xA7B8C0CC, 0xA379DD7B, 0x9B3660C6, 0x9FF77D71,
+ 0x92B45BA8, 0x9675461F, 0x8832161A, 0x8CF30BAD, 0x81B02D74,
+ 0x857130C3, 0x5D8A9099, 0x594B8D2E, 0x5408ABF7, 0x50C9B640,
+ 0x4E8EE645, 0x4A4FFBF2, 0x470CDD2B, 0x43CDC09C, 0x7B827D21,
+ 0x7F436096, 0x7200464F, 0x76C15BF8, 0x68860BFD, 0x6C47164A,
+ 0x61043093, 0x65C52D24, 0x119B4BE9, 0x155A565E, 0x18197087,
+ 0x1CD86D30, 0x029F3D35, 0x065E2082, 0x0B1D065B, 0x0FDC1BEC,
+ 0x3793A651, 0x3352BBE6, 0x3E119D3F, 0x3AD08088, 0x2497D08D,
+ 0x2056CD3A, 0x2D15EBE3, 0x29D4F654, 0xC5A92679, 0xC1683BCE,
+ 0xCC2B1D17, 0xC8EA00A0, 0xD6AD50A5, 0xD26C4D12, 0xDF2F6BCB,
+ 0xDBEE767C, 0xE3A1CBC1, 0xE760D676, 0xEA23F0AF, 0xEEE2ED18,
+ 0xF0A5BD1D, 0xF464A0AA, 0xF9278673, 0xFDE69BC4, 0x89B8FD09,
+ 0x8D79E0BE, 0x803AC667, 0x84FBDBD0, 0x9ABC8BD5, 0x9E7D9662,
+ 0x933EB0BB, 0x97FFAD0C, 0xAFB010B1, 0xAB710D06, 0xA6322BDF,
+ 0xA2F33668, 0xBCB4666D, 0xB8757BDA, 0xB5365D03, 0xB1F740B4
+};
+
+unsigned long crc32(const void *buffer, unsigned long length)
+{
+ const unsigned char *cp = (const unsigned char *) buffer;
+ unsigned long crc = 0;
+
+ while (length--)
+ crc = (crc << 8) ^ crctab[((crc >> 24) ^ *(cp++)) & 0xFF];
+
+ return crc;
+}
--- /dev/null
+/* crc32 -- calculate and POSIX.2 checksum
+ Copyright (C) 92, 1995-1999 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifndef CRC32_H
+#define CRC32_H
+
+extern unsigned long crc32(const void * const, unsigned long);
+
+#endif
--- /dev/null
+; Keep adding 1024kb/s reading clients at 4 seconds
+[global]
+size=32m
+
+[/tmp/file1]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=0
+
+[/tmp/file2]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=4
+
+[/tmp/file3]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=8
+
+[/tmp/file4]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=12
+
+[/tmp/file5]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=16
+
+[/tmp/file6]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=20
+
+[/tmp/file7]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=24
+
+[/tmp/file8]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=28
+
+[/tmp/file9]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=32
+
+[/tmp/file10]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=36
+
+[/tmp/file11]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=40
+
+[/tmp/file12]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=44
+
+[/tmp/file13]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=48
+
+[/tmp/file14]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=52
+
+[/tmp/file15]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=56
+
+[/tmp/file16]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=60
+
+[/tmp/file17]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=64
+
+[/tmp/file18]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=68
+
+[/tmp/file19]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=72
+
+[/tmp/file20]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=76
+
+[/tmp/file21]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=80
+
+[/tmp/file22]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=84
+
+[/tmp/file23]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=88
+
+[/tmp/file24]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=92
+
+[/tmp/file25]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=96
+
+[/tmp/file26]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=100
+
+[/tmp/file27]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=104
+
+[/tmp/file28]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=108
+
+[/tmp/file29]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=112
+
+[/tmp/file30]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=116
+
+[/tmp/file31]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=120
+
+[/tmp/file32]
+rw=read
+rate=1250
+ratemin=1024
+startdelay=124
+
--- /dev/null
+; Read 4 files with aio at different depths
+[global]
+ioengine=libaio
+rw=randread
+bs=128k
+
+[/data1/file1]
+iodepth=4
+
+[/data1/file2]
+iodepth=32
+
+[/data1/file3]
+iodepth=8
+
+[/data1/file4]
+iodepth=16
--- /dev/null
+; tiobench like setup, add more fX files between the stonewalls to
+; create more threads
+
+[global]
+direct=1
+size=512m
+bsrange=4k-4k
+timeout=60
+numjobs=4 ; 4 simultaneous threads for each job
+
+[f1]
+rw=write
+
+[f2]
+stonewall
+rw=randwrite
+
+[f3]
+stonewall
+rw=read
+
+[f4]
+stonewall
+rw=randread
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "fio.h"
+
+#define DEF_BS (4096)
+#define DEF_TIMEOUT (0)
+#define DEF_RATE_CYCLE (1000)
+#define DEF_ODIRECT (1)
+#define DEF_IO_ENGINE (FIO_SYNCIO)
+#define DEF_IO_ENGINE_NAME "sync"
+#define DEF_SEQUENTIAL (1)
+#define DEF_RAND_REPEAT (1)
+#define DEF_OVERWRITE (1)
+#define DEF_CREATE (1)
+#define DEF_INVALIDATE (1)
+#define DEF_SYNCIO (0)
+#define DEF_RANDSEED (0xb1899bedUL)
+#define DEF_BWAVGTIME (500)
+#define DEF_CREATE_SER (1)
+#define DEF_CREATE_FSYNC (1)
+#define DEF_LOOPS (1)
+#define DEF_VERIFY (0)
+#define DEF_STONEWALL (0)
+#define DEF_NUMJOBS (1)
+#define DEF_USE_THREAD (0)
+#define DEF_FILE_SIZE (1024 * 1024 * 1024UL)
+
+static char fio_version_string[] = "fio 1.0";
+
+static int repeatable = DEF_RAND_REPEAT;
+static char *ini_file;
+static int max_jobs = MAX_JOBS;
+
+struct thread_data def_thread;
+struct thread_data *threads = NULL;
+
+int rate_quit = 0;
+int write_lat_log = 0;
+int write_bw_log = 0;
+int exitall_on_terminate = 0;
+
+static int setup_rate(struct thread_data *td)
+{
+ int nr_reads_per_sec;
+
+ if (!td->rate)
+ return 0;
+
+ if (td->rate < td->ratemin) {
+ fprintf(stderr, "min rate larger than nominal rate\n");
+ return -1;
+ }
+
+ nr_reads_per_sec = (td->rate * 1024) / td->min_bs;
+ td->rate_usec_cycle = 1000000 / nr_reads_per_sec;
+ td->rate_pending_usleep = 0;
+ return 0;
+}
+
+static void setup_log(struct io_log **log)
+{
+ struct io_log *l = malloc(sizeof(*l));
+
+ l->nr_samples = 0;
+ l->max_samples = 1024;
+ l->log = malloc(l->max_samples * sizeof(struct io_sample));
+ *log = l;
+}
+
+void finish_log(struct thread_data *td, struct io_log *log, const char *name)
+{
+ char file_name[128];
+ FILE *f;
+ unsigned int i;
+
+ sprintf(file_name, "client%d_%s.log", td->thread_number, name);
+ f = fopen(file_name, "w");
+ if (!f) {
+ perror("fopen log");
+ return;
+ }
+
+ for (i = 0; i < log->nr_samples; i++)
+ fprintf(f, "%lu, %lu, %u\n", log->log[i].time, log->log[i].val, log->log[i].ddir);
+
+ fclose(f);
+ free(log->log);
+ free(log);
+}
+
+static struct thread_data *get_new_job(int global, struct thread_data *parent)
+{
+ struct thread_data *td;
+
+ if (global)
+ return &def_thread;
+ if (thread_number >= max_jobs)
+ return NULL;
+
+ td = &threads[thread_number++];
+ memset(td, 0, sizeof(*td));
+
+ td->fd = -1;
+ td->thread_number = thread_number;
+
+ td->ddir = parent->ddir;
+ td->ioprio = parent->ioprio;
+ td->sequential = parent->sequential;
+ td->bs = parent->bs;
+ td->min_bs = parent->min_bs;
+ td->max_bs = parent->max_bs;
+ td->odirect = parent->odirect;
+ td->thinktime = parent->thinktime;
+ td->fsync_blocks = parent->fsync_blocks;
+ td->start_delay = parent->start_delay;
+ td->timeout = parent->timeout;
+ td->io_engine = parent->io_engine;
+ td->create_file = parent->create_file;
+ td->overwrite = parent->overwrite;
+ td->invalidate_cache = parent->invalidate_cache;
+ td->file_size = parent->file_size;
+ td->file_offset = parent->file_offset;
+ td->rate = parent->rate;
+ td->ratemin = parent->ratemin;
+ td->ratecycle = parent->ratecycle;
+ td->iodepth = parent->iodepth;
+ td->sync_io = parent->sync_io;
+ td->mem_type = parent->mem_type;
+ td->bw_avg_time = parent->bw_avg_time;
+ td->create_serialize = parent->create_serialize;
+ td->create_fsync = parent->create_fsync;
+ td->loops = parent->loops;
+ td->verify = parent->verify;
+ td->stonewall = parent->stonewall;
+ td->numjobs = parent->numjobs;
+ td->use_thread = parent->use_thread;
+ td->do_disk_util = parent->do_disk_util;
+ memcpy(&td->cpumask, &parent->cpumask, sizeof(td->cpumask));
+ strcpy(td->io_engine_name, parent->io_engine_name);
+
+ return td;
+}
+
+static void put_job(struct thread_data *td)
+{
+ memset(&threads[td->thread_number - 1], 0, sizeof(*td));
+ thread_number--;
+}
+
+static int add_job(struct thread_data *td, const char *jobname, int prioclass,
+ int prio)
+{
+ char *ddir_str[] = { "read", "write", "randread", "randwrite" };
+ struct stat sb;
+ int numjobs, ddir;
+
+#ifndef FIO_HAVE_LIBAIO
+ if (td->io_engine == FIO_LIBAIO) {
+ fprintf(stderr, "Linux libaio not available\n");
+ return 1;
+ }
+#endif
+#ifndef FIO_HAVE_POSIXAIO
+ if (td->io_engine == FIO_POSIXAIO) {
+ fprintf(stderr, "posix aio not available\n");
+ return 1;
+ }
+#endif
+#ifdef FIO_HAVE_IOPRIO
+ td->ioprio = (prioclass << IOPRIO_CLASS_SHIFT) | prio;
+#endif
+
+ /*
+ * the def_thread is just for options, it's not a real job
+ */
+ if (td == &def_thread)
+ return 0;
+
+ if (td->io_engine & FIO_SYNCIO)
+ td->iodepth = 1;
+ else {
+ if (!td->iodepth)
+ td->iodepth = 1;
+ }
+
+ td->filetype = FIO_TYPE_FILE;
+ if (!stat(jobname, &sb) && S_ISBLK(sb.st_mode))
+ td->filetype = FIO_TYPE_BD;
+
+ if (td->filetype == FIO_TYPE_FILE) {
+ if (td->directory[0] != '\0')
+ sprintf(td->file_name, "%s/%s.%d", td->directory, jobname, td->thread_number);
+ else
+ sprintf(td->file_name, "%s.%d", jobname, td->thread_number);
+ } else
+ strcpy(td->file_name, jobname);
+
+ sem_init(&td->mutex, 0, 0);
+
+ td->clat_stat[0].min_val = td->clat_stat[1].min_val = ULONG_MAX;
+ td->slat_stat[0].min_val = td->slat_stat[1].min_val = ULONG_MAX;
+ td->bw_stat[0].min_val = td->bw_stat[1].min_val = ULONG_MAX;
+
+ if (td->min_bs == -1U)
+ td->min_bs = td->bs;
+ if (td->max_bs == -1U)
+ td->max_bs = td->bs;
+ if (td_read(td))
+ td->verify = 0;
+
+ if (td->stonewall && td->thread_number > 1)
+ groupid++;
+
+ td->groupid = groupid;
+
+ if (setup_rate(td))
+ goto err;
+
+ if (write_lat_log) {
+ setup_log(&td->slat_log);
+ setup_log(&td->clat_log);
+ }
+ if (write_bw_log)
+ setup_log(&td->bw_log);
+
+ ddir = td->ddir + (!td->sequential << 1);
+ printf("Client%d (g=%d): rw=%s, prio=%d/%d, odir=%d, bs=%d-%d, rate=%d, ioengine=%s, iodepth=%d\n", td->thread_number, td->groupid, ddir_str[ddir], prioclass, prio, td->odirect, td->min_bs, td->max_bs, td->rate, td->io_engine_name, td->iodepth);
+
+ /*
+ * recurse add identical jobs, clear numjobs and stonewall options
+ * as they don't apply to sub-jobs
+ */
+ numjobs = td->numjobs;
+ while (--numjobs) {
+ struct thread_data *td_new = get_new_job(0, td);
+
+ if (!td_new)
+ goto err;
+
+ td_new->numjobs = 1;
+ td_new->stonewall = 0;
+
+ if (add_job(td_new, jobname, prioclass, prio))
+ goto err;
+ }
+ return 0;
+err:
+ put_job(td);
+ return -1;
+}
+
+int init_random_state(struct thread_data *td)
+{
+ unsigned long seed;
+ int fd, num_maps, blocks;
+
+ fd = open("/dev/random", O_RDONLY);
+ if (fd == -1) {
+ td_verror(td, errno);
+ return 1;
+ }
+
+ if (read(fd, &seed, sizeof(seed)) < (int) sizeof(seed)) {
+ td_verror(td, EIO);
+ close(fd);
+ return 1;
+ }
+
+ close(fd);
+
+ srand48_r(seed, &td->bsrange_state);
+ srand48_r(seed, &td->verify_state);
+
+ if (td->sequential)
+ return 0;
+
+ if (repeatable)
+ seed = DEF_RANDSEED;
+
+ blocks = (td->io_size + td->min_bs - 1) / td->min_bs;
+ num_maps = blocks / BLOCKS_PER_MAP;
+ td->file_map = malloc(num_maps * sizeof(long));
+ td->num_maps = num_maps;
+ memset(td->file_map, 0, num_maps * sizeof(long));
+
+ srand48_r(seed, &td->random_state);
+ return 0;
+}
+
+static void fill_cpu_mask(os_cpu_mask_t cpumask, int cpu)
+{
+#ifdef FIO_HAVE_CPU_AFFINITY
+ unsigned int i;
+
+ CPU_ZERO(&cpumask);
+
+ for (i = 0; i < sizeof(int) * 8; i++) {
+ if ((1 << i) & cpu)
+ CPU_SET(i, &cpumask);
+ }
+#endif
+}
+
+static unsigned long get_mult(char c)
+{
+ switch (c) {
+ case 'k':
+ case 'K':
+ return 1024;
+ case 'm':
+ case 'M':
+ return 1024 * 1024;
+ case 'g':
+ case 'G':
+ return 1024 * 1024 * 1024;
+ default:
+ return 1;
+ }
+}
+
+/*
+ * convert string after '=' into decimal value, noting any size suffix
+ */
+static int str_cnv(char *p, unsigned long long *val)
+{
+ char *str;
+ int len;
+
+ str = strstr(p, "=");
+ if (!str)
+ return 1;
+
+ str++;
+ len = strlen(str);
+
+ *val = strtoul(str, NULL, 10);
+ if (*val == ULONG_MAX && errno == ERANGE)
+ return 1;
+
+ *val *= get_mult(str[len - 2]);
+ return 0;
+}
+
+static int check_strcnv(char *p, char *name, unsigned long long *val)
+{
+ if (!strstr(p, name))
+ return 1;
+
+ return str_cnv(p, val);
+}
+
+static void strip_blank_front(char **p)
+{
+ char *s = *p;
+
+ while (isblank(*s))
+ s++;
+}
+
+static void strip_blank_end(char *p)
+{
+ while (isblank(*p)) {
+ *p = '\0';
+ p--;
+ }
+}
+
+typedef int (str_cb_fn)(struct thread_data *, char *);
+
+static int check_str(char *p, char *name, str_cb_fn *cb, struct thread_data *td)
+{
+ char *s = strstr(p, name);
+
+ if (!s)
+ return 1;
+
+ s = strstr(s, "=");
+ if (!s)
+ return 1;
+
+ s++;
+ strip_blank_front(&s);
+ return cb(td, s);
+}
+
+static int check_strstore(char *p, char *name, char *dest)
+{
+ char *s = strstr(p, name);
+
+ if (!s)
+ return 1;
+
+ s = strstr(p, "=");
+ if (!s)
+ return 1;
+
+ s++;
+ strip_blank_front(&s);
+
+ strcpy(dest, s);
+
+ s = dest + strlen(dest) - 1;
+ strip_blank_end(s);
+ return 0;
+}
+
+static int check_range(char *p, char *name, unsigned long *s, unsigned long *e)
+{
+ char str[128];
+ char s1, s2;
+
+ sprintf(str, "%s=%%lu%%c-%%lu%%c", name);
+ if (sscanf(p, str, s, &s1, e, &s2) == 4) {
+ *s *= get_mult(s1);
+ *e *= get_mult(s2);
+ return 0;
+ }
+
+ sprintf(str, "%s = %%lu%%c-%%lu%%c", name);
+ if (sscanf(p, str, s, &s1, e, &s2) == 4) {
+ *s *= get_mult(s1);
+ *e *= get_mult(s2);
+ return 0;
+ }
+
+ sprintf(str, "%s=%%lu-%%lu", name);
+ if (sscanf(p, str, s, e) == 2)
+ return 0;
+
+ sprintf(str, "%s = %%lu-%%lu", name);
+ if (sscanf(p, str, s, e) == 2)
+ return 0;
+
+ return 1;
+
+}
+
+static int check_int(char *p, char *name, unsigned int *val)
+{
+ char str[128];
+
+ sprintf(str, "%s=%%d", name);
+ if (sscanf(p, str, val) == 1)
+ return 0;
+
+ sprintf(str, "%s = %%d", name);
+ if (sscanf(p, str, val) == 1)
+ return 0;
+
+ return 1;
+}
+
+static int check_strset(char *p, char *name)
+{
+ return strncmp(p, name, strlen(name));
+}
+
+static int is_empty_or_comment(char *line)
+{
+ unsigned int i;
+
+ for (i = 0; i < strlen(line); i++) {
+ if (line[i] == ';')
+ return 1;
+ if (!isspace(line[i]) && !iscntrl(line[i]))
+ return 0;
+ }
+
+ return 1;
+}
+
+static int str_rw_cb(struct thread_data *td, char *mem)
+{
+ if (!strncmp(mem, "read", 4) || !strncmp(mem, "0", 1)) {
+ td->ddir = DDIR_READ;
+ td->sequential = 1;
+ return 0;
+ } else if (!strncmp(mem, "randread", 8)) {
+ td->ddir = DDIR_READ;
+ td->sequential = 0;
+ return 0;
+ } else if (!strncmp(mem, "write", 5) || !strncmp(mem, "1", 1)) {
+ td->ddir = DDIR_WRITE;
+ td->sequential = 1;
+ return 0;
+ } else if (!strncmp(mem, "randwrite", 9)) {
+ td->ddir = DDIR_WRITE;
+ td->sequential = 0;
+ return 0;
+ }
+
+ fprintf(stderr, "bad data direction: %s\n", mem);
+ return 1;
+}
+
+static int str_verify_cb(struct thread_data *td, char *mem)
+{
+ if (!strncmp(mem, "0", 1)) {
+ td->verify = VERIFY_NONE;
+ return 0;
+ } else if (!strncmp(mem, "md5", 3) || !strncmp(mem, "1", 1)) {
+ td->verify = VERIFY_MD5;
+ return 0;
+ } else if (!strncmp(mem, "crc32", 5)) {
+ td->verify = VERIFY_CRC32;
+ return 0;
+ }
+
+ fprintf(stderr, "bad verify type: %s\n", mem);
+ return 1;
+}
+
+static int str_mem_cb(struct thread_data *td, char *mem)
+{
+ if (!strncmp(mem, "malloc", 6)) {
+ td->mem_type = MEM_MALLOC;
+ return 0;
+ } else if (!strncmp(mem, "shm", 3)) {
+ td->mem_type = MEM_SHM;
+ return 0;
+ } else if (!strncmp(mem, "mmap", 4)) {
+ td->mem_type = MEM_MMAP;
+ return 0;
+ }
+
+ fprintf(stderr, "bad mem type: %s\n", mem);
+ return 1;
+}
+
+static int str_ioengine_cb(struct thread_data *td, char *str)
+{
+ if (!strncmp(str, "linuxaio", 8) || !strncmp(str, "aio", 3) ||
+ !strncmp(str, "libaio", 6)) {
+ strcpy(td->io_engine_name, "libaio");
+ td->io_engine = FIO_LIBAIO;
+ return 0;
+ } else if (!strncmp(str, "posixaio", 8)) {
+ strcpy(td->io_engine_name, "posixaio");
+ td->io_engine = FIO_POSIXAIO;
+ return 0;
+ } else if (!strncmp(str, "sync", 4)) {
+ strcpy(td->io_engine_name, "sync");
+ td->io_engine = FIO_SYNCIO;
+ return 0;
+ } else if (!strncmp(str, "mmap", 4)) {
+ strcpy(td->io_engine_name, "mmap");
+ td->io_engine = FIO_MMAPIO;
+ return 0;
+ } else if (!strncmp(str, "sgio", 4)) {
+ strcpy(td->io_engine_name, "sgio");
+ td->io_engine = FIO_SGIO;
+ return 0;
+ }
+
+ fprintf(stderr, "bad ioengine type: %s\n", str);
+ return 1;
+}
+
+
+int parse_jobs_ini(char *file)
+{
+ unsigned int prioclass, prio, cpu, global;
+ unsigned long long ull;
+ unsigned long ul1, ul2;
+ struct thread_data *td;
+ char *string, *name;
+ fpos_t off;
+ FILE *f;
+ char *p;
+
+ f = fopen(file, "r");
+ if (!f) {
+ perror("fopen");
+ return 1;
+ }
+
+ string = malloc(4096);
+ name = malloc(256);
+
+ while ((p = fgets(string, 4096, f)) != NULL) {
+ if (is_empty_or_comment(p))
+ continue;
+ if (sscanf(p, "[%s]", name) != 1)
+ continue;
+
+ global = !strncmp(name, "global", 6);
+
+ name[strlen(name) - 1] = '\0';
+
+ td = get_new_job(global, &def_thread);
+ if (!td)
+ return 1;
+
+ prioclass = 2;
+ prio = 4;
+
+ fgetpos(f, &off);
+ while ((p = fgets(string, 4096, f)) != NULL) {
+ if (is_empty_or_comment(p))
+ continue;
+ if (strstr(p, "["))
+ break;
+ if (!check_int(p, "prio", &prio)) {
+#ifndef FIO_HAVE_IOPRIO
+ fprintf(stderr, "io priorities not available\n");
+ return 1;
+#endif
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "prioclass", &prioclass)) {
+#ifndef FIO_HAVE_IOPRIO
+ fprintf(stderr, "io priorities not available\n");
+ return 1;
+#endif
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "direct", &td->odirect)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "rate", &td->rate)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "ratemin", &td->ratemin)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "ratecycle", &td->ratecycle)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "thinktime", &td->thinktime)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "cpumask", &cpu)) {
+#ifndef FIO_HAVE_CPU_AFFINITY
+ fprintf(stderr, "cpu affinity not available\n");
+ return 1;
+#endif
+ fill_cpu_mask(td->cpumask, cpu);
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "fsync", &td->fsync_blocks)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "startdelay", &td->start_delay)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "timeout", &td->timeout)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "invalidate",&td->invalidate_cache)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "iodepth", &td->iodepth)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "sync", &td->sync_io)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "bwavgtime", &td->bw_avg_time)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "create_serialize", &td->create_serialize)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "create_fsync", &td->create_fsync)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "loops", &td->loops)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "numjobs", &td->numjobs)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "overwrite", &td->overwrite)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_range(p, "bsrange", &ul1, &ul2)) {
+ if (ul1 > ul2) {
+ td->max_bs = ul1;
+ td->min_bs = ul2;
+ } else {
+ td->max_bs = ul2;
+ td->min_bs = ul1;
+ }
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strcnv(p, "bs", &ull)) {
+ td->bs = ull;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strcnv(p, "size", &td->file_size)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strcnv(p, "offset", &td->file_offset)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strstore(p, "directory", td->directory)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_str(p, "mem", str_mem_cb, td)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_str(p, "verify", str_verify_cb, td)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_str(p, "rw", str_rw_cb, td)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_str(p, "ioengine", str_ioengine_cb, td)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strset(p, "create")) {
+ td->create_file = 1;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strset(p, "exitall")) {
+ exitall_on_terminate = 1;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strset(p, "stonewall")) {
+ td->stonewall = 1;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strset(p, "thread")) {
+ td->use_thread = 1;
+ fgetpos(f, &off);
+ continue;
+ }
+
+ printf("Client%d: bad option %s\n",td->thread_number,p);
+ }
+ fsetpos(f, &off);
+
+ if (add_job(td, name, prioclass, prio))
+ return 1;
+ }
+
+ free(string);
+ free(name);
+ fclose(f);
+ return 0;
+}
+
+static int fill_def_thread(void)
+{
+ memset(&def_thread, 0, sizeof(def_thread));
+
+ if (fio_getaffinity(getpid(), &def_thread.cpumask) == -1) {
+ perror("sched_getaffinity");
+ return 1;
+ }
+
+ /*
+ * fill globals
+ */
+ def_thread.ddir = DDIR_READ;
+ def_thread.bs = DEF_BS;
+ def_thread.min_bs = -1;
+ def_thread.max_bs = -1;
+ def_thread.io_engine = DEF_IO_ENGINE;
+ strcpy(def_thread.io_engine_name, DEF_IO_ENGINE_NAME);
+ def_thread.odirect = DEF_ODIRECT;
+ def_thread.ratecycle = DEF_RATE_CYCLE;
+ def_thread.sequential = DEF_SEQUENTIAL;
+ def_thread.timeout = DEF_TIMEOUT;
+ def_thread.create_file = DEF_CREATE;
+ def_thread.overwrite = DEF_OVERWRITE;
+ def_thread.invalidate_cache = DEF_INVALIDATE;
+ def_thread.sync_io = DEF_SYNCIO;
+ def_thread.mem_type = MEM_MALLOC;
+ def_thread.bw_avg_time = DEF_BWAVGTIME;
+ def_thread.create_serialize = DEF_CREATE_SER;
+ def_thread.create_fsync = DEF_CREATE_FSYNC;
+ def_thread.loops = DEF_LOOPS;
+ def_thread.verify = DEF_VERIFY;
+ def_thread.stonewall = DEF_STONEWALL;
+ def_thread.numjobs = DEF_NUMJOBS;
+ def_thread.use_thread = DEF_USE_THREAD;
+#ifdef FIO_HAVE_DISK_UTIL
+ def_thread.do_disk_util = 1;
+#endif
+
+ return 0;
+}
+
+static void parse_cmd_line(int argc, char *argv[])
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "s:b:t:r:R:o:f:lwv")) != EOF) {
+ switch (c) {
+ case 's':
+ def_thread.sequential = !!atoi(optarg);
+ break;
+ case 'b':
+ def_thread.bs = atoi(optarg);
+ def_thread.bs <<= 10;
+ if (!def_thread.bs) {
+ printf("bad block size\n");
+ def_thread.bs = DEF_BS;
+ }
+ break;
+ case 't':
+ def_thread.timeout = atoi(optarg);
+ break;
+ case 'r':
+ repeatable = !!atoi(optarg);
+ break;
+ case 'R':
+ rate_quit = !!atoi(optarg);
+ break;
+ case 'o':
+ def_thread.odirect = !!atoi(optarg);
+ break;
+ case 'f':
+ ini_file = strdup(optarg);
+ break;
+ case 'l':
+ write_lat_log = 1;
+ break;
+ case 'w':
+ write_bw_log = 1;
+ break;
+ case 'v':
+ printf("%s\n", fio_version_string);
+ exit(0);
+ }
+ }
+}
+
+static void free_shm(void)
+{
+ struct shmid_ds sbuf;
+
+ if (threads) {
+ shmdt(threads);
+ threads = NULL;
+ shmctl(shm_id, IPC_RMID, &sbuf);
+ }
+}
+
+static int setup_thread_area(void)
+{
+ /*
+ * 1024 is too much on some machines, scale max_jobs if
+ * we get a failure that looks like too large a shm segment
+ */
+ do {
+ int s = max_jobs * sizeof(struct thread_data);
+
+ shm_id = shmget(0, s, IPC_CREAT | 0600);
+ if (shm_id != -1)
+ break;
+ if (errno != EINVAL) {
+ perror("shmget");
+ break;
+ }
+
+ max_jobs >>= 1;
+ } while (max_jobs);
+
+ if (shm_id == -1)
+ return 1;
+
+ threads = shmat(shm_id, NULL, 0);
+ if (threads == (void *) -1) {
+ perror("shmat");
+ return 1;
+ }
+
+ atexit(free_shm);
+ return 0;
+}
+
+int parse_options(int argc, char *argv[])
+{
+ if (setup_thread_area())
+ return 1;
+ if (fill_def_thread())
+ return 1;
+
+ parse_cmd_line(argc, argv);
+
+ if (!ini_file) {
+ printf("Need job file\n");
+ return 1;
+ }
+
+ if (parse_jobs_ini(ini_file))
+ return 1;
+
+ return 0;
+}
--- /dev/null
+/*
+ * The io parts of the fio tool, includes workers for sync and mmap'ed
+ * io, as well as both posix and linux libaio support.
+ *
+ * sync io is implemented on top of aio.
+ *
+ * This is not really specific to fio, if the get_io_u/put_io_u and
+ * structures was pulled into this as well it would be a perfectly
+ * generic io engine that could be used for other projects.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+#include <time.h>
+#include <sys/mman.h>
+#include "fio.h"
+#include "os.h"
+
+#ifdef FIO_HAVE_LIBAIO
+
+#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj)
+
+static int fio_io_sync(struct thread_data *td)
+{
+ return fsync(td->fd);
+}
+
+static int fill_timespec(struct timespec *ts)
+{
+#ifdef _POSIX_TIMERS
+ if (!clock_gettime(CLOCK_MONOTONIC, ts))
+ return 0;
+
+ perror("clock_gettime");
+#endif
+ return 1;
+}
+
+static unsigned long long ts_utime_since_now(struct timespec *t)
+{
+ long long sec, nsec;
+ struct timespec now;
+
+ if (fill_timespec(&now))
+ return 0;
+
+ sec = now.tv_sec - t->tv_sec;
+ nsec = now.tv_nsec - t->tv_nsec;
+ if (sec > 0 && nsec < 0) {
+ sec--;
+ nsec += 1000000000;
+ }
+
+ sec *= 1000000;
+ nsec /= 1000;
+ return sec + nsec;
+}
+
+struct libaio_data {
+ io_context_t aio_ctx;
+ struct io_event *aio_events;
+};
+
+static int fio_libaio_io_prep(struct thread_data *td, struct io_u *io_u)
+{
+ if (io_u->ddir == DDIR_READ)
+ io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
+ else
+ io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
+
+ return 0;
+}
+
+static struct io_u *fio_libaio_event(struct thread_data *td, int event)
+{
+ struct libaio_data *ld = td->io_data;
+
+ return ev_to_iou(ld->aio_events + event);
+}
+
+static int fio_libaio_getevents(struct thread_data *td, int min, int max,
+ struct timespec *t)
+{
+ struct libaio_data *ld = td->io_data;
+ int r;
+
+ do {
+ r = io_getevents(ld->aio_ctx, min, max, ld->aio_events, t);
+ if (r == -EAGAIN) {
+ usleep(100);
+ continue;
+ } else if (r == -EINTR)
+ continue;
+ else
+ break;
+ } while (1);
+
+ return r;
+}
+
+static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u)
+{
+ struct libaio_data *ld = td->io_data;
+ struct iocb *iocb = &io_u->iocb;
+ int ret;
+
+ do {
+ ret = io_submit(ld->aio_ctx, 1, &iocb);
+ if (ret == 1)
+ return 0;
+ else if (ret == -EAGAIN)
+ usleep(100);
+ else if (ret == -EINTR)
+ continue;
+ else
+ break;
+ } while (1);
+
+ return ret;
+
+}
+
+static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u)
+{
+ struct libaio_data *ld = td->io_data;
+
+ return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events);
+}
+
+static void fio_libaio_cleanup(struct thread_data *td)
+{
+ struct libaio_data *ld = td->io_data;
+
+ if (ld) {
+ io_destroy(ld->aio_ctx);
+ if (ld->aio_events)
+ free(ld->aio_events);
+
+ free(ld);
+ td->io_data = NULL;
+ }
+}
+
+int fio_libaio_init(struct thread_data *td)
+{
+ struct libaio_data *ld = malloc(sizeof(*ld));
+
+ memset(ld, 0, sizeof(*ld));
+ if (io_queue_init(td->iodepth, &ld->aio_ctx)) {
+ td_verror(td, errno);
+ return 1;
+ }
+
+ td->io_prep = fio_libaio_io_prep;
+ td->io_queue = fio_libaio_queue;
+ td->io_getevents = fio_libaio_getevents;
+ td->io_event = fio_libaio_event;
+ td->io_cancel = fio_libaio_cancel;
+ td->io_cleanup = fio_libaio_cleanup;
+ td->io_sync = fio_io_sync;
+
+ ld->aio_events = malloc(td->iodepth * sizeof(struct io_event));
+ td->io_data = ld;
+ return 0;
+}
+
+#else /* FIO_HAVE_LIBAIO */
+
+int fio_libaio_init(struct thread_data *td)
+{
+ return EINVAL;
+}
+
+#endif /* FIO_HAVE_LIBAIO */
+
+#ifdef FIO_HAVE_POSIXAIO
+
+struct posixaio_data {
+ struct io_u **aio_events;
+};
+
+static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u)
+{
+ int r = aio_cancel(td->fd, &io_u->aiocb);
+
+ if (r == 1 || r == AIO_CANCELED)
+ return 0;
+
+ return 1;
+}
+
+static int fio_posixaio_prep(struct thread_data *td, struct io_u *io_u)
+{
+ struct aiocb *aiocb = &io_u->aiocb;
+
+ aiocb->aio_fildes = td->fd;
+ aiocb->aio_buf = io_u->buf;
+ aiocb->aio_nbytes = io_u->buflen;
+ aiocb->aio_offset = io_u->offset;
+
+ io_u->seen = 0;
+ return 0;
+}
+
+static int fio_posixaio_getevents(struct thread_data *td, int min, int max,
+ struct timespec *t)
+{
+ struct posixaio_data *pd = td->io_data;
+ struct list_head *entry;
+ struct timespec start;
+ int r, have_timeout = 0;
+
+ if (t && !fill_timespec(&start))
+ have_timeout = 1;
+
+ r = 0;
+restart:
+ list_for_each(entry, &td->io_u_busylist) {
+ struct io_u *io_u = list_entry(entry, struct io_u, list);
+ int err;
+
+ if (io_u->seen)
+ continue;
+
+ err = aio_error(&io_u->aiocb);
+ switch (err) {
+ default:
+ io_u->error = err;
+ case ECANCELED:
+ case 0:
+ pd->aio_events[r++] = io_u;
+ io_u->seen = 1;
+ break;
+ case EINPROGRESS:
+ break;
+ }
+
+ if (r >= max)
+ break;
+ }
+
+ if (r >= min)
+ return r;
+
+ if (have_timeout) {
+ unsigned long long usec;
+
+ usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000);
+ if (ts_utime_since_now(&start) > usec)
+ return r;
+ }
+
+ /*
+ * hrmpf, we need to wait for more. we should use aio_suspend, for
+ * now just sleep a little and recheck status of busy-and-not-seen
+ */
+ usleep(1000);
+ goto restart;
+}
+
+static struct io_u *fio_posixaio_event(struct thread_data *td, int event)
+{
+ struct posixaio_data *pd = td->io_data;
+
+ return pd->aio_events[event];
+}
+
+static int fio_posixaio_queue(struct thread_data *td, struct io_u *io_u)
+{
+ struct aiocb *aiocb = &io_u->aiocb;
+ int ret;
+
+ if (io_u->ddir == DDIR_READ)
+ ret = aio_read(aiocb);
+ else
+ ret = aio_write(aiocb);
+
+ if (ret)
+ io_u->error = errno;
+
+ return io_u->error;
+}
+
+static void fio_posixaio_cleanup(struct thread_data *td)
+{
+ struct posixaio_data *pd = td->io_data;
+
+ if (pd) {
+ free(pd->aio_events);
+ free(pd);
+ td->io_data = NULL;
+ }
+}
+
+int fio_posixaio_init(struct thread_data *td)
+{
+ struct posixaio_data *pd = malloc(sizeof(*pd));
+
+ pd->aio_events = malloc(td->iodepth * sizeof(struct io_u *));
+
+ td->io_prep = fio_posixaio_prep;
+ td->io_queue = fio_posixaio_queue;
+ td->io_getevents = fio_posixaio_getevents;
+ td->io_event = fio_posixaio_event;
+ td->io_cancel = fio_posixaio_cancel;
+ td->io_cleanup = fio_posixaio_cleanup;
+ td->io_sync = fio_io_sync;
+
+ td->io_data = pd;
+ return 0;
+}
+
+#else /* FIO_HAVE_POSIXAIO */
+
+int fio_posixaio_init(struct thread_data *td)
+{
+ return EINVAL;
+}
+
+#endif /* FIO_HAVE_POSIXAIO */
+
+struct syncio_data {
+ struct io_u *last_io_u;
+};
+
+static int fio_syncio_getevents(struct thread_data *td, int min, int max,
+ struct timespec *t)
+{
+ assert(max <= 1);
+
+ /*
+ * we can only have one finished io_u for sync io, since the depth
+ * is always 1
+ */
+ if (list_empty(&td->io_u_busylist))
+ return 0;
+
+ return 1;
+}
+
+static struct io_u *fio_syncio_event(struct thread_data *td, int event)
+{
+ struct syncio_data *sd = td->io_data;
+
+ assert(event == 0);
+
+ return sd->last_io_u;
+}
+
+static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u)
+{
+ if (td->cur_off != io_u->offset) {
+ if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
+ td_verror(td, errno);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u)
+{
+ struct syncio_data *sd = td->io_data;
+ int ret;
+
+ if (io_u->ddir == DDIR_READ)
+ ret = read(td->fd, io_u->buf, io_u->buflen);
+ else
+ ret = write(td->fd, io_u->buf, io_u->buflen);
+
+ if ((unsigned int) ret != io_u->buflen) {
+ if (ret > 0) {
+ io_u->resid = io_u->buflen - ret;
+ io_u->error = ENODATA;
+ } else
+ io_u->error = errno;
+ }
+
+ if (!io_u->error)
+ sd->last_io_u = io_u;
+
+ return io_u->error;
+}
+
+static void fio_syncio_cleanup(struct thread_data *td)
+{
+ if (td->io_data) {
+ free(td->io_data);
+ td->io_data = NULL;
+ }
+}
+
+int fio_syncio_init(struct thread_data *td)
+{
+ struct syncio_data *sd = malloc(sizeof(*sd));
+
+ td->io_prep = fio_syncio_prep;
+ td->io_queue = fio_syncio_queue;
+ td->io_getevents = fio_syncio_getevents;
+ td->io_event = fio_syncio_event;
+ td->io_cancel = NULL;
+ td->io_cleanup = fio_syncio_cleanup;
+ td->io_sync = fio_io_sync;
+
+ sd->last_io_u = NULL;
+ td->io_data = sd;
+ return 0;
+}
+
+static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
+{
+ unsigned long long real_off = io_u->offset - td->file_offset;
+ struct syncio_data *sd = td->io_data;
+
+ if (io_u->ddir == DDIR_READ)
+ memcpy(io_u->buf, td->mmap + real_off, io_u->buflen);
+ else
+ memcpy(td->mmap + real_off, io_u->buf, io_u->buflen);
+
+ /*
+ * not really direct, but should drop the pages from the cache
+ */
+ if (td->odirect) {
+ if (msync(td->mmap + real_off, io_u->buflen, MS_SYNC) < 0)
+ io_u->error = errno;
+ if (madvise(td->mmap + real_off, io_u->buflen, MADV_DONTNEED) < 0)
+ io_u->error = errno;
+ }
+
+ if (!io_u->error)
+ sd->last_io_u = io_u;
+
+ return io_u->error;
+}
+
+static int fio_mmapio_sync(struct thread_data *td)
+{
+ return msync(td->mmap, td->file_size, MS_SYNC);
+}
+
+int fio_mmapio_init(struct thread_data *td)
+{
+ struct syncio_data *sd = malloc(sizeof(*sd));
+
+ td->io_prep = NULL;
+ td->io_queue = fio_mmapio_queue;
+ td->io_getevents = fio_syncio_getevents;
+ td->io_event = fio_syncio_event;
+ td->io_cancel = NULL;
+ td->io_cleanup = fio_syncio_cleanup;
+ td->io_sync = fio_mmapio_sync;
+
+ sd->last_io_u = NULL;
+ td->io_data = sd;
+ return 0;
+}
+
+#ifdef FIO_HAVE_SGIO
+
+struct sgio_data {
+ struct io_u *last_io_u;
+ unsigned char cdb[10];
+ unsigned int bs;
+};
+
+static inline void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
+ struct io_u *io_u)
+{
+ memset(hdr, 0, sizeof(*hdr));
+ memset(sd->cdb, 0, sizeof(sd->cdb));
+
+ hdr->interface_id = 'S';
+ hdr->cmdp = sd->cdb;
+ hdr->cmd_len = sizeof(sd->cdb);
+
+ if (io_u) {
+ hdr->dxferp = io_u->buf;
+ hdr->dxfer_len = io_u->buflen;
+ }
+}
+
+static int fio_sgio_sync(struct thread_data *td)
+{
+ struct sgio_data *sd = td->io_data;
+ struct sg_io_hdr hdr;
+
+ sgio_hdr_init(sd, &hdr, NULL);
+ hdr.dxfer_direction = SG_DXFER_NONE;
+
+ hdr.cmdp[0] = 0x35;
+
+ return ioctl(td->fd, SG_IO, &hdr);
+}
+
+static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
+{
+ struct sg_io_hdr *hdr = &io_u->hdr;
+ struct sgio_data *sd = td->io_data;
+ int nr_blocks, lba;
+
+ if (io_u->buflen & (sd->bs - 1)) {
+ fprintf(stderr, "read/write not sector aligned\n");
+ return EINVAL;
+ }
+
+ sgio_hdr_init(sd, hdr, io_u);
+
+ if (io_u->ddir == DDIR_READ) {
+ hdr->dxfer_direction = SG_DXFER_FROM_DEV;
+ hdr->cmdp[0] = 0x28;
+ } else {
+ hdr->dxfer_direction = SG_DXFER_TO_DEV;
+ hdr->cmdp[0] = 0x2a;
+ }
+
+ nr_blocks = io_u->buflen / sd->bs;
+ lba = io_u->offset / sd->bs;
+ hdr->cmdp[2] = (lba >> 24) & 0xff;
+ hdr->cmdp[3] = (lba >> 16) & 0xff;
+ hdr->cmdp[4] = (lba >> 8) & 0xff;
+ hdr->cmdp[5] = lba & 0xff;
+ hdr->cmdp[7] = (nr_blocks >> 8) & 0xff;
+ hdr->cmdp[8] = nr_blocks & 0xff;
+ return 0;
+}
+
+static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u)
+{
+ struct sg_io_hdr *hdr = &io_u->hdr;
+ struct sgio_data *sd = td->io_data;
+ int ret;
+
+ ret = ioctl(td->fd, SG_IO, hdr);
+ if (ret < 0)
+ io_u->error = errno;
+ else if (hdr->status) {
+ io_u->resid = hdr->resid;
+ io_u->error = EIO;
+ }
+
+ if (!io_u->error)
+ sd->last_io_u = io_u;
+
+ return io_u->error;
+}
+
+static struct io_u *fio_sgio_event(struct thread_data *td, int event)
+{
+ struct sgio_data *sd = td->io_data;
+
+ assert(event == 0);
+
+ return sd->last_io_u;
+}
+
+int fio_sgio_init(struct thread_data *td)
+{
+ struct sgio_data *sd;
+ int bs;
+
+ if (td->filetype != FIO_TYPE_BD) {
+ fprintf(stderr, "ioengine sgio only works on block devices\n");
+ return 1;
+ }
+
+ if (ioctl(td->fd, BLKSSZGET, &bs) < 0) {
+ td_verror(td, errno);
+ return 1;
+ }
+
+ sd = malloc(sizeof(*sd));
+ sd->bs = bs;
+
+ td->io_prep = fio_sgio_prep;
+ td->io_queue = fio_sgio_queue;
+ td->io_getevents = fio_syncio_getevents;
+ td->io_event = fio_sgio_event;
+ td->io_cancel = NULL;
+ td->io_cleanup = fio_syncio_cleanup;
+ td->io_sync = fio_sgio_sync;
+
+ /*
+ * we want to do it, regardless of whether odirect is set or not
+ */
+ td->override_sync = 1;
+
+ sd->last_io_u = NULL;
+ td->io_data = sd;
+ return 0;
+}
+
+#else /* FIO_HAVE_SGIO */
+
+int fio_sgio_init(struct thread_data *td)
+{
+ return EINVAL;
+}
+
+#endif /* FIO_HAVE_SGIO */
--- /dev/null
+/*
+ * fio - the flexible io tester
+ *
+ * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <signal.h>
+#include <time.h>
+#include <math.h>
+#include <assert.h>
+#include <dirent.h>
+#include <libgen.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include "fio.h"
+#include "os.h"
+
+#define MASK (4095)
+
+#define ALIGN(buf) (char *) (((unsigned long) (buf) + MASK) & ~(MASK))
+
+int groupid = 0;
+int thread_number = 0;
+static char run_str[MAX_JOBS + 1];
+int shm_id = 0;
+static LIST_HEAD(disk_list);
+static struct itimerval itimer;
+
+static void update_io_ticks(void);
+static void disk_util_timer_arm(void);
+static void print_thread_status(void);
+
+/*
+ * thread life cycle
+ */
+enum {
+ TD_NOT_CREATED = 0,
+ TD_CREATED,
+ TD_RUNNING,
+ TD_VERIFYING,
+ TD_EXITED,
+ TD_REAPED,
+};
+
+#define should_fsync(td) (td_write(td) && (!(td)->odirect || (td)->override_sync))
+
+static sem_t startup_sem;
+
+#define TERMINATE_ALL (-1)
+
+static void terminate_threads(int group_id)
+{
+ int i;
+
+ for (i = 0; i < thread_number; i++) {
+ struct thread_data *td = &threads[i];
+
+ if (group_id == TERMINATE_ALL || groupid == td->groupid) {
+ td->terminate = 1;
+ td->start_delay = 0;
+ }
+ }
+}
+
+static void sig_handler(int sig)
+{
+ switch (sig) {
+ case SIGALRM:
+ update_io_ticks();
+ disk_util_timer_arm();
+ print_thread_status();
+ break;
+ default:
+ printf("\nfio: terminating on signal\n");
+ fflush(stdout);
+ terminate_threads(TERMINATE_ALL);
+ break;
+ }
+}
+
+static unsigned long utime_since(struct timeval *s, struct timeval *e)
+{
+ double sec, usec;
+
+ sec = e->tv_sec - s->tv_sec;
+ usec = e->tv_usec - s->tv_usec;
+ if (sec > 0 && usec < 0) {
+ sec--;
+ usec += 1000000;
+ }
+
+ sec *= (double) 1000000;
+
+ return sec + usec;
+}
+
+static unsigned long utime_since_now(struct timeval *s)
+{
+ struct timeval t;
+
+ gettimeofday(&t, NULL);
+ return utime_since(s, &t);
+}
+
+static unsigned long mtime_since(struct timeval *s, struct timeval *e)
+{
+ double sec, usec;
+
+ sec = e->tv_sec - s->tv_sec;
+ usec = e->tv_usec - s->tv_usec;
+ if (sec > 0 && usec < 0) {
+ sec--;
+ usec += 1000000;
+ }
+
+ sec *= (double) 1000;
+ usec /= (double) 1000;
+
+ return sec + usec;
+}
+
+static unsigned long mtime_since_now(struct timeval *s)
+{
+ struct timeval t;
+
+ gettimeofday(&t, NULL);
+ return mtime_since(s, &t);
+}
+
+static inline unsigned long msec_now(struct timeval *s)
+{
+ return s->tv_sec * 1000 + s->tv_usec / 1000;
+}
+
+static int random_map_free(struct thread_data *td, unsigned long long block)
+{
+ unsigned int idx = RAND_MAP_IDX(td, block);
+ unsigned int bit = RAND_MAP_BIT(td, block);
+
+ return (td->file_map[idx] & (1UL << bit)) == 0;
+}
+
+static int get_next_free_block(struct thread_data *td, unsigned long long *b)
+{
+ int i;
+
+ *b = 0;
+ i = 0;
+ while ((*b) * td->min_bs < td->io_size) {
+ if (td->file_map[i] != -1UL) {
+ *b += ffz(td->file_map[i]);
+ return 0;
+ }
+
+ *b += BLOCKS_PER_MAP;
+ i++;
+ }
+
+ return 1;
+}
+
+static void mark_random_map(struct thread_data *td, struct io_u *io_u)
+{
+ unsigned long block = io_u->offset / td->min_bs;
+ unsigned int blocks = 0;
+
+ while (blocks < (io_u->buflen / td->min_bs)) {
+ unsigned int idx, bit;
+
+ if (!random_map_free(td, block))
+ break;
+
+ idx = RAND_MAP_IDX(td, block);
+ bit = RAND_MAP_BIT(td, block);
+
+ assert(idx < td->num_maps);
+
+ td->file_map[idx] |= (1UL << bit);
+ block++;
+ blocks++;
+ }
+
+ if ((blocks * td->min_bs) < io_u->buflen)
+ io_u->buflen = blocks * td->min_bs;
+}
+
+static int get_next_offset(struct thread_data *td, unsigned long long *offset)
+{
+ unsigned long long b, rb;
+ long r;
+
+ if (!td->sequential) {
+ unsigned long max_blocks = td->io_size / td->min_bs;
+ int loops = 50;
+
+ do {
+ lrand48_r(&td->random_state, &r);
+ b = ((max_blocks - 1) * r / (RAND_MAX+1.0));
+ rb = b + (td->file_offset / td->min_bs);
+ loops--;
+ } while (!random_map_free(td, rb) && loops);
+
+ if (!loops) {
+ if (get_next_free_block(td, &b))
+ return 1;
+ }
+ } else
+ b = td->last_bytes / td->min_bs;
+
+ *offset = (b * td->min_bs) + td->file_offset;
+ if (*offset > td->file_size)
+ return 1;
+
+ return 0;
+}
+
+static unsigned int get_next_buflen(struct thread_data *td)
+{
+ unsigned int buflen;
+ long r;
+
+ if (td->min_bs == td->max_bs)
+ buflen = td->min_bs;
+ else {
+ lrand48_r(&td->bsrange_state, &r);
+ buflen = (1 + (double) (td->max_bs - 1) * r / (RAND_MAX + 1.0));
+ buflen = (buflen + td->min_bs - 1) & ~(td->min_bs - 1);
+ }
+
+ if (buflen > td->io_size - td->this_io_bytes[td->ddir])
+ buflen = td->io_size - td->this_io_bytes[td->ddir];
+
+ return buflen;
+}
+
+static inline void add_stat_sample(struct io_stat *is, unsigned long val)
+{
+ if (val > is->max_val)
+ is->max_val = val;
+ if (val < is->min_val)
+ is->min_val = val;
+
+ is->val += val;
+ is->val_sq += val * val;
+ is->samples++;
+}
+
+static void add_log_sample(struct thread_data *td, struct io_log *iolog,
+ unsigned long val, int ddir)
+{
+ if (iolog->nr_samples == iolog->max_samples) {
+ int new_size = sizeof(struct io_sample) * iolog->max_samples*2;
+
+ iolog->log = realloc(iolog->log, new_size);
+ iolog->max_samples <<= 1;
+ }
+
+ iolog->log[iolog->nr_samples].val = val;
+ iolog->log[iolog->nr_samples].time = mtime_since_now(&td->epoch);
+ iolog->log[iolog->nr_samples].ddir = ddir;
+ iolog->nr_samples++;
+}
+
+static void add_clat_sample(struct thread_data *td, int ddir,unsigned long msec)
+{
+ add_stat_sample(&td->clat_stat[ddir], msec);
+
+ if (td->clat_log)
+ add_log_sample(td, td->clat_log, msec, ddir);
+}
+
+static void add_slat_sample(struct thread_data *td, int ddir,unsigned long msec)
+{
+ add_stat_sample(&td->slat_stat[ddir], msec);
+
+ if (td->slat_log)
+ add_log_sample(td, td->slat_log, msec, ddir);
+}
+
+static void add_bw_sample(struct thread_data *td, int ddir)
+{
+ unsigned long spent = mtime_since_now(&td->stat_sample_time[ddir]);
+ unsigned long rate;
+
+ if (spent < td->bw_avg_time)
+ return;
+
+ rate = (td->this_io_bytes[ddir] - td->stat_io_bytes[ddir]) / spent;
+ add_stat_sample(&td->bw_stat[ddir], rate);
+
+ if (td->bw_log)
+ add_log_sample(td, td->bw_log, rate, ddir);
+
+ gettimeofday(&td->stat_sample_time[ddir], NULL);
+ td->stat_io_bytes[ddir] = td->this_io_bytes[ddir];
+}
+
+/*
+ * busy looping version for the last few usec
+ */
+static void __usec_sleep(unsigned int usec)
+{
+ struct timeval start;
+
+ gettimeofday(&start, NULL);
+ while (utime_since_now(&start) < usec)
+ nop;
+}
+
+static void usec_sleep(struct thread_data *td, unsigned long usec)
+{
+ struct timespec req, rem;
+
+ req.tv_sec = usec / 1000000;
+ req.tv_nsec = usec * 1000 - req.tv_sec * 1000000;
+
+ do {
+ if (usec < 5000) {
+ __usec_sleep(usec);
+ break;
+ }
+
+ rem.tv_sec = rem.tv_nsec = 0;
+ if (nanosleep(&req, &rem) < 0)
+ break;
+
+ if ((rem.tv_sec + rem.tv_nsec) == 0)
+ break;
+
+ req.tv_nsec = rem.tv_nsec;
+ req.tv_sec = rem.tv_sec;
+
+ usec = rem.tv_sec * 1000000 + rem.tv_nsec / 1000;
+ } while (!td->terminate);
+}
+
+static void rate_throttle(struct thread_data *td, unsigned long time_spent,
+ unsigned int bytes)
+{
+ unsigned long usec_cycle;
+
+ if (!td->rate)
+ return;
+
+ usec_cycle = td->rate_usec_cycle * (bytes / td->min_bs);
+
+ if (time_spent < usec_cycle) {
+ unsigned long s = usec_cycle - time_spent;
+
+ td->rate_pending_usleep += s;
+ if (td->rate_pending_usleep >= 100000) {
+ usec_sleep(td, td->rate_pending_usleep);
+ td->rate_pending_usleep = 0;
+ }
+ } else {
+ long overtime = time_spent - usec_cycle;
+
+ td->rate_pending_usleep -= overtime;
+ }
+}
+
+static int check_min_rate(struct thread_data *td, struct timeval *now)
+{
+ unsigned long spent;
+ unsigned long rate;
+ int ddir = td->ddir;
+
+ /*
+ * allow a 2 second settle period in the beginning
+ */
+ if (mtime_since(&td->start, now) < 2000)
+ return 0;
+
+ /*
+ * if rate blocks is set, sample is running
+ */
+ if (td->rate_bytes) {
+ spent = mtime_since(&td->lastrate, now);
+ if (spent < td->ratecycle)
+ return 0;
+
+ rate = (td->this_io_bytes[ddir] - td->rate_bytes) / spent;
+ if (rate < td->ratemin) {
+ printf("Client%d: min rate %d not met, got %ldKiB/sec\n", td->thread_number, td->ratemin, rate);
+ if (rate_quit)
+ terminate_threads(td->groupid);
+ return 1;
+ }
+ }
+
+ td->rate_bytes = td->this_io_bytes[ddir];
+ memcpy(&td->lastrate, now, sizeof(*now));
+ return 0;
+}
+
+static inline int runtime_exceeded(struct thread_data *td, struct timeval *t)
+{
+ if (!td->timeout)
+ return 0;
+ if (mtime_since(&td->epoch, t) >= td->timeout * 1000)
+ return 1;
+
+ return 0;
+}
+
+static void fill_random_bytes(struct thread_data *td,
+ unsigned char *p, unsigned int len)
+{
+ unsigned int todo;
+ double r;
+
+ while (len) {
+ drand48_r(&td->verify_state, &r);
+
+ /*
+ * lrand48_r seems to be broken and only fill the bottom
+ * 32-bits, even on 64-bit archs with 64-bit longs
+ */
+ todo = sizeof(r);
+ if (todo > len)
+ todo = len;
+
+ memcpy(p, &r, todo);
+
+ len -= todo;
+ p += todo;
+ }
+}
+
+static void hexdump(void *buffer, int len)
+{
+ unsigned char *p = buffer;
+ int i;
+
+ for (i = 0; i < len; i++)
+ printf("%02x", p[i]);
+ printf("\n");
+}
+
+static int verify_io_u_crc32(struct verify_header *hdr, struct io_u *io_u)
+{
+ unsigned char *p = (unsigned char *) io_u->buf;
+ unsigned long c;
+ int ret;
+
+ p += sizeof(*hdr);
+ c = crc32(p, hdr->len - sizeof(*hdr));
+ ret = c != hdr->crc32;
+
+ if (ret) {
+ fprintf(stderr, "crc32: verify failed at %llu/%u\n", io_u->offset, io_u->buflen);
+ fprintf(stderr, "crc32: wanted %lx, got %lx\n", hdr->crc32, c);
+ }
+
+ return ret;
+}
+
+static int verify_io_u_md5(struct verify_header *hdr, struct io_u *io_u)
+{
+ unsigned char *p = (unsigned char *) io_u->buf;
+ struct md5_ctx md5_ctx;
+ int ret;
+
+ memset(&md5_ctx, 0, sizeof(md5_ctx));
+ p += sizeof(*hdr);
+ md5_update(&md5_ctx, p, hdr->len - sizeof(*hdr));
+
+ ret = memcmp(hdr->md5_digest, md5_ctx.hash, sizeof(md5_ctx.hash));
+ if (ret) {
+ fprintf(stderr, "md5: verify failed at %llu/%u\n", io_u->offset, io_u->buflen);
+ hexdump(hdr->md5_digest, sizeof(hdr->md5_digest));
+ hexdump(md5_ctx.hash, sizeof(md5_ctx.hash));
+ }
+
+ return ret;
+}
+
+static int verify_io_u(struct io_u *io_u)
+{
+ struct verify_header *hdr = (struct verify_header *) io_u->buf;
+ int ret;
+
+ if (hdr->fio_magic != FIO_HDR_MAGIC)
+ return 1;
+
+ if (hdr->verify_type == VERIFY_MD5)
+ ret = verify_io_u_md5(hdr, io_u);
+ else if (hdr->verify_type == VERIFY_CRC32)
+ ret = verify_io_u_crc32(hdr, io_u);
+ else {
+ fprintf(stderr, "Bad verify type %d\n", hdr->verify_type);
+ ret = 1;
+ }
+
+ return ret;
+}
+
+static void fill_crc32(struct verify_header *hdr, void *p, unsigned int len)
+{
+ hdr->crc32 = crc32(p, len);
+}
+
+static void fill_md5(struct verify_header *hdr, void *p, unsigned int len)
+{
+ struct md5_ctx md5_ctx;
+
+ memset(&md5_ctx, 0, sizeof(md5_ctx));
+ md5_update(&md5_ctx, p, len);
+ memcpy(hdr->md5_digest, md5_ctx.hash, sizeof(md5_ctx.hash));
+}
+
+/*
+ * fill body of io_u->buf with random data and add a header with the
+ * (eg) sha1sum of that data.
+ */
+static void populate_io_u(struct thread_data *td, struct io_u *io_u)
+{
+ unsigned char *p = (unsigned char *) io_u->buf;
+ struct verify_header hdr;
+
+ hdr.fio_magic = FIO_HDR_MAGIC;
+ hdr.len = io_u->buflen;
+ p += sizeof(hdr);
+ fill_random_bytes(td, p, io_u->buflen - sizeof(hdr));
+
+ if (td->verify == VERIFY_MD5) {
+ fill_md5(&hdr, p, io_u->buflen - sizeof(hdr));
+ hdr.verify_type = VERIFY_MD5;
+ } else {
+ fill_crc32(&hdr, p, io_u->buflen - sizeof(hdr));
+ hdr.verify_type = VERIFY_CRC32;
+ }
+
+ memcpy(io_u->buf, &hdr, sizeof(hdr));
+}
+
+static void put_io_u(struct thread_data *td, struct io_u *io_u)
+{
+ list_del(&io_u->list);
+ list_add(&io_u->list, &td->io_u_freelist);
+ td->cur_depth--;
+}
+
+#define queue_full(td) (list_empty(&(td)->io_u_freelist))
+
+static struct io_u *__get_io_u(struct thread_data *td)
+{
+ struct io_u *io_u;
+
+ if (queue_full(td))
+ return NULL;
+
+ io_u = list_entry(td->io_u_freelist.next, struct io_u, list);
+ io_u->error = 0;
+ io_u->resid = 0;
+ list_del(&io_u->list);
+ list_add(&io_u->list, &td->io_u_busylist);
+ td->cur_depth++;
+ return io_u;
+}
+
+static int td_io_prep(struct thread_data *td, struct io_u *io_u, int read)
+{
+ if (read)
+ io_u->ddir = DDIR_READ;
+ else
+ io_u->ddir = DDIR_WRITE;
+
+ if (td->io_prep && td->io_prep(td, io_u))
+ return 1;
+
+ return 0;
+}
+
+static struct io_u *get_io_u(struct thread_data *td)
+{
+ struct io_u *io_u;
+
+ io_u = __get_io_u(td);
+ if (!io_u)
+ return NULL;
+
+ if (get_next_offset(td, &io_u->offset)) {
+ put_io_u(td, io_u);
+ return NULL;
+ }
+
+ io_u->buflen = get_next_buflen(td);
+ if (!io_u->buflen) {
+ put_io_u(td, io_u);
+ return NULL;
+ }
+
+ if (io_u->buflen + io_u->offset > td->file_size)
+ io_u->buflen = td->file_size - io_u->offset;
+
+ if (!io_u->buflen) {
+ put_io_u(td, io_u);
+ return NULL;
+ }
+
+ if (!td->sequential)
+ mark_random_map(td, io_u);
+
+ td->last_bytes += io_u->buflen;
+
+ if (td->verify != VERIFY_NONE)
+ populate_io_u(td, io_u);
+
+ if (td_io_prep(td, io_u, td_read(td))) {
+ put_io_u(td, io_u);
+ return NULL;
+ }
+
+ gettimeofday(&io_u->start_time, NULL);
+ return io_u;
+}
+
+static inline void td_set_runstate(struct thread_data *td, int runstate)
+{
+ td->old_runstate = td->runstate;
+ td->runstate = runstate;
+}
+
+static int get_next_verify(struct thread_data *td,
+ unsigned long long *offset, unsigned int *len)
+{
+ struct io_piece *ipo;
+
+ if (list_empty(&td->io_hist_list))
+ return 1;
+
+ ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
+ list_del(&ipo->list);
+
+ *offset = ipo->offset;
+ *len = ipo->len;
+ free(ipo);
+ return 0;
+}
+
+static void prune_io_piece_log(struct thread_data *td)
+{
+ struct io_piece *ipo;
+
+ while (!list_empty(&td->io_hist_list)) {
+ ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
+
+ list_del(&ipo->list);
+ free(ipo);
+ }
+}
+
+/*
+ * log a succesful write, so we can unwind the log for verify
+ */
+static void log_io_piece(struct thread_data *td, struct io_u *io_u)
+{
+ struct io_piece *ipo = malloc(sizeof(struct io_piece));
+ struct list_head *entry;
+
+ INIT_LIST_HEAD(&ipo->list);
+ ipo->offset = io_u->offset;
+ ipo->len = io_u->buflen;
+
+ /*
+ * for random io where the writes extend the file, it will typically
+ * be laid out with the block scattered as written. it's faster to
+ * read them in in that order again, so don't sort
+ */
+ if (td->sequential || !td->overwrite) {
+ list_add_tail(&ipo->list, &td->io_hist_list);
+ return;
+ }
+
+ /*
+ * for random io, sort the list so verify will run faster
+ */
+ entry = &td->io_hist_list;
+ while ((entry = entry->prev) != &td->io_hist_list) {
+ struct io_piece *__ipo = list_entry(entry, struct io_piece, list);
+
+ if (__ipo->offset < ipo->offset)
+ break;
+ }
+
+ list_add(&ipo->list, entry);
+}
+
+static int sync_td(struct thread_data *td)
+{
+ if (td->io_sync)
+ return td->io_sync(td);
+
+ return 0;
+}
+
+static int io_u_getevents(struct thread_data *td, int min, int max,
+ struct timespec *t)
+{
+ return td->io_getevents(td, min, max, t);
+}
+
+static int io_u_queue(struct thread_data *td, struct io_u *io_u)
+{
+ gettimeofday(&io_u->issue_time, NULL);
+
+ return td->io_queue(td, io_u);
+}
+
+#define iocb_time(iocb) ((unsigned long) (iocb)->data)
+
+static void io_completed(struct thread_data *td, struct io_u *io_u,
+ struct io_completion_data *icd)
+{
+ struct timeval e;
+ unsigned long msec;
+
+ gettimeofday(&e, NULL);
+
+ if (!io_u->error) {
+ int idx = io_u->ddir;
+
+ td->io_blocks[idx]++;
+ td->io_bytes[idx] += (io_u->buflen - io_u->resid);
+ td->this_io_bytes[idx] += (io_u->buflen - io_u->resid);
+
+ msec = mtime_since(&io_u->issue_time, &e);
+
+ add_clat_sample(td, io_u->ddir, msec);
+ add_bw_sample(td, io_u->ddir);
+
+ if (td_write(td) && io_u->ddir == DDIR_WRITE)
+ log_io_piece(td, io_u);
+
+ icd->bytes_done[idx] += (io_u->buflen - io_u->resid);
+ } else
+ icd->error = io_u->error;
+}
+
+static void ios_completed(struct thread_data *td,struct io_completion_data *icd)
+{
+ struct io_u *io_u;
+ int i;
+
+ icd->error = 0;
+ icd->bytes_done[0] = icd->bytes_done[1] = 0;
+
+ for (i = 0; i < icd->nr; i++) {
+ io_u = td->io_event(td, i);
+
+ io_completed(td, io_u, icd);
+ put_io_u(td, io_u);
+ }
+}
+
+static void cleanup_pending_aio(struct thread_data *td)
+{
+ struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
+ struct list_head *entry, *n;
+ struct io_completion_data icd;
+ struct io_u *io_u;
+ int r;
+
+ /*
+ * get immediately available events, if any
+ */
+ r = io_u_getevents(td, 0, td->cur_depth, &ts);
+ if (r > 0) {
+ icd.nr = r;
+ ios_completed(td, &icd);
+ }
+
+ /*
+ * now cancel remaining active events
+ */
+ if (td->io_cancel) {
+ list_for_each_safe(entry, n, &td->io_u_busylist) {
+ io_u = list_entry(entry, struct io_u, list);
+
+ r = td->io_cancel(td, io_u);
+ if (!r)
+ put_io_u(td, io_u);
+ }
+ }
+
+ if (td->cur_depth) {
+ r = io_u_getevents(td, td->cur_depth, td->cur_depth, NULL);
+ if (r > 0) {
+ icd.nr = r;
+ ios_completed(td, &icd);
+ }
+ }
+}
+
+static int do_io_u_verify(struct thread_data *td, struct io_u **io_u)
+{
+ struct io_u *v_io_u = *io_u;
+ int ret = 0;
+
+ if (v_io_u) {
+ ret = verify_io_u(v_io_u);
+ put_io_u(td, v_io_u);
+ *io_u = NULL;
+ }
+
+ return ret;
+}
+
+static void do_verify(struct thread_data *td)
+{
+ struct timeval t;
+ struct io_u *io_u, *v_io_u = NULL;
+ struct io_completion_data icd;
+ int ret;
+
+ td_set_runstate(td, TD_VERIFYING);
+
+ do {
+ if (td->terminate)
+ break;
+
+ gettimeofday(&t, NULL);
+ if (runtime_exceeded(td, &t))
+ break;
+
+ io_u = __get_io_u(td);
+ if (!io_u)
+ break;
+
+ if (get_next_verify(td, &io_u->offset, &io_u->buflen)) {
+ put_io_u(td, io_u);
+ break;
+ }
+
+ if (td_io_prep(td, io_u, 1)) {
+ put_io_u(td, io_u);
+ break;
+ }
+
+ ret = io_u_queue(td, io_u);
+ if (ret) {
+ put_io_u(td, io_u);
+ td_verror(td, ret);
+ break;
+ }
+
+ /*
+ * we have one pending to verify, do that while
+ * we are doing io on the next one
+ */
+ if (do_io_u_verify(td, &v_io_u))
+ break;
+
+ ret = io_u_getevents(td, 1, 1, NULL);
+ if (ret != 1) {
+ if (ret < 0)
+ td_verror(td, ret);
+ break;
+ }
+
+ v_io_u = td->io_event(td, 0);
+ icd.nr = 1;
+ icd.error = 0;
+ io_completed(td, v_io_u, &icd);
+
+ if (icd.error) {
+ td_verror(td, icd.error);
+ put_io_u(td, v_io_u);
+ v_io_u = NULL;
+ break;
+ }
+
+ td->cur_off = v_io_u->offset + v_io_u->buflen;
+
+ /*
+ * if we can't submit more io, we need to verify now
+ */
+ if (queue_full(td) && do_io_u_verify(td, &v_io_u))
+ break;
+
+ } while (1);
+
+ do_io_u_verify(td, &v_io_u);
+
+ if (td->cur_depth)
+ cleanup_pending_aio(td);
+
+ td_set_runstate(td, TD_RUNNING);
+}
+
+static void do_io(struct thread_data *td)
+{
+ struct io_completion_data icd;
+ struct timeval s, e;
+ unsigned long usec;
+
+ while (td->this_io_bytes[td->ddir] < td->io_size) {
+ struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
+ struct timespec *timeout;
+ int ret, min_evts = 0;
+ struct io_u *io_u;
+
+ if (td->terminate)
+ break;
+
+ io_u = get_io_u(td);
+ if (!io_u)
+ break;
+
+ memcpy(&s, &io_u->start_time, sizeof(s));
+
+ ret = io_u_queue(td, io_u);
+ if (ret) {
+ put_io_u(td, io_u);
+ td_verror(td, ret);
+ break;
+ }
+
+ add_slat_sample(td, io_u->ddir, mtime_since(&io_u->start_time, &io_u->issue_time));
+
+ if (td->cur_depth < td->iodepth) {
+ timeout = &ts;
+ min_evts = 0;
+ } else {
+ timeout = NULL;
+ min_evts = 1;
+ }
+
+ ret = io_u_getevents(td, min_evts, td->cur_depth, timeout);
+ if (ret < 0) {
+ td_verror(td, ret);
+ break;
+ } else if (!ret)
+ continue;
+
+ icd.nr = ret;
+ ios_completed(td, &icd);
+ if (icd.error) {
+ td_verror(td, icd.error);
+ break;
+ }
+
+ /*
+ * the rate is batched for now, it should work for batches
+ * of completions except the very first one which may look
+ * a little bursty
+ */
+ gettimeofday(&e, NULL);
+ usec = utime_since(&s, &e);
+
+ rate_throttle(td, usec, icd.bytes_done[td->ddir]);
+
+ if (check_min_rate(td, &e)) {
+ td_verror(td, ENOMEM);
+ break;
+ }
+
+ if (runtime_exceeded(td, &e))
+ break;
+
+ if (td->thinktime)
+ usec_sleep(td, td->thinktime);
+
+ if (should_fsync(td) && td->fsync_blocks &&
+ (td->io_blocks[DDIR_WRITE] % td->fsync_blocks) == 0)
+ sync_td(td);
+ }
+
+ if (td->cur_depth)
+ cleanup_pending_aio(td);
+
+ if (should_fsync(td))
+ sync_td(td);
+}
+
+static void cleanup_io(struct thread_data *td)
+{
+ if (td->io_cleanup)
+ td->io_cleanup(td);
+}
+
+static int init_io(struct thread_data *td)
+{
+ if (td->io_engine == FIO_SYNCIO)
+ return fio_syncio_init(td);
+ else if (td->io_engine == FIO_MMAPIO)
+ return fio_mmapio_init(td);
+ else if (td->io_engine == FIO_LIBAIO)
+ return fio_libaio_init(td);
+ else if (td->io_engine == FIO_POSIXAIO)
+ return fio_posixaio_init(td);
+ else if (td->io_engine == FIO_SGIO)
+ return fio_sgio_init(td);
+ else {
+ fprintf(stderr, "bad io_engine %d\n", td->io_engine);
+ return 1;
+ }
+}
+
+static void cleanup_io_u(struct thread_data *td)
+{
+ struct list_head *entry, *n;
+ struct io_u *io_u;
+
+ list_for_each_safe(entry, n, &td->io_u_freelist) {
+ io_u = list_entry(entry, struct io_u, list);
+
+ list_del(&io_u->list);
+ free(io_u);
+ }
+
+ if (td->mem_type == MEM_MALLOC)
+ free(td->orig_buffer);
+ else if (td->mem_type == MEM_SHM) {
+ struct shmid_ds sbuf;
+
+ shmdt(td->orig_buffer);
+ shmctl(td->shm_id, IPC_RMID, &sbuf);
+ } else if (td->mem_type == MEM_MMAP)
+ munmap(td->orig_buffer, td->orig_buffer_size);
+ else
+ fprintf(stderr, "Bad memory type %d\n", td->mem_type);
+
+ td->orig_buffer = NULL;
+}
+
+static int init_io_u(struct thread_data *td)
+{
+ struct io_u *io_u;
+ int i, max_units;
+ char *p;
+
+ if (td->io_engine & FIO_SYNCIO)
+ max_units = 1;
+ else
+ max_units = td->iodepth;
+
+ td->orig_buffer_size = td->max_bs * max_units + MASK;
+
+ if (td->mem_type == MEM_MALLOC)
+ td->orig_buffer = malloc(td->orig_buffer_size);
+ else if (td->mem_type == MEM_SHM) {
+ td->shm_id = shmget(IPC_PRIVATE, td->orig_buffer_size, IPC_CREAT | 0600);
+ if (td->shm_id < 0) {
+ td_verror(td, errno);
+ perror("shmget");
+ return 1;
+ }
+
+ td->orig_buffer = shmat(td->shm_id, NULL, 0);
+ if (td->orig_buffer == (void *) -1) {
+ td_verror(td, errno);
+ perror("shmat");
+ td->orig_buffer = NULL;
+ return 1;
+ }
+ } else if (td->mem_type == MEM_MMAP) {
+ td->orig_buffer = mmap(NULL, td->orig_buffer_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | OS_MAP_ANON, 0, 0);
+ if (td->orig_buffer == MAP_FAILED) {
+ td_verror(td, errno);
+ perror("mmap");
+ td->orig_buffer = NULL;
+ return 1;
+ }
+ }
+
+ INIT_LIST_HEAD(&td->io_u_freelist);
+ INIT_LIST_HEAD(&td->io_u_busylist);
+ INIT_LIST_HEAD(&td->io_hist_list);
+
+ p = ALIGN(td->orig_buffer);
+ for (i = 0; i < max_units; i++) {
+ io_u = malloc(sizeof(*io_u));
+ memset(io_u, 0, sizeof(*io_u));
+ INIT_LIST_HEAD(&io_u->list);
+
+ io_u->buf = p + td->max_bs * i;
+ list_add(&io_u->list, &td->io_u_freelist);
+ }
+
+ return 0;
+}
+
+static int create_file(struct thread_data *td, unsigned long long size,
+ int extend)
+{
+ unsigned long long left;
+ unsigned int bs;
+ int r, oflags;
+ char *b;
+
+ /*
+ * unless specifically asked for overwrite, let normal io extend it
+ */
+ if (td_write(td) && !td->overwrite)
+ return 0;
+
+ if (!size) {
+ fprintf(stderr, "Need size for create\n");
+ td_verror(td, EINVAL);
+ return 1;
+ }
+
+ if (!extend) {
+ oflags = O_CREAT | O_TRUNC;
+ printf("Client%d: Laying out IO file (%LuMiB)\n", td->thread_number, size >> 20);
+ } else {
+ oflags = O_APPEND;
+ printf("Client%d: Extending IO file (%Lu -> %LuMiB)\n", td->thread_number, (td->file_size - size) >> 20, td->file_size >> 20);
+ }
+
+ td->fd = open(td->file_name, O_WRONLY | oflags, 0644);
+ if (td->fd < 0) {
+ td_verror(td, errno);
+ return 1;
+ }
+
+ if (!extend && ftruncate(td->fd, td->file_size) == -1) {
+ td_verror(td, errno);
+ return 1;
+ }
+
+ td->io_size = td->file_size;
+ b = malloc(td->max_bs);
+ memset(b, 0, td->max_bs);
+
+ left = size;
+ while (left && !td->terminate) {
+ bs = td->max_bs;
+ if (bs > left)
+ bs = left;
+
+ r = write(td->fd, b, bs);
+
+ if (r == (int) bs) {
+ left -= bs;
+ continue;
+ } else {
+ if (r < 0)
+ td_verror(td, errno);
+ else
+ td_verror(td, EIO);
+
+ break;
+ }
+ }
+
+ if (td->terminate)
+ unlink(td->file_name);
+ else if (td->create_fsync)
+ fsync(td->fd);
+
+ close(td->fd);
+ td->fd = -1;
+ free(b);
+ return 0;
+}
+
+static int file_size(struct thread_data *td)
+{
+ struct stat st;
+
+ if (fstat(td->fd, &st) == -1) {
+ td_verror(td, errno);
+ return 1;
+ }
+
+ if (!td->file_size)
+ td->file_size = st.st_size;
+
+ return 0;
+}
+
+static int bdev_size(struct thread_data *td)
+{
+ size_t bytes;
+ int r;
+
+ r = blockdev_size(td->fd, &bytes);
+ if (r) {
+ td_verror(td, r);
+ return 1;
+ }
+
+ /*
+ * no extend possibilities, so limit size to device size if too large
+ */
+ if (!td->file_size || td->file_size > bytes)
+ td->file_size = bytes;
+
+ return 0;
+}
+
+static int get_file_size(struct thread_data *td)
+{
+ int ret;
+
+ if (td->filetype == FIO_TYPE_FILE)
+ ret = file_size(td);
+ else
+ ret = bdev_size(td);
+
+ if (ret)
+ return ret;
+
+ if (td->file_offset > td->file_size) {
+ fprintf(stderr, "Client%d: offset larger than length (%Lu > %Lu)\n", td->thread_number, td->file_offset, td->file_size);
+ return 1;
+ }
+
+ td->io_size = td->file_size - td->file_offset;
+ if (td->io_size == 0) {
+ fprintf(stderr, "Client%d: no io blocks\n", td->thread_number);
+ td_verror(td, EINVAL);
+ return 1;
+ }
+
+ td->total_io_size = td->io_size * td->loops;
+ return 0;
+}
+
+static int setup_file_mmap(struct thread_data *td)
+{
+ int flags;
+
+ if (td_read(td))
+ flags = PROT_READ;
+ else {
+ flags = PROT_WRITE;
+
+ if (td->verify != VERIFY_NONE)
+ flags |= PROT_READ;
+ }
+
+ td->mmap = mmap(NULL, td->file_size, flags, MAP_SHARED, td->fd, td->file_offset);
+ if (td->mmap == MAP_FAILED) {
+ td->mmap = NULL;
+ td_verror(td, errno);
+ return 1;
+ }
+
+ if (td->invalidate_cache) {
+ if (madvise(td->mmap, td->file_size, MADV_DONTNEED) < 0) {
+ td_verror(td, errno);
+ return 1;
+ }
+ }
+
+ if (td->sequential) {
+ if (madvise(td->mmap, td->file_size, MADV_SEQUENTIAL) < 0) {
+ td_verror(td, errno);
+ return 1;
+ }
+ } else {
+ if (madvise(td->mmap, td->file_size, MADV_RANDOM) < 0) {
+ td_verror(td, errno);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int setup_file_plain(struct thread_data *td)
+{
+ if (td->invalidate_cache) {
+ if (fadvise(td->fd, td->file_offset, td->file_size, POSIX_FADV_DONTNEED) < 0) {
+ td_verror(td, errno);
+ return 1;
+ }
+ }
+
+ if (td->sequential) {
+ if (fadvise(td->fd, td->file_offset, td->file_size, POSIX_FADV_SEQUENTIAL) < 0) {
+ td_verror(td, errno);
+ return 1;
+ }
+ } else {
+ if (fadvise(td->fd, td->file_offset, td->file_size, POSIX_FADV_RANDOM) < 0) {
+ td_verror(td, errno);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int setup_file(struct thread_data *td)
+{
+ struct stat st;
+ int flags = 0;
+
+ if (stat(td->file_name, &st) == -1) {
+ if (errno != ENOENT) {
+ td_verror(td, errno);
+ return 1;
+ }
+ if (!td->create_file) {
+ td_verror(td, ENOENT);
+ return 1;
+ }
+ if (create_file(td, td->file_size, 0))
+ return 1;
+ } else if (td->filetype == FIO_TYPE_FILE) {
+ if (st.st_size < td->file_size) {
+ if (create_file(td, td->file_size - st.st_size, 1))
+ return 1;
+ }
+ }
+
+ if (td->odirect)
+ flags |= O_DIRECT;
+
+ if (td_read(td))
+ td->fd = open(td->file_name, flags | O_RDONLY);
+ else {
+ if (td->filetype == FIO_TYPE_FILE) {
+ if (!td->overwrite)
+ flags |= O_TRUNC;
+
+ flags |= O_CREAT;
+ }
+ if (td->sync_io)
+ flags |= O_SYNC;
+
+ flags |= O_RDWR;
+
+ td->fd = open(td->file_name, flags, 0600);
+ }
+
+ if (td->fd == -1) {
+ td_verror(td, errno);
+ return 1;
+ }
+
+ if (get_file_size(td))
+ return 1;
+
+ if (td->io_engine != FIO_MMAPIO)
+ return setup_file_plain(td);
+ else
+ return setup_file_mmap(td);
+}
+
+static int check_dev_match(dev_t dev, char *path)
+{
+ unsigned int major, minor;
+ char line[256], *p;
+ FILE *f;
+
+ f = fopen(path, "r");
+ if (!f) {
+ perror("open path");
+ return 1;
+ }
+
+ p = fgets(line, sizeof(line), f);
+ if (!p) {
+ fclose(f);
+ return 1;
+ }
+
+ if (sscanf(p, "%u:%u", &major, &minor) != 2) {
+ fclose(f);
+ return 1;
+ }
+
+ if (((major << 8) | minor) == dev) {
+ fclose(f);
+ return 0;
+ }
+
+ fclose(f);
+ return 1;
+}
+
+static int find_block_dir(dev_t dev, char *path)
+{
+ struct dirent *dir;
+ struct stat st;
+ int found = 0;
+ DIR *D;
+
+ D = opendir(path);
+ if (!D)
+ return 0;
+
+ while ((dir = readdir(D)) != NULL) {
+ char full_path[256];
+
+ if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, ".."))
+ continue;
+ if (!strcmp(dir->d_name, "device"))
+ continue;
+
+ sprintf(full_path, "%s/%s", path, dir->d_name);
+
+ if (!strcmp(dir->d_name, "dev")) {
+ if (!check_dev_match(dev, full_path)) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (stat(full_path, &st) == -1) {
+ perror("stat");
+ break;
+ }
+
+ if (!S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode))
+ continue;
+
+ found = find_block_dir(dev, full_path);
+ if (found) {
+ strcpy(path, full_path);
+ break;
+ }
+ }
+
+ closedir(D);
+ return found;
+}
+
+static int get_io_ticks(struct disk_util *du, struct disk_util_stat *dus)
+{
+ unsigned in_flight;
+ char line[256];
+ FILE *f;
+ char *p;
+
+ f = fopen(du->path, "r");
+ if (!f)
+ return 1;
+
+ p = fgets(line, sizeof(line), f);
+ if (!p) {
+ fclose(f);
+ return 1;
+ }
+
+ if (sscanf(p, "%u %u %llu %u %u %u %llu %u %u %u %u\n", &dus->ios[0], &dus->merges[0], &dus->sectors[0], &dus->ticks[0], &dus->ios[1], &dus->merges[1], &dus->sectors[1], &dus->ticks[1], &in_flight, &dus->io_ticks, &dus->time_in_queue) != 11) {
+ fclose(f);
+ return 1;
+ }
+
+ fclose(f);
+ return 0;
+}
+
+static void update_io_tick_disk(struct disk_util *du)
+{
+ struct disk_util_stat __dus, *dus, *ldus;
+ struct timeval t;
+
+ if (get_io_ticks(du, &__dus))
+ return;
+
+ dus = &du->dus;
+ ldus = &du->last_dus;
+
+ dus->sectors[0] += (__dus.sectors[0] - ldus->sectors[0]);
+ dus->sectors[1] += (__dus.sectors[1] - ldus->sectors[1]);
+ dus->ios[0] += (__dus.ios[0] - ldus->ios[0]);
+ dus->ios[1] += (__dus.ios[1] - ldus->ios[1]);
+ dus->merges[0] += (__dus.merges[0] - ldus->merges[0]);
+ dus->merges[1] += (__dus.merges[1] - ldus->merges[1]);
+ dus->ticks[0] += (__dus.ticks[0] - ldus->ticks[0]);
+ dus->ticks[1] += (__dus.ticks[1] - ldus->ticks[1]);
+ dus->io_ticks += (__dus.io_ticks - ldus->io_ticks);
+ dus->time_in_queue += (__dus.time_in_queue - ldus->time_in_queue);
+
+ gettimeofday(&t, NULL);
+ du->msec += mtime_since(&du->time, &t);
+ memcpy(&du->time, &t, sizeof(t));
+ memcpy(ldus, &__dus, sizeof(__dus));
+}
+
+static void update_io_ticks(void)
+{
+ struct list_head *entry;
+ struct disk_util *du;
+
+ list_for_each(entry, &disk_list) {
+ du = list_entry(entry, struct disk_util, list);
+ update_io_tick_disk(du);
+ }
+}
+
+static int disk_util_exists(dev_t dev)
+{
+ struct list_head *entry;
+ struct disk_util *du;
+
+ list_for_each(entry, &disk_list) {
+ du = list_entry(entry, struct disk_util, list);
+
+ if (du->dev == dev)
+ return 1;
+ }
+
+ return 0;
+}
+
+static void disk_util_add(dev_t dev, char *path)
+{
+ struct disk_util *du = malloc(sizeof(*du));
+
+ memset(du, 0, sizeof(*du));
+ INIT_LIST_HEAD(&du->list);
+ sprintf(du->path, "%s/stat", path);
+ du->name = strdup(basename(path));
+ du->dev = dev;
+
+ gettimeofday(&du->time, NULL);
+ get_io_ticks(du, &du->last_dus);
+
+ list_add_tail(&du->list, &disk_list);
+}
+
+static void init_disk_util(struct thread_data *td)
+{
+ struct stat st;
+ char foo[256], tmp[256];
+ dev_t dev;
+ char *p;
+
+ if (!td->do_disk_util)
+ return;
+
+ if (!stat(td->file_name, &st)) {
+ if (S_ISBLK(st.st_mode))
+ dev = st.st_rdev;
+ else
+ dev = st.st_dev;
+ } else {
+ /*
+ * must be a file, open "." in that path
+ */
+ strcpy(foo, td->file_name);
+ p = dirname(foo);
+ if (stat(p, &st)) {
+ perror("disk util stat");
+ return;
+ }
+
+ dev = st.st_dev;
+ }
+
+ if (disk_util_exists(dev))
+ return;
+
+ sprintf(foo, "/sys/block");
+ if (!find_block_dir(dev, foo))
+ return;
+
+ /*
+ * if this is inside a partition dir, jump back to parent
+ */
+ sprintf(tmp, "%s/queue", foo);
+ if (stat(tmp, &st)) {
+ p = dirname(foo);
+ sprintf(tmp, "%s/queue", p);
+ if (stat(tmp, &st)) {
+ fprintf(stderr, "unknown sysfs layout\n");
+ return;
+ }
+ sprintf(foo, "%s", p);
+ }
+
+ disk_util_add(dev, foo);
+}
+
+static void disk_util_timer_arm(void)
+{
+ itimer.it_value.tv_sec = 0;
+ itimer.it_value.tv_usec = DISK_UTIL_MSEC * 1000;
+ setitimer(ITIMER_REAL, &itimer, NULL);
+}
+
+static void clear_io_state(struct thread_data *td)
+{
+ if (td->io_engine == FIO_SYNCIO)
+ lseek(td->fd, SEEK_SET, 0);
+
+ td->cur_off = 0;
+ td->last_bytes = 0;
+ td->stat_io_bytes[0] = td->stat_io_bytes[1] = 0;
+ td->this_io_bytes[0] = td->this_io_bytes[1] = 0;
+
+ if (td->file_map)
+ memset(td->file_map, 0, td->num_maps * sizeof(long));
+}
+
+static void update_rusage_stat(struct thread_data *td)
+{
+ if (!(td->runtime[0] + td->runtime[1]))
+ return;
+
+ getrusage(RUSAGE_SELF, &td->ru_end);
+
+ td->usr_time += mtime_since(&td->ru_start.ru_utime, &td->ru_end.ru_utime);
+ td->sys_time += mtime_since(&td->ru_start.ru_stime, &td->ru_end.ru_stime);
+ td->ctx += td->ru_end.ru_nvcsw + td->ru_end.ru_nivcsw - (td->ru_start.ru_nvcsw + td->ru_start.ru_nivcsw);
+
+
+ memcpy(&td->ru_start, &td->ru_end, sizeof(td->ru_end));
+}
+
+static void *thread_main(void *data)
+{
+ struct thread_data *td = data;
+ int ret = 1;
+
+ if (!td->use_thread)
+ setsid();
+
+ td->pid = getpid();
+
+ if (init_io_u(td))
+ goto err;
+
+ if (fio_setaffinity(td) == -1) {
+ td_verror(td, errno);
+ goto err;
+ }
+
+ if (init_io(td))
+ goto err;
+
+ if (td->ioprio) {
+ if (ioprio_set(IOPRIO_WHO_PROCESS, 0, td->ioprio) == -1) {
+ td_verror(td, errno);
+ goto err;
+ }
+ }
+
+ sem_post(&startup_sem);
+ sem_wait(&td->mutex);
+
+ if (!td->create_serialize && setup_file(td))
+ goto err;
+
+ if (init_random_state(td))
+ goto err;
+
+ gettimeofday(&td->epoch, NULL);
+
+ while (td->loops--) {
+ getrusage(RUSAGE_SELF, &td->ru_start);
+ gettimeofday(&td->start, NULL);
+ memcpy(&td->stat_sample_time, &td->start, sizeof(td->start));
+
+ if (td->ratemin)
+ memcpy(&td->lastrate, &td->stat_sample_time, sizeof(td->lastrate));
+
+ clear_io_state(td);
+ prune_io_piece_log(td);
+
+ do_io(td);
+
+ td->runtime[td->ddir] += mtime_since_now(&td->start);
+ update_rusage_stat(td);
+
+ if (td->error || td->terminate)
+ break;
+
+ if (td->verify == VERIFY_NONE)
+ continue;
+
+ clear_io_state(td);
+ gettimeofday(&td->start, NULL);
+
+ do_verify(td);
+
+ td->runtime[DDIR_READ] += mtime_since_now(&td->start);
+
+ if (td->error || td->terminate)
+ break;
+ }
+
+ ret = 0;
+
+ if (td->bw_log)
+ finish_log(td, td->bw_log, "bw");
+ if (td->slat_log)
+ finish_log(td, td->slat_log, "slat");
+ if (td->clat_log)
+ finish_log(td, td->clat_log, "clat");
+
+ if (exitall_on_terminate)
+ terminate_threads(td->groupid);
+
+err:
+ if (td->fd != -1) {
+ close(td->fd);
+ td->fd = -1;
+ }
+ if (td->mmap)
+ munmap(td->mmap, td->file_size);
+ cleanup_io(td);
+ cleanup_io_u(td);
+ if (ret) {
+ sem_post(&startup_sem);
+ sem_wait(&td->mutex);
+ }
+ td_set_runstate(td, TD_EXITED);
+ return NULL;
+
+}
+
+static void *fork_main(int shmid, int offset)
+{
+ struct thread_data *td;
+ void *data;
+
+ data = shmat(shmid, NULL, 0);
+ if (data == (void *) -1) {
+ perror("shmat");
+ return NULL;
+ }
+
+ td = data + offset * sizeof(struct thread_data);
+ thread_main(td);
+ shmdt(data);
+ return NULL;
+}
+
+static int calc_lat(struct io_stat *is, unsigned long *min, unsigned long *max,
+ double *mean, double *dev)
+{
+ double n;
+
+ if (is->samples == 0)
+ return 0;
+
+ *min = is->min_val;
+ *max = is->max_val;
+
+ n = (double) is->samples;
+ *mean = (double) is->val / n;
+ *dev = sqrt(((double) is->val_sq - (*mean * *mean) / n) / (n - 1));
+ if (!(*min + *max) && !(*mean + *dev))
+ return 0;
+
+ return 1;
+}
+
+static void show_ddir_status(struct thread_data *td, struct group_run_stats *rs,
+ int ddir)
+{
+ char *ddir_str[] = { "read ", "write" };
+ unsigned long min, max, bw;
+ double mean, dev;
+
+ if (!td->runtime[ddir])
+ return;
+
+ bw = td->io_bytes[ddir] / td->runtime[ddir];
+ printf(" %s: io=%6luMiB, bw=%6luKiB/s, runt=%6lumsec\n", ddir_str[ddir], td->io_bytes[ddir] >> 20, bw, td->runtime[ddir]);
+
+ if (calc_lat(&td->slat_stat[ddir], &min, &max, &mean, &dev))
+ printf(" slat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
+
+ if (calc_lat(&td->clat_stat[ddir], &min, &max, &mean, &dev))
+ printf(" clat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
+
+ if (calc_lat(&td->bw_stat[ddir], &min, &max, &mean, &dev)) {
+ double p_of_agg;
+
+ p_of_agg = mean * 100 / (double) rs->agg[ddir];
+ printf(" bw (KiB/s) : min=%5lu, max=%5lu, per=%3.2f%%, avg=%5.02f, dev=%5.02f\n", min, max, p_of_agg, mean, dev);
+ }
+}
+
+static void show_thread_status(struct thread_data *td,
+ struct group_run_stats *rs)
+{
+ double usr_cpu, sys_cpu;
+
+ if (!(td->io_bytes[0] + td->io_bytes[1]) && !td->error)
+ return;
+
+ printf("Client%d (groupid=%d): err=%2d:\n", td->thread_number, td->groupid, td->error);
+
+ show_ddir_status(td, rs, td->ddir);
+ show_ddir_status(td, rs, td->ddir ^ 1);
+
+ if (td->runtime[0] + td->runtime[1]) {
+ double runt = td->runtime[0] + td->runtime[1];
+
+ usr_cpu = (double) td->usr_time * 100 / runt;
+ sys_cpu = (double) td->sys_time * 100 / runt;
+ } else {
+ usr_cpu = 0;
+ sys_cpu = 0;
+ }
+
+ printf(" cpu : usr=%3.2f%%, sys=%3.2f%%, ctx=%lu\n", usr_cpu, sys_cpu, td->ctx);
+}
+
+static void check_str_update(struct thread_data *td)
+{
+ char c = run_str[td->thread_number - 1];
+
+ if (td->runstate == td->old_runstate)
+ return;
+
+ switch (td->runstate) {
+ case TD_REAPED:
+ c = '_';
+ break;
+ case TD_EXITED:
+ c = 'E';
+ break;
+ case TD_RUNNING:
+ if (td_read(td)) {
+ if (td->sequential)
+ c = 'R';
+ else
+ c = 'r';
+ } else {
+ if (td->sequential)
+ c = 'W';
+ else
+ c = 'w';
+ }
+ break;
+ case TD_VERIFYING:
+ c = 'V';
+ break;
+ case TD_CREATED:
+ c = 'C';
+ break;
+ case TD_NOT_CREATED:
+ c = 'P';
+ break;
+ default:
+ printf("state %d\n", td->runstate);
+ }
+
+ run_str[td->thread_number - 1] = c;
+ td->old_runstate = td->runstate;
+}
+
+static void print_thread_status(void)
+{
+ unsigned long long bytes_done, bytes_total;
+ int i, nr_running, t_rate, m_rate;
+ double perc;
+
+ bytes_done = bytes_total = 0;
+ nr_running = t_rate = m_rate = 0;
+ for (i = 0; i < thread_number; i++) {
+ struct thread_data *td = &threads[i];
+
+ if (td->runstate == TD_RUNNING || td->runstate == TD_VERIFYING){
+ nr_running++;
+ t_rate += td->rate;
+ m_rate += td->ratemin;
+ }
+
+ bytes_total += td->total_io_size;
+ if (td->verify)
+ bytes_total += td->total_io_size;
+
+ bytes_done += td->io_bytes[DDIR_READ] +td->io_bytes[DDIR_WRITE];
+
+ check_str_update(td);
+ }
+
+ perc = 0;
+ if (bytes_total && bytes_done) {
+ perc = (double) 100 * bytes_done / (double) bytes_total;
+ if (perc > 100.0)
+ perc = 100.0;
+ }
+
+ printf("Threads now running: %d", nr_running);
+ if (m_rate || t_rate)
+ printf(", commitrate %d/%dKiB/sec", t_rate, m_rate);
+ printf(" : [%s] [%3.2f%% done]\r", run_str, perc);
+ fflush(stdout);
+}
+
+static void reap_threads(int *nr_running, int *t_rate, int *m_rate)
+{
+ int i;
+
+ /*
+ * reap exited threads (TD_EXITED -> TD_REAPED)
+ */
+ for (i = 0; i < thread_number; i++) {
+ struct thread_data *td = &threads[i];
+
+ if (td->runstate != TD_EXITED)
+ continue;
+
+ td_set_runstate(td, TD_REAPED);
+
+ if (td->use_thread) {
+ long ret;
+
+ if (pthread_join(td->thread, (void *) &ret))
+ perror("thread_join");
+ } else
+ waitpid(td->pid, NULL, 0);
+
+ (*nr_running)--;
+ (*m_rate) -= td->ratemin;
+ (*t_rate) -= td->rate;
+ }
+}
+
+static void run_threads(void)
+{
+ struct timeval genesis;
+ struct thread_data *td;
+ unsigned long spent;
+ int i, todo, nr_running, m_rate, t_rate, nr_started;
+
+ printf("Starting %d thread%s\n", thread_number, thread_number > 1 ? "s" : "");
+ fflush(stdout);
+
+ signal(SIGINT, sig_handler);
+ signal(SIGALRM, sig_handler);
+
+ todo = thread_number;
+ nr_running = 0;
+ nr_started = 0;
+ m_rate = t_rate = 0;
+
+ for (i = 0; i < thread_number; i++) {
+ td = &threads[i];
+
+ run_str[td->thread_number - 1] = 'P';
+
+ init_disk_util(td);
+
+ if (!td->create_serialize)
+ continue;
+
+ /*
+ * do file setup here so it happens sequentially,
+ * we don't want X number of threads getting their
+ * client data interspersed on disk
+ */
+ if (setup_file(td)) {
+ td_set_runstate(td, TD_REAPED);
+ todo--;
+ }
+ }
+
+ gettimeofday(&genesis, NULL);
+
+ while (todo) {
+ /*
+ * create threads (TD_NOT_CREATED -> TD_CREATED)
+ */
+ for (i = 0; i < thread_number; i++) {
+ td = &threads[i];
+
+ if (td->runstate != TD_NOT_CREATED)
+ continue;
+
+ /*
+ * never got a chance to start, killed by other
+ * thread for some reason
+ */
+ if (td->terminate) {
+ todo--;
+ continue;
+ }
+
+ if (td->start_delay) {
+ spent = mtime_since_now(&genesis);
+
+ if (td->start_delay * 1000 > spent)
+ continue;
+ }
+
+ if (td->stonewall && (nr_started || nr_running))
+ break;
+
+ td_set_runstate(td, TD_CREATED);
+ sem_init(&startup_sem, 0, 1);
+ todo--;
+ nr_started++;
+
+ if (td->use_thread) {
+ if (pthread_create(&td->thread, NULL, thread_main, td)) {
+ perror("thread_create");
+ nr_started--;
+ }
+ } else {
+ if (fork())
+ sem_wait(&startup_sem);
+ else {
+ fork_main(shm_id, i);
+ exit(0);
+ }
+ }
+ }
+
+ /*
+ * start created threads (TD_CREATED -> TD_RUNNING)
+ */
+ for (i = 0; i < thread_number; i++) {
+ td = &threads[i];
+
+ if (td->runstate != TD_CREATED)
+ continue;
+
+ td_set_runstate(td, TD_RUNNING);
+ nr_running++;
+ nr_started--;
+ m_rate += td->ratemin;
+ t_rate += td->rate;
+ sem_post(&td->mutex);
+ }
+
+ reap_threads(&nr_running, &t_rate, &m_rate);
+
+ if (todo)
+ usleep(100000);
+ }
+
+ while (nr_running) {
+ reap_threads(&nr_running, &t_rate, &m_rate);
+ usleep(10000);
+ }
+
+ update_io_ticks();
+}
+
+static void show_group_stats(struct group_run_stats *rs, int id)
+{
+ printf("\nRun status group %d (all jobs):\n", id);
+
+ if (rs->max_run[DDIR_READ])
+ printf(" READ: io=%luMiB, aggrb=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", rs->io_mb[0], rs->agg[0], rs->min_bw[0], rs->max_bw[0], rs->min_run[0], rs->max_run[0]);
+ if (rs->max_run[DDIR_WRITE])
+ printf(" WRITE: io=%luMiB, aggrb=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", rs->io_mb[1], rs->agg[1], rs->min_bw[1], rs->max_bw[1], rs->min_run[1], rs->max_run[1]);
+}
+
+static void show_disk_util(void)
+{
+ struct disk_util_stat *dus;
+ struct list_head *entry;
+ struct disk_util *du;
+ double util;
+
+ printf("\nDisk stats (read/write):\n");
+
+ list_for_each(entry, &disk_list) {
+ du = list_entry(entry, struct disk_util, list);
+ dus = &du->dus;
+
+ util = (double) 100 * du->dus.io_ticks / (double) du->msec;
+ if (util > 100.0)
+ util = 100.0;
+
+ printf(" %s: ios=%u/%u, merge=%u/%u, ticks=%u/%u, in_queue=%u, util=%3.2f%%\n", du->name, dus->ios[0], dus->ios[1], dus->merges[0], dus->merges[1], dus->ticks[0], dus->ticks[1], dus->time_in_queue, util);
+ }
+}
+
+static void show_run_stats(void)
+{
+ struct group_run_stats *runstats, *rs;
+ struct thread_data *td;
+ int i;
+
+ runstats = malloc(sizeof(struct group_run_stats) * (groupid + 1));
+
+ for (i = 0; i < groupid + 1; i++) {
+ rs = &runstats[i];
+
+ memset(rs, 0, sizeof(*rs));
+ rs->min_bw[0] = rs->min_run[0] = ~0UL;
+ rs->min_bw[1] = rs->min_run[1] = ~0UL;
+ }
+
+ for (i = 0; i < thread_number; i++) {
+ unsigned long rbw, wbw;
+
+ td = &threads[i];
+
+ if (td->error) {
+ printf("Client%d: %s\n", td->thread_number, td->verror);
+ continue;
+ }
+
+ rs = &runstats[td->groupid];
+
+ if (td->runtime[0] < rs->min_run[0] || !rs->min_run[0])
+ rs->min_run[0] = td->runtime[0];
+ if (td->runtime[0] > rs->max_run[0])
+ rs->max_run[0] = td->runtime[0];
+ if (td->runtime[1] < rs->min_run[1] || !rs->min_run[1])
+ rs->min_run[1] = td->runtime[1];
+ if (td->runtime[1] > rs->max_run[1])
+ rs->max_run[1] = td->runtime[1];
+
+ rbw = wbw = 0;
+ if (td->runtime[0])
+ rbw = td->io_bytes[0] / td->runtime[0];
+ if (td->runtime[1])
+ wbw = td->io_bytes[1] / td->runtime[1];
+
+ if (rbw < rs->min_bw[0])
+ rs->min_bw[0] = rbw;
+ if (wbw < rs->min_bw[1])
+ rs->min_bw[1] = wbw;
+ if (rbw > rs->max_bw[0])
+ rs->max_bw[0] = rbw;
+ if (wbw > rs->max_bw[1])
+ rs->max_bw[1] = wbw;
+
+ rs->io_mb[0] += td->io_bytes[0] >> 20;
+ rs->io_mb[1] += td->io_bytes[1] >> 20;
+ }
+
+ for (i = 0; i < groupid + 1; i++) {
+ rs = &runstats[i];
+
+ if (rs->max_run[0])
+ rs->agg[0] = (rs->io_mb[0]*1024*1000) / rs->max_run[0];
+ if (rs->max_run[1])
+ rs->agg[1] = (rs->io_mb[1]*1024*1000) / rs->max_run[1];
+ }
+
+ /*
+ * don't overwrite last signal output
+ */
+ printf("\n");
+
+ for (i = 0; i < thread_number; i++) {
+ td = &threads[i];
+ rs = &runstats[td->groupid];
+
+ show_thread_status(td, rs);
+ }
+
+ for (i = 0; i < groupid + 1; i++)
+ show_group_stats(&runstats[i], i);
+
+ show_disk_util();
+}
+
+int main(int argc, char *argv[])
+{
+ if (parse_options(argc, argv))
+ return 1;
+
+ if (!thread_number) {
+ printf("Nothing to do\n");
+ return 1;
+ }
+
+ disk_util_timer_arm();
+
+ run_threads();
+ show_run_stats();
+
+ return 0;
+}
--- /dev/null
+#ifndef FIO_H
+#define FIO_H
+
+#include <sched.h>
+#include <limits.h>
+#include <pthread.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <semaphore.h>
+
+#include "list.h"
+#include "md5.h"
+#include "crc32.h"
+#include "arch.h"
+#include "os.h"
+
+struct io_stat {
+ unsigned long val;
+ unsigned long val_sq;
+ unsigned long max_val;
+ unsigned long min_val;
+ unsigned long samples;
+};
+
+struct io_sample {
+ unsigned long time;
+ unsigned long val;
+ unsigned int ddir;
+};
+
+struct io_log {
+ unsigned long nr_samples;
+ unsigned long max_samples;
+ struct io_sample *log;
+};
+
+struct io_piece {
+ struct list_head list;
+ unsigned long long offset;
+ unsigned int len;
+};
+
+/*
+ * The io unit
+ */
+struct io_u {
+ union {
+#ifdef FIO_HAVE_LIBAIO
+ struct iocb iocb;
+#endif
+#ifdef FIO_HAVE_POSIXAIO
+ struct aiocb aiocb;
+#endif
+#ifdef FIO_HAVE_SGIO
+ struct sg_io_hdr hdr;
+#endif
+ };
+ struct timeval start_time;
+ struct timeval issue_time;
+
+ char *buf;
+ unsigned int buflen;
+ unsigned long long offset;
+
+ unsigned int resid;
+ unsigned int error;
+
+ unsigned char seen;
+ unsigned char ddir;
+
+ struct list_head list;
+};
+
+#define FIO_HDR_MAGIC 0xf00baaef
+
+enum {
+ VERIFY_NONE = 0,
+ VERIFY_MD5,
+ VERIFY_CRC32,
+};
+
+struct verify_header {
+ unsigned int fio_magic;
+ unsigned int len;
+ unsigned int verify_type;
+ union {
+ char md5_digest[MD5_HASH_WORDS * 4];
+ unsigned long crc32;
+ };
+};
+
+struct group_run_stats {
+ unsigned long max_run[2], min_run[2];
+ unsigned long max_bw[2], min_bw[2];
+ unsigned long io_mb[2];
+ unsigned long agg[2];
+};
+
+struct thread_data {
+ char file_name[256];
+ char directory[256];
+ char verror[80];
+ pthread_t thread;
+ int thread_number;
+ int groupid;
+ int filetype;
+ int error;
+ int fd;
+ void *mmap;
+ pid_t pid;
+ char *orig_buffer;
+ size_t orig_buffer_size;
+ volatile int terminate;
+ volatile int runstate;
+ volatile int old_runstate;
+ unsigned int ddir;
+ unsigned int ioprio;
+ unsigned int sequential;
+ unsigned int bs;
+ unsigned int min_bs;
+ unsigned int max_bs;
+ unsigned int odirect;
+ unsigned int thinktime;
+ unsigned int fsync_blocks;
+ unsigned int start_delay;
+ unsigned int timeout;
+ unsigned int io_engine;
+ unsigned int create_file;
+ unsigned int overwrite;
+ unsigned int invalidate_cache;
+ unsigned int bw_avg_time;
+ unsigned int create_serialize;
+ unsigned int create_fsync;
+ unsigned int loops;
+ unsigned long long file_size;
+ unsigned long long file_offset;
+ unsigned int sync_io;
+ unsigned int mem_type;
+ unsigned int verify;
+ unsigned int stonewall;
+ unsigned int numjobs;
+ unsigned int use_thread;
+ unsigned int iodepth;
+ os_cpu_mask_t cpumask;
+
+ struct drand48_data bsrange_state;
+ struct drand48_data verify_state;
+
+ int shm_id;
+
+ unsigned long long cur_off;
+
+ void *io_data;
+ char io_engine_name[16];
+ int (*io_prep)(struct thread_data *, struct io_u *);
+ int (*io_queue)(struct thread_data *, struct io_u *);
+ int (*io_getevents)(struct thread_data *, int, int, struct timespec *);
+ struct io_u *(*io_event)(struct thread_data *, int);
+ int (*io_cancel)(struct thread_data *, struct io_u *);
+ void (*io_cleanup)(struct thread_data *);
+ int (*io_sync)(struct thread_data *);
+
+ unsigned int cur_depth;
+ struct list_head io_u_freelist;
+ struct list_head io_u_busylist;
+
+ unsigned int rate;
+ unsigned int ratemin;
+ unsigned int ratecycle;
+ unsigned long rate_usec_cycle;
+ long rate_pending_usleep;
+ unsigned long rate_bytes;
+ struct timeval lastrate;
+
+ unsigned long runtime[2]; /* msec */
+ unsigned long long io_size;
+ unsigned long long total_io_size;
+
+ unsigned long io_blocks[2];
+ unsigned long io_bytes[2];
+ unsigned long this_io_bytes[2];
+ unsigned long last_bytes;
+ sem_t mutex;
+
+ struct drand48_data random_state;
+ unsigned long *file_map;
+ unsigned int num_maps;
+
+ /*
+ * bandwidth and latency stats
+ */
+ struct io_stat clat_stat[2]; /* completion latency */
+ struct io_stat slat_stat[2]; /* submission latency */
+ struct io_stat bw_stat[2]; /* bandwidth stats */
+
+ unsigned long stat_io_bytes[2];
+ struct timeval stat_sample_time[2];
+
+ struct io_log *slat_log;
+ struct io_log *clat_log;
+ struct io_log *bw_log;
+
+ struct timeval start; /* start of this loop */
+ struct timeval epoch; /* time job was started */
+
+ struct rusage ru_start;
+ struct rusage ru_end;
+ unsigned long usr_time;
+ unsigned long sys_time;
+ unsigned long ctx;
+
+ unsigned int do_disk_util;
+ unsigned int override_sync;
+
+ struct list_head io_hist_list;
+};
+
+#define td_verror(td, err) \
+ do { \
+ int e = (err); \
+ (td)->error = e; \
+ snprintf(td->verror, sizeof(td->verror) - 1, "file:%s:%d, error=%s", __FILE__, __LINE__, strerror(e)); \
+ } while (0)
+
+extern int parse_jobs_ini(char *);
+extern int parse_options(int, char **);
+extern void finish_log(struct thread_data *, struct io_log *, const char *);
+extern int init_random_state(struct thread_data *);
+
+extern int rate_quit;
+extern int write_lat_log;
+extern int write_bw_log;
+extern int exitall_on_terminate;
+extern int thread_number;
+extern int shm_id;
+extern int groupid;
+
+extern struct thread_data *threads;
+
+enum {
+ DDIR_READ = 0,
+ DDIR_WRITE,
+};
+
+/*
+ * What type of allocation to use for io buffers
+ */
+enum {
+ MEM_MALLOC, /* ordinary malloc */
+ MEM_SHM, /* use shared memory segments */
+ MEM_MMAP, /* use anonynomous mmap */
+};
+
+/*
+ * The type of object we are working on
+ */
+enum {
+ FIO_TYPE_FILE = 1,
+ FIO_TYPE_BD,
+};
+
+enum {
+ FIO_SYNCIO = 1 << 0,
+ FIO_MMAPIO = 1 << 1 | FIO_SYNCIO,
+ FIO_LIBAIO = 1 << 2,
+ FIO_POSIXAIO = 1 << 3,
+ FIO_SGIO = 1 << 4,
+};
+
+#define td_read(td) ((td)->ddir == DDIR_READ)
+#define td_write(td) ((td)->ddir == DDIR_WRITE)
+
+#define BLOCKS_PER_MAP (8 * sizeof(long))
+#define TO_MAP_BLOCK(td, b) ((b) - ((td)->file_offset / (td)->min_bs))
+#define RAND_MAP_IDX(td, b) (TO_MAP_BLOCK(td, b) / BLOCKS_PER_MAP)
+#define RAND_MAP_BIT(td, b) (TO_MAP_BLOCK(td, b) & (BLOCKS_PER_MAP - 1))
+
+#define MAX_JOBS (1024)
+
+struct disk_util_stat {
+ unsigned ios[2];
+ unsigned merges[2];
+ unsigned long long sectors[2];
+ unsigned ticks[2];
+ unsigned io_ticks;
+ unsigned time_in_queue;
+};
+
+struct disk_util {
+ struct list_head list;
+
+ char *name;
+ char path[256];
+ dev_t dev;
+
+ struct disk_util_stat dus;
+ struct disk_util_stat last_dus;
+
+ unsigned long msec;
+ struct timeval time;
+};
+
+struct io_completion_data {
+ int nr; /* input */
+
+ int error; /* output */
+ unsigned long bytes_done[2]; /* output */
+};
+
+#define DISK_UTIL_MSEC (250)
+
+#endif
--- /dev/null
+#!/bin/bash
+
+# Use gnuplot to generate plots from fio run with -l and/or -w
+
+if [ "$1"x == "x" ]; then
+ echo Need title as arg
+ exit 1
+fi
+
+TITLE=$1
+
+PLOT_LINE=""
+for i in *bw.log; do
+ if [ ! -r $i ]; then
+ continue
+ fi
+ if [ "$PLOT_LINE"x != "x" ]; then
+ PLOT_LINE=$PLOT_LINE", "
+ fi
+
+ PLOT_LINE=$PLOT_LINE"'$i' with lines"
+done
+
+if [ "$PLOT_LINE"x != "x" ]; then
+ echo Making bw logs
+ echo "set title 'Bandwidth - $TITLE'; set xlabel 'time (msec)'; set ylabel 'KiB/sec'; set terminal png; set output '$TITLE-bw.png'; plot " $PLOT_LINE | gnuplot -
+fi
+
+PLOT_LINE=""
+for i in *slat.log; do
+ if [ ! -r $i ]; then
+ continue
+ fi
+ if [ "$PLOT_LINE"x != "x" ]; then
+ PLOT_LINE=$PLOT_LINE", "
+ fi
+
+ PLOT_LINE=$PLOT_LINE"'$i' with lines"
+done
+
+if [ "$PLOT_LINE"x != "x" ]; then
+ echo Making slat logs $PLOT_LINE
+ echo "set title 'Submission latency - $TITLE'; set xlabel 'time (msec)'; set ylabel 'latency (msec)'; set terminal png; set output '$TITLE-slat.png'; plot " $PLOT_LINE | gnuplot -
+fi
+
+PLOT_LINE=""
+for i in *clat.log; do
+ if [ ! -r $i ]; then
+ continue
+ fi
+ if [ "$PLOT_LINE"x != "x" ]; then
+ PLOT_LINE=$PLOT_LINE", "
+ fi
+
+ PLOT_LINE=$PLOT_LINE"'$i' with lines"
+done
+
+if [ "$PLOT_LINE"x != "x" ]; then
+ echo Making clat logs $PLOT_LINE
+ echo "set title 'Completion latency - $TITLE'; set xlabel 'time (msec)'; set ylabel 'latency (msec)'; set terminal png; set output '$TITLE-clat.png'; plot " $PLOT_LINE | gnuplot -
+fi
--- /dev/null
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+
+#undef offsetof
+#ifdef __compiler_offsetof
+#define offsetof(TYPE,MEMBER) __compiler_offsetof(TYPE,MEMBER)
+#else
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+ struct list_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) do { \
+ (ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_add(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+static inline void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ entry->next = NULL;
+ entry->prev = NULL;
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(const struct list_head *head)
+{
+ return head->next == head;
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr: the &struct list_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+/**
+ * list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define list_for_each(pos, head) \
+ for (pos = (head)->next; pos != (head); pos = pos->next)
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos: the &struct list_head to use as a loop counter.
+ * @n: another &struct list_head to use as temporary storage
+ * @head: the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+
+#endif
--- /dev/null
+/*
+ * Shamelessly lifted from the 2.6 kernel (crypto/md5.c)
+ */
+#include <string.h>
+#include <stdint.h>
+#include "md5.h"
+
+static void md5_transform(uint32_t *hash, uint32_t const *in)
+{
+ uint32_t a, b, c, d;
+
+ a = hash[0];
+ b = hash[1];
+ c = hash[2];
+ d = hash[3];
+
+ MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
+ MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
+ MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
+ MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
+ MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
+ MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
+ MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
+ MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
+ MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
+ MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
+ MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
+ MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
+ MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
+ MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
+ MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
+ MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
+
+ MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
+ MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
+ MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
+ MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
+ MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
+ MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
+ MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
+ MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
+ MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
+ MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
+ MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
+ MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
+ MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
+ MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
+ MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
+ MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
+
+ MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
+ MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
+ MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
+ MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
+ MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
+ MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
+ MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
+ MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
+ MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
+ MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
+ MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
+ MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
+ MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
+ MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
+ MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
+ MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
+
+ MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
+ MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
+ MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
+ MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
+ MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
+ MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
+ MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
+ MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
+ MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
+ MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
+ MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
+ MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
+ MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
+ MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
+ MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
+ MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
+
+ hash[0] += a;
+ hash[1] += b;
+ hash[2] += c;
+ hash[3] += d;
+}
+
+void md5_update(struct md5_ctx *mctx, const uint8_t *data, unsigned int len)
+{
+ const uint32_t avail = sizeof(mctx->block) - (mctx->byte_count & 0x3f);
+
+ mctx->byte_count += len;
+
+ if (avail > len) {
+ memcpy((char *)mctx->block + (sizeof(mctx->block) - avail),
+ data, len);
+ return;
+ }
+
+ memcpy((char *)mctx->block + (sizeof(mctx->block) - avail),
+ data, avail);
+
+ md5_transform(mctx->hash, mctx->block);
+ data += avail;
+ len -= avail;
+
+ while (len >= sizeof(mctx->block)) {
+ memcpy(mctx->block, data, sizeof(mctx->block));
+ md5_transform(mctx->hash, mctx->block);
+ data += sizeof(mctx->block);
+ len -= sizeof(mctx->block);
+ }
+
+ memcpy(mctx->block, data, len);
+}
--- /dev/null
+#ifndef MD5_H
+#define MD5_H
+
+#include <stdint.h>
+
+#define MD5_DIGEST_SIZE 16
+#define MD5_HMAC_BLOCK_SIZE 64
+#define MD5_BLOCK_WORDS 16
+#define MD5_HASH_WORDS 4
+
+#define F1(x, y, z) (z ^ (x & (y ^ z)))
+#define F2(x, y, z) F1(z, x, y)
+#define F3(x, y, z) (x ^ y ^ z)
+#define F4(x, y, z) (y ^ (x | ~z))
+
+#define MD5STEP(f, w, x, y, z, in, s) \
+ (w += f(x, y, z) + in, w = (w<<s | w>>(32-s)) + x)
+
+struct md5_ctx {
+ uint32_t hash[MD5_HASH_WORDS];
+ uint32_t block[MD5_BLOCK_WORDS];
+ uint64_t byte_count;
+};
+
+extern void md5_update(struct md5_ctx *, const uint8_t *, unsigned int);
+
+#endif
--- /dev/null
+#ifndef FIO_OS_FREEBSD_H
+#define FIO_OS_FREEBSD_H
+
+#undef FIO_HAVE_LIBAIO
+#define FIO_HAVE_POSIXAIO
+#undef FIO_HAVE_FADVISE
+#undef FIO_HAVE_CPU_AFFINITY
+#undef FIO_HAVE_DISK_UTIL
+#undef FIO_HAVE_SGIO
+
+#define OS_MAP_ANON (MAP_ANON)
+
+typedef unsigned long os_cpu_mask_t;
+
+/*
+ * FIXME
+ */
+static inline int blockdev_size(int fd, size_t *bytes)
+{
+ return 1;
+}
+
+#endif
--- /dev/null
+#ifndef FIO_OS_LINUX_H
+#define FIO_OS_LINUX_H
+
+#include <sys/ioctl.h>
+
+#define FIO_HAVE_LIBAIO
+#define FIO_HAVE_POSIXAIO
+#define FIO_HAVE_FADVISE
+#define FIO_HAVE_CPU_AFFINITY
+#define FIO_HAVE_DISK_UTIL
+#define FIO_HAVE_SGIO
+
+#define OS_MAP_ANON (MAP_ANONYMOUS)
+
+typedef cpu_set_t os_cpu_mask_t;
+
+/*
+ * we want fadvise64 really, but it's so tangled... later
+ */
+#define fadvise(fd, off, len, advice) \
+ posix_fadvise((fd), (off_t)(off), (len), (advice))
+
+#define fio_setaffinity(td) \
+ sched_setaffinity((td)->pid, sizeof((td)->cpumask), &(td)->cpumask)
+#define fio_getaffinity(pid, ptr) \
+ sched_getaffinity((pid), sizeof(cpu_set_t), (ptr))
+
+static inline int ioprio_set(int which, int who, int ioprio)
+{
+ return syscall(__NR_ioprio_set, which, who, ioprio);
+}
+
+enum {
+ IOPRIO_WHO_PROCESS = 1,
+ IOPRIO_WHO_PGRP,
+ IOPRIO_WHO_USER,
+};
+
+#define IOPRIO_CLASS_SHIFT 13
+
+#ifndef BLKGETSIZE64
+#define BLKGETSIZE64 _IOR(0x12,114,size_t)
+#endif
+
+static inline int blockdev_size(int fd, size_t *bytes)
+{
+ if (!ioctl(fd, BLKGETSIZE64, bytes))
+ return 0;
+
+ return errno;
+}
+
+#endif
--- /dev/null
+#ifndef FIO_OS_H
+#define FIO_OS_H
+
+#if defined(__linux__)
+#include "os-linux.h"
+#elif defined(__FreeBSD__)
+#include "os-freebsd.h"
+#else
+#error "unsupported os"
+#endif
+
+#ifdef FIO_HAVE_LIBAIO
+#include <libaio.h>
+#endif
+
+#ifdef FIO_HAVE_POSIXAIO
+#include <aio.h>
+#endif
+
+#ifdef FIO_HAVE_SGIO
+#include <linux/fs.h>
+#include <scsi/sg.h>
+#endif
+
+#ifndef FIO_HAVE_FADVISE
+#define fadvise(fd, off, len, advice) (0)
+
+#define POSIX_FADV_DONTNEED (0)
+#define POSIX_FADV_SEQUENTIAL (0)
+#define POSIX_FADV_RANDOM (0)
+#endif /* FIO_HAVE_FADVISE */
+
+#ifndef FIO_HAVE_CPU_AFFINITY
+#define fio_setaffinity(td) (0)
+#define fio_getaffinity(pid, mask) (0)
+#endif
+
+#ifndef FIO_HAVE_IOPRIO
+#define ioprio_set(which, who, prio) (0)
+#endif
+
+struct thread_data;
+extern int fio_libaio_init(struct thread_data *);
+extern int fio_posixaio_init(struct thread_data *);
+extern int fio_syncio_init(struct thread_data *);
+extern int fio_mmapio_init(struct thread_data *);
+extern int fio_sgio_init(struct thread_data *);
+
+#endif