Update libhdfs engine documention and options

author Manish Mandlik <manishm@fb.com>

Thu, 14 Aug 2014 17:45:16 +0000 (11:45 -0600)

committer Jens Axboe <axboe@fb.com>

Thu, 14 Aug 2014 17:45:16 +0000 (11:45 -0600)
author Manish Mandlik <manishm@fb.com>
Thu, 14 Aug 2014 17:45:16 +0000 (11:45 -0600)
committer Jens Axboe <axboe@fb.com>
Thu, 14 Aug 2014 17:45:16 +0000 (11:45 -0600)
diff --git a/HOWTO b/HOWTO

index d7283535db0d8ddff7908422185fe5cdb9480b76..a0b89c8071d15b45aaef71222611625e38022178 100644 (file)
--- a/HOWTO
+++ b/HOWTO
@@ -694,7 +694,21 @@ ioengine=str       Defines how the job issues io to the file. The following
                                 having to go through FUSE. This ioengine
                                 defines engine specific options.
  
-                       hdfs    Read and write through Hadoop (HDFS).
+                       libhdfs Read and write through Hadoop (HDFS).
+                               The 'filename' option is used to specify host,
+                               port of the hdfs name-node to connect. This
+                               engine interprets offsets a little
+                               differently. In HDFS, files once created
+                               cannot be modified. So random writes are not
+                               possible. To imitate this, libhdfs engine
+                               expects bunch of small files to be created
+                               over HDFS, and engine will randomly pick a
+                               file out of those files based on the offset
+                               generated by fio backend. (see the example
+                               job file to create such files, use rw=write
+                               option). Please note, you might want to set
+                               necessary environment variables to work with
+                               hdfs/libhdfs properly.
  
                         external Prefix to specify loading an external
                                 IO engine object file. Append the engine
diff --git a/examples/libhdfs.fio b/examples/libhdfs.fio

new file mode 100644 (file)

index 0000000..d5c0ba6
--- /dev/null
+++ b/examples/libhdfs.fio
@@ -0,0 +1,8 @@
+[global]
+runtime=300
+
+[hdfs]
+filename=dfs-perftest-base.dfs-perftest-base,9000
+ioengine=libhdfs
+rw=read
+bs=256k
diff --git a/fio.1 b/fio.1

index b5ff3ccbc4633dd0cf92ad42a59efe660d64e848..c61948bb52814a2a6bac248c5db0063406225a48 100644 (file)
--- a/fio.1
+++ b/fio.1
@@ -613,8 +613,16 @@ Using Glusterfs libgfapi async interface to direct access to Glusterfs volumes w
  having to go through FUSE. This ioengine defines engine specific
  options.
  .TP
-.B hdfs
-Read and write through Hadoop (HDFS)
+.B libhdfs
+Read and write through Hadoop (HDFS).  The \fBfilename\fR option is used to
+specify host,port of the hdfs name-node to connect. This engine interprets
+offsets a little differently. In HDFS, files once created cannot be modified.
+So random writes are not possible. To imitate this, libhdfs engine expects
+bunch of small files to be created over HDFS, and engine will randomly pick a
+file out of those files based on the offset generated by fio backend. (see the
+example job file to create such files, use rw=write option). Please note, you
+might want to set necessary environment variables to work with hdfs/libhdfs
+properly.
  .RE
  .P
  .RE
diff --git a/options.c b/options.c

index 484efc1a2ebe8dd438dceff5a2a1307c8eaf7c0b..3acfdc86f197a0db37df2e91a3feae477ba25d91 100644 (file)
--- a/options.c
+++ b/options.c
@@ -672,7 +672,7 @@ static int str_numa_mpol_cb(void *data, char *input)
                 }
                 td->o.numa_memnodes = strdup(nodelist);
                 numa_free_nodemask(verify_bitmask);
-                
+
                 break;
         case MPOL_LOCAL:
         case MPOL_DEFAULT:
@@ -1542,7 +1542,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                           },
  #endif
  #ifdef CONFIG_LIBHDFS
-                         { .ival = "hdfs",
+                         { .ival = "libhdfs",
                             .help = "Hadoop Distributed Filesystem (HDFS) engine"
                           },
  #endif
author	Manish Mandlik <manishm@fb.com>
	Thu, 14 Aug 2014 17:45:16 +0000 (11:45 -0600)
committer	Jens Axboe <axboe@fb.com>
	Thu, 14 Aug 2014 17:45:16 +0000 (11:45 -0600)
HOWTO		patch \| blob \| blame \| history
examples/libhdfs.fio	[new file with mode: 0644]	patch \| blob
fio.1		patch \| blob \| blame \| history
options.c		patch \| blob \| blame \| history