Add intel hardware assisted crc32c support
authorJens Axboe <jens.axboe@oracle.com>
Mon, 4 Aug 2008 13:35:26 +0000 (15:35 +0200)
committerJens Axboe <jens.axboe@oracle.com>
Mon, 4 Aug 2008 13:35:26 +0000 (15:35 +0200)
Not tested, since I don't have the hardware.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
HOWTO
Makefile
crc/crc32c-intel.c [new file with mode: 0644]
crc/crc32c.h
fio.h
verify.c

diff --git a/HOWTO b/HOWTO
index f569f5622de5db7d28d0fb3b7319b6aee9545ef1..7a65aa1540338c32bc2d43ef5b862b6c68b5b7ab 100644 (file)
--- a/HOWTO
+++ b/HOWTO
@@ -664,6 +664,9 @@ verify=str  If writing to a file, fio can verify the file contents
                        crc32c  Use a crc32c sum of the data area and store
                                it in the header of each block.
 
                        crc32c  Use a crc32c sum of the data area and store
                                it in the header of each block.
 
+                       crc32c-intel Use hardware assisted crc32c calcuation
+                               provided on SSE4.2 enabled processors.
+
                        crc32   Use a crc32 sum of the data area and store
                                it in the header of each block.
 
                        crc32   Use a crc32 sum of the data area and store
                                it in the header of each block.
 
index 11bb17e7387b3b1bf54d36fc455596370d4a4a4e..c95c3dc050d79538f0febe67ce4576e5180d77fc 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -12,6 +12,7 @@ OBJS += crc/crc7.o
 OBJS += crc/crc16.o
 OBJS += crc/crc32.o
 OBJS += crc/crc32c.o
 OBJS += crc/crc16.o
 OBJS += crc/crc32.o
 OBJS += crc/crc32c.o
+OBJS += crc/crc32c-intel.o
 OBJS += crc/crc64.o
 OBJS += crc/sha256.o
 OBJS += crc/sha512.o
 OBJS += crc/crc64.o
 OBJS += crc/sha256.o
 OBJS += crc/sha512.o
diff --git a/crc/crc32c-intel.c b/crc/crc32c-intel.c
new file mode 100644 (file)
index 0000000..c0abe73
--- /dev/null
@@ -0,0 +1,66 @@
+#include <inttypes.h>
+
+/*
+ * Based on a posting to lkml by Austin Zhang <austin.zhang@intel.com>
+ *
+ * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
+ * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
+ * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
+ * http://www.intel.com/products/processor/manuals/
+ * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
+ * Volume 2A: Instruction Set Reference, A-M
+ */
+
+#if BITS_PER_LONG == 64
+#define REX_PRE "0x48, "
+#define SCALE_F 8
+#else
+#define REX_PRE
+#define SCALE_F 4
+#endif
+
+uint32_t crc32c_intel_le_hw_byte(uint32_t crc, unsigned char const *data,
+                                unsigned long length)
+{
+       while (length--) {
+               __asm__ __volatile__(
+                       ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
+                       :"=S"(crc)
+                       :"0"(crc), "c"(*data)
+               );
+               data++;
+       }
+
+       return crc;
+}
+
+/*
+ * Steps through buffer one byte at at time, calculates reflected 
+ * crc using table.
+ */
+uint32_t crc32c_intel(unsigned char const *data, unsigned long length)
+{
+       unsigned int iquotient = length / SCALE_F;
+       unsigned int iremainder = length % SCALE_F;
+#if BITS_PER_LONG == 64
+       uint64_t *ptmp = (uint64_t *) data;
+#else
+       uint32_t *ptmp = (uint32_t *) data;
+#endif
+       uint32_t crc = ~0;
+
+       while (iquotient--) {
+               __asm__ __volatile__(
+                       ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
+                       :"=S"(crc)
+                       :"0"(crc), "c"(*ptmp)
+               );
+               ptmp++;
+       }
+
+       if (iremainder)
+               crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp,
+                                iremainder);
+
+       return crc;
+}
index 1498db91a4d072ec77d2ec9ef488b81bd861c1ba..cf1713692b76e82efeb516740d0a1275875c70d1 100644 (file)
@@ -19,5 +19,6 @@
 #define CRC32C_H
 
 extern uint32_t crc32c(unsigned char const *, unsigned long);
 #define CRC32C_H
 
 extern uint32_t crc32c(unsigned char const *, unsigned long);
+extern uint32_t crc32c_intel(unsigned char const *, unsigned long);
 
 #endif
 
 #endif
diff --git a/fio.h b/fio.h
index ec8d593519dc0cfb4478c0c40f1dd92acc1419ed..22aaf345ae8daa093df160a7f10523702d0c5e1c 100644 (file)
--- a/fio.h
+++ b/fio.h
@@ -196,6 +196,7 @@ enum {
        VERIFY_CRC64,                   /* crc64 sum data blocks */
        VERIFY_CRC32,                   /* crc32 sum data blocks */
        VERIFY_CRC32C,                  /* crc32c sum data blocks */
        VERIFY_CRC64,                   /* crc64 sum data blocks */
        VERIFY_CRC32,                   /* crc32 sum data blocks */
        VERIFY_CRC32C,                  /* crc32c sum data blocks */
+       VERIFY_CRC32C_INTEL,            /* crc32c sum data blocks with hw */
        VERIFY_CRC16,                   /* crc16 sum data blocks */
        VERIFY_CRC7,                    /* crc7 sum data blocks */
        VERIFY_SHA256,                  /* sha256 sum data blocks */
        VERIFY_CRC16,                   /* crc16 sum data blocks */
        VERIFY_CRC7,                    /* crc7 sum data blocks */
        VERIFY_SHA256,                  /* sha256 sum data blocks */
index afb4cb7d4afbda3a896c34ab4da805c52238a26e..2aa5ab40e5674c4bb1f10f07697adaa05492ff56 100644 (file)
--- a/verify.c
+++ b/verify.c
@@ -128,6 +128,7 @@ static inline unsigned int __hdr_size(int verify_type)
                break;
        case VERIFY_CRC32C:
        case VERIFY_CRC32:
                break;
        case VERIFY_CRC32C:
        case VERIFY_CRC32:
+       case VERIFY_CRC32C_INTEL:
                len = sizeof(struct vhdr_crc32);
                break;
        case VERIFY_CRC16:
                len = sizeof(struct vhdr_crc32);
                break;
        case VERIFY_CRC16:
@@ -337,7 +338,10 @@ static int verify_io_u_crc32c(struct verify_header *hdr, struct io_u *io_u,
 
        dprint(FD_VERIFY, "crc32c verify io_u %p, len %u\n", io_u, hdr->len);
 
 
        dprint(FD_VERIFY, "crc32c verify io_u %p, len %u\n", io_u, hdr->len);
 
-       c = crc32c(p, hdr->len - hdr_size(hdr));
+       if (hdr->verify_type == VERIFY_CRC32C_INTEL)
+               c = crc32c_intel(p, hdr->len - hdr_size(hdr));
+       else
+               c = crc32c(p, hdr->len - hdr_size(hdr));
 
        if (c != vh->crc32) {
                log_err("crc32c: verify failed at %llu/%u\n",
 
        if (c != vh->crc32) {
                log_err("crc32c: verify failed at %llu/%u\n",
@@ -466,6 +470,7 @@ int verify_io_u(struct thread_data *td, struct io_u *io_u)
                        ret = verify_io_u_crc64(hdr, io_u, hdr_num);
                        break;
                case VERIFY_CRC32C:
                        ret = verify_io_u_crc64(hdr, io_u, hdr_num);
                        break;
                case VERIFY_CRC32C:
+               case VERIFY_CRC32C_INTEL:
                        ret = verify_io_u_crc32c(hdr, io_u, hdr_num);
                        break;
                case VERIFY_CRC32:
                        ret = verify_io_u_crc32c(hdr, io_u, hdr_num);
                        break;
                case VERIFY_CRC32:
@@ -557,7 +562,10 @@ static void fill_crc32c(struct verify_header *hdr, void *p, unsigned int len)
 {
        struct vhdr_crc32 *vh = hdr_priv(hdr);
 
 {
        struct vhdr_crc32 *vh = hdr_priv(hdr);
 
-       vh->crc32 = crc32c(p, len);
+       if (hdr->verify_type == VERIFY_CRC32C_INTEL)
+               vh->crc32 = crc32c_intel(p, len);
+       else
+               vh->crc32 = crc32c(p, len);
 }
 
 static void fill_crc64(struct verify_header *hdr, void *p, unsigned int len)
 }
 
 static void fill_crc64(struct verify_header *hdr, void *p, unsigned int len)
@@ -618,6 +626,7 @@ void populate_verify_io_u(struct thread_data *td, struct io_u *io_u)
                        fill_crc64(hdr, data, data_len);
                        break;
                case VERIFY_CRC32C:
                        fill_crc64(hdr, data, data_len);
                        break;
                case VERIFY_CRC32C:
+               case VERIFY_CRC32C_INTEL:
                        dprint(FD_VERIFY, "fill crc32c io_u %p, len %u\n",
                                                        io_u, hdr->len);
                        fill_crc32c(hdr, data, data_len);
                        dprint(FD_VERIFY, "fill crc32c io_u %p, len %u\n",
                                                        io_u, hdr->len);
                        fill_crc32c(hdr, data, data_len);