From 3845591fadea480177223e28c9d1c03642d34f0e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 4 Aug 2008 15:35:26 +0200 Subject: [PATCH] Add intel hardware assisted crc32c support Not tested, since I don't have the hardware. Signed-off-by: Jens Axboe --- HOWTO | 3 +++ Makefile | 1 + crc/crc32c-intel.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++ crc/crc32c.h | 1 + fio.h | 1 + verify.c | 13 +++++++-- 6 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 crc/crc32c-intel.c diff --git a/HOWTO b/HOWTO index f569f562..7a65aa15 100644 --- a/HOWTO +++ b/HOWTO @@ -664,6 +664,9 @@ verify=str If writing to a file, fio can verify the file contents crc32c Use a crc32c sum of the data area and store it in the header of each block. + crc32c-intel Use hardware assisted crc32c calcuation + provided on SSE4.2 enabled processors. + crc32 Use a crc32 sum of the data area and store it in the header of each block. diff --git a/Makefile b/Makefile index 11bb17e7..c95c3dc0 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,7 @@ OBJS += crc/crc7.o OBJS += crc/crc16.o OBJS += crc/crc32.o OBJS += crc/crc32c.o +OBJS += crc/crc32c-intel.o OBJS += crc/crc64.o OBJS += crc/sha256.o OBJS += crc/sha512.o diff --git a/crc/crc32c-intel.c b/crc/crc32c-intel.c new file mode 100644 index 00000000..c0abe73a --- /dev/null +++ b/crc/crc32c-intel.c @@ -0,0 +1,66 @@ +#include + +/* + * Based on a posting to lkml by Austin Zhang + * + * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal. + * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE) + * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at: + * http://www.intel.com/products/processor/manuals/ + * Intel(R) 64 and IA-32 Architectures Software Developer's Manual + * Volume 2A: Instruction Set Reference, A-M + */ + +#if BITS_PER_LONG == 64 +#define REX_PRE "0x48, " +#define SCALE_F 8 +#else +#define REX_PRE +#define SCALE_F 4 +#endif + +uint32_t crc32c_intel_le_hw_byte(uint32_t crc, unsigned char const *data, + unsigned long length) +{ + while (length--) { + __asm__ __volatile__( + ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1" + :"=S"(crc) + :"0"(crc), "c"(*data) + ); + data++; + } + + return crc; +} + +/* + * Steps through buffer one byte at at time, calculates reflected + * crc using table. + */ +uint32_t crc32c_intel(unsigned char const *data, unsigned long length) +{ + unsigned int iquotient = length / SCALE_F; + unsigned int iremainder = length % SCALE_F; +#if BITS_PER_LONG == 64 + uint64_t *ptmp = (uint64_t *) data; +#else + uint32_t *ptmp = (uint32_t *) data; +#endif + uint32_t crc = ~0; + + while (iquotient--) { + __asm__ __volatile__( + ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;" + :"=S"(crc) + :"0"(crc), "c"(*ptmp) + ); + ptmp++; + } + + if (iremainder) + crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp, + iremainder); + + return crc; +} diff --git a/crc/crc32c.h b/crc/crc32c.h index 1498db91..cf171369 100644 --- a/crc/crc32c.h +++ b/crc/crc32c.h @@ -19,5 +19,6 @@ #define CRC32C_H extern uint32_t crc32c(unsigned char const *, unsigned long); +extern uint32_t crc32c_intel(unsigned char const *, unsigned long); #endif diff --git a/fio.h b/fio.h index ec8d5935..22aaf345 100644 --- a/fio.h +++ b/fio.h @@ -196,6 +196,7 @@ enum { VERIFY_CRC64, /* crc64 sum data blocks */ VERIFY_CRC32, /* crc32 sum data blocks */ VERIFY_CRC32C, /* crc32c sum data blocks */ + VERIFY_CRC32C_INTEL, /* crc32c sum data blocks with hw */ VERIFY_CRC16, /* crc16 sum data blocks */ VERIFY_CRC7, /* crc7 sum data blocks */ VERIFY_SHA256, /* sha256 sum data blocks */ diff --git a/verify.c b/verify.c index afb4cb7d..2aa5ab40 100644 --- a/verify.c +++ b/verify.c @@ -128,6 +128,7 @@ static inline unsigned int __hdr_size(int verify_type) break; case VERIFY_CRC32C: case VERIFY_CRC32: + case VERIFY_CRC32C_INTEL: len = sizeof(struct vhdr_crc32); break; case VERIFY_CRC16: @@ -337,7 +338,10 @@ static int verify_io_u_crc32c(struct verify_header *hdr, struct io_u *io_u, dprint(FD_VERIFY, "crc32c verify io_u %p, len %u\n", io_u, hdr->len); - c = crc32c(p, hdr->len - hdr_size(hdr)); + if (hdr->verify_type == VERIFY_CRC32C_INTEL) + c = crc32c_intel(p, hdr->len - hdr_size(hdr)); + else + c = crc32c(p, hdr->len - hdr_size(hdr)); if (c != vh->crc32) { log_err("crc32c: verify failed at %llu/%u\n", @@ -466,6 +470,7 @@ int verify_io_u(struct thread_data *td, struct io_u *io_u) ret = verify_io_u_crc64(hdr, io_u, hdr_num); break; case VERIFY_CRC32C: + case VERIFY_CRC32C_INTEL: ret = verify_io_u_crc32c(hdr, io_u, hdr_num); break; case VERIFY_CRC32: @@ -557,7 +562,10 @@ static void fill_crc32c(struct verify_header *hdr, void *p, unsigned int len) { struct vhdr_crc32 *vh = hdr_priv(hdr); - vh->crc32 = crc32c(p, len); + if (hdr->verify_type == VERIFY_CRC32C_INTEL) + vh->crc32 = crc32c_intel(p, len); + else + vh->crc32 = crc32c(p, len); } static void fill_crc64(struct verify_header *hdr, void *p, unsigned int len) @@ -618,6 +626,7 @@ void populate_verify_io_u(struct thread_data *td, struct io_u *io_u) fill_crc64(hdr, data, data_len); break; case VERIFY_CRC32C: + case VERIFY_CRC32C_INTEL: dprint(FD_VERIFY, "fill crc32c io_u %p, len %u\n", io_u, hdr->len); fill_crc32c(hdr, data, data_len); -- 2.25.1