Not tested, since I don't have the hardware.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
crc32c Use a crc32c sum of the data area and store
it in the header of each block.
+ crc32c-intel Use hardware assisted crc32c calcuation
+ provided on SSE4.2 enabled processors.
+
crc32 Use a crc32 sum of the data area and store
it in the header of each block.
OBJS += crc/crc16.o
OBJS += crc/crc32.o
OBJS += crc/crc32c.o
+OBJS += crc/crc32c-intel.o
OBJS += crc/crc64.o
OBJS += crc/sha256.o
OBJS += crc/sha512.o
--- /dev/null
+#include <inttypes.h>
+
+/*
+ * Based on a posting to lkml by Austin Zhang <austin.zhang@intel.com>
+ *
+ * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
+ * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
+ * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
+ * http://www.intel.com/products/processor/manuals/
+ * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
+ * Volume 2A: Instruction Set Reference, A-M
+ */
+
+#if BITS_PER_LONG == 64
+#define REX_PRE "0x48, "
+#define SCALE_F 8
+#else
+#define REX_PRE
+#define SCALE_F 4
+#endif
+
+uint32_t crc32c_intel_le_hw_byte(uint32_t crc, unsigned char const *data,
+ unsigned long length)
+{
+ while (length--) {
+ __asm__ __volatile__(
+ ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
+ :"=S"(crc)
+ :"0"(crc), "c"(*data)
+ );
+ data++;
+ }
+
+ return crc;
+}
+
+/*
+ * Steps through buffer one byte at at time, calculates reflected
+ * crc using table.
+ */
+uint32_t crc32c_intel(unsigned char const *data, unsigned long length)
+{
+ unsigned int iquotient = length / SCALE_F;
+ unsigned int iremainder = length % SCALE_F;
+#if BITS_PER_LONG == 64
+ uint64_t *ptmp = (uint64_t *) data;
+#else
+ uint32_t *ptmp = (uint32_t *) data;
+#endif
+ uint32_t crc = ~0;
+
+ while (iquotient--) {
+ __asm__ __volatile__(
+ ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
+ :"=S"(crc)
+ :"0"(crc), "c"(*ptmp)
+ );
+ ptmp++;
+ }
+
+ if (iremainder)
+ crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp,
+ iremainder);
+
+ return crc;
+}
#define CRC32C_H
extern uint32_t crc32c(unsigned char const *, unsigned long);
+extern uint32_t crc32c_intel(unsigned char const *, unsigned long);
#endif
VERIFY_CRC64, /* crc64 sum data blocks */
VERIFY_CRC32, /* crc32 sum data blocks */
VERIFY_CRC32C, /* crc32c sum data blocks */
+ VERIFY_CRC32C_INTEL, /* crc32c sum data blocks with hw */
VERIFY_CRC16, /* crc16 sum data blocks */
VERIFY_CRC7, /* crc7 sum data blocks */
VERIFY_SHA256, /* sha256 sum data blocks */
break;
case VERIFY_CRC32C:
case VERIFY_CRC32:
+ case VERIFY_CRC32C_INTEL:
len = sizeof(struct vhdr_crc32);
break;
case VERIFY_CRC16:
dprint(FD_VERIFY, "crc32c verify io_u %p, len %u\n", io_u, hdr->len);
- c = crc32c(p, hdr->len - hdr_size(hdr));
+ if (hdr->verify_type == VERIFY_CRC32C_INTEL)
+ c = crc32c_intel(p, hdr->len - hdr_size(hdr));
+ else
+ c = crc32c(p, hdr->len - hdr_size(hdr));
if (c != vh->crc32) {
log_err("crc32c: verify failed at %llu/%u\n",
ret = verify_io_u_crc64(hdr, io_u, hdr_num);
break;
case VERIFY_CRC32C:
+ case VERIFY_CRC32C_INTEL:
ret = verify_io_u_crc32c(hdr, io_u, hdr_num);
break;
case VERIFY_CRC32:
{
struct vhdr_crc32 *vh = hdr_priv(hdr);
- vh->crc32 = crc32c(p, len);
+ if (hdr->verify_type == VERIFY_CRC32C_INTEL)
+ vh->crc32 = crc32c_intel(p, len);
+ else
+ vh->crc32 = crc32c(p, len);
}
static void fill_crc64(struct verify_header *hdr, void *p, unsigned int len)
fill_crc64(hdr, data, data_len);
break;
case VERIFY_CRC32C:
+ case VERIFY_CRC32C_INTEL:
dprint(FD_VERIFY, "fill crc32c io_u %p, len %u\n",
io_u, hdr->len);
fill_crc32c(hdr, data, data_len);