back to regular software crc32c, if not
supported by the system.
+ crc32c-arm64 Use hardware assisted crc32c calculation
+ provided on CRC enabled ARM 64-bits processors.
+ Falls back to regular software crc32c, if not
+ supported by the system.
+
crc32 Use a crc32 sum of the data area and store
it in the header of each block.
T_DEDUPE_OBJS = t/dedupe.o
T_DEDUPE_OBJS += lib/rbtree.o t/log.o mutex.o smalloc.o gettime.o crc/md5.o \
lib/memalign.o lib/bloom.o t/debug.o crc/xxhash.o t/arch.o \
- crc/murmur3.o crc/crc32c.o crc/crc32c-intel.o crc/fnv.o
+ crc/murmur3.o crc/crc32c.o crc/crc32c-intel.o crc/crc32c-arm64.o crc/fnv.o
T_DEDUPE_PROGS = t/fio-dedupe
-T_VS_OBJS = t/verify-state.o t/log.o crc/crc32c.o crc/crc32c-intel.o t/debug.o
+T_VS_OBJS = t/verify-state.o t/log.o crc/crc32c.o crc/crc32c-intel.o crc/crc32c-arm64.o t/debug.o
T_VS_PROGS = t/fio-verify-state
T_PIPE_ASYNC_OBJS = t/read-to-pipe-async.o
#define ARCH_HAVE_FFZ
+#ifdef ARCH_HAVE_CRC_CRYPTO
+#define ARCH_HAVE_ARM64_CRC_CRYPTO
+#endif
+
#endif
fi
elif check_define __arm__ ; then
cpu="arm"
+elif check_define __aarch64__ ; then
+ cpu="aarch64"
elif check_define __hppa__ ; then
cpu="hppa"
else
armv*b|armv*l|arm)
cpu="arm"
;;
+ aarch64)
+ cpu="arm64"
+ ;;
hppa|parisc|parisc64)
cpu="hppa"
;;
fi
echo "bool $have_bool"
+##########################################
+# check march=armv8-a+crc+crypto
+march_armv8_a_crc_crypto="no"
+if test "$cpu" = "arm64" ; then
+ cat > $TMPC <<EOF
+int main(void)
+{
+ return 0;
+}
+EOF
+ if compile_prog "-march=armv8-a+crc+crypto" "" ""; then
+ march_armv8_a_crc_crypto="yes"
+ CFLAGS="$CFLAGS -march=armv8-a+crc+crypto -DARCH_HAVE_CRC_CRYPTO"
+ fi
+fi
+echo "march_armv8_a_crc_crypto $march_armv8_a_crc_crypto"
+
+
#############################################################################
if test "$wordsize" = "64" ; then
--- /dev/null
+#include "crc32c.h"
+
+#define CRC32C3X8(ITR) \
+ crc1 = __crc32cd(crc1, *((const uint64_t *)data + 42*1 + (ITR)));\
+ crc2 = __crc32cd(crc2, *((const uint64_t *)data + 42*2 + (ITR)));\
+ crc0 = __crc32cd(crc0, *((const uint64_t *)data + 42*0 + (ITR)));
+
+#define CRC32C7X3X8(ITR) do {\
+ CRC32C3X8((ITR)*7+0) \
+ CRC32C3X8((ITR)*7+1) \
+ CRC32C3X8((ITR)*7+2) \
+ CRC32C3X8((ITR)*7+3) \
+ CRC32C3X8((ITR)*7+4) \
+ CRC32C3X8((ITR)*7+5) \
+ CRC32C3X8((ITR)*7+6) \
+ } while(0)
+
+#ifndef HWCAP_CRC32
+#define HWCAP_CRC32 (1 << 7)
+#endif /* HWCAP_CRC32 */
+
+int crc32c_arm64_available = 0;
+
+#ifdef ARCH_HAVE_ARM64_CRC_CRYPTO
+
+#include <sys/auxv.h>
+#include <arm_acle.h>
+#include <arm_neon.h>
+
+static int crc32c_probed;
+
+/*
+ * Function to calculate reflected crc with PMULL Instruction
+ * crc done "by 3" for fixed input block size of 1024 bytes
+ */
+uint32_t crc32c_arm64(unsigned char const *data, unsigned long length)
+{
+ signed long len = length;
+ uint32_t crc = ~0;
+ uint32_t crc0, crc1, crc2;
+
+ /* Load two consts: K1 and K2 */
+ const poly64_t k1 = 0xe417f38a, k2 = 0x8f158014;
+ uint64_t t0, t1;
+
+ while ((len -= 1024) >= 0) {
+ /* Do first 8 bytes here for better pipelining */
+ crc0 = __crc32cd(crc, *(const uint64_t *)data);
+ crc1 = 0;
+ crc2 = 0;
+ data += sizeof(uint64_t);
+
+ /* Process block inline
+ Process crc0 last to avoid dependency with above */
+ CRC32C7X3X8(0);
+ CRC32C7X3X8(1);
+ CRC32C7X3X8(2);
+ CRC32C7X3X8(3);
+ CRC32C7X3X8(4);
+ CRC32C7X3X8(5);
+
+ data += 42*3*sizeof(uint64_t);
+
+ /* Merge crc0 and crc1 into crc2
+ crc1 multiply by K2
+ crc0 multiply by K1 */
+
+ t1 = (uint64_t)vmull_p64(crc1, k2);
+ t0 = (uint64_t)vmull_p64(crc0, k1);
+ crc = __crc32cd(crc2, *(const uint64_t *)data);
+ crc1 = __crc32cd(0, t1);
+ crc ^= crc1;
+ crc0 = __crc32cd(0, t0);
+ crc ^= crc0;
+
+ data += sizeof(uint64_t);
+ }
+
+ if(!(len += 1024))
+ return crc;
+
+ while ((len -= sizeof(uint64_t)) >= 0) {
+ crc = __crc32cd(crc, *(const uint64_t *)data);
+ data += sizeof(uint64_t);
+ }
+
+ /* The following is more efficient than the straight loop */
+ if (len & sizeof(uint32_t)) {
+ crc = __crc32cw(crc, *(const uint32_t *)data);
+ data += sizeof(uint32_t);
+ }
+ if (len & sizeof(uint16_t)) {
+ crc = __crc32ch(crc, *(const uint16_t *)data);
+ data += sizeof(uint16_t);
+ }
+ if (len & sizeof(uint8_t)) {
+ crc = __crc32cb(crc, *(const uint8_t *)data);
+ }
+
+ return crc;
+}
+
+void crc32c_arm64_probe(void)
+{
+ unsigned long hwcap;
+ if (!crc32c_probed) {
+ hwcap = getauxval(AT_HWCAP);
+ if (hwcap & HWCAP_CRC32) {
+ crc32c_arm64_available = 1;
+ }
+ crc32c_probed = 1;
+ }
+}
+
+#endif /* ARCH_HAVE_ARM64_CRC_CRYPTO */
#include "../arch/arch.h"
extern uint32_t crc32c_sw(unsigned char const *, unsigned long);
+extern int crc32c_arm64_available;
extern int crc32c_intel_available;
+#ifdef ARCH_HAVE_ARM64_CRC_CRYPTO
+extern uint32_t crc32c_arm64(unsigned char const *, unsigned long);
+extern void crc32c_arm64_probe(void);
+#else
+#define crc32c_arm64 crc32c_sw
+static inline void crc32c_arm64_probe(void)
+{
+}
+#endif
+
#ifdef ARCH_HAVE_SSE4_2
extern uint32_t crc32c_intel(unsigned char const *, unsigned long);
extern void crc32c_intel_probe(void);
static inline uint32_t fio_crc32c(unsigned char const *buf, unsigned long len)
{
+ if (crc32c_arm64_available) {
+ return crc32c_arm64(buf, len);
+ }
+
if (crc32c_intel_available)
return crc32c_intel(buf, len);
int i, first = 1;
void *buf;
+ crc32c_arm64_probe();
crc32c_intel_probe();
if (!type)
struct bloom *b;
size_t no_uints;
+ crc32c_arm64_probe();
crc32c_intel_probe();
b = malloc(sizeof(*b));
.oval = VERIFY_CRC32C,
.help = "Use crc32c checksums for verification (hw assisted, if available)",
},
+ { .ival = "crc32c-arm64",
+ .oval = VERIFY_CRC32C,
+ .help = "Use crc32c checksums for verification (hw assisted, if available)",
+ },
{ .ival = "crc32c",
.oval = VERIFY_CRC32C,
.help = "Use crc32c checksums for verification (hw assisted, if available)",
void fio_verify_init(struct thread_data *td)
{
if (td->o.verify == VERIFY_CRC32C_INTEL ||
+ td->o.verify == VERIFY_CRC32C_ARM64 ||
td->o.verify == VERIFY_CRC32C) {
+ crc32c_arm64_probe();
crc32c_intel_probe();
}
}
VERIFY_CRC64, /* crc64 sum data blocks */
VERIFY_CRC32, /* crc32 sum data blocks */
VERIFY_CRC32C, /* crc32c sum data blocks */
+ VERIFY_CRC32C_ARM64, /* crc32c sum data blocks with hw */
VERIFY_CRC32C_INTEL, /* crc32c sum data blocks with hw */
VERIFY_CRC16, /* crc16 sum data blocks */
VERIFY_CRC7, /* crc7 sum data blocks */