bloom: if we're not setting bits, break after first failed mask check
[fio.git] / crc / crc32c-arm64.c
CommitLineData
214e2d56 1#include "crc32c.h"
2
3#define CRC32C3X8(ITR) \
4 crc1 = __crc32cd(crc1, *((const uint64_t *)data + 42*1 + (ITR)));\
5 crc2 = __crc32cd(crc2, *((const uint64_t *)data + 42*2 + (ITR)));\
6 crc0 = __crc32cd(crc0, *((const uint64_t *)data + 42*0 + (ITR)));
7
8#define CRC32C7X3X8(ITR) do {\
9 CRC32C3X8((ITR)*7+0) \
10 CRC32C3X8((ITR)*7+1) \
11 CRC32C3X8((ITR)*7+2) \
12 CRC32C3X8((ITR)*7+3) \
13 CRC32C3X8((ITR)*7+4) \
14 CRC32C3X8((ITR)*7+5) \
15 CRC32C3X8((ITR)*7+6) \
16 } while(0)
17
18#ifndef HWCAP_CRC32
19#define HWCAP_CRC32 (1 << 7)
20#endif /* HWCAP_CRC32 */
21
22int crc32c_arm64_available = 0;
23
24#ifdef ARCH_HAVE_ARM64_CRC_CRYPTO
25
26#include <sys/auxv.h>
27#include <arm_acle.h>
28#include <arm_neon.h>
29
30static int crc32c_probed;
31
32/*
33 * Function to calculate reflected crc with PMULL Instruction
34 * crc done "by 3" for fixed input block size of 1024 bytes
35 */
36uint32_t crc32c_arm64(unsigned char const *data, unsigned long length)
37{
38 signed long len = length;
39 uint32_t crc = ~0;
40 uint32_t crc0, crc1, crc2;
41
42 /* Load two consts: K1 and K2 */
43 const poly64_t k1 = 0xe417f38a, k2 = 0x8f158014;
44 uint64_t t0, t1;
45
46 while ((len -= 1024) >= 0) {
47 /* Do first 8 bytes here for better pipelining */
48 crc0 = __crc32cd(crc, *(const uint64_t *)data);
49 crc1 = 0;
50 crc2 = 0;
51 data += sizeof(uint64_t);
52
53 /* Process block inline
54 Process crc0 last to avoid dependency with above */
55 CRC32C7X3X8(0);
56 CRC32C7X3X8(1);
57 CRC32C7X3X8(2);
58 CRC32C7X3X8(3);
59 CRC32C7X3X8(4);
60 CRC32C7X3X8(5);
61
62 data += 42*3*sizeof(uint64_t);
63
64 /* Merge crc0 and crc1 into crc2
65 crc1 multiply by K2
66 crc0 multiply by K1 */
67
68 t1 = (uint64_t)vmull_p64(crc1, k2);
69 t0 = (uint64_t)vmull_p64(crc0, k1);
70 crc = __crc32cd(crc2, *(const uint64_t *)data);
71 crc1 = __crc32cd(0, t1);
72 crc ^= crc1;
73 crc0 = __crc32cd(0, t0);
74 crc ^= crc0;
75
76 data += sizeof(uint64_t);
77 }
78
847d544c 79 if (!(len += 1024))
214e2d56 80 return crc;
81
82 while ((len -= sizeof(uint64_t)) >= 0) {
83 crc = __crc32cd(crc, *(const uint64_t *)data);
84 data += sizeof(uint64_t);
85 }
86
87 /* The following is more efficient than the straight loop */
88 if (len & sizeof(uint32_t)) {
89 crc = __crc32cw(crc, *(const uint32_t *)data);
90 data += sizeof(uint32_t);
91 }
92 if (len & sizeof(uint16_t)) {
93 crc = __crc32ch(crc, *(const uint16_t *)data);
94 data += sizeof(uint16_t);
95 }
96 if (len & sizeof(uint8_t)) {
97 crc = __crc32cb(crc, *(const uint8_t *)data);
98 }
99
100 return crc;
101}
102
103void crc32c_arm64_probe(void)
104{
105 unsigned long hwcap;
847d544c 106
214e2d56 107 if (!crc32c_probed) {
108 hwcap = getauxval(AT_HWCAP);
847d544c 109 if (hwcap & HWCAP_CRC32)
214e2d56 110 crc32c_arm64_available = 1;
214e2d56 111 crc32c_probed = 1;
112 }
113}
114
115#endif /* ARCH_HAVE_ARM64_CRC_CRYPTO */