Commit | Line | Data |
---|---|---|
214e2d56 | 1 | #include "crc32c.h" |
58828d07 SW |
2 | #include "../os/os.h" |
3 | ||
4 | bool crc32c_arm64_available = false; | |
5 | ||
6 | #ifdef ARCH_HAVE_CRC_CRYPTO | |
214e2d56 | 7 | |
8 | #define CRC32C3X8(ITR) \ | |
9 | crc1 = __crc32cd(crc1, *((const uint64_t *)data + 42*1 + (ITR)));\ | |
10 | crc2 = __crc32cd(crc2, *((const uint64_t *)data + 42*2 + (ITR)));\ | |
11 | crc0 = __crc32cd(crc0, *((const uint64_t *)data + 42*0 + (ITR))); | |
12 | ||
13 | #define CRC32C7X3X8(ITR) do {\ | |
14 | CRC32C3X8((ITR)*7+0) \ | |
15 | CRC32C3X8((ITR)*7+1) \ | |
16 | CRC32C3X8((ITR)*7+2) \ | |
17 | CRC32C3X8((ITR)*7+3) \ | |
18 | CRC32C3X8((ITR)*7+4) \ | |
19 | CRC32C3X8((ITR)*7+5) \ | |
20 | CRC32C3X8((ITR)*7+6) \ | |
21 | } while(0) | |
22 | ||
214e2d56 | 23 | #include <arm_acle.h> |
24 | #include <arm_neon.h> | |
25 | ||
97211af3 | 26 | static bool crc32c_probed; |
214e2d56 | 27 | |
28 | /* | |
29 | * Function to calculate reflected crc with PMULL Instruction | |
30 | * crc done "by 3" for fixed input block size of 1024 bytes | |
31 | */ | |
32 | uint32_t crc32c_arm64(unsigned char const *data, unsigned long length) | |
33 | { | |
34 | signed long len = length; | |
35 | uint32_t crc = ~0; | |
36 | uint32_t crc0, crc1, crc2; | |
37 | ||
38 | /* Load two consts: K1 and K2 */ | |
39 | const poly64_t k1 = 0xe417f38a, k2 = 0x8f158014; | |
40 | uint64_t t0, t1; | |
41 | ||
42 | while ((len -= 1024) >= 0) { | |
43 | /* Do first 8 bytes here for better pipelining */ | |
44 | crc0 = __crc32cd(crc, *(const uint64_t *)data); | |
45 | crc1 = 0; | |
46 | crc2 = 0; | |
47 | data += sizeof(uint64_t); | |
48 | ||
49 | /* Process block inline | |
50 | Process crc0 last to avoid dependency with above */ | |
51 | CRC32C7X3X8(0); | |
52 | CRC32C7X3X8(1); | |
53 | CRC32C7X3X8(2); | |
54 | CRC32C7X3X8(3); | |
55 | CRC32C7X3X8(4); | |
56 | CRC32C7X3X8(5); | |
57 | ||
58 | data += 42*3*sizeof(uint64_t); | |
59 | ||
60 | /* Merge crc0 and crc1 into crc2 | |
61 | crc1 multiply by K2 | |
62 | crc0 multiply by K1 */ | |
63 | ||
64 | t1 = (uint64_t)vmull_p64(crc1, k2); | |
65 | t0 = (uint64_t)vmull_p64(crc0, k1); | |
66 | crc = __crc32cd(crc2, *(const uint64_t *)data); | |
67 | crc1 = __crc32cd(0, t1); | |
68 | crc ^= crc1; | |
69 | crc0 = __crc32cd(0, t0); | |
70 | crc ^= crc0; | |
71 | ||
72 | data += sizeof(uint64_t); | |
73 | } | |
74 | ||
847d544c | 75 | if (!(len += 1024)) |
214e2d56 | 76 | return crc; |
77 | ||
78 | while ((len -= sizeof(uint64_t)) >= 0) { | |
79 | crc = __crc32cd(crc, *(const uint64_t *)data); | |
80 | data += sizeof(uint64_t); | |
81 | } | |
82 | ||
83 | /* The following is more efficient than the straight loop */ | |
84 | if (len & sizeof(uint32_t)) { | |
85 | crc = __crc32cw(crc, *(const uint32_t *)data); | |
86 | data += sizeof(uint32_t); | |
87 | } | |
88 | if (len & sizeof(uint16_t)) { | |
89 | crc = __crc32ch(crc, *(const uint16_t *)data); | |
90 | data += sizeof(uint16_t); | |
91 | } | |
92 | if (len & sizeof(uint8_t)) { | |
93 | crc = __crc32cb(crc, *(const uint8_t *)data); | |
94 | } | |
95 | ||
96 | return crc; | |
97 | } | |
98 | ||
99 | void crc32c_arm64_probe(void) | |
100 | { | |
214e2d56 | 101 | if (!crc32c_probed) { |
58828d07 | 102 | crc32c_arm64_available = os_cpu_has(CPU_ARM64_CRC32C); |
97211af3 | 103 | crc32c_probed = true; |
214e2d56 | 104 | } |
105 | } | |
106 | ||
58828d07 | 107 | #endif /* ARCH_HAVE_CRC_CRYPTO */ |