214e2d56 |
1 | #include "crc32c.h" |
2 | |
3 | #define CRC32C3X8(ITR) \ |
4 | crc1 = __crc32cd(crc1, *((const uint64_t *)data + 42*1 + (ITR)));\ |
5 | crc2 = __crc32cd(crc2, *((const uint64_t *)data + 42*2 + (ITR)));\ |
6 | crc0 = __crc32cd(crc0, *((const uint64_t *)data + 42*0 + (ITR))); |
7 | |
8 | #define CRC32C7X3X8(ITR) do {\ |
9 | CRC32C3X8((ITR)*7+0) \ |
10 | CRC32C3X8((ITR)*7+1) \ |
11 | CRC32C3X8((ITR)*7+2) \ |
12 | CRC32C3X8((ITR)*7+3) \ |
13 | CRC32C3X8((ITR)*7+4) \ |
14 | CRC32C3X8((ITR)*7+5) \ |
15 | CRC32C3X8((ITR)*7+6) \ |
16 | } while(0) |
17 | |
18 | #ifndef HWCAP_CRC32 |
19 | #define HWCAP_CRC32 (1 << 7) |
20 | #endif /* HWCAP_CRC32 */ |
21 | |
22 | int crc32c_arm64_available = 0; |
23 | |
24 | #ifdef ARCH_HAVE_ARM64_CRC_CRYPTO |
25 | |
26 | #include <sys/auxv.h> |
27 | #include <arm_acle.h> |
28 | #include <arm_neon.h> |
29 | |
30 | static int crc32c_probed; |
31 | |
32 | /* |
33 | * Function to calculate reflected crc with PMULL Instruction |
34 | * crc done "by 3" for fixed input block size of 1024 bytes |
35 | */ |
36 | uint32_t crc32c_arm64(unsigned char const *data, unsigned long length) |
37 | { |
38 | signed long len = length; |
39 | uint32_t crc = ~0; |
40 | uint32_t crc0, crc1, crc2; |
41 | |
42 | /* Load two consts: K1 and K2 */ |
43 | const poly64_t k1 = 0xe417f38a, k2 = 0x8f158014; |
44 | uint64_t t0, t1; |
45 | |
46 | while ((len -= 1024) >= 0) { |
47 | /* Do first 8 bytes here for better pipelining */ |
48 | crc0 = __crc32cd(crc, *(const uint64_t *)data); |
49 | crc1 = 0; |
50 | crc2 = 0; |
51 | data += sizeof(uint64_t); |
52 | |
53 | /* Process block inline |
54 | Process crc0 last to avoid dependency with above */ |
55 | CRC32C7X3X8(0); |
56 | CRC32C7X3X8(1); |
57 | CRC32C7X3X8(2); |
58 | CRC32C7X3X8(3); |
59 | CRC32C7X3X8(4); |
60 | CRC32C7X3X8(5); |
61 | |
62 | data += 42*3*sizeof(uint64_t); |
63 | |
64 | /* Merge crc0 and crc1 into crc2 |
65 | crc1 multiply by K2 |
66 | crc0 multiply by K1 */ |
67 | |
68 | t1 = (uint64_t)vmull_p64(crc1, k2); |
69 | t0 = (uint64_t)vmull_p64(crc0, k1); |
70 | crc = __crc32cd(crc2, *(const uint64_t *)data); |
71 | crc1 = __crc32cd(0, t1); |
72 | crc ^= crc1; |
73 | crc0 = __crc32cd(0, t0); |
74 | crc ^= crc0; |
75 | |
76 | data += sizeof(uint64_t); |
77 | } |
78 | |
847d544c |
79 | if (!(len += 1024)) |
214e2d56 |
80 | return crc; |
81 | |
82 | while ((len -= sizeof(uint64_t)) >= 0) { |
83 | crc = __crc32cd(crc, *(const uint64_t *)data); |
84 | data += sizeof(uint64_t); |
85 | } |
86 | |
87 | /* The following is more efficient than the straight loop */ |
88 | if (len & sizeof(uint32_t)) { |
89 | crc = __crc32cw(crc, *(const uint32_t *)data); |
90 | data += sizeof(uint32_t); |
91 | } |
92 | if (len & sizeof(uint16_t)) { |
93 | crc = __crc32ch(crc, *(const uint16_t *)data); |
94 | data += sizeof(uint16_t); |
95 | } |
96 | if (len & sizeof(uint8_t)) { |
97 | crc = __crc32cb(crc, *(const uint8_t *)data); |
98 | } |
99 | |
100 | return crc; |
101 | } |
102 | |
103 | void crc32c_arm64_probe(void) |
104 | { |
105 | unsigned long hwcap; |
847d544c |
106 | |
214e2d56 |
107 | if (!crc32c_probed) { |
108 | hwcap = getauxval(AT_HWCAP); |
847d544c |
109 | if (hwcap & HWCAP_CRC32) |
214e2d56 |
110 | crc32c_arm64_available = 1; |
214e2d56 |
111 | crc32c_probed = 1; |
112 | } |
113 | } |
114 | |
115 | #endif /* ARCH_HAVE_ARM64_CRC_CRYPTO */ |