aarch64: refactor HW ARM CRC32c detection
[fio.git] / crc / crc32c-arm64.c
CommitLineData
214e2d56 1#include "crc32c.h"
58828d07
SW
2#include "../os/os.h"
3
4bool crc32c_arm64_available = false;
5
6#ifdef ARCH_HAVE_CRC_CRYPTO
214e2d56 7
8#define CRC32C3X8(ITR) \
9 crc1 = __crc32cd(crc1, *((const uint64_t *)data + 42*1 + (ITR)));\
10 crc2 = __crc32cd(crc2, *((const uint64_t *)data + 42*2 + (ITR)));\
11 crc0 = __crc32cd(crc0, *((const uint64_t *)data + 42*0 + (ITR)));
12
13#define CRC32C7X3X8(ITR) do {\
14 CRC32C3X8((ITR)*7+0) \
15 CRC32C3X8((ITR)*7+1) \
16 CRC32C3X8((ITR)*7+2) \
17 CRC32C3X8((ITR)*7+3) \
18 CRC32C3X8((ITR)*7+4) \
19 CRC32C3X8((ITR)*7+5) \
20 CRC32C3X8((ITR)*7+6) \
21 } while(0)
22
214e2d56 23#include <arm_acle.h>
24#include <arm_neon.h>
25
97211af3 26static bool crc32c_probed;
214e2d56 27
28/*
29 * Function to calculate reflected crc with PMULL Instruction
30 * crc done "by 3" for fixed input block size of 1024 bytes
31 */
32uint32_t crc32c_arm64(unsigned char const *data, unsigned long length)
33{
34 signed long len = length;
35 uint32_t crc = ~0;
36 uint32_t crc0, crc1, crc2;
37
38 /* Load two consts: K1 and K2 */
39 const poly64_t k1 = 0xe417f38a, k2 = 0x8f158014;
40 uint64_t t0, t1;
41
42 while ((len -= 1024) >= 0) {
43 /* Do first 8 bytes here for better pipelining */
44 crc0 = __crc32cd(crc, *(const uint64_t *)data);
45 crc1 = 0;
46 crc2 = 0;
47 data += sizeof(uint64_t);
48
49 /* Process block inline
50 Process crc0 last to avoid dependency with above */
51 CRC32C7X3X8(0);
52 CRC32C7X3X8(1);
53 CRC32C7X3X8(2);
54 CRC32C7X3X8(3);
55 CRC32C7X3X8(4);
56 CRC32C7X3X8(5);
57
58 data += 42*3*sizeof(uint64_t);
59
60 /* Merge crc0 and crc1 into crc2
61 crc1 multiply by K2
62 crc0 multiply by K1 */
63
64 t1 = (uint64_t)vmull_p64(crc1, k2);
65 t0 = (uint64_t)vmull_p64(crc0, k1);
66 crc = __crc32cd(crc2, *(const uint64_t *)data);
67 crc1 = __crc32cd(0, t1);
68 crc ^= crc1;
69 crc0 = __crc32cd(0, t0);
70 crc ^= crc0;
71
72 data += sizeof(uint64_t);
73 }
74
847d544c 75 if (!(len += 1024))
214e2d56 76 return crc;
77
78 while ((len -= sizeof(uint64_t)) >= 0) {
79 crc = __crc32cd(crc, *(const uint64_t *)data);
80 data += sizeof(uint64_t);
81 }
82
83 /* The following is more efficient than the straight loop */
84 if (len & sizeof(uint32_t)) {
85 crc = __crc32cw(crc, *(const uint32_t *)data);
86 data += sizeof(uint32_t);
87 }
88 if (len & sizeof(uint16_t)) {
89 crc = __crc32ch(crc, *(const uint16_t *)data);
90 data += sizeof(uint16_t);
91 }
92 if (len & sizeof(uint8_t)) {
93 crc = __crc32cb(crc, *(const uint8_t *)data);
94 }
95
96 return crc;
97}
98
99void crc32c_arm64_probe(void)
100{
214e2d56 101 if (!crc32c_probed) {
58828d07 102 crc32c_arm64_available = os_cpu_has(CPU_ARM64_CRC32C);
97211af3 103 crc32c_probed = true;
214e2d56 104 }
105}
106
58828d07 107#endif /* ARCH_HAVE_CRC_CRYPTO */