| 1 | #include "crc32c.h" |
| 2 | #include "../os/os.h" |
| 3 | |
| 4 | bool crc32c_arm64_available = false; |
| 5 | |
| 6 | #ifdef ARCH_HAVE_CRC_CRYPTO |
| 7 | |
| 8 | #define CRC32C3X8(ITR) \ |
| 9 | crc1 = __crc32cd(crc1, *((const uint64_t *)data + 42*1 + (ITR)));\ |
| 10 | crc2 = __crc32cd(crc2, *((const uint64_t *)data + 42*2 + (ITR)));\ |
| 11 | crc0 = __crc32cd(crc0, *((const uint64_t *)data + 42*0 + (ITR))); |
| 12 | |
| 13 | #define CRC32C7X3X8(ITR) do {\ |
| 14 | CRC32C3X8((ITR)*7+0) \ |
| 15 | CRC32C3X8((ITR)*7+1) \ |
| 16 | CRC32C3X8((ITR)*7+2) \ |
| 17 | CRC32C3X8((ITR)*7+3) \ |
| 18 | CRC32C3X8((ITR)*7+4) \ |
| 19 | CRC32C3X8((ITR)*7+5) \ |
| 20 | CRC32C3X8((ITR)*7+6) \ |
| 21 | } while(0) |
| 22 | |
| 23 | #include <arm_acle.h> |
| 24 | #include <arm_neon.h> |
| 25 | |
| 26 | static bool crc32c_probed; |
| 27 | |
| 28 | /* |
| 29 | * Function to calculate reflected crc with PMULL Instruction |
| 30 | * crc done "by 3" for fixed input block size of 1024 bytes |
| 31 | */ |
| 32 | uint32_t crc32c_arm64(unsigned char const *data, unsigned long length) |
| 33 | { |
| 34 | signed long len = length; |
| 35 | uint32_t crc = ~0; |
| 36 | uint32_t crc0, crc1, crc2; |
| 37 | |
| 38 | /* Load two consts: K1 and K2 */ |
| 39 | const poly64_t k1 = 0xe417f38a, k2 = 0x8f158014; |
| 40 | uint64_t t0, t1; |
| 41 | |
| 42 | while ((len -= 1024) >= 0) { |
| 43 | /* Do first 8 bytes here for better pipelining */ |
| 44 | crc0 = __crc32cd(crc, *(const uint64_t *)data); |
| 45 | crc1 = 0; |
| 46 | crc2 = 0; |
| 47 | data += sizeof(uint64_t); |
| 48 | |
| 49 | /* Process block inline |
| 50 | Process crc0 last to avoid dependency with above */ |
| 51 | CRC32C7X3X8(0); |
| 52 | CRC32C7X3X8(1); |
| 53 | CRC32C7X3X8(2); |
| 54 | CRC32C7X3X8(3); |
| 55 | CRC32C7X3X8(4); |
| 56 | CRC32C7X3X8(5); |
| 57 | |
| 58 | data += 42*3*sizeof(uint64_t); |
| 59 | |
| 60 | /* Merge crc0 and crc1 into crc2 |
| 61 | crc1 multiply by K2 |
| 62 | crc0 multiply by K1 */ |
| 63 | |
| 64 | t1 = (uint64_t)vmull_p64(crc1, k2); |
| 65 | t0 = (uint64_t)vmull_p64(crc0, k1); |
| 66 | crc = __crc32cd(crc2, *(const uint64_t *)data); |
| 67 | crc1 = __crc32cd(0, t1); |
| 68 | crc ^= crc1; |
| 69 | crc0 = __crc32cd(0, t0); |
| 70 | crc ^= crc0; |
| 71 | |
| 72 | data += sizeof(uint64_t); |
| 73 | } |
| 74 | |
| 75 | if (!(len += 1024)) |
| 76 | return crc; |
| 77 | |
| 78 | while ((len -= sizeof(uint64_t)) >= 0) { |
| 79 | crc = __crc32cd(crc, *(const uint64_t *)data); |
| 80 | data += sizeof(uint64_t); |
| 81 | } |
| 82 | |
| 83 | /* The following is more efficient than the straight loop */ |
| 84 | if (len & sizeof(uint32_t)) { |
| 85 | crc = __crc32cw(crc, *(const uint32_t *)data); |
| 86 | data += sizeof(uint32_t); |
| 87 | } |
| 88 | if (len & sizeof(uint16_t)) { |
| 89 | crc = __crc32ch(crc, *(const uint16_t *)data); |
| 90 | data += sizeof(uint16_t); |
| 91 | } |
| 92 | if (len & sizeof(uint8_t)) { |
| 93 | crc = __crc32cb(crc, *(const uint8_t *)data); |
| 94 | } |
| 95 | |
| 96 | return crc; |
| 97 | } |
| 98 | |
| 99 | void crc32c_arm64_probe(void) |
| 100 | { |
| 101 | if (!crc32c_probed) { |
| 102 | crc32c_arm64_available = os_cpu_has(CPU_ARM64_CRC32C); |
| 103 | crc32c_probed = true; |
| 104 | } |
| 105 | } |
| 106 | |
| 107 | #endif /* ARCH_HAVE_CRC_CRYPTO */ |