Merge branch 'master' of https://github.com/celestinechen/fio
[fio.git] / crc / crc32c-intel.c
1 #include "crc32c.h"
2
3 /*
4  * Based on a posting to lkml by Austin Zhang <austin.zhang@intel.com>
5  *
6  * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
7  * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
8  * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
9  * http://www.intel.com/products/processor/manuals/
10  * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
11  * Volume 2A: Instruction Set Reference, A-M
12  */
13
14 bool crc32c_intel_available = false;
15
16 #ifdef ARCH_HAVE_SSE4_2
17
18 #if BITS_PER_LONG == 64
19 #define REX_PRE "0x48, "
20 #define SCALE_F 8
21 #else
22 #define REX_PRE
23 #define SCALE_F 4
24 #endif
25
26 static bool crc32c_probed;
27
28 static uint32_t crc32c_intel_le_hw_byte(uint32_t crc, unsigned char const *data,
29                                         unsigned long length)
30 {
31         while (length--) {
32                 __asm__ __volatile__(
33                         ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
34                         :"=S"(crc)
35                         :"0"(crc), "c"(*data)
36                 );
37                 data++;
38         }
39
40         return crc;
41 }
42
43 /*
44  * Steps through buffer one byte at at time, calculates reflected 
45  * crc using table.
46  */
47 uint32_t crc32c_intel(unsigned char const *data, unsigned long length)
48 {
49         unsigned int iquotient = length / SCALE_F;
50         unsigned int iremainder = length % SCALE_F;
51 #if BITS_PER_LONG == 64
52         uint64_t *ptmp = (uint64_t *) data;
53 #else
54         uint32_t *ptmp = (uint32_t *) data;
55 #endif
56         uint32_t crc = ~0;
57
58         while (iquotient--) {
59                 __asm__ __volatile__(
60                         ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
61                         :"=S"(crc)
62                         :"0"(crc), "c"(*ptmp)
63                 );
64                 ptmp++;
65         }
66
67         if (iremainder)
68                 crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp,
69                                  iremainder);
70
71         return crc;
72 }
73
74 void crc32c_intel_probe(void)
75 {
76         if (!crc32c_probed) {
77                 unsigned int eax, ebx, ecx = 0, edx;
78
79                 eax = 1;
80
81                 do_cpuid(&eax, &ebx, &ecx, &edx);
82                 crc32c_intel_available = (ecx & (1 << 20)) != 0;
83                 crc32c_probed = true;
84         }
85 }
86
87 #endif /* ARCH_HAVE_SSE4_2 */