fio: fix aio trim completion latencies
[fio.git] / crc / crc32c-intel.c
CommitLineData
419484b9 1#include "crc32c.h"
3845591f
JA
2
3/*
4 * Based on a posting to lkml by Austin Zhang <austin.zhang@intel.com>
5 *
6 * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
7 * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
8 * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
9 * http://www.intel.com/products/processor/manuals/
10 * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
11 * Volume 2A: Instruction Set Reference, A-M
12 */
13
97211af3 14bool crc32c_intel_available = false;
e3aaafc4 15
2f68124f 16#ifdef ARCH_HAVE_SSE4_2
419484b9 17
3845591f
JA
18#if BITS_PER_LONG == 64
19#define REX_PRE "0x48, "
20#define SCALE_F 8
21#else
22#define REX_PRE
23#define SCALE_F 4
24#endif
25
97211af3 26static bool crc32c_probed;
e3aaafc4 27
cc62ea70
JA
28static uint32_t crc32c_intel_le_hw_byte(uint32_t crc, unsigned char const *data,
29 unsigned long length)
3845591f
JA
30{
31 while (length--) {
32 __asm__ __volatile__(
33 ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
34 :"=S"(crc)
35 :"0"(crc), "c"(*data)
36 );
37 data++;
38 }
39
40 return crc;
41}
42
43/*
44 * Steps through buffer one byte at at time, calculates reflected
45 * crc using table.
46 */
47uint32_t crc32c_intel(unsigned char const *data, unsigned long length)
48{
49 unsigned int iquotient = length / SCALE_F;
50 unsigned int iremainder = length % SCALE_F;
51#if BITS_PER_LONG == 64
52 uint64_t *ptmp = (uint64_t *) data;
53#else
54 uint32_t *ptmp = (uint32_t *) data;
55#endif
56 uint32_t crc = ~0;
57
58 while (iquotient--) {
59 __asm__ __volatile__(
60 ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
61 :"=S"(crc)
62 :"0"(crc), "c"(*ptmp)
63 );
64 ptmp++;
65 }
66
67 if (iremainder)
68 crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp,
69 iremainder);
70
71 return crc;
72}
419484b9 73
e3aaafc4 74void crc32c_intel_probe(void)
5d7c5d34 75{
e3aaafc4 76 if (!crc32c_probed) {
267339ff 77 unsigned int eax, ebx, ecx = 0, edx;
419484b9 78
e3aaafc4 79 eax = 1;
e0ab5f97 80
e3aaafc4
JA
81 do_cpuid(&eax, &ebx, &ecx, &edx);
82 crc32c_intel_available = (ecx & (1 << 20)) != 0;
97211af3 83 crc32c_probed = true;
e3aaafc4 84 }
5d7c5d34
JA
85}
86
09c81e13 87#endif /* ARCH_HAVE_SSE4_2 */