summaryrefslogtreecommitdiff
path: root/crc/crc32c-intel.c
blob: 77d6df411973510298342d79e190db4b3a447ffa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#include <inttypes.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
#include <sys/types.h>
#include <sys/wait.h>
#include "crc32c.h"

/*
 * Based on a posting to lkml by Austin Zhang <austin.zhang@intel.com>
 *
 * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
 * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
 * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
 * http://www.intel.com/products/processor/manuals/
 * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
 * Volume 2A: Instruction Set Reference, A-M
 */

#ifdef ARCH_HAVE_SSE

#if BITS_PER_LONG == 64
#define REX_PRE "0x48, "
#define SCALE_F 8
#else
#define REX_PRE
#define SCALE_F 4
#endif

uint32_t crc32c_intel_le_hw_byte(uint32_t crc, unsigned char const *data,
				 unsigned long length)
{
	while (length--) {
		__asm__ __volatile__(
			".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
			:"=S"(crc)
			:"0"(crc), "c"(*data)
		);
		data++;
	}

	return crc;
}

/*
 * Steps through buffer one byte at at time, calculates reflected 
 * crc using table.
 */
uint32_t crc32c_intel(unsigned char const *data, unsigned long length)
{
	unsigned int iquotient = length / SCALE_F;
	unsigned int iremainder = length % SCALE_F;
#if BITS_PER_LONG == 64
	uint64_t *ptmp = (uint64_t *) data;
#else
	uint32_t *ptmp = (uint32_t *) data;
#endif
	uint32_t crc = ~0;

	while (iquotient--) {
		__asm__ __volatile__(
			".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
			:"=S"(crc)
			:"0"(crc), "c"(*ptmp)
		);
		ptmp++;
	}

	if (iremainder)
		crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp,
				 iremainder);

	return crc;
}

static void do_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx,
		     unsigned int *edx)
{
	int id = *eax;

	asm("movl %4, %%eax;"
	    "cpuid;"
	    "movl %%eax, %0;"
	    "movl %%ebx, %1;"
	    "movl %%ecx, %2;"
	    "movl %%edx, %3;"
		: "=r" (*eax), "=r" (*ebx), "=r" (*ecx), "=r" (*edx)
		: "r" (id)
		: "eax", "ebx", "ecx", "edx");
}

int crc32c_intel_works(void)
{
	unsigned int eax, ebx, ecx, edx;

	eax = 1;

	do_cpuid(&eax, &ebx, &ecx, &edx);
	return (ecx & (1 << 20)) != 0;
}

#endif /* ARCH_HAVE_SSE */