Merge branch 'work.dotdot1' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[linux-2.6-block.git] / arch / x86 / include / asm / xor_avx.h
CommitLineData
b886d83c 1/* SPDX-License-Identifier: GPL-2.0-only */
ea4d26ae
JK
2#ifndef _ASM_X86_XOR_AVX_H
3#define _ASM_X86_XOR_AVX_H
4
5/*
6 * Optimized RAID-5 checksumming functions for AVX
7 *
8 * Copyright (C) 2012 Intel Corporation
9 * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
10 *
11 * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines
ea4d26ae
JK
12 */
13
14#ifdef CONFIG_AS_AVX
15
16#include <linux/compiler.h>
df6b35f4 17#include <asm/fpu/api.h>
ea4d26ae 18
ea4d26ae
JK
19#define BLOCK4(i) \
20 BLOCK(32 * i, 0) \
21 BLOCK(32 * (i + 1), 1) \
22 BLOCK(32 * (i + 2), 2) \
23 BLOCK(32 * (i + 3), 3)
24
25#define BLOCK16() \
26 BLOCK4(0) \
27 BLOCK4(4) \
28 BLOCK4(8) \
29 BLOCK4(12)
30
31static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
32{
841e3604 33 unsigned long lines = bytes >> 9;
ea4d26ae 34
841e3604 35 kernel_fpu_begin();
ea4d26ae
JK
36
37 while (lines--) {
38#undef BLOCK
39#define BLOCK(i, reg) \
40do { \
41 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p1[i / sizeof(*p1)])); \
42 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
43 "m" (p0[i / sizeof(*p0)])); \
44 asm volatile("vmovdqa %%ymm" #reg ", %0" : \
45 "=m" (p0[i / sizeof(*p0)])); \
46} while (0);
47
48 BLOCK16()
49
50 p0 = (unsigned long *)((uintptr_t)p0 + 512);
51 p1 = (unsigned long *)((uintptr_t)p1 + 512);
52 }
53
841e3604 54 kernel_fpu_end();
ea4d26ae
JK
55}
56
57static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
58 unsigned long *p2)
59{
841e3604 60 unsigned long lines = bytes >> 9;
ea4d26ae 61
841e3604 62 kernel_fpu_begin();
ea4d26ae
JK
63
64 while (lines--) {
65#undef BLOCK
66#define BLOCK(i, reg) \
67do { \
68 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p2[i / sizeof(*p2)])); \
69 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
70 "m" (p1[i / sizeof(*p1)])); \
71 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
72 "m" (p0[i / sizeof(*p0)])); \
73 asm volatile("vmovdqa %%ymm" #reg ", %0" : \
74 "=m" (p0[i / sizeof(*p0)])); \
75} while (0);
76
77 BLOCK16()
78
79 p0 = (unsigned long *)((uintptr_t)p0 + 512);
80 p1 = (unsigned long *)((uintptr_t)p1 + 512);
81 p2 = (unsigned long *)((uintptr_t)p2 + 512);
82 }
83
841e3604 84 kernel_fpu_end();
ea4d26ae
JK
85}
86
87static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
88 unsigned long *p2, unsigned long *p3)
89{
841e3604 90 unsigned long lines = bytes >> 9;
ea4d26ae 91
841e3604 92 kernel_fpu_begin();
ea4d26ae
JK
93
94 while (lines--) {
95#undef BLOCK
96#define BLOCK(i, reg) \
97do { \
98 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p3[i / sizeof(*p3)])); \
99 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
100 "m" (p2[i / sizeof(*p2)])); \
101 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
102 "m" (p1[i / sizeof(*p1)])); \
103 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
104 "m" (p0[i / sizeof(*p0)])); \
105 asm volatile("vmovdqa %%ymm" #reg ", %0" : \
106 "=m" (p0[i / sizeof(*p0)])); \
107} while (0);
108
109 BLOCK16();
110
111 p0 = (unsigned long *)((uintptr_t)p0 + 512);
112 p1 = (unsigned long *)((uintptr_t)p1 + 512);
113 p2 = (unsigned long *)((uintptr_t)p2 + 512);
114 p3 = (unsigned long *)((uintptr_t)p3 + 512);
115 }
116
841e3604 117 kernel_fpu_end();
ea4d26ae
JK
118}
119
120static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
121 unsigned long *p2, unsigned long *p3, unsigned long *p4)
122{
841e3604 123 unsigned long lines = bytes >> 9;
ea4d26ae 124
841e3604 125 kernel_fpu_begin();
ea4d26ae
JK
126
127 while (lines--) {
128#undef BLOCK
129#define BLOCK(i, reg) \
130do { \
131 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p4[i / sizeof(*p4)])); \
132 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
133 "m" (p3[i / sizeof(*p3)])); \
134 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
135 "m" (p2[i / sizeof(*p2)])); \
136 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
137 "m" (p1[i / sizeof(*p1)])); \
138 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
139 "m" (p0[i / sizeof(*p0)])); \
140 asm volatile("vmovdqa %%ymm" #reg ", %0" : \
141 "=m" (p0[i / sizeof(*p0)])); \
142} while (0);
143
144 BLOCK16()
145
146 p0 = (unsigned long *)((uintptr_t)p0 + 512);
147 p1 = (unsigned long *)((uintptr_t)p1 + 512);
148 p2 = (unsigned long *)((uintptr_t)p2 + 512);
149 p3 = (unsigned long *)((uintptr_t)p3 + 512);
150 p4 = (unsigned long *)((uintptr_t)p4 + 512);
151 }
152
841e3604 153 kernel_fpu_end();
ea4d26ae
JK
154}
155
156static struct xor_block_template xor_block_avx = {
157 .name = "avx",
158 .do_2 = xor_avx_2,
159 .do_3 = xor_avx_3,
160 .do_4 = xor_avx_4,
161 .do_5 = xor_avx_5,
162};
163
164#define AVX_XOR_SPEED \
165do { \
da154e82 166 if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \
ea4d26ae
JK
167 xor_speed(&xor_block_avx); \
168} while (0)
169
170#define AVX_SELECT(FASTEST) \
da154e82 171 (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST)
ea4d26ae
JK
172
173#else
174
175#define AVX_XOR_SPEED {}
176
177#define AVX_SELECT(FASTEST) (FASTEST)
178
179#endif
180#endif