Commit | Line | Data |
---|---|---|
1a59d1b8 | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
ef1313de | 2 | /* |
ef1313de AB |
3 | * |
4 | * Copyright (C) IBM Corporation, 2012 | |
5 | * | |
6 | * Author: Anton Blanchard <anton@au.ibm.com> | |
7 | */ | |
8fe08885 DA |
8 | |
9 | /* | |
10 | * Sparse (as at v0.5.0) gets very, very confused by this file. | |
11 | * Make it a bit simpler for it. | |
12 | */ | |
13 | #if !defined(__CHECKER__) | |
ef1313de | 14 | #include <altivec.h> |
8fe08885 DA |
15 | #else |
16 | #define vec_xor(a, b) a ^ b | |
17 | #define vector __attribute__((vector_size(16))) | |
18 | #endif | |
ef1313de | 19 | |
f718d426 | 20 | #include "xor_vmx.h" |
ef1313de AB |
21 | |
22 | typedef vector signed char unative_t; | |
23 | ||
24 | #define DEFINE(V) \ | |
25 | unative_t *V = (unative_t *)V##_in; \ | |
26 | unative_t V##_0, V##_1, V##_2, V##_3 | |
27 | ||
28 | #define LOAD(V) \ | |
29 | do { \ | |
30 | V##_0 = V[0]; \ | |
31 | V##_1 = V[1]; \ | |
32 | V##_2 = V[2]; \ | |
33 | V##_3 = V[3]; \ | |
34 | } while (0) | |
35 | ||
36 | #define STORE(V) \ | |
37 | do { \ | |
38 | V[0] = V##_0; \ | |
39 | V[1] = V##_1; \ | |
40 | V[2] = V##_2; \ | |
41 | V[3] = V##_3; \ | |
42 | } while (0) | |
43 | ||
44 | #define XOR(V1, V2) \ | |
45 | do { \ | |
46 | V1##_0 = vec_xor(V1##_0, V2##_0); \ | |
47 | V1##_1 = vec_xor(V1##_1, V2##_1); \ | |
48 | V1##_2 = vec_xor(V1##_2, V2##_2); \ | |
49 | V1##_3 = vec_xor(V1##_3, V2##_3); \ | |
50 | } while (0) | |
51 | ||
297565aa AB |
52 | void __xor_altivec_2(unsigned long bytes, |
53 | unsigned long * __restrict v1_in, | |
54 | const unsigned long * __restrict v2_in) | |
ef1313de AB |
55 | { |
56 | DEFINE(v1); | |
57 | DEFINE(v2); | |
58 | unsigned long lines = bytes / (sizeof(unative_t)) / 4; | |
59 | ||
ef1313de AB |
60 | do { |
61 | LOAD(v1); | |
62 | LOAD(v2); | |
63 | XOR(v1, v2); | |
64 | STORE(v1); | |
65 | ||
66 | v1 += 4; | |
67 | v2 += 4; | |
68 | } while (--lines > 0); | |
ef1313de | 69 | } |
ef1313de | 70 | |
297565aa AB |
71 | void __xor_altivec_3(unsigned long bytes, |
72 | unsigned long * __restrict v1_in, | |
73 | const unsigned long * __restrict v2_in, | |
74 | const unsigned long * __restrict v3_in) | |
ef1313de AB |
75 | { |
76 | DEFINE(v1); | |
77 | DEFINE(v2); | |
78 | DEFINE(v3); | |
79 | unsigned long lines = bytes / (sizeof(unative_t)) / 4; | |
80 | ||
ef1313de AB |
81 | do { |
82 | LOAD(v1); | |
83 | LOAD(v2); | |
84 | LOAD(v3); | |
85 | XOR(v1, v2); | |
86 | XOR(v1, v3); | |
87 | STORE(v1); | |
88 | ||
89 | v1 += 4; | |
90 | v2 += 4; | |
91 | v3 += 4; | |
92 | } while (--lines > 0); | |
ef1313de | 93 | } |
ef1313de | 94 | |
297565aa AB |
95 | void __xor_altivec_4(unsigned long bytes, |
96 | unsigned long * __restrict v1_in, | |
97 | const unsigned long * __restrict v2_in, | |
98 | const unsigned long * __restrict v3_in, | |
99 | const unsigned long * __restrict v4_in) | |
ef1313de AB |
100 | { |
101 | DEFINE(v1); | |
102 | DEFINE(v2); | |
103 | DEFINE(v3); | |
104 | DEFINE(v4); | |
105 | unsigned long lines = bytes / (sizeof(unative_t)) / 4; | |
106 | ||
ef1313de AB |
107 | do { |
108 | LOAD(v1); | |
109 | LOAD(v2); | |
110 | LOAD(v3); | |
111 | LOAD(v4); | |
112 | XOR(v1, v2); | |
113 | XOR(v3, v4); | |
114 | XOR(v1, v3); | |
115 | STORE(v1); | |
116 | ||
117 | v1 += 4; | |
118 | v2 += 4; | |
119 | v3 += 4; | |
120 | v4 += 4; | |
121 | } while (--lines > 0); | |
ef1313de | 122 | } |
ef1313de | 123 | |
297565aa AB |
124 | void __xor_altivec_5(unsigned long bytes, |
125 | unsigned long * __restrict v1_in, | |
126 | const unsigned long * __restrict v2_in, | |
127 | const unsigned long * __restrict v3_in, | |
128 | const unsigned long * __restrict v4_in, | |
129 | const unsigned long * __restrict v5_in) | |
ef1313de AB |
130 | { |
131 | DEFINE(v1); | |
132 | DEFINE(v2); | |
133 | DEFINE(v3); | |
134 | DEFINE(v4); | |
135 | DEFINE(v5); | |
136 | unsigned long lines = bytes / (sizeof(unative_t)) / 4; | |
137 | ||
ef1313de AB |
138 | do { |
139 | LOAD(v1); | |
140 | LOAD(v2); | |
141 | LOAD(v3); | |
142 | LOAD(v4); | |
143 | LOAD(v5); | |
144 | XOR(v1, v2); | |
145 | XOR(v3, v4); | |
146 | XOR(v1, v5); | |
147 | XOR(v1, v3); | |
148 | STORE(v1); | |
149 | ||
150 | v1 += 4; | |
151 | v2 += 4; | |
152 | v3 += 4; | |
153 | v4 += 4; | |
154 | v5 += 4; | |
155 | } while (--lines > 0); | |
ef1313de | 156 | } |