Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
f68e556e LT |
2 | #ifndef _ASM_WORD_AT_A_TIME_H |
3 | #define _ASM_WORD_AT_A_TIME_H | |
4 | ||
44696908 DM |
5 | #include <linux/kernel.h> |
6 | ||
f68e556e LT |
7 | /* |
8 | * This is largely generic for little-endian machines, but the | |
9 | * optimal byte mask counting is probably going to be something | |
10 | * that is architecture-specific. If you have a reliably fast | |
11 | * bit count instruction, that might be better than the multiply | |
12 | * and shift, for example. | |
13 | */ | |
36126f8f LT |
14 | struct word_at_a_time { |
15 | const unsigned long one_bits, high_bits; | |
16 | }; | |
17 | ||
18 | #define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } | |
f68e556e LT |
19 | |
20 | #ifdef CONFIG_64BIT | |
21 | ||
22 | /* | |
23 | * Jan Achrenius on G+: microoptimized version of | |
24 | * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" | |
25 | * that works for the bytemasks without having to | |
26 | * mask them first. | |
27 | */ | |
28 | static inline long count_masked_bytes(unsigned long mask) | |
29 | { | |
30 | return mask*0x0001020304050608ul >> 56; | |
31 | } | |
32 | ||
33 | #else /* 32-bit case */ | |
34 | ||
35 | /* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ | |
36 | static inline long count_masked_bytes(long mask) | |
37 | { | |
38 | /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ | |
39 | long a = (0x0ff0001+mask) >> 23; | |
40 | /* Fix the 1 for 00 case */ | |
41 | return a & mask; | |
42 | } | |
43 | ||
44 | #endif | |
45 | ||
36126f8f LT |
46 | /* Return nonzero if it has a zero */ |
47 | static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) | |
48 | { | |
49 | unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits; | |
50 | *bits = mask; | |
51 | return mask; | |
52 | } | |
53 | ||
54 | static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c) | |
55 | { | |
56 | return bits; | |
57 | } | |
58 | ||
59 | static inline unsigned long create_zero_mask(unsigned long bits) | |
60 | { | |
61 | bits = (bits - 1) & ~bits; | |
62 | return bits >> 7; | |
63 | } | |
64 | ||
65 | /* The mask we created is directly usable as a bytemask */ | |
66 | #define zero_bytemask(mask) (mask) | |
67 | ||
68 | static inline unsigned long find_zero(unsigned long mask) | |
f68e556e | 69 | { |
36126f8f | 70 | return count_masked_bytes(mask); |
f68e556e LT |
71 | } |
72 | ||
e419b4cc LT |
73 | /* |
74 | * Load an unaligned word from kernel space. | |
75 | * | |
76 | * In the (very unlikely) case of the word being a page-crosser | |
77 | * and the next page not being mapped, take the exception and | |
78 | * return zeroes in the non-existing part. | |
79 | */ | |
80 | static inline unsigned long load_unaligned_zeropad(const void *addr) | |
81 | { | |
82 | unsigned long ret, dummy; | |
83 | ||
84 | asm( | |
85 | "1:\tmov %2,%0\n" | |
86 | "2:\n" | |
87 | ".section .fixup,\"ax\"\n" | |
88 | "3:\t" | |
89 | "lea %2,%1\n\t" | |
90 | "and %3,%1\n\t" | |
91 | "mov (%1),%0\n\t" | |
92 | "leal %2,%%ecx\n\t" | |
93 | "andl %4,%%ecx\n\t" | |
94 | "shll $3,%%ecx\n\t" | |
95 | "shr %%cl,%0\n\t" | |
96 | "jmp 2b\n" | |
97 | ".previous\n" | |
98 | _ASM_EXTABLE(1b, 3b) | |
99 | :"=&r" (ret),"=&c" (dummy) | |
100 | :"m" (*(unsigned long *)addr), | |
101 | "i" (-sizeof(unsigned long)), | |
102 | "i" (sizeof(unsigned long)-1)); | |
103 | return ret; | |
104 | } | |
105 | ||
f68e556e | 106 | #endif /* _ASM_WORD_AT_A_TIME_H */ |