Commit | Line | Data |
---|---|---|
9412b234 CL |
1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* | |
3 | * strlen() for PPC32 | |
4 | * | |
5 | * Copyright (C) 2018 Christophe Leroy CS Systemes d'Information. | |
6 | * | |
7 | * Inspired from glibc implementation | |
8 | */ | |
39326182 | 9 | #include <linux/export.h> |
9412b234 | 10 | #include <asm/ppc_asm.h> |
9412b234 CL |
11 | #include <asm/cache.h> |
12 | ||
13 | .text | |
14 | ||
15 | /* | |
16 | * Algorithm: | |
17 | * | |
18 | * 1) Given a word 'x', we can test to see if it contains any 0 bytes | |
19 | * by subtracting 0x01010101, and seeing if any of the high bits of each | |
20 | * byte changed from 0 to 1. This works because the least significant | |
21 | * 0 byte must have had no incoming carry (otherwise it's not the least | |
22 | * significant), so it is 0x00 - 0x01 == 0xff. For all other | |
23 | * byte values, either they have the high bit set initially, or when | |
24 | * 1 is subtracted you get a value in the range 0x00-0x7f, none of which | |
25 | * have their high bit set. The expression here is | |
26 | * (x - 0x01010101) & ~x & 0x80808080), which gives 0x00000000 when | |
27 | * there were no 0x00 bytes in the word. You get 0x80 in bytes that | |
28 | * match, but possibly false 0x80 matches in the next more significant | |
29 | * byte to a true match due to carries. For little-endian this is | |
30 | * of no consequence since the least significant match is the one | |
31 | * we're interested in, but big-endian needs method 2 to find which | |
32 | * byte matches. | |
33 | * 2) Given a word 'x', we can test to see _which_ byte was zero by | |
34 | * calculating ~(((x & ~0x80808080) - 0x80808080 - 1) | x | ~0x80808080). | |
35 | * This produces 0x80 in each byte that was zero, and 0x00 in all | |
36 | * the other bytes. The '| ~0x80808080' clears the low 7 bits in each | |
37 | * byte, and the '| x' part ensures that bytes with the high bit set | |
38 | * produce 0x00. The addition will carry into the high bit of each byte | |
39 | * iff that byte had one of its low 7 bits set. We can then just see | |
40 | * which was the most significant bit set and divide by 8 to find how | |
41 | * many to add to the index. | |
42 | * This is from the book 'The PowerPC Compiler Writer's Guide', | |
43 | * by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren. | |
44 | */ | |
45 | ||
46 | _GLOBAL(strlen) | |
47 | andi. r0, r3, 3 | |
48 | lis r7, 0x0101 | |
49 | addi r10, r3, -4 | |
50 | addic r7, r7, 0x0101 /* r7 = 0x01010101 (lomagic) & clear XER[CA] */ | |
51 | rotlwi r6, r7, 31 /* r6 = 0x80808080 (himagic) */ | |
52 | bne- 3f | |
53 | .balign IFETCH_ALIGN_BYTES | |
54 | 1: lwzu r9, 4(r10) | |
55 | 2: subf r8, r7, r9 | |
56 | and. r8, r8, r6 | |
57 | beq+ 1b | |
58 | andc. r8, r8, r9 | |
59 | beq+ 1b | |
60 | andc r8, r9, r6 | |
61 | orc r9, r9, r6 | |
62 | subfe r8, r6, r8 | |
63 | nor r8, r8, r9 | |
64 | cntlzw r8, r8 | |
65 | subf r3, r3, r10 | |
66 | srwi r8, r8, 3 | |
67 | add r3, r3, r8 | |
68 | blr | |
69 | ||
70 | /* Missaligned string: make sure bytes before string are seen not 0 */ | |
71 | 3: xor r10, r10, r0 | |
72 | orc r8, r8, r8 | |
73 | lwzu r9, 4(r10) | |
74 | slwi r0, r0, 3 | |
75 | srw r8, r8, r0 | |
76 | orc r9, r9, r8 | |
77 | b 2b | |
78 | EXPORT_SYMBOL(strlen) |