Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu) | |
3 | * | |
4 | * Finds length of a 0-terminated string. Optimized for the | |
5 | * Alpha architecture: | |
6 | * | |
7 | * - memory accessed as aligned quadwords only | |
8 | * - uses bcmpge to compare 8 bytes in parallel | |
9 | * - does binary search to find 0 byte in last | |
10 | * quadword (HAKMEM needed 12 instructions to | |
11 | * do this instead of the 9 instructions that | |
12 | * binary search needs). | |
13 | */ | |
14 | ||
15 | .set noreorder | |
16 | .set noat | |
17 | ||
18 | .align 3 | |
19 | ||
20 | .globl strlen | |
21 | .ent strlen | |
22 | ||
23 | strlen: | |
24 | ldq_u $1, 0($16) # load first quadword ($16 may be misaligned) | |
25 | lda $2, -1($31) | |
26 | insqh $2, $16, $2 | |
27 | andnot $16, 7, $0 | |
28 | or $2, $1, $1 | |
29 | cmpbge $31, $1, $2 # $2 <- bitmask: bit i == 1 <==> i-th byte == 0 | |
30 | bne $2, found | |
31 | ||
32 | loop: ldq $1, 8($0) | |
33 | addq $0, 8, $0 # addr += 8 | |
34 | nop # helps dual issue last two insns | |
35 | cmpbge $31, $1, $2 | |
36 | beq $2, loop | |
37 | ||
38 | found: blbs $2, done # make aligned case fast | |
39 | negq $2, $3 | |
40 | and $2, $3, $2 | |
41 | ||
42 | and $2, 0x0f, $1 | |
43 | addq $0, 4, $3 | |
44 | cmoveq $1, $3, $0 | |
45 | ||
46 | and $2, 0x33, $1 | |
47 | addq $0, 2, $3 | |
48 | cmoveq $1, $3, $0 | |
49 | ||
50 | and $2, 0x55, $1 | |
51 | addq $0, 1, $3 | |
52 | cmoveq $1, $3, $0 | |
53 | ||
54 | done: subq $0, $16, $0 | |
55 | ret $31, ($26) | |
56 | ||
57 | .end strlen |