From: Linus Torvalds Date: Sat, 28 May 2016 23:15:25 +0000 (-0700) Subject: Merge branch 'hash' of git://ftp.sciencehorizons.net/linux X-Git-Tag: v4.7-rc1~8 X-Git-Url: https://git.kernel.dk/?a=commitdiff_plain;h=7e0fb73c52c4037b4d5ef9ff56c7296a3151bd92;hp=4e8440b3b6b801953b2e53c55491cf98fc8f6c01;p=linux-2.6-block.git Merge branch 'hash' of git://ftp.sciencehorizons.net/linux Pull string hash improvements from George Spelvin: "This series does several related things: - Makes the dcache hash (fs/namei.c) useful for general kernel use. (Thanks to Bruce for noticing the zero-length corner case) - Converts the string hashes in to use the above. - Avoids 64-bit multiplies in hash_64() on 32-bit platforms. Two 32-bit multiplies will do well enough. - Rids the world of the bad hash multipliers in hash_32. This finishes the job started in commit 689de1d6ca95 ("Minimal fix-up of bad hashing behavior of hash_64()") The vast majority of Linux architectures have hardware support for 32x32-bit multiply and so derive no benefit from "simplified" multipliers. The few processors that do not (68000, h8/300 and some models of Microblaze) have arch-specific implementations added. Those patches are last in the series. - Overhauls the dcache hash mixing. The patch in commit 0fed3ac866ea ("namei: Improve hash mixing if CONFIG_DCACHE_WORD_ACCESS") was an off-the-cuff suggestion. Replaced with a much more careful design that's simultaneously faster and better. (My own invention, as there was noting suitable in the literature I could find. Comments welcome!) - Modify the hash_name() loop to skip the initial HASH_MIX(). This would let us salt the hash if we ever wanted to. - Sort out partial_name_hash(). The hash function is declared as using a long state, even though it's truncated to 32 bits at the end and the extra internal state contributes nothing to the result. And some callers do odd things: - fs/hfs/string.c only allocates 32 bits of state - fs/hfsplus/unicode.c uses it to hash 16-bit unicode symbols not bytes - Modify bytemask_from_count to handle inputs of 1..sizeof(long) rather than 0..sizeof(long)-1. This would simplify users other than full_name_hash" Special thanks to Bruce Fields for testing and finding bugs in v1. (I learned some humbling lessons about "obviously correct" code.) On the arch-specific front, the m68k assembly has been tested in a standalone test harness, I've been in contact with the Microblaze maintainers who mostly don't care, as the hardware multiplier is never omitted in real-world applications, and I haven't heard anything from the H8/300 world" * 'hash' of git://ftp.sciencehorizons.net/linux: h8300: Add microblaze: Add m68k: Add : Add support for architecture-specific functions fs/namei.c: Improve dcache hash function Eliminate bad hash multipliers from hash_32() and hash_64() Change hash_64() return value to 32 bits : Define hash_str() in terms of hashlen_string() fs/namei.c: Add hashlen_string() function Pull out string hash to --- diff --git a/arch/Kconfig b/arch/Kconfig index b16e74e4b5af..d794384a0404 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -598,6 +598,14 @@ config HAVE_STACK_VALIDATION Architecture supports the 'objtool check' host tool command, which performs compile-time stack metadata validation. +config HAVE_ARCH_HASH + bool + default n + help + If this is set, the architecture provides an + file which provides platform-specific implementations of some + functions in or fs/namei.c. + # # ABI hall of shame # diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index aa232de2d4bc..3ae852507e57 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -20,6 +20,7 @@ config H8300 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZO select HAVE_ARCH_KGDB + select HAVE_ARCH_HASH select CPU_NO_EFFICIENT_FFS config RWSEM_GENERIC_SPINLOCK diff --git a/arch/h8300/include/asm/hash.h b/arch/h8300/include/asm/hash.h new file mode 100644 index 000000000000..04cfbd2bd850 --- /dev/null +++ b/arch/h8300/include/asm/hash.h @@ -0,0 +1,53 @@ +#ifndef _ASM_HASH_H +#define _ASM_HASH_H + +/* + * The later H8SX models have a 32x32-bit multiply, but the H8/300H + * and H8S have only 16x16->32. Since it's tolerably compact, this is + * basically an inlined version of the __mulsi3 code. Since the inputs + * are not expected to be small, it's also simplfied by skipping the + * early-out checks. + * + * (Since neither CPU has any multi-bit shift instructions, a + * shift-and-add version is a non-starter.) + * + * TODO: come up with an arch-specific version of the hashing in fs/namei.c, + * since that is heavily dependent on rotates. Which, as mentioned, suck + * horribly on H8. + */ + +#if defined(CONFIG_CPU_H300H) || defined(CONFIG_CPU_H8S) + +#define HAVE_ARCH__HASH_32 1 + +/* + * Multiply by k = 0x61C88647. Fitting this into three registers requires + * one extra instruction, but reducing register pressure will probably + * make that back and then some. + * + * GCC asm note: %e1 is the high half of operand %1, while %f1 is the + * low half. So if %1 is er4, then %e1 is e4 and %f1 is r4. + * + * This has been designed to modify x in place, since that's the most + * common usage, but preserve k, since hash_64() makes two calls in + * quick succession. + */ +static inline u32 __attribute_const__ __hash_32(u32 x) +{ + u32 temp; + + asm( "mov.w %e1,%f0" + "\n mulxu.w %f2,%0" /* klow * xhigh */ + "\n mov.w %f0,%e1" /* The extra instruction */ + "\n mov.w %f1,%f0" + "\n mulxu.w %e2,%0" /* khigh * xlow */ + "\n add.w %e1,%f0" + "\n mulxu.w %f2,%1" /* klow * xlow */ + "\n add.w %f0,%e1" + : "=&r" (temp), "=r" (x) + : "%r" (GOLDEN_RATIO_32), "1" (x)); + return x; +} + +#endif +#endif /* _ASM_HASH_H */ diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 8ace920ca24a..967260f2eb1c 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -41,6 +41,7 @@ config M68000 select CPU_HAS_NO_UNALIGNED select GENERIC_CSUM select CPU_NO_EFFICIENT_FFS + select HAVE_ARCH_HASH help The Freescale (was Motorola) 68000 CPU is the first generation of the well known M68K family of processors. The CPU core as well as diff --git a/arch/m68k/include/asm/hash.h b/arch/m68k/include/asm/hash.h new file mode 100644 index 000000000000..6407af84a994 --- /dev/null +++ b/arch/m68k/include/asm/hash.h @@ -0,0 +1,59 @@ +#ifndef _ASM_HASH_H +#define _ASM_HASH_H + +/* + * If CONFIG_M68000=y (original mc68000/010), this file is #included + * to work around the lack of a MULU.L instruction. + */ + +#define HAVE_ARCH__HASH_32 1 +/* + * While it would be legal to substitute a different hash operation + * entirely, let's keep it simple and just use an optimized multiply + * by GOLDEN_RATIO_32 = 0x61C88647. + * + * The best way to do that appears to be to multiply by 0x8647 with + * shifts and adds, and use mulu.w to multiply the high half by 0x61C8. + * + * Because the 68000 has multi-cycle shifts, this addition chain is + * chosen to minimise the shift distances. + * + * Despite every attempt to spoon-feed it simple operations, GCC + * 6.1.1 doggedly insists on doing annoying things like converting + * "lsl.l #2," (12 cycles) to two adds (8+8 cycles). + * + * It also likes to notice two shifts in a row, like "a = x << 2" and + * "a <<= 7", and convert that to "a = x << 9". But shifts longer + * than 8 bits are extra-slow on m68k, so that's a lose. + * + * Since the 68000 is a very simple in-order processor with no + * instruction scheduling effects on execution time, we can safely + * take it out of GCC's hands and write one big asm() block. + * + * Without calling overhead, this operation is 30 bytes (14 instructions + * plus one immediate constant) and 166 cycles. + * + * (Because %2 is fetched twice, it can't be postincrement, and thus it + * can't be a fully general "g" or "m". Register is preferred, but + * offsettable memory or immediate will work.) + */ +static inline u32 __attribute_const__ __hash_32(u32 x) +{ + u32 a, b; + + asm( "move.l %2,%0" /* a = x * 0x0001 */ + "\n lsl.l #2,%0" /* a = x * 0x0004 */ + "\n move.l %0,%1" + "\n lsl.l #7,%0" /* a = x * 0x0200 */ + "\n add.l %2,%0" /* a = x * 0x0201 */ + "\n add.l %0,%1" /* b = x * 0x0205 */ + "\n add.l %0,%0" /* a = x * 0x0402 */ + "\n add.l %0,%1" /* b = x * 0x0607 */ + "\n lsl.l #5,%0" /* a = x * 0x8040 */ + : "=&d,d" (a), "=&r,r" (b) + : "r,roi?" (x)); /* a+b = x*0x8647 */ + + return ((u16)(x*0x61c8) << 16) + a + b; +} + +#endif /* _ASM_HASH_H */ diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index f17c3a4fb697..636e0720fb20 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -16,6 +16,7 @@ config MICROBLAZE select GENERIC_IRQ_SHOW select GENERIC_PCI_IOMAP select GENERIC_SCHED_CLOCK + select HAVE_ARCH_HASH select HAVE_ARCH_KGDB select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_API_DEBUG diff --git a/arch/microblaze/include/asm/hash.h b/arch/microblaze/include/asm/hash.h new file mode 100644 index 000000000000..753513ae8cb0 --- /dev/null +++ b/arch/microblaze/include/asm/hash.h @@ -0,0 +1,81 @@ +#ifndef _ASM_HASH_H +#define _ASM_HASH_H + +/* + * Fortunately, most people who want to run Linux on Microblaze enable + * both multiplier and barrel shifter, but omitting them is technically + * a supported configuration. + * + * With just a barrel shifter, we can implement an efficient constant + * multiply using shifts and adds. GCC can find a 9-step solution, but + * this 6-step solution was found by Yevgen Voronenko's implementation + * of the Hcub algorithm at http://spiral.ece.cmu.edu/mcm/gen.html. + * + * That software is really not designed for a single multiplier this large, + * but if you run it enough times with different seeds, it'll find several + * 6-shift, 6-add sequences for computing x * 0x61C88647. They are all + * c = (x << 19) + x; + * a = (x << 9) + c; + * b = (x << 23) + a; + * return (a<<11) + (b<<6) + (c<<3) - b; + * with variations on the order of the final add. + * + * Without even a shifter, it's hopless; any hash function will suck. + */ + +#if CONFIG_XILINX_MICROBLAZE0_USE_HW_MUL == 0 + +#define HAVE_ARCH__HASH_32 1 + +/* Multiply by GOLDEN_RATIO_32 = 0x61C88647 */ +static inline u32 __attribute_const__ __hash_32(u32 a) +{ +#if CONFIG_XILINX_MICROBLAZE0_USE_BARREL + unsigned int b, c; + + /* Phase 1: Compute three intermediate values */ + b = a << 23; + c = (a << 19) + a; + a = (a << 9) + c; + b += a; + + /* Phase 2: Compute (a << 11) + (b << 6) + (c << 3) - b */ + a <<= 5; + a += b; /* (a << 5) + b */ + a <<= 3; + a += c; /* (a << 8) + (b << 3) + c */ + a <<= 3; + return a - b; /* (a << 11) + (b << 6) + (c << 3) - b */ +#else + /* + * "This is really going to hurt." + * + * Without a barrel shifter, left shifts are implemented as + * repeated additions, and the best we can do is an optimal + * addition-subtraction chain. This one is not known to be + * optimal, but at 37 steps, it's decent for a 31-bit multiplier. + * + * Question: given its size (37*4 = 148 bytes per instance), + * and slowness, is this worth having inline? + */ + unsigned int b, c, d; + + b = a << 4; /* 4 */ + c = b << 1; /* 1 5 */ + b += a; /* 1 6 */ + c += b; /* 1 7 */ + c <<= 3; /* 3 10 */ + c -= a; /* 1 11 */ + d = c << 7; /* 7 18 */ + d += b; /* 1 19 */ + d <<= 8; /* 8 27 */ + d += a; /* 1 28 */ + d <<= 1; /* 1 29 */ + d += b; /* 1 30 */ + d <<= 6; /* 6 36 */ + return d + c; /* 1 37 total instructions*/ +#endif +} + +#endif /* !CONFIG_XILINX_MICROBLAZE0_USE_HW_MUL */ +#endif /* _ASM_HASH_H */ diff --git a/drivers/media/usb/dvb-usb-v2/af9015.c b/drivers/media/usb/dvb-usb-v2/af9015.c index 95a7388e89d4..09e0f58f6bb7 100644 --- a/drivers/media/usb/dvb-usb-v2/af9015.c +++ b/drivers/media/usb/dvb-usb-v2/af9015.c @@ -398,6 +398,8 @@ error: } #define AF9015_EEPROM_SIZE 256 +/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ +#define GOLDEN_RATIO_PRIME_32 0x9e370001UL /* hash (and dump) eeprom */ static int af9015_eeprom_hash(struct dvb_usb_device *d) diff --git a/fs/dcache.c b/fs/dcache.c index c622872c12c5..ad4a542e9bab 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1670,8 +1670,7 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name) struct qstr q; q.name = name; - q.len = strlen(name); - q.hash = full_name_hash(q.name, q.len); + q.hash_len = hashlen_string(name); return d_alloc(parent, &q); } EXPORT_SYMBOL(d_alloc_name); diff --git a/fs/namei.c b/fs/namei.c index 15b124c18ed8..e7bf99d387d0 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "internal.h" @@ -1797,74 +1798,144 @@ static int walk_component(struct nameidata *nd, int flags) #include -#ifdef CONFIG_64BIT +#ifdef HASH_MIX -static inline unsigned int fold_hash(unsigned long hash) -{ - return hash_64(hash, 32); -} +/* Architecture provides HASH_MIX and fold_hash() in */ +#elif defined(CONFIG_64BIT) /* - * This is George Marsaglia's XORSHIFT generator. - * It implements a maximum-period LFSR in only a few - * instructions. It also has the property (required - * by hash_name()) that mix_hash(0) = 0. + * Register pressure in the mixing function is an issue, particularly + * on 32-bit x86, but almost any function requires one state value and + * one temporary. Instead, use a function designed for two state values + * and no temporaries. + * + * This function cannot create a collision in only two iterations, so + * we have two iterations to achieve avalanche. In those two iterations, + * we have six layers of mixing, which is enough to spread one bit's + * influence out to 2^6 = 64 state bits. + * + * Rotate constants are scored by considering either 64 one-bit input + * deltas or 64*63/2 = 2016 two-bit input deltas, and finding the + * probability of that delta causing a change to each of the 128 output + * bits, using a sample of random initial states. + * + * The Shannon entropy of the computed probabilities is then summed + * to produce a score. Ideally, any input change has a 50% chance of + * toggling any given output bit. + * + * Mixing scores (in bits) for (12,45): + * Input delta: 1-bit 2-bit + * 1 round: 713.3 42542.6 + * 2 rounds: 2753.7 140389.8 + * 3 rounds: 5954.1 233458.2 + * 4 rounds: 7862.6 256672.2 + * Perfect: 8192 258048 + * (64*128) (64*63/2 * 128) */ -static inline unsigned long mix_hash(unsigned long hash) +#define HASH_MIX(x, y, a) \ + ( x ^= (a), \ + y ^= x, x = rol64(x,12),\ + x += y, y = rol64(y,45),\ + y *= 9 ) + +/* + * Fold two longs into one 32-bit hash value. This must be fast, but + * latency isn't quite as critical, as there is a fair bit of additional + * work done before the hash value is used. + */ +static inline unsigned int fold_hash(unsigned long x, unsigned long y) { - hash ^= hash << 13; - hash ^= hash >> 7; - hash ^= hash << 17; - return hash; + y ^= x * GOLDEN_RATIO_64; + y *= GOLDEN_RATIO_64; + return y >> 32; } #else /* 32-bit case */ -#define fold_hash(x) (x) +/* + * Mixing scores (in bits) for (7,20): + * Input delta: 1-bit 2-bit + * 1 round: 330.3 9201.6 + * 2 rounds: 1246.4 25475.4 + * 3 rounds: 1907.1 31295.1 + * 4 rounds: 2042.3 31718.6 + * Perfect: 2048 31744 + * (32*64) (32*31/2 * 64) + */ +#define HASH_MIX(x, y, a) \ + ( x ^= (a), \ + y ^= x, x = rol32(x, 7),\ + x += y, y = rol32(y,20),\ + y *= 9 ) -static inline unsigned long mix_hash(unsigned long hash) +static inline unsigned int fold_hash(unsigned long x, unsigned long y) { - hash ^= hash << 13; - hash ^= hash >> 17; - hash ^= hash << 5; - return hash; + /* Use arch-optimized multiply if one exists */ + return __hash_32(y ^ __hash_32(x)); } #endif -unsigned int full_name_hash(const unsigned char *name, unsigned int len) +/* + * Return the hash of a string of known length. This is carfully + * designed to match hash_name(), which is the more critical function. + * In particular, we must end by hashing a final word containing 0..7 + * payload bytes, to match the way that hash_name() iterates until it + * finds the delimiter after the name. + */ +unsigned int full_name_hash(const char *name, unsigned int len) { - unsigned long a, hash = 0; + unsigned long a, x = 0, y = 0; for (;;) { + if (!len) + goto done; a = load_unaligned_zeropad(name); if (len < sizeof(unsigned long)) break; - hash = mix_hash(hash + a); + HASH_MIX(x, y, a); name += sizeof(unsigned long); len -= sizeof(unsigned long); - if (!len) - goto done; } - hash += a & bytemask_from_count(len); + x ^= a & bytemask_from_count(len); done: - return fold_hash(hash); + return fold_hash(x, y); } EXPORT_SYMBOL(full_name_hash); +/* Return the "hash_len" (hash and length) of a null-terminated string */ +u64 hashlen_string(const char *name) +{ + unsigned long a = 0, x = 0, y = 0, adata, mask, len; + const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; + + len = -sizeof(unsigned long); + do { + HASH_MIX(x, y, a); + len += sizeof(unsigned long); + a = load_unaligned_zeropad(name+len); + } while (!has_zero(a, &adata, &constants)); + + adata = prep_zero_mask(a, adata, &constants); + mask = create_zero_mask(adata); + x ^= a & zero_bytemask(mask); + + return hashlen_create(fold_hash(x, y), len + find_zero(mask)); +} +EXPORT_SYMBOL(hashlen_string); + /* * Calculate the length and hash of the path component, and * return the "hash_len" as the result. */ static inline u64 hash_name(const char *name) { - unsigned long a, b, adata, bdata, mask, hash, len; + unsigned long a = 0, b, x = 0, y = 0, adata, bdata, mask, len; const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; - hash = a = 0; len = -sizeof(unsigned long); do { - hash = mix_hash(hash + a); + HASH_MIX(x, y, a); len += sizeof(unsigned long); a = load_unaligned_zeropad(name+len); b = a ^ REPEAT_BYTE('/'); @@ -1872,25 +1943,40 @@ static inline u64 hash_name(const char *name) adata = prep_zero_mask(a, adata, &constants); bdata = prep_zero_mask(b, bdata, &constants); - mask = create_zero_mask(adata | bdata); + x ^= a & zero_bytemask(mask); - hash += a & zero_bytemask(mask); - len += find_zero(mask); - return hashlen_create(fold_hash(hash), len); + return hashlen_create(fold_hash(x, y), len + find_zero(mask)); } -#else +#else /* !CONFIG_DCACHE_WORD_ACCESS: Slow, byte-at-a-time version */ -unsigned int full_name_hash(const unsigned char *name, unsigned int len) +/* Return the hash of a string of known length */ +unsigned int full_name_hash(const char *name, unsigned int len) { unsigned long hash = init_name_hash(); while (len--) - hash = partial_name_hash(*name++, hash); + hash = partial_name_hash((unsigned char)*name++, hash); return end_name_hash(hash); } EXPORT_SYMBOL(full_name_hash); +/* Return the "hash_len" (hash and length) of a null-terminated string */ +u64 hash_string(const char *name) +{ + unsigned long hash = init_name_hash(); + unsigned long len = 0, c; + + c = (unsigned char)*name; + do { + len++; + hash = partial_name_hash(c, hash); + c = (unsigned char)name[len]; + } while (c); + return hashlen_create(end_name_hash(hash), len); +} +EXPORT_SYMBOL(hash_string); + /* * We know there's a real path component here of at least * one character. @@ -1934,7 +2020,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) int type; err = may_lookup(nd); - if (err) + if (err) return err; hash_len = hash_name(name); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index f8506e8dd4d4..484c8792da82 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -10,6 +10,7 @@ #include #include #include +#include struct path; struct vfsmount; @@ -52,9 +53,6 @@ struct qstr { }; #define QSTR_INIT(n,l) { { { .len = l } }, .name = n } -#define hashlen_hash(hashlen) ((u32) (hashlen)) -#define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) -#define hashlen_create(hash,len) (((u64)(len)<<32)|(u32)(hash)) struct dentry_stat_t { long nr_dentry; @@ -65,29 +63,6 @@ struct dentry_stat_t { }; extern struct dentry_stat_t dentry_stat; -/* Name hashing routines. Initial hash value */ -/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ -#define init_name_hash() 0 - -/* partial hash update function. Assume roughly 4 bits per character */ -static inline unsigned long -partial_name_hash(unsigned long c, unsigned long prevhash) -{ - return (prevhash + (c << 4) + (c >> 4)) * 11; -} - -/* - * Finally: cut down the number of bits to a int value (and try to avoid - * losing bits) - */ -static inline unsigned long end_name_hash(unsigned long hash) -{ - return (unsigned int) hash; -} - -/* Compute the hash for a name string. */ -extern unsigned int full_name_hash(const unsigned char *, unsigned int); - /* * Try to keep struct dentry aligned on 64 byte cachelines (this will * give reasonable cacheline footprint with larger lines without the diff --git a/include/linux/hash.h b/include/linux/hash.h index 79c52fa81cac..ad6fa21d977b 100644 --- a/include/linux/hash.h +++ b/include/linux/hash.h @@ -3,92 +3,94 @@ /* Fast hashing routine for ints, longs and pointers. (C) 2002 Nadia Yvette Chambers, IBM */ -/* - * Knuth recommends primes in approximately golden ratio to the maximum - * integer representable by a machine word for multiplicative hashing. - * Chuck Lever verified the effectiveness of this technique: - * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf - * - * These primes are chosen to be bit-sparse, that is operations on - * them can use shifts and additions instead of multiplications for - * machines where multiplications are slow. - */ - #include #include -/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ -#define GOLDEN_RATIO_PRIME_32 0x9e370001UL -/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ -#define GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001UL - +/* + * The "GOLDEN_RATIO_PRIME" is used in ifs/btrfs/brtfs_inode.h and + * fs/inode.c. It's not actually prime any more (the previous primes + * were actively bad for hashing), but the name remains. + */ #if BITS_PER_LONG == 32 -#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_32 +#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_32 #define hash_long(val, bits) hash_32(val, bits) #elif BITS_PER_LONG == 64 #define hash_long(val, bits) hash_64(val, bits) -#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_64 +#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_64 #else #error Wordsize not 32 or 64 #endif /* - * The above primes are actively bad for hashing, since they are - * too sparse. The 32-bit one is mostly ok, the 64-bit one causes - * real problems. Besides, the "prime" part is pointless for the - * multiplicative hash. + * This hash multiplies the input by a large odd number and takes the + * high bits. Since multiplication propagates changes to the most + * significant end only, it is essential that the high bits of the + * product be used for the hash value. + * + * Chuck Lever verified the effectiveness of this technique: + * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf * * Although a random odd number will do, it turns out that the golden * ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice - * properties. + * properties. (See Knuth vol 3, section 6.4, exercise 9.) * - * These are the negative, (1 - phi) = (phi^2) = (3 - sqrt(5))/2. - * (See Knuth vol 3, section 6.4, exercise 9.) + * These are the negative, (1 - phi) = phi**2 = (3 - sqrt(5))/2, + * which is very slightly easier to multiply by and makes no + * difference to the hash distribution. */ #define GOLDEN_RATIO_32 0x61C88647 #define GOLDEN_RATIO_64 0x61C8864680B583EBull -static __always_inline u64 hash_64(u64 val, unsigned int bits) -{ - u64 hash = val; +#ifdef CONFIG_HAVE_ARCH_HASH +/* This header may use the GOLDEN_RATIO_xx constants */ +#include +#endif -#if BITS_PER_LONG == 64 - hash = hash * GOLDEN_RATIO_64; -#else - /* Sigh, gcc can't optimise this alone like it does for 32 bits. */ - u64 n = hash; - n <<= 18; - hash -= n; - n <<= 33; - hash -= n; - n <<= 3; - hash += n; - n <<= 3; - hash -= n; - n <<= 4; - hash += n; - n <<= 2; - hash += n; +/* + * The _generic versions exist only so lib/test_hash.c can compare + * the arch-optimized versions with the generic. + * + * Note that if you change these, any that aren't updated + * to match need to have their HAVE_ARCH_* define values updated so the + * self-test will not false-positive. + */ +#ifndef HAVE_ARCH__HASH_32 +#define __hash_32 __hash_32_generic #endif +static inline u32 __hash_32_generic(u32 val) +{ + return val * GOLDEN_RATIO_32; +} +#ifndef HAVE_ARCH_HASH_32 +#define hash_32 hash_32_generic +#endif +static inline u32 hash_32_generic(u32 val, unsigned int bits) +{ /* High bits are more random, so use them. */ - return hash >> (64 - bits); + return __hash_32(val) >> (32 - bits); } -static inline u32 hash_32(u32 val, unsigned int bits) +#ifndef HAVE_ARCH_HASH_64 +#define hash_64 hash_64_generic +#endif +static __always_inline u32 hash_64_generic(u64 val, unsigned int bits) { - /* On some cpus multiply is faster, on others gcc will do shifts */ - u32 hash = val * GOLDEN_RATIO_PRIME_32; - - /* High bits are more random, so use them. */ - return hash >> (32 - bits); +#if BITS_PER_LONG == 64 + /* 64x64-bit multiply is efficient on all 64-bit processors */ + return val * GOLDEN_RATIO_64 >> (64 - bits); +#else + /* Hash 64 bits using only 32x32-bit multiply. */ + return hash_32((u32)val ^ __hash_32(val >> 32), bits); +#endif } -static inline unsigned long hash_ptr(const void *ptr, unsigned int bits) +static inline u32 hash_ptr(const void *ptr, unsigned int bits) { return hash_long((unsigned long)ptr, bits); } +/* This really should be called fold32_ptr; it does no hashing to speak of. */ static inline u32 hash32_ptr(const void *ptr) { unsigned long val = (unsigned long)ptr; diff --git a/include/linux/stringhash.h b/include/linux/stringhash.h new file mode 100644 index 000000000000..451771d9b9c0 --- /dev/null +++ b/include/linux/stringhash.h @@ -0,0 +1,76 @@ +#ifndef __LINUX_STRINGHASH_H +#define __LINUX_STRINGHASH_H + +#include /* For __pure */ +#include /* For u32, u64 */ + +/* + * Routines for hashing strings of bytes to a 32-bit hash value. + * + * These hash functions are NOT GUARANTEED STABLE between kernel + * versions, architectures, or even repeated boots of the same kernel. + * (E.g. they may depend on boot-time hardware detection or be + * deliberately randomized.) + * + * They are also not intended to be secure against collisions caused by + * malicious inputs; much slower hash functions are required for that. + * + * They are optimized for pathname components, meaning short strings. + * Even if a majority of files have longer names, the dynamic profile of + * pathname components skews short due to short directory names. + * (E.g. /usr/lib/libsesquipedalianism.so.3.141.) + */ + +/* + * Version 1: one byte at a time. Example of use: + * + * unsigned long hash = init_name_hash; + * while (*p) + * hash = partial_name_hash(tolower(*p++), hash); + * hash = end_name_hash(hash); + * + * Although this is designed for bytes, fs/hfsplus/unicode.c + * abuses it to hash 16-bit values. + */ + +/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ +#define init_name_hash() 0 + +/* partial hash update function. Assume roughly 4 bits per character */ +static inline unsigned long +partial_name_hash(unsigned long c, unsigned long prevhash) +{ + return (prevhash + (c << 4) + (c >> 4)) * 11; +} + +/* + * Finally: cut down the number of bits to a int value (and try to avoid + * losing bits) + */ +static inline unsigned long end_name_hash(unsigned long hash) +{ + return (unsigned int)hash; +} + +/* + * Version 2: One word (32 or 64 bits) at a time. + * If CONFIG_DCACHE_WORD_ACCESS is defined (meaning + * exists, which describes major Linux platforms like x86 and ARM), then + * this computes a different hash function much faster. + * + * If not set, this falls back to a wrapper around the preceding. + */ +extern unsigned int __pure full_name_hash(const char *, unsigned int); + +/* + * A hash_len is a u64 with the hash of a string in the low + * half and the length in the high half. + */ +#define hashlen_hash(hashlen) ((u32)(hashlen)) +#define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) +#define hashlen_create(hash, len) ((u64)(len)<<32 | (u32)(hash)) + +/* Return the "hash_len" (hash and length) of a null-terminated string */ +extern u64 __pure hashlen_string(const char *name); + +#endif /* __LINUX_STRINGHASH_H */ diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index c00f53a4ccdd..91d5a5d6f52b 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -16,6 +16,7 @@ #include #include #include +#include #include struct svc_cred { @@ -165,41 +166,18 @@ extern int svcauth_unix_set_client(struct svc_rqst *rqstp); extern int unix_gid_cache_create(struct net *net); extern void unix_gid_cache_destroy(struct net *net); -static inline unsigned long hash_str(char *name, int bits) +/* + * The functions are good enough that we don't need to + * use hash_32() on them; just extracting the high bits is enough. + */ +static inline unsigned long hash_str(char const *name, int bits) { - unsigned long hash = 0; - unsigned long l = 0; - int len = 0; - unsigned char c; - do { - if (unlikely(!(c = *name++))) { - c = (char)len; len = -1; - } - l = (l << 8) | c; - len++; - if ((len & (BITS_PER_LONG/8-1))==0) - hash = hash_long(hash^l, BITS_PER_LONG); - } while (len); - return hash >> (BITS_PER_LONG - bits); + return hashlen_hash(hashlen_string(name)) >> (32 - bits); } -static inline unsigned long hash_mem(char *buf, int length, int bits) +static inline unsigned long hash_mem(char const *buf, int length, int bits) { - unsigned long hash = 0; - unsigned long l = 0; - int len = 0; - unsigned char c; - do { - if (len == length) { - c = (char)len; len = -1; - } else - c = *buf++; - l = (l << 8) | c; - len++; - if ((len & (BITS_PER_LONG/8-1))==0) - hash = hash_long(hash^l, BITS_PER_LONG); - } while (len); - return hash >> (BITS_PER_LONG - bits); + return full_name_hash(buf, length) >> (32 - bits); } #endif /* __KERNEL__ */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e707ab3e1991..77d7d034bac3 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1849,6 +1849,17 @@ config TEST_RHASHTABLE If unsure, say N. +config TEST_HASH + tristate "Perform selftest on hash functions" + default n + help + Enable this option to test the kernel's integer () + and string () hash functions on boot + (or module load). + + This is intended to help people writing architecture-specific + optimized versions. If unsure, say N. + endmenu # runtime tests config PROVIDE_OHCI1394_DMA_INIT diff --git a/lib/Makefile b/lib/Makefile index 42b69185f963..499fb354d627 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o obj-y += kstrtox.o obj-$(CONFIG_TEST_BPF) += test_bpf.o obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o +obj-$(CONFIG_TEST_HASH) += test_hash.o obj-$(CONFIG_TEST_KASAN) += test_kasan.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o obj-$(CONFIG_TEST_LKM) += test_module.o diff --git a/lib/test_hash.c b/lib/test_hash.c new file mode 100644 index 000000000000..c9549c8b4909 --- /dev/null +++ b/lib/test_hash.c @@ -0,0 +1,250 @@ +/* + * Test cases for and + * This just verifies that various ways of computing a hash + * produce the same thing and, for cases where a k-bit hash + * value is requested, is of the requested size. + * + * We fill a buffer with a 255-byte null-terminated string, + * and use both full_name_hash() and hashlen_string() to hash the + * substrings from i to j, where 0 <= i < j < 256. + * + * The returned values are used to check that __hash_32() and + * __hash_32_generic() compute the same thing. Likewise hash_32() + * and hash_64(). + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt "\n" + +#include +#include +#include +#include +#include +#include + +/* 32-bit XORSHIFT generator. Seed must not be zero. */ +static u32 __init __attribute_const__ +xorshift(u32 seed) +{ + seed ^= seed << 13; + seed ^= seed >> 17; + seed ^= seed << 5; + return seed; +} + +/* Given a non-zero x, returns a non-zero byte. */ +static u8 __init __attribute_const__ +mod255(u32 x) +{ + x = (x & 0xffff) + (x >> 16); /* 1 <= x <= 0x1fffe */ + x = (x & 0xff) + (x >> 8); /* 1 <= x <= 0x2fd */ + x = (x & 0xff) + (x >> 8); /* 1 <= x <= 0x100 */ + x = (x & 0xff) + (x >> 8); /* 1 <= x <= 0xff */ + return x; +} + +/* Fill the buffer with non-zero bytes. */ +static void __init +fill_buf(char *buf, size_t len, u32 seed) +{ + size_t i; + + for (i = 0; i < len; i++) { + seed = xorshift(seed); + buf[i] = mod255(seed); + } +} + +/* + * Test the various integer hash functions. h64 (or its low-order bits) + * is the integer to hash. hash_or accumulates the OR of the hash values, + * which are later checked to see that they cover all the requested bits. + * + * Because these functions (as opposed to the string hashes) are all + * inline, the code being tested is actually in the module, and you can + * recompile and re-test the module without rebooting. + */ +static bool __init +test_int_hash(unsigned long long h64, u32 hash_or[2][33]) +{ + int k; + u32 h0 = (u32)h64, h1, h2; + + /* Test __hash32 */ + hash_or[0][0] |= h1 = __hash_32(h0); +#ifdef HAVE_ARCH__HASH_32 + hash_or[1][0] |= h2 = __hash_32_generic(h0); +#if HAVE_ARCH__HASH_32 == 1 + if (h1 != h2) { + pr_err("__hash_32(%#x) = %#x != __hash_32_generic() = %#x", + h0, h1, h2); + return false; + } +#endif +#endif + + /* Test k = 1..32 bits */ + for (k = 1; k <= 32; k++) { + u32 const m = ((u32)2 << (k-1)) - 1; /* Low k bits set */ + + /* Test hash_32 */ + hash_or[0][k] |= h1 = hash_32(h0, k); + if (h1 > m) { + pr_err("hash_32(%#x, %d) = %#x > %#x", h0, k, h1, m); + return false; + } +#ifdef HAVE_ARCH_HASH_32 + h2 = hash_32_generic(h0, k); +#if HAVE_ARCH_HASH_32 == 1 + if (h1 != h2) { + pr_err("hash_32(%#x, %d) = %#x != hash_32_generic() " + " = %#x", h0, k, h1, h2); + return false; + } +#else + if (h2 > m) { + pr_err("hash_32_generic(%#x, %d) = %#x > %#x", + h0, k, h1, m); + return false; + } +#endif +#endif + /* Test hash_64 */ + hash_or[1][k] |= h1 = hash_64(h64, k); + if (h1 > m) { + pr_err("hash_64(%#llx, %d) = %#x > %#x", h64, k, h1, m); + return false; + } +#ifdef HAVE_ARCH_HASH_64 + h2 = hash_64_generic(h64, k); +#if HAVE_ARCH_HASH_64 == 1 + if (h1 != h2) { + pr_err("hash_64(%#llx, %d) = %#x != hash_64_generic() " + "= %#x", h64, k, h1, h2); + return false; + } +#else + if (h2 > m) { + pr_err("hash_64_generic(%#llx, %d) = %#x > %#x", + h64, k, h1, m); + return false; + } +#endif +#endif + } + + (void)h2; /* Suppress unused variable warning */ + return true; +} + +#define SIZE 256 /* Run time is cubic in SIZE */ + +static int __init +test_hash_init(void) +{ + char buf[SIZE+1]; + u32 string_or = 0, hash_or[2][33] = { 0 }; + unsigned tests = 0; + unsigned long long h64 = 0; + int i, j; + + fill_buf(buf, SIZE, 1); + + /* Test every possible non-empty substring in the buffer. */ + for (j = SIZE; j > 0; --j) { + buf[j] = '\0'; + + for (i = 0; i <= j; i++) { + u64 hashlen = hashlen_string(buf+i); + u32 h0 = full_name_hash(buf+i, j-i); + + /* Check that hashlen_string gets the length right */ + if (hashlen_len(hashlen) != j-i) { + pr_err("hashlen_string(%d..%d) returned length" + " %u, expected %d", + i, j, hashlen_len(hashlen), j-i); + return -EINVAL; + } + /* Check that the hashes match */ + if (hashlen_hash(hashlen) != h0) { + pr_err("hashlen_string(%d..%d) = %08x != " + "full_name_hash() = %08x", + i, j, hashlen_hash(hashlen), h0); + return -EINVAL; + } + + string_or |= h0; + h64 = h64 << 32 | h0; /* For use with hash_64 */ + if (!test_int_hash(h64, hash_or)) + return -EINVAL; + tests++; + } /* i */ + } /* j */ + + /* The OR of all the hash values should cover all the bits */ + if (~string_or) { + pr_err("OR of all string hash results = %#x != %#x", + string_or, -1u); + return -EINVAL; + } + if (~hash_or[0][0]) { + pr_err("OR of all __hash_32 results = %#x != %#x", + hash_or[0][0], -1u); + return -EINVAL; + } +#ifdef HAVE_ARCH__HASH_32 +#if HAVE_ARCH__HASH_32 != 1 /* Test is pointless if results match */ + if (~hash_or[1][0]) { + pr_err("OR of all __hash_32_generic results = %#x != %#x", + hash_or[1][0], -1u); + return -EINVAL; + } +#endif +#endif + + /* Likewise for all the i-bit hash values */ + for (i = 1; i <= 32; i++) { + u32 const m = ((u32)2 << (i-1)) - 1; /* Low i bits set */ + + if (hash_or[0][i] != m) { + pr_err("OR of all hash_32(%d) results = %#x " + "(%#x expected)", i, hash_or[0][i], m); + return -EINVAL; + } + if (hash_or[1][i] != m) { + pr_err("OR of all hash_64(%d) results = %#x " + "(%#x expected)", i, hash_or[1][i], m); + return -EINVAL; + } + } + + /* Issue notices about skipped tests. */ +#ifndef HAVE_ARCH__HASH_32 + pr_info("__hash_32() has no arch implementation to test."); +#elif HAVE_ARCH__HASH_32 != 1 + pr_info("__hash_32() is arch-specific; not compared to generic."); +#endif +#ifndef HAVE_ARCH_HASH_32 + pr_info("hash_32() has no arch implementation to test."); +#elif HAVE_ARCH_HASH_32 != 1 + pr_info("hash_32() is arch-specific; not compared to generic."); +#endif +#ifndef HAVE_ARCH_HASH_64 + pr_info("hash_64() has no arch implementation to test."); +#elif HAVE_ARCH_HASH_64 != 1 + pr_info("hash_64() is arch-specific; not compared to generic."); +#endif + + pr_notice("%u tests passed.", tests); + + return 0; +} + +static void __exit test_hash_exit(void) +{ +} + +module_init(test_hash_init); /* Does everything */ +module_exit(test_hash_exit); /* Does nothing */ + +MODULE_LICENSE("GPL");