|
| 1 | +/* |
| 2 | + * hashfn_unstable.h |
| 3 | + * |
| 4 | + * Building blocks for creating fast inlineable hash functions. The |
| 5 | + * functions in this file are not guaranteed to be stable between versions, |
| 6 | + * and may differ by hardware platform. Hence they must not be used in |
| 7 | + * indexes or other on-disk structures. See hashfn.h if you need stability. |
| 8 | + * |
| 9 | + * |
| 10 | + * Portions Copyright (c) 2024, PostgreSQL Global Development Group |
| 11 | + * |
| 12 | + * src/include/common/hashfn_unstable.h |
| 13 | + */ |
| 14 | +#ifndef HASHFN_UNSTABLE_H |
| 15 | +#define HASHFN_UNSTABLE_H |
| 16 | + |
| 17 | +#include "port/pg_bitutils.h" |
| 18 | +#include "port/pg_bswap.h" |
| 19 | + |
| 20 | +/* |
| 21 | + * fasthash is a modification of code taken from |
| 22 | + * https://code.google.com/archive/p/fast-hash/source/default/source |
| 23 | + * under the terms of the MIT license. The original copyright |
| 24 | + * notice follows: |
| 25 | + */ |
| 26 | + |
| 27 | +/* The MIT License |
| 28 | +
|
| 29 | + Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com) |
| 30 | +
|
| 31 | + Permission is hereby granted, free of charge, to any person |
| 32 | + obtaining a copy of this software and associated documentation |
| 33 | + files (the "Software"), to deal in the Software without |
| 34 | + restriction, including without limitation the rights to use, copy, |
| 35 | + modify, merge, publish, distribute, sublicense, and/or sell copies |
| 36 | + of the Software, and to permit persons to whom the Software is |
| 37 | + furnished to do so, subject to the following conditions: |
| 38 | +
|
| 39 | + The above copyright notice and this permission notice shall be |
| 40 | + included in all copies or substantial portions of the Software. |
| 41 | +
|
| 42 | + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 43 | + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 44 | + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 45 | + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
| 46 | + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
| 47 | + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| 48 | + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 49 | + SOFTWARE. |
| 50 | +*/ |
| 51 | + |
| 52 | +/* |
| 53 | + * fasthash as implemented here has two interfaces: |
| 54 | + * |
| 55 | + * 1) Standalone functions, e.g. fasthash32() for a single value with a |
| 56 | + * known length. |
| 57 | + * |
| 58 | + * 2) Incremental interface. This can used for incorporating multiple |
| 59 | + * inputs. The standalone functions use this internally, so see fasthash64() |
| 60 | + * for an an example of how this works. |
| 61 | + */ |
| 62 | + |
| 63 | + |
| 64 | +typedef struct fasthash_state |
| 65 | +{ |
| 66 | + /* staging area for chunks of input */ |
| 67 | + uint64 accum; |
| 68 | + |
| 69 | + uint64 hash; |
| 70 | +} fasthash_state; |
| 71 | + |
| 72 | +#define FH_SIZEOF_ACCUM sizeof(uint64) |
| 73 | + |
| 74 | +#define FH_UNKNOWN_LENGTH 1 |
| 75 | + |
| 76 | +/* |
| 77 | + * Initialize the hash state. |
| 78 | + * |
| 79 | + * 'len' is the length of the input, if known ahead of time. |
| 80 | + * If that is not known, pass FH_UNKNOWN_LENGTH. |
| 81 | + * 'seed' can be zero. |
| 82 | + */ |
| 83 | +static inline void |
| 84 | +fasthash_init(fasthash_state *hs, int len, uint64 seed) |
| 85 | +{ |
| 86 | + memset(hs, 0, sizeof(fasthash_state)); |
| 87 | + hs->hash = seed ^ (len * 0x880355f21e6d1965); |
| 88 | +} |
| 89 | + |
| 90 | +/* both the finalizer and part of the combining step */ |
| 91 | +static inline uint64 |
| 92 | +fasthash_mix(uint64 h, uint64 tweak) |
| 93 | +{ |
| 94 | + h ^= (h >> 23) + tweak; |
| 95 | + h *= 0x2127599bf4325c37; |
| 96 | + h ^= h >> 47; |
| 97 | + return h; |
| 98 | +} |
| 99 | + |
| 100 | +/* combine one chunk of input into the hash */ |
| 101 | +static inline void |
| 102 | +fasthash_combine(fasthash_state *hs) |
| 103 | +{ |
| 104 | + hs->hash ^= fasthash_mix(hs->accum, 0); |
| 105 | + hs->hash *= 0x880355f21e6d1965; |
| 106 | + |
| 107 | + /* reset hash state for next input */ |
| 108 | + hs->accum = 0; |
| 109 | +} |
| 110 | + |
| 111 | +/* accumulate up to 8 bytes of input and combine it into the hash */ |
| 112 | +static inline void |
| 113 | +fasthash_accum(fasthash_state *hs, const char *k, int len) |
| 114 | +{ |
| 115 | + uint32 lower_four; |
| 116 | + |
| 117 | + Assert(hs->accum == 0); |
| 118 | + Assert(len <= FH_SIZEOF_ACCUM); |
| 119 | + |
| 120 | + switch (len) |
| 121 | + { |
| 122 | + case 8: |
| 123 | + memcpy(&hs->accum, k, 8); |
| 124 | + break; |
| 125 | + case 7: |
| 126 | + hs->accum |= (uint64) k[6] << 48; |
| 127 | + /* FALLTHROUGH */ |
| 128 | + case 6: |
| 129 | + hs->accum |= (uint64) k[5] << 40; |
| 130 | + /* FALLTHROUGH */ |
| 131 | + case 5: |
| 132 | + hs->accum |= (uint64) k[4] << 32; |
| 133 | + /* FALLTHROUGH */ |
| 134 | + case 4: |
| 135 | + memcpy(&lower_four, k, sizeof(lower_four)); |
| 136 | + hs->accum |= lower_four; |
| 137 | + break; |
| 138 | + case 3: |
| 139 | + hs->accum |= (uint64) k[2] << 16; |
| 140 | + /* FALLTHROUGH */ |
| 141 | + case 2: |
| 142 | + hs->accum |= (uint64) k[1] << 8; |
| 143 | + /* FALLTHROUGH */ |
| 144 | + case 1: |
| 145 | + hs->accum |= (uint64) k[0]; |
| 146 | + break; |
| 147 | + case 0: |
| 148 | + return; |
| 149 | + } |
| 150 | + |
| 151 | + fasthash_combine(hs); |
| 152 | +} |
| 153 | + |
| 154 | +/* |
| 155 | + * The finalizer |
| 156 | + * |
| 157 | + * 'tweak' is intended to be the input length when the caller doesn't know |
| 158 | + * the length ahead of time, such as for NUL-terminated strings, otherwise |
| 159 | + * zero. |
| 160 | + */ |
| 161 | +static inline uint64 |
| 162 | +fasthash_final64(fasthash_state *hs, uint64 tweak) |
| 163 | +{ |
| 164 | + return fasthash_mix(hs->hash, tweak); |
| 165 | +} |
| 166 | + |
| 167 | +/* |
| 168 | + * Reduce a 64-bit hash to a 32-bit hash. |
| 169 | + * |
| 170 | + * This optional step provides a bit more additional mixing compared to |
| 171 | + * just taking the lower 32-bits. |
| 172 | + */ |
| 173 | +static inline uint32 |
| 174 | +fasthash_reduce32(uint64 h) |
| 175 | +{ |
| 176 | + /* |
| 177 | + * Convert the 64-bit hashcode to Fermat residue, which shall retain |
| 178 | + * information from both the higher and lower parts of hashcode. |
| 179 | + */ |
| 180 | + return h - (h >> 32); |
| 181 | +} |
| 182 | + |
| 183 | +/* finalize and reduce */ |
| 184 | +static inline uint32 |
| 185 | +fasthash_final32(fasthash_state *hs, uint64 tweak) |
| 186 | +{ |
| 187 | + return fasthash_reduce32(fasthash_final64(hs, tweak)); |
| 188 | +} |
| 189 | + |
| 190 | +/* |
| 191 | + * The original fasthash64 function, re-implemented using the incremental |
| 192 | + * interface. Returns a 64-bit hashcode. 'len' controls not only how |
| 193 | + * many bytes to hash, but also modifies the internal seed. |
| 194 | + * 'seed' can be zero. |
| 195 | + */ |
| 196 | +static inline uint64 |
| 197 | +fasthash64(const char *k, int len, uint64 seed) |
| 198 | +{ |
| 199 | + fasthash_state hs; |
| 200 | + |
| 201 | + fasthash_init(&hs, len, seed); |
| 202 | + |
| 203 | + while (len >= FH_SIZEOF_ACCUM) |
| 204 | + { |
| 205 | + fasthash_accum(&hs, k, FH_SIZEOF_ACCUM); |
| 206 | + k += FH_SIZEOF_ACCUM; |
| 207 | + len -= FH_SIZEOF_ACCUM; |
| 208 | + } |
| 209 | + |
| 210 | + fasthash_accum(&hs, k, len); |
| 211 | + return fasthash_final64(&hs, 0); |
| 212 | +} |
| 213 | + |
| 214 | +/* like fasthash64, but returns a 32-bit hashcode */ |
| 215 | +static inline uint64 |
| 216 | +fasthash32(const char *k, int len, uint64 seed) |
| 217 | +{ |
| 218 | + return fasthash_reduce32(fasthash64(k, len, seed)); |
| 219 | +} |
| 220 | + |
| 221 | +#endif /* HASHFN_UNSTABLE_H */ |
0 commit comments