Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit e51a048

Browse files
committed
Add general purpose hasing functions to pgbench.
Hashing function is useful for simulating real-world workload in test like WEB workload, as an example - YCSB benchmarks. Author: Ildar Musin with minor editorization by me Reviewed by: Fabien Coelho, me Discussion: https://www.postgresql.org/message-id/flat/0e8bd39e-dfcd-2879-f88f-272799ad7ef2@postgrespro.ru
1 parent 8bb3c7d commit e51a048

File tree

5 files changed

+239
-34
lines changed

5 files changed

+239
-34
lines changed

doc/src/sgml/ref/pgbench.sgml

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -874,13 +874,18 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
874874

875875
<tbody>
876876
<row>
877-
<entry> <literal>scale</literal> </entry>
878-
<entry>current scale factor</entry>
877+
<entry> <literal>client_id</literal> </entry>
878+
<entry>unique number identifying the client session (starts from zero)</entry>
879879
</row>
880880

881881
<row>
882-
<entry> <literal>client_id</literal> </entry>
883-
<entry>unique number identifying the client session (starts from zero)</entry>
882+
<entry> <literal>default_seed</literal> </entry>
883+
<entry>seed used in hash functions by default</entry>
884+
</row>
885+
886+
<row>
887+
<entry> <literal>scale</literal> </entry>
888+
<entry>current scale factor</entry>
884889
</row>
885890
</tbody>
886891
</tgroup>
@@ -1245,6 +1250,27 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
12451250
<entry><literal>greatest(5, 4, 3, 2)</literal></entry>
12461251
<entry><literal>5</literal></entry>
12471252
</row>
1253+
<row>
1254+
<entry><literal><function>hash(<replaceable>a</replaceable> [, <replaceable>seed</replaceable> ] )</function></literal></entry>
1255+
<entry>integer</entry>
1256+
<entry>alias for <literal>hash_murmur2()</literal></entry>
1257+
<entry><literal>hash(10, 5432)</literal></entry>
1258+
<entry><literal>-5817877081768721676</literal></entry>
1259+
</row>
1260+
<row>
1261+
<entry><literal><function>hash_fnv1a(<replaceable>a</replaceable> [, <replaceable>seed</replaceable> ] )</function></literal></entry>
1262+
<entry>integer</entry>
1263+
<entry><ulink url="https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function">FNV-1a hash</ulink></entry>
1264+
<entry><literal>hash_fnv1a(10, 5432)</literal></entry>
1265+
<entry><literal>-7793829335365542153</literal></entry>
1266+
</row>
1267+
<row>
1268+
<entry><literal><function>hash_murmur2(<replaceable>a</replaceable> [, <replaceable>seed</replaceable> ] )</function></literal></entry>
1269+
<entry>integer</entry>
1270+
<entry><ulink url="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash2 hash</ulink></entry>
1271+
<entry><literal>hash_murmur2(10, 5432)</literal></entry>
1272+
<entry><literal>-5817877081768721676</literal></entry>
1273+
</row>
12481274
<row>
12491275
<entry><literal><function>int(<replaceable>x</replaceable>)</function></literal></entry>
12501276
<entry>integer</entry>
@@ -1423,6 +1449,31 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
14231449
</listitem>
14241450
</itemizedlist>
14251451

1452+
<para>
1453+
Hash functions <literal>hash</literal>, <literal>hash_murmur2</literal> and
1454+
<literal>hash_fnv1a</literal> accept an input value and an optional seed parameter.
1455+
In case the seed isn't provided the value of <literal>:default_seed</literal>
1456+
is used, which is initialized randomly unless set by the command-line
1457+
<literal>-D</literal> option. Hash functions can be used to scatter the
1458+
distribution of random functions such as <literal>random_zipfian</literal> or
1459+
<literal>random_exponential</literal>. For instance, the following pgbench
1460+
script simulates possible real world workload typical for social media and
1461+
blogging platforms where few accounts generate excessive load:
1462+
1463+
<programlisting>
1464+
\set r random_zipfian(0, 100000000, 1.07)
1465+
\set k abs(hash(:r)) % 1000000
1466+
</programlisting>
1467+
1468+
In some cases several distinct distributions are needed which don't correlate
1469+
with each other and this is when implicit seed parameter comes in handy:
1470+
1471+
<programlisting>
1472+
\set k1 abs(hash(:r), :default_seed + 123) % 1000000
1473+
\set k2 abs(hash(:r), :default_seed + 321) % 1000000
1474+
</programlisting>
1475+
</para>
1476+
14261477
<para>
14271478
As an example, the full definition of the built-in TPC-B-like
14281479
transaction is:

src/bin/pgbench/exprparse.y

Lines changed: 73 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616

1717
#include "pgbench.h"
1818

19+
#define PGBENCH_NARGS_VARIABLE (-1)
20+
#define PGBENCH_NARGS_CASE (-2)
21+
#define PGBENCH_NARGS_HASH (-3)
22+
1923
PgBenchExpr *expr_parse_result;
2024

2125
static PgBenchExprList *make_elist(PgBenchExpr *exp, PgBenchExprList *list);
@@ -226,9 +230,13 @@ make_uop(yyscan_t yyscanner, const char *operator, PgBenchExpr *expr)
226230
/*
227231
* List of available functions:
228232
* - fname: function name, "!..." for special internal functions
229-
* - nargs: number of arguments
230-
* -1 is a special value for least & greatest meaning #args >= 1
231-
* -2 is for the "CASE WHEN ..." function, which has #args >= 3 and odd
233+
* - nargs: number of arguments. Special cases:
234+
* - PGBENCH_NARGS_VARIABLE is a special value for least & greatest
235+
* meaning #args >= 1;
236+
* - PGBENCH_NARGS_CASE is for the "CASE WHEN ..." function, which
237+
* has #args >= 3 and odd;
238+
* - PGBENCH_NARGS_HASH is for hash functions, which have one required
239+
* and one optional argument;
232240
* - tag: function identifier from PgBenchFunction enum
233241
*/
234242
static const struct
@@ -259,10 +267,10 @@ static const struct
259267
"abs", 1, PGBENCH_ABS
260268
},
261269
{
262-
"least", -1, PGBENCH_LEAST
270+
"least", PGBENCH_NARGS_VARIABLE, PGBENCH_LEAST
263271
},
264272
{
265-
"greatest", -1, PGBENCH_GREATEST
273+
"greatest", PGBENCH_NARGS_VARIABLE, PGBENCH_GREATEST
266274
},
267275
{
268276
"debug", 1, PGBENCH_DEBUG
@@ -347,7 +355,25 @@ static const struct
347355
},
348356
/* "case when ... then ... else ... end" construction */
349357
{
350-
"!case_end", -2, PGBENCH_CASE
358+
"!case_end", PGBENCH_NARGS_CASE, PGBENCH_CASE
359+
},
360+
{
361+
"hash", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
362+
},
363+
{
364+
"hash_murmur2", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
365+
},
366+
{
367+
"hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A
368+
},
369+
{
370+
"hash", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
371+
},
372+
{
373+
"hash_murmur2", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
374+
},
375+
{
376+
"hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A
351377
},
352378
/* keep as last array element */
353379
{
@@ -423,29 +449,51 @@ elist_length(PgBenchExprList *list)
423449
static PgBenchExpr *
424450
make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList *args)
425451
{
452+
int len = elist_length(args);
453+
426454
PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr));
427455

428456
Assert(fnumber >= 0);
429457

430-
if (PGBENCH_FUNCTIONS[fnumber].nargs >= 0 &&
431-
PGBENCH_FUNCTIONS[fnumber].nargs != elist_length(args))
432-
expr_yyerror_more(yyscanner, "unexpected number of arguments",
433-
PGBENCH_FUNCTIONS[fnumber].fname);
434-
435-
/* check at least one arg for least & greatest */
436-
if (PGBENCH_FUNCTIONS[fnumber].nargs == -1 &&
437-
elist_length(args) == 0)
438-
expr_yyerror_more(yyscanner, "at least one argument expected",
439-
PGBENCH_FUNCTIONS[fnumber].fname);
440-
/* special case: case (when ... then ...)+ (else ...)? end */
441-
if (PGBENCH_FUNCTIONS[fnumber].nargs == -2)
442-
{
443-
int len = elist_length(args);
444-
445-
/* 'else' branch is always present, but could be a NULL-constant */
446-
if (len < 3 || len % 2 != 1)
447-
expr_yyerror_more(yyscanner, "odd and >= 3 number of arguments expected",
448-
"case control structure");
458+
/* validate arguments number including few special cases */
459+
switch (PGBENCH_FUNCTIONS[fnumber].nargs)
460+
{
461+
/* check at least one arg for least & greatest */
462+
case PGBENCH_NARGS_VARIABLE:
463+
if (len == 0)
464+
expr_yyerror_more(yyscanner, "at least one argument expected",
465+
PGBENCH_FUNCTIONS[fnumber].fname);
466+
break;
467+
468+
/* case (when ... then ...)+ (else ...)? end */
469+
case PGBENCH_NARGS_CASE:
470+
/* 'else' branch is always present, but could be a NULL-constant */
471+
if (len < 3 || len % 2 != 1)
472+
expr_yyerror_more(yyscanner,
473+
"odd and >= 3 number of arguments expected",
474+
"case control structure");
475+
break;
476+
477+
/* hash functions with optional seed argument */
478+
case PGBENCH_NARGS_HASH:
479+
if (len > 2)
480+
expr_yyerror_more(yyscanner, "unexpected number of arguments",
481+
PGBENCH_FUNCTIONS[fnumber].fname);
482+
483+
if (len == 1)
484+
{
485+
PgBenchExpr *var = make_variable("default_seed");
486+
args = make_elist(var, args);
487+
}
488+
break;
489+
490+
/* common case: positive arguments number */
491+
default:
492+
Assert(PGBENCH_FUNCTIONS[fnumber].nargs >= 0);
493+
494+
if (PGBENCH_FUNCTIONS[fnumber].nargs != len)
495+
expr_yyerror_more(yyscanner, "unexpected number of arguments",
496+
PGBENCH_FUNCTIONS[fnumber].fname);
449497
}
450498

451499
expr->etype = ENODE_FUNCTION;

src/bin/pgbench/pgbench.c

Lines changed: 97 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,14 @@
6060

6161
#define ERRCODE_UNDEFINED_TABLE "42P01"
6262

63+
/*
64+
* Hashing constants
65+
*/
66+
#define FNV_PRIME 0x100000001b3
67+
#define FNV_OFFSET_BASIS 0xcbf29ce484222325
68+
#define MM2_MUL 0xc6a4a7935bd1e995
69+
#define MM2_ROT 47
70+
6371
/*
6472
* Multi-platform pthread implementations
6573
*/
@@ -915,6 +923,54 @@ getZipfianRand(TState *thread, int64 min, int64 max, double s)
915923
: computeHarmonicZipfian(thread, n, s));
916924
}
917925

926+
/*
927+
* FNV-1a hash function
928+
*/
929+
static int64
930+
getHashFnv1a(int64 val, uint64 seed)
931+
{
932+
int64 result;
933+
int i;
934+
935+
result = FNV_OFFSET_BASIS ^ seed;
936+
for (i = 0; i < 8; ++i)
937+
{
938+
int32 octet = val & 0xff;
939+
940+
val = val >> 8;
941+
result = result ^ octet;
942+
result = result * FNV_PRIME;
943+
}
944+
945+
return result;
946+
}
947+
948+
/*
949+
* Murmur2 hash function
950+
*
951+
* Based on original work of Austin Appleby
952+
* https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp
953+
*/
954+
static int64
955+
getHashMurmur2(int64 val, uint64 seed)
956+
{
957+
uint64 result = seed ^ (sizeof(int64) * MM2_MUL);
958+
uint64 k = (uint64) val;
959+
960+
k *= MM2_MUL;
961+
k ^= k >> MM2_ROT;
962+
k *= MM2_MUL;
963+
964+
result ^= k;
965+
result *= MM2_MUL;
966+
967+
result ^= result >> MM2_ROT;
968+
result *= MM2_MUL;
969+
result ^= result >> MM2_ROT;
970+
971+
return (int64) result;
972+
}
973+
918974
/*
919975
* Initialize the given SimpleStats struct to all zeroes
920976
*/
@@ -2211,6 +2267,30 @@ evalStandardFunc(TState *thread, CState *st,
22112267
return true;
22122268
}
22132269

2270+
/* hashing */
2271+
case PGBENCH_HASH_FNV1A:
2272+
case PGBENCH_HASH_MURMUR2:
2273+
{
2274+
int64 val,
2275+
seed;
2276+
2277+
Assert(nargs == 2);
2278+
2279+
if (!coerceToInt(&vargs[0], &val) ||
2280+
!coerceToInt(&vargs[1], &seed))
2281+
return false;
2282+
2283+
if (func == PGBENCH_HASH_MURMUR2)
2284+
setIntValue(retval, getHashMurmur2(val, seed));
2285+
else if (func == PGBENCH_HASH_FNV1A)
2286+
setIntValue(retval, getHashFnv1a(val, seed));
2287+
else
2288+
/* cannot get here */
2289+
Assert(0);
2290+
2291+
return true;
2292+
}
2293+
22142294
default:
22152295
/* cannot get here */
22162296
Assert(0);
@@ -4963,6 +5043,10 @@ main(int argc, char **argv)
49635043
exit(1);
49645044
}
49655045

5046+
/* set random seed */
5047+
INSTR_TIME_SET_CURRENT(start_time);
5048+
srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time));
5049+
49665050
if (internal_script_used)
49675051
{
49685052
/*
@@ -5024,6 +5108,19 @@ main(int argc, char **argv)
50245108
}
50255109
}
50265110

5111+
/* set default seed for hash functions */
5112+
if (lookupVariable(&state[0], "default_seed") == NULL)
5113+
{
5114+
uint64 seed = ((uint64) (random() & 0xFFFF) << 48) |
5115+
((uint64) (random() & 0xFFFF) << 32) |
5116+
((uint64) (random() & 0xFFFF) << 16) |
5117+
(uint64) (random() & 0xFFFF);
5118+
5119+
for (i = 0; i < nclients; i++)
5120+
if (!putVariableInt(&state[i], "startup", "default_seed", (int64) seed))
5121+
exit(1);
5122+
}
5123+
50275124
if (!is_no_vacuum)
50285125
{
50295126
fprintf(stderr, "starting vacuum...");
@@ -5041,10 +5138,6 @@ main(int argc, char **argv)
50415138
}
50425139
PQfinish(con);
50435140

5044-
/* set random seed */
5045-
INSTR_TIME_SET_CURRENT(start_time);
5046-
srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time));
5047-
50485141
/* set up thread data structures */
50495142
threads = (TState *) pg_malloc(sizeof(TState) * nthreads);
50505143
nclients_dealt = 0;

src/bin/pgbench/pgbench.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,9 @@ typedef enum PgBenchFunction
9797
PGBENCH_LE,
9898
PGBENCH_LT,
9999
PGBENCH_IS,
100-
PGBENCH_CASE
100+
PGBENCH_CASE,
101+
PGBENCH_HASH_FNV1A,
102+
PGBENCH_HASH_MURMUR2
101103
} PgBenchFunction;
102104

103105
typedef struct PgBenchExpr PgBenchExpr;

0 commit comments

Comments
 (0)