Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 27d6bc6

Browse files
committed
Fix portability problem in pgbench.
The pgbench regression test supposed that srandom() with a specific value would result in deterministic output from random(), as required by POSIX. It emerges however that OpenBSD is too smart to be constrained by mere standards, so their random() emits nondeterministic output anyway. While a workaround does exist, what seems like a better fix is to stop relying on the platform's srandom()/random() altogether, so that what you get from --random-seed=N is not merely deterministic but platform independent. Hence, use a separate pg_jrand48() random sequence in place of random(). Also adjust the regression test case that's supposed to detect nondeterminism so that it's more likely to detect it; the original choice of random_zipfian parameter tended to produce the same output all the time even if the underlying behavior wasn't deterministic. In passing, improve pgbench's docs about random_zipfian(). Back-patch to v11 where this code was introduced. Fabien Coelho and Tom Lane Discussion: https://postgr.es/m/4615.1547792324@sss.pgh.pa.us
1 parent 71eba83 commit 27d6bc6

File tree

3 files changed

+56
-29
lines changed

3 files changed

+56
-29
lines changed

doc/src/sgml/ref/pgbench.sgml

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1493,15 +1493,24 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
14931493
in (1, 1000), a rejection method is used, based on
14941494
"Non-Uniform Random Variate Generation", Luc Devroye, p. 550-551,
14951495
Springer 1986. The distribution is not defined when the parameter's
1496-
value is 1.0. The drawing performance is poor for parameter values
1496+
value is 1.0. The function's performance is poor for parameter values
14971497
close and above 1.0 and on a small range.
14981498
</para>
14991499
<para>
1500-
<replaceable>parameter</replaceable>
1501-
defines how skewed the distribution is. The larger the <replaceable>parameter</replaceable>, the more
1502-
frequently values to the beginning of the interval are drawn.
1500+
<replaceable>parameter</replaceable> defines how skewed the distribution
1501+
is. The larger the <replaceable>parameter</replaceable>, the more
1502+
frequently values closer to the beginning of the interval are drawn.
15031503
The closer to 0 <replaceable>parameter</replaceable> is,
1504-
the flatter (more uniform) the access distribution.
1504+
the flatter (more uniform) the output distribution.
1505+
The distribution is such that, assuming the range starts from 1,
1506+
the ratio of the probability of drawing <replaceable>k</replaceable>
1507+
versus drawing <replaceable>k+1</replaceable> is
1508+
<literal>((<replaceable>k</replaceable>+1)/<replaceable>k</replaceable>)**<replaceable>parameter</replaceable></literal>.
1509+
For example, <literal>random_zipfian(1, ..., 2.5)</literal> produces
1510+
the value <literal>1</literal> about <literal>(2/1)**2.5 =
1511+
5.66</literal> times more frequently than <literal>2</literal>, which
1512+
itself is produced <literal>(3/2)*2.5 = 2.76</literal> times more
1513+
frequently than <literal>3</literal>, and so on.
15051514
</para>
15061515
</listitem>
15071516
</itemizedlist>

src/bin/pgbench/pgbench.c

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ int64 latency_limit = 0;
156156
char *tablespace = NULL;
157157
char *index_tablespace = NULL;
158158

159-
/* random seed used when calling srandom() */
159+
/* random seed used to initialize base_random_sequence */
160160
int64 random_seed = -1;
161161

162162
/*
@@ -250,6 +250,9 @@ typedef struct StatsData
250250
SimpleStats lag;
251251
} StatsData;
252252

253+
/* Various random sequences are initialized from this one. */
254+
static unsigned short base_random_sequence[3];
255+
253256
/*
254257
* Connection state machine states.
255258
*/
@@ -692,7 +695,14 @@ strtoint64(const char *str)
692695
return ((sign < 0) ? -result : result);
693696
}
694697

695-
/* random number generator: uniform distribution from min to max inclusive */
698+
/*
699+
* Random number generator: uniform distribution from min to max inclusive.
700+
*
701+
* Although the limits are expressed as int64, you can't generate the full
702+
* int64 range in one call, because the difference of the limits mustn't
703+
* overflow int64. In practice it's unwise to ask for more than an int32
704+
* range, because of the limited precision of pg_erand48().
705+
*/
696706
static int64
697707
getrand(TState *thread, int64 min, int64 max)
698708
{
@@ -4700,20 +4710,22 @@ printResults(TState *threads, StatsData *total, instr_time total_time,
47004710
}
47014711
}
47024712

4703-
/* call srandom based on some seed. NULL triggers the default behavior. */
4713+
/*
4714+
* Set up a random seed according to seed parameter (NULL means default),
4715+
* and initialize base_random_sequence for use in initializing other sequences.
4716+
*/
47044717
static bool
47054718
set_random_seed(const char *seed)
47064719
{
4707-
/* srandom expects an unsigned int */
4708-
unsigned int iseed;
4720+
uint64 iseed;
47094721

47104722
if (seed == NULL || strcmp(seed, "time") == 0)
47114723
{
47124724
/* rely on current time */
47134725
instr_time now;
47144726

47154727
INSTR_TIME_SET_CURRENT(now);
4716-
iseed = (unsigned int) INSTR_TIME_GET_MICROSEC(now);
4728+
iseed = (uint64) INSTR_TIME_GET_MICROSEC(now);
47174729
}
47184730
else if (strcmp(seed, "rand") == 0)
47194731
{
@@ -4733,7 +4745,7 @@ set_random_seed(const char *seed)
47334745
/* parse seed unsigned int value */
47344746
char garbage;
47354747

4736-
if (sscanf(seed, "%u%c", &iseed, &garbage) != 1)
4748+
if (sscanf(seed, UINT64_FORMAT "%c", &iseed, &garbage) != 1)
47374749
{
47384750
fprintf(stderr,
47394751
"unrecognized random seed option \"%s\": expecting an unsigned integer, \"time\" or \"rand\"\n",
@@ -4743,10 +4755,14 @@ set_random_seed(const char *seed)
47434755
}
47444756

47454757
if (seed != NULL)
4746-
fprintf(stderr, "setting random seed to %u\n", iseed);
4747-
srandom(iseed);
4748-
/* no precision loss: 32 bit unsigned int cast to 64 bit int */
4758+
fprintf(stderr, "setting random seed to " UINT64_FORMAT "\n", iseed);
47494759
random_seed = iseed;
4760+
4761+
/* Fill base_random_sequence with low-order bits of seed */
4762+
base_random_sequence[0] = iseed & 0xFFFF;
4763+
base_random_sequence[1] = (iseed >> 16) & 0xFFFF;
4764+
base_random_sequence[2] = (iseed >> 32) & 0xFFFF;
4765+
47504766
return true;
47514767
}
47524768

@@ -5444,10 +5460,9 @@ main(int argc, char **argv)
54445460
/* set default seed for hash functions */
54455461
if (lookupVariable(&state[0], "default_seed") == NULL)
54465462
{
5447-
uint64 seed = ((uint64) (random() & 0xFFFF) << 48) |
5448-
((uint64) (random() & 0xFFFF) << 32) |
5449-
((uint64) (random() & 0xFFFF) << 16) |
5450-
(uint64) (random() & 0xFFFF);
5463+
uint64 seed =
5464+
((uint64) pg_jrand48(base_random_sequence) & 0xFFFFFFFF) |
5465+
(((uint64) pg_jrand48(base_random_sequence) & 0xFFFFFFFF) << 32);
54515466

54525467
for (i = 0; i < nclients; i++)
54535468
if (!putVariableInt(&state[i], "startup", "default_seed", (int64) seed))
@@ -5491,9 +5506,12 @@ main(int argc, char **argv)
54915506
thread->state = &state[nclients_dealt];
54925507
thread->nstate =
54935508
(nclients - nclients_dealt + nthreads - i - 1) / (nthreads - i);
5494-
thread->random_state[0] = random();
5495-
thread->random_state[1] = random();
5496-
thread->random_state[2] = random();
5509+
thread->random_state[0] = (unsigned short)
5510+
(pg_jrand48(base_random_sequence) & 0xFFFF);
5511+
thread->random_state[1] = (unsigned short)
5512+
(pg_jrand48(base_random_sequence) & 0xFFFF);
5513+
thread->random_state[2] = (unsigned short)
5514+
(pg_jrand48(base_random_sequence) & 0xFFFF);
54975515
thread->logfile = NULL; /* filled in later */
54985516
thread->latency_late = 0;
54995517
thread->zipf_cache.nb_cells = 0;

src/bin/pgbench/t/001_pgbench_with_server.pl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -259,11 +259,11 @@ sub pgbench
259259
[
260260
qr{setting random seed to 5432\b},
261261

262-
# After explicit seeding, the four * random checks (1-3,20) should be
263-
# deterministic, but not necessarily portable.
264-
qr{command=1.: int 1\d\b}, # uniform random: 12 on linux
265-
qr{command=2.: int 1\d\d\b}, # exponential random: 106 on linux
266-
qr{command=3.: int 1\d\d\d\b}, # gaussian random: 1462 on linux
262+
# After explicit seeding, the four random checks (1-3,20) are
263+
# deterministic
264+
qr{command=1.: int 18\b}, # uniform random
265+
qr{command=2.: int 101\b}, # exponential random
266+
qr{command=3.: int 1415\b}, # gaussian random
267267
qr{command=4.: int 4\b},
268268
qr{command=5.: int 5\b},
269269
qr{command=6.: int 6\b},
@@ -277,7 +277,7 @@ sub pgbench
277277
qr{command=16.: double 16\b},
278278
qr{command=17.: double 17\b},
279279
qr{command=18.: int 9223372036854775807\b},
280-
qr{command=20.: int \d\b}, # zipfian random: 1 on linux
280+
qr{command=20.: int 2\b}, # zipfian random
281281
qr{command=21.: double -27\b},
282282
qr{command=22.: double 1024\b},
283283
qr{command=23.: double 1\b},
@@ -468,7 +468,7 @@ sub pgbench
468468
\set ur random(1000, 1999)
469469
\set er random_exponential(2000, 2999, 2.0)
470470
\set gr random_gaussian(3000, 3999, 3.0)
471-
\set zr random_zipfian(4000, 4999, 2.5)
471+
\set zr random_zipfian(4000, 4999, 1.5)
472472
INSERT INTO seeded_random(seed, rand, val) VALUES
473473
(:random_seed, 'uniform', :ur),
474474
(:random_seed, 'exponential', :er),

0 commit comments

Comments
 (0)