Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 26a76cb

Browse files
committed
Restrict pgbench's zipfian parameter to ensure good performance.
Remove the code that supported zipfian distribution parameters less than 1.0, as it had undocumented performance hazards, and it's not clear that the case is useful enough to justify either fixing or documenting those hazards. Also, since the code path for parameter > 1.0 could perform badly for values very close to 1.0, establish a minimum allowed value of 1.001. This solution seems superior to the previous vague documentation warning about small values not performing well. Fabien Coelho, per a gripe from Tomas Vondra Discussion: https://postgr.es/m/b5e172e9-ad22-48a3-86a3-589afa20e8f7@2ndquadrant.com
1 parent 4fd05bb commit 26a76cb

File tree

3 files changed

+32
-193
lines changed

3 files changed

+32
-193
lines changed

doc/src/sgml/ref/pgbench.sgml

+11-16
Original file line numberDiff line numberDiff line change
@@ -1543,29 +1543,17 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
15431543
middle quarter (1.0 / 4.0) of the interval (i.e. from
15441544
<literal>3.0 / 8.0</literal> to <literal>5.0 / 8.0</literal>) and 95% from
15451545
the middle half (<literal>2.0 / 4.0</literal>) of the interval (second and third
1546-
quartiles). The minimum <replaceable>parameter</replaceable> is 2.0 for performance
1547-
of the Box-Muller transform.
1546+
quartiles). The minimum allowed <replaceable>parameter</replaceable>
1547+
value is 2.0.
15481548
</para>
15491549
</listitem>
15501550
<listitem>
15511551
<para>
1552-
<literal>random_zipfian</literal> generates an approximated bounded Zipfian
1553-
distribution. For <replaceable>parameter</replaceable> in (0, 1), an
1554-
approximated algorithm is taken from
1555-
"Quickly Generating Billion-Record Synthetic Databases",
1556-
Jim Gray et al, SIGMOD 1994. For <replaceable>parameter</replaceable>
1557-
in (1, 1000), a rejection method is used, based on
1558-
"Non-Uniform Random Variate Generation", Luc Devroye, p. 550-551,
1559-
Springer 1986. The distribution is not defined when the parameter's
1560-
value is 1.0. The function's performance is poor for parameter values
1561-
close and above 1.0 and on a small range.
1562-
</para>
1563-
<para>
1552+
<literal>random_zipfian</literal> generates a bounded Zipfian
1553+
distribution.
15641554
<replaceable>parameter</replaceable> defines how skewed the distribution
15651555
is. The larger the <replaceable>parameter</replaceable>, the more
15661556
frequently values closer to the beginning of the interval are drawn.
1567-
The closer to 0 <replaceable>parameter</replaceable> is,
1568-
the flatter (more uniform) the output distribution.
15691557
The distribution is such that, assuming the range starts from 1,
15701558
the ratio of the probability of drawing <replaceable>k</replaceable>
15711559
versus drawing <replaceable>k+1</replaceable> is
@@ -1576,6 +1564,13 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
15761564
itself is produced <literal>(3/2)*2.5 = 2.76</literal> times more
15771565
frequently than <literal>3</literal>, and so on.
15781566
</para>
1567+
<para>
1568+
<application>pgbench</application>'s implementation is based on
1569+
"Non-Uniform Random Variate Generation", Luc Devroye, p. 550-551,
1570+
Springer 1986. Due to limitations of that algorithm,
1571+
the <replaceable>parameter</replaceable> value is restricted to
1572+
the range [1.001, 1000].
1573+
</para>
15791574
</listitem>
15801575
</itemizedlist>
15811576

src/bin/pgbench/pgbench.c

+19-148
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,10 @@ static int pthread_join(pthread_t th, void **thread_return);
135135
#define LOG_STEP_SECONDS 5 /* seconds between log messages */
136136
#define DEFAULT_NXACTS 10 /* default nxacts */
137137

138-
#define ZIPF_CACHE_SIZE 15 /* cache cells number */
139-
140138
#define MIN_GAUSSIAN_PARAM 2.0 /* minimum parameter for gauss */
141-
#define MAX_ZIPFIAN_PARAM 1000 /* maximum parameter for zipfian */
139+
140+
#define MIN_ZIPFIAN_PARAM 1.001 /* minimum parameter for zipfian */
141+
#define MAX_ZIPFIAN_PARAM 1000.0 /* maximum parameter for zipfian */
142142

143143
int nxacts = 0; /* number of transactions per client */
144144
int duration = 0; /* duration in seconds */
@@ -410,35 +410,6 @@ typedef struct
410410
int ecnt; /* error count */
411411
} CState;
412412

413-
/*
414-
* Cache cell for random_zipfian call
415-
*/
416-
typedef struct
417-
{
418-
/* cell keys */
419-
double s; /* s - parameter of random_zipfian function */
420-
int64 n; /* number of elements in range (max - min + 1) */
421-
422-
double harmonicn; /* generalizedHarmonicNumber(n, s) */
423-
double alpha;
424-
double beta;
425-
double eta;
426-
427-
uint64 last_used; /* last used logical time */
428-
} ZipfCell;
429-
430-
/*
431-
* Zipf cache for zeta values
432-
*/
433-
typedef struct
434-
{
435-
uint64 current; /* counter for LRU cache replacement algorithm */
436-
437-
int nb_cells; /* number of filled cells */
438-
int overflowCount; /* number of cache overflows */
439-
ZipfCell cells[ZIPF_CACHE_SIZE];
440-
} ZipfCache;
441-
442413
/*
443414
* Thread state
444415
*/
@@ -460,8 +431,6 @@ typedef struct
460431

461432
int64 throttle_trigger; /* previous/next throttling (us) */
462433
FILE *logfile; /* where to log, or NULL */
463-
ZipfCache zipf_cache; /* for thread-safe zipfian random number
464-
* generation */
465434

466435
/* per thread collected stats */
467436
instr_time start_time; /* thread start time */
@@ -975,77 +944,12 @@ getPoissonRand(RandomState *random_state, double center)
975944
return (int64) (-log(uniform) * center + 0.5);
976945
}
977946

978-
/* helper function for getZipfianRand */
979-
static double
980-
generalizedHarmonicNumber(int64 n, double s)
981-
{
982-
int i;
983-
double ans = 0.0;
984-
985-
for (i = n; i > 1; i--)
986-
ans += pow(i, -s);
987-
return ans + 1.0;
988-
}
989-
990-
/* set harmonicn and other parameters to cache cell */
991-
static void
992-
zipfSetCacheCell(ZipfCell *cell, int64 n, double s)
993-
{
994-
double harmonic2;
995-
996-
cell->n = n;
997-
cell->s = s;
998-
999-
harmonic2 = generalizedHarmonicNumber(2, s);
1000-
cell->harmonicn = generalizedHarmonicNumber(n, s);
1001-
1002-
cell->alpha = 1.0 / (1.0 - s);
1003-
cell->beta = pow(0.5, s);
1004-
cell->eta = (1.0 - pow(2.0 / n, 1.0 - s)) / (1.0 - harmonic2 / cell->harmonicn);
1005-
}
1006-
1007-
/*
1008-
* search for cache cell with keys (n, s)
1009-
* and create new cell if it does not exist
1010-
*/
1011-
static ZipfCell *
1012-
zipfFindOrCreateCacheCell(ZipfCache *cache, int64 n, double s)
1013-
{
1014-
int i,
1015-
least_recently_used = 0;
1016-
ZipfCell *cell;
1017-
1018-
/* search cached cell for given parameters */
1019-
for (i = 0; i < cache->nb_cells; i++)
1020-
{
1021-
cell = &cache->cells[i];
1022-
if (cell->n == n && cell->s == s)
1023-
return &cache->cells[i];
1024-
1025-
if (cell->last_used < cache->cells[least_recently_used].last_used)
1026-
least_recently_used = i;
1027-
}
1028-
1029-
/* create new one if it does not exist */
1030-
if (cache->nb_cells < ZIPF_CACHE_SIZE)
1031-
i = cache->nb_cells++;
1032-
else
1033-
{
1034-
/* replace LRU cell if cache is full */
1035-
i = least_recently_used;
1036-
cache->overflowCount++;
1037-
}
1038-
1039-
zipfSetCacheCell(&cache->cells[i], n, s);
1040-
1041-
cache->cells[i].last_used = cache->current++;
1042-
return &cache->cells[i];
1043-
}
1044-
1045947
/*
1046948
* Computing zipfian using rejection method, based on
1047949
* "Non-Uniform Random Variate Generation",
1048950
* Luc Devroye, p. 550-551, Springer 1986.
951+
*
952+
* This works for s > 1.0, but may perform badly for s very close to 1.0.
1049953
*/
1050954
static int64
1051955
computeIterativeZipfian(RandomState *random_state, int64 n, double s)
@@ -1056,6 +960,10 @@ computeIterativeZipfian(RandomState *random_state, int64 n, double s)
1056960
u,
1057961
v;
1058962

963+
/* Ensure n is sane */
964+
if (n <= 1)
965+
return 1;
966+
1059967
while (true)
1060968
{
1061969
/* random variates */
@@ -1072,39 +980,16 @@ computeIterativeZipfian(RandomState *random_state, int64 n, double s)
1072980
return (int64) x;
1073981
}
1074982

1075-
/*
1076-
* Computing zipfian using harmonic numbers, based on algorithm described in
1077-
* "Quickly Generating Billion-Record Synthetic Databases",
1078-
* Jim Gray et al, SIGMOD 1994
1079-
*/
1080-
static int64
1081-
computeHarmonicZipfian(ZipfCache *zipf_cache, RandomState *random_state,
1082-
int64 n, double s)
1083-
{
1084-
ZipfCell *cell = zipfFindOrCreateCacheCell(zipf_cache, n, s);
1085-
double uniform = pg_erand48(random_state->xseed);
1086-
double uz = uniform * cell->harmonicn;
1087-
1088-
if (uz < 1.0)
1089-
return 1;
1090-
if (uz < 1.0 + cell->beta)
1091-
return 2;
1092-
return 1 + (int64) (cell->n * pow(cell->eta * uniform - cell->eta + 1.0, cell->alpha));
1093-
}
1094-
1095983
/* random number generator: zipfian distribution from min to max inclusive */
1096984
static int64
1097-
getZipfianRand(ZipfCache *zipf_cache, RandomState *random_state, int64 min,
1098-
int64 max, double s)
985+
getZipfianRand(RandomState *random_state, int64 min, int64 max, double s)
1099986
{
1100987
int64 n = max - min + 1;
1101988

1102989
/* abort if parameter is invalid */
1103-
Assert(s > 0.0 && s != 1.0 && s <= MAX_ZIPFIAN_PARAM);
990+
Assert(MIN_ZIPFIAN_PARAM <= s && s <= MAX_ZIPFIAN_PARAM);
1104991

1105-
return min - 1 + ((s > 1)
1106-
? computeIterativeZipfian(random_state, n, s)
1107-
: computeHarmonicZipfian(zipf_cache, random_state, n, s));
992+
return min - 1 + computeIterativeZipfian(random_state, n, s);
1108993
}
1109994

1110995
/*
@@ -2426,25 +2311,25 @@ evalStandardFunc(TState *thread, CState *st,
24262311
}
24272312
else if (func == PGBENCH_RANDOM_ZIPFIAN)
24282313
{
2429-
if (param <= 0.0 || param == 1.0 || param > MAX_ZIPFIAN_PARAM)
2314+
if (param < MIN_ZIPFIAN_PARAM || param > MAX_ZIPFIAN_PARAM)
24302315
{
24312316
fprintf(stderr,
2432-
"zipfian parameter must be in range (0, 1) U (1, %d]"
2433-
" (got %f)\n", MAX_ZIPFIAN_PARAM, param);
2317+
"zipfian parameter must be in range [%.3f, %.0f]"
2318+
" (not %f)\n",
2319+
MIN_ZIPFIAN_PARAM, MAX_ZIPFIAN_PARAM, param);
24342320
return false;
24352321
}
2322+
24362323
setIntValue(retval,
2437-
getZipfianRand(&thread->zipf_cache,
2438-
&st->cs_func_rs,
2439-
imin, imax, param));
2324+
getZipfianRand(&st->cs_func_rs, imin, imax, param));
24402325
}
24412326
else /* exponential */
24422327
{
24432328
if (param <= 0.0)
24442329
{
24452330
fprintf(stderr,
24462331
"exponential parameter must be greater than zero"
2447-
" (got %f)\n", param);
2332+
" (not %f)\n", param);
24482333
return false;
24492334
}
24502335

@@ -4996,8 +4881,6 @@ printResults(TState *threads, StatsData *total, instr_time total_time,
49964881
tps_include,
49974882
tps_exclude;
49984883
int64 ntx = total->cnt - total->skipped;
4999-
int i,
5000-
totalCacheOverflows = 0;
50014884

50024885
time_include = INSTR_TIME_GET_DOUBLE(total_time);
50034886

@@ -5025,15 +4908,6 @@ printResults(TState *threads, StatsData *total, instr_time total_time,
50254908
printf("number of transactions actually processed: " INT64_FORMAT "\n",
50264909
ntx);
50274910
}
5028-
/* Report zipfian cache overflow */
5029-
for (i = 0; i < nthreads; i++)
5030-
{
5031-
totalCacheOverflows += threads[i].zipf_cache.overflowCount;
5032-
}
5033-
if (totalCacheOverflows > 0)
5034-
{
5035-
printf("zipfian cache array overflowed %d time(s)\n", totalCacheOverflows);
5036-
}
50374911

50384912
/* Remaining stats are nonsensical if we failed to execute any xacts */
50394913
if (total->cnt <= 0)
@@ -5916,9 +5790,6 @@ main(int argc, char **argv)
59165790
initRandomState(&thread->ts_sample_rs);
59175791
thread->logfile = NULL; /* filled in later */
59185792
thread->latency_late = 0;
5919-
thread->zipf_cache.nb_cells = 0;
5920-
thread->zipf_cache.current = 0;
5921-
thread->zipf_cache.overflowCount = 0;
59225793
initStats(&thread->stats, 0);
59235794

59245795
nclients_dealt += thread->nstate;

src/bin/pgbench/t/001_pgbench_with_server.pl

+2-29
Original file line numberDiff line numberDiff line change
@@ -666,13 +666,13 @@ sub pgbench
666666
[
667667
'set zipfian param to 1',
668668
2,
669-
[qr{zipfian parameter must be in range \(0, 1\) U \(1, \d+\]}],
669+
[qr{zipfian parameter must be in range \[1\.001, 1000\]}],
670670
q{\set i random_zipfian(0, 10, 1)}
671671
],
672672
[
673673
'set zipfian param too large',
674674
2,
675-
[qr{zipfian parameter must be in range \(0, 1\) U \(1, \d+\]}],
675+
[qr{zipfian parameter must be in range \[1\.001, 1000\]}],
676676
q{\set i random_zipfian(0, 10, 1000000)}
677677
],
678678
[
@@ -802,33 +802,6 @@ sub pgbench
802802
{ $n => $script });
803803
}
804804

805-
# zipfian cache array overflow
806-
pgbench(
807-
'-t 1', 0,
808-
[ qr{processed: 1/1}, qr{zipfian cache array overflowed 1 time\(s\)} ],
809-
[qr{^}],
810-
'pgbench zipfian array overflow on random_zipfian',
811-
{
812-
'001_pgbench_random_zipfian' => q{
813-
\set i random_zipfian(1, 100, 0.5)
814-
\set i random_zipfian(2, 100, 0.5)
815-
\set i random_zipfian(3, 100, 0.5)
816-
\set i random_zipfian(4, 100, 0.5)
817-
\set i random_zipfian(5, 100, 0.5)
818-
\set i random_zipfian(6, 100, 0.5)
819-
\set i random_zipfian(7, 100, 0.5)
820-
\set i random_zipfian(8, 100, 0.5)
821-
\set i random_zipfian(9, 100, 0.5)
822-
\set i random_zipfian(10, 100, 0.5)
823-
\set i random_zipfian(11, 100, 0.5)
824-
\set i random_zipfian(12, 100, 0.5)
825-
\set i random_zipfian(13, 100, 0.5)
826-
\set i random_zipfian(14, 100, 0.5)
827-
\set i random_zipfian(15, 100, 0.5)
828-
\set i random_zipfian(16, 100, 0.5)
829-
}
830-
});
831-
832805
# throttling
833806
pgbench(
834807
'-t 100 -S --rate=100000 --latency-limit=1000000 -c 2 -n -r',

0 commit comments

Comments
 (0)