Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 38d8176

Browse files
committed
Invent random_normal() to provide normally-distributed random numbers.
There is already a version of this in contrib/tablefunc, but it seems sufficiently widely useful to justify having it in core. Paul Ramsey Discussion: https://postgr.es/m/CACowWR0DqHAvOKUCNxTrASFkWsDLqKMd6WiXvVvaWg4pV1BMnQ@mail.gmail.com
1 parent 2673ebf commit 38d8176

File tree

10 files changed

+164
-29
lines changed

10 files changed

+164
-29
lines changed

doc/src/sgml/func.sgml

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1815,6 +1815,28 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
18151815
</para></entry>
18161816
</row>
18171817

1818+
<row>
1819+
<entry role="func_table_entry"><para role="func_signature">
1820+
<indexterm>
1821+
<primary>random_normal</primary>
1822+
</indexterm>
1823+
1824+
<function>random_normal</function> (
1825+
<optional> <parameter>mean</parameter> <type>double precision</type>
1826+
<optional>, <parameter>stddev</parameter> <type>double precision</type> </optional></optional> )
1827+
<returnvalue>double precision</returnvalue>
1828+
</para>
1829+
<para>
1830+
Returns a random value from the normal distribution with the given
1831+
parameters; <parameter>mean</parameter> defaults to 0.0
1832+
and <parameter>stddev</parameter> defaults to 1.0
1833+
</para>
1834+
<para>
1835+
<literal>random_normal(0.0, 1.0)</literal>
1836+
<returnvalue>0.051285419</returnvalue>
1837+
</para></entry>
1838+
</row>
1839+
18181840
<row>
18191841
<entry role="func_table_entry"><para role="func_signature">
18201842
<indexterm>
@@ -1824,7 +1846,8 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
18241846
<returnvalue>void</returnvalue>
18251847
</para>
18261848
<para>
1827-
Sets the seed for subsequent <literal>random()</literal> calls;
1849+
Sets the seed for subsequent <literal>random()</literal> and
1850+
<literal>random_normal()</literal> calls;
18281851
argument must be between -1.0 and 1.0, inclusive
18291852
</para>
18301853
<para>
@@ -1848,6 +1871,7 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
18481871
Without any prior <function>setseed()</function> call in the same
18491872
session, the first <function>random()</function> call obtains a seed
18501873
from a platform-dependent source of random bits.
1874+
These remarks hold equally for <function>random_normal()</function>.
18511875
</para>
18521876

18531877
<para>

src/backend/catalog/system_functions.sql

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,13 @@ CREATE OR REPLACE FUNCTION bit_length(text)
6666
IMMUTABLE PARALLEL SAFE STRICT COST 1
6767
RETURN octet_length($1) * 8;
6868

69+
CREATE OR REPLACE FUNCTION
70+
random_normal(mean float8 DEFAULT 0, stddev float8 DEFAULT 1)
71+
RETURNS float8
72+
LANGUAGE internal
73+
VOLATILE PARALLEL RESTRICTED STRICT COST 1
74+
AS 'drandom_normal';
75+
6976
CREATE OR REPLACE FUNCTION log(numeric)
7077
RETURNS numeric
7178
LANGUAGE sql

src/backend/utils/adt/float.c

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2743,13 +2743,11 @@ datanh(PG_FUNCTION_ARGS)
27432743

27442744

27452745
/*
2746-
* drandom - returns a random number
2746+
* initialize_drandom_seed - initialize drandom_seed if not yet done
27472747
*/
2748-
Datum
2749-
drandom(PG_FUNCTION_ARGS)
2748+
static void
2749+
initialize_drandom_seed(void)
27502750
{
2751-
float8 result;
2752-
27532751
/* Initialize random seed, if not done yet in this process */
27542752
if (unlikely(!drandom_seed_set))
27552753
{
@@ -2769,13 +2767,45 @@ drandom(PG_FUNCTION_ARGS)
27692767
}
27702768
drandom_seed_set = true;
27712769
}
2770+
}
2771+
2772+
/*
2773+
* drandom - returns a random number
2774+
*/
2775+
Datum
2776+
drandom(PG_FUNCTION_ARGS)
2777+
{
2778+
float8 result;
2779+
2780+
initialize_drandom_seed();
27722781

27732782
/* pg_prng_double produces desired result range [0.0 - 1.0) */
27742783
result = pg_prng_double(&drandom_seed);
27752784

27762785
PG_RETURN_FLOAT8(result);
27772786
}
27782787

2788+
/*
2789+
* drandom_normal - returns a random number from a normal distribution
2790+
*/
2791+
Datum
2792+
drandom_normal(PG_FUNCTION_ARGS)
2793+
{
2794+
float8 mean = PG_GETARG_FLOAT8(0);
2795+
float8 stddev = PG_GETARG_FLOAT8(1);
2796+
float8 result,
2797+
z;
2798+
2799+
initialize_drandom_seed();
2800+
2801+
/* Get random value from standard normal(mean = 0.0, stddev = 1.0) */
2802+
z = pg_prng_double_normal(&drandom_seed);
2803+
/* Transform the normal standard variable (z) */
2804+
/* using the target normal distribution parameters */
2805+
result = (stddev * z) + mean;
2806+
2807+
PG_RETURN_FLOAT8(result);
2808+
}
27792809

27802810
/*
27812811
* setseed - set seed for the random number generator

src/bin/pgbench/pgbench.c

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1136,8 +1136,8 @@ getGaussianRand(pg_prng_state *state, int64 min, int64 max,
11361136
Assert(parameter >= MIN_GAUSSIAN_PARAM);
11371137

11381138
/*
1139-
* Get user specified random number from this loop, with -parameter <
1140-
* stdev <= parameter
1139+
* Get normally-distributed random number in the range -parameter <= stdev
1140+
* < parameter.
11411141
*
11421142
* This loop is executed until the number is in the expected range.
11431143
*
@@ -1149,25 +1149,7 @@ getGaussianRand(pg_prng_state *state, int64 min, int64 max,
11491149
*/
11501150
do
11511151
{
1152-
/*
1153-
* pg_prng_double generates [0, 1), but for the basic version of the
1154-
* Box-Muller transform the two uniformly distributed random numbers
1155-
* are expected to be in (0, 1] (see
1156-
* https://en.wikipedia.org/wiki/Box-Muller_transform)
1157-
*/
1158-
double rand1 = 1.0 - pg_prng_double(state);
1159-
double rand2 = 1.0 - pg_prng_double(state);
1160-
1161-
/* Box-Muller basic form transform */
1162-
double var_sqrt = sqrt(-2.0 * log(rand1));
1163-
1164-
stdev = var_sqrt * sin(2.0 * M_PI * rand2);
1165-
1166-
/*
1167-
* we may try with cos, but there may be a bias induced if the
1168-
* previous value fails the test. To be on the safe side, let us try
1169-
* over.
1170-
*/
1152+
stdev = pg_prng_double_normal(state);
11711153
}
11721154
while (stdev < -parameter || stdev >= parameter);
11731155

src/common/pg_prng.c

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,17 @@
1919

2020
#include "c.h"
2121

22-
#include <math.h> /* for ldexp() */
22+
#include <math.h>
2323

2424
#include "common/pg_prng.h"
2525
#include "port/pg_bitutils.h"
2626

27+
/* X/Open (XSI) requires <math.h> to provide M_PI, but core POSIX does not */
28+
#ifndef M_PI
29+
#define M_PI 3.14159265358979323846
30+
#endif
31+
32+
2733
/* process-wide state vector */
2834
pg_prng_state pg_global_prng_state;
2935

@@ -235,6 +241,35 @@ pg_prng_double(pg_prng_state *state)
235241
return ldexp((double) (v >> (64 - 52)), -52);
236242
}
237243

244+
/*
245+
* Select a random double from the normal distribution with
246+
* mean = 0.0 and stddev = 1.0.
247+
*
248+
* To get a result from a different normal distribution use
249+
* STDDEV * pg_prng_double_normal() + MEAN
250+
*
251+
* Uses https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
252+
*/
253+
double
254+
pg_prng_double_normal(pg_prng_state *state)
255+
{
256+
double u1,
257+
u2,
258+
z0;
259+
260+
/*
261+
* pg_prng_double generates [0, 1), but for the basic version of the
262+
* Box-Muller transform the two uniformly distributed random numbers are
263+
* expected to be in (0, 1]; in particular we'd better not compute log(0).
264+
*/
265+
u1 = 1.0 - pg_prng_double(state);
266+
u2 = 1.0 - pg_prng_double(state);
267+
268+
/* Apply Box-Muller transform to get one normal-valued output */
269+
z0 = sqrt(-2.0 * log(u1)) * sin(2.0 * M_PI * u2);
270+
return z0;
271+
}
272+
238273
/*
239274
* Select a random boolean value.
240275
*/

src/include/catalog/catversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,6 @@
5757
*/
5858

5959
/* yyyymmddN */
60-
#define CATALOG_VERSION_NO 202301091
60+
#define CATALOG_VERSION_NO 202301092
6161

6262
#endif

src/include/catalog/pg_proc.dat

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3359,6 +3359,10 @@
33593359
{ oid => '1598', descr => 'random value',
33603360
proname => 'random', provolatile => 'v', proparallel => 'r',
33613361
prorettype => 'float8', proargtypes => '', prosrc => 'drandom' },
3362+
{ oid => '8074', descr => 'random value from normal distribution',
3363+
proname => 'random_normal', provolatile => 'v', proparallel => 'r',
3364+
prorettype => 'float8', proargtypes => 'float8 float8',
3365+
prosrc => 'drandom_normal' },
33623366
{ oid => '1599', descr => 'set random seed',
33633367
proname => 'setseed', provolatile => 'v', proparallel => 'r',
33643368
prorettype => 'void', proargtypes => 'float8', prosrc => 'setseed' },

src/include/common/pg_prng.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ extern uint32 pg_prng_uint32(pg_prng_state *state);
5555
extern int32 pg_prng_int32(pg_prng_state *state);
5656
extern int32 pg_prng_int32p(pg_prng_state *state);
5757
extern double pg_prng_double(pg_prng_state *state);
58+
extern double pg_prng_double_normal(pg_prng_state *state);
5859
extern bool pg_prng_bool(pg_prng_state *state);
5960

6061
#endif /* PG_PRNG_H */

src/test/regress/expected/random.out

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,31 @@ SELECT AVG(random) FROM RANDOM_TBL
5151
-----
5252
(0 rows)
5353

54+
-- now test random_normal()
55+
TRUNCATE random_tbl;
56+
INSERT INTO random_tbl (random)
57+
SELECT count(*)
58+
FROM onek WHERE random_normal(0, 1) < 0;
59+
INSERT INTO random_tbl (random)
60+
SELECT count(*)
61+
FROM onek WHERE random_normal(0) < 0;
62+
INSERT INTO random_tbl (random)
63+
SELECT count(*)
64+
FROM onek WHERE random_normal() < 0;
65+
INSERT INTO random_tbl (random)
66+
SELECT count(*)
67+
FROM onek WHERE random_normal(stddev => 1, mean => 0) < 0;
68+
-- expect similar, but not identical values
69+
SELECT random, count(random) FROM random_tbl
70+
GROUP BY random HAVING count(random) > 3;
71+
random | count
72+
--------+-------
73+
(0 rows)
74+
75+
-- approximately check expected distribution
76+
SELECT AVG(random) FROM random_tbl
77+
HAVING AVG(random) NOT BETWEEN 400 AND 600;
78+
avg
79+
-----
80+
(0 rows)
81+

src/test/regress/sql/random.sql

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,27 @@ SELECT random, count(random) FROM RANDOM_TBL
4242

4343
SELECT AVG(random) FROM RANDOM_TBL
4444
HAVING AVG(random) NOT BETWEEN 80 AND 120;
45+
46+
-- now test random_normal()
47+
48+
TRUNCATE random_tbl;
49+
INSERT INTO random_tbl (random)
50+
SELECT count(*)
51+
FROM onek WHERE random_normal(0, 1) < 0;
52+
INSERT INTO random_tbl (random)
53+
SELECT count(*)
54+
FROM onek WHERE random_normal(0) < 0;
55+
INSERT INTO random_tbl (random)
56+
SELECT count(*)
57+
FROM onek WHERE random_normal() < 0;
58+
INSERT INTO random_tbl (random)
59+
SELECT count(*)
60+
FROM onek WHERE random_normal(stddev => 1, mean => 0) < 0;
61+
62+
-- expect similar, but not identical values
63+
SELECT random, count(random) FROM random_tbl
64+
GROUP BY random HAVING count(random) > 3;
65+
66+
-- approximately check expected distribution
67+
SELECT AVG(random) FROM random_tbl
68+
HAVING AVG(random) NOT BETWEEN 400 AND 600;

0 commit comments

Comments
 (0)