Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 6b9bba2

Browse files
committed
Marginal performance hacking in erand48.c.
Get rid of the multiplier and addend variables in favor of hard-wired constants. Do the multiply-and-add using uint64 arithmetic, rather than manually combining several narrower multiplications and additions. Make _dorand48 return the full-width new random value, and have its callers use that directly (after suitable masking) rather than reconstructing what they need from the unsigned short[] representation. On my machine, this is good for a nearly factor-of-2 speedup of pg_erand48(), probably mostly from needing just one call of ldexp() rather than three. The wins for the other functions are smaller but measurable. While none of the existing call sites are really performance-critical, a cycle saved is a cycle earned; and besides the machine code is smaller this way (at least on x86_64). Patch by me, but the original idea to optimize this by switching to int64 arithmetic is from Fabien Coelho. Discussion: https://postgr.es/m/1551.1546018192@sss.pgh.pa.us
1 parent e090466 commit 6b9bba2

File tree

1 file changed

+33
-38
lines changed

1 file changed

+33
-38
lines changed

src/port/erand48.c

+33-38
Original file line numberDiff line numberDiff line change
@@ -37,48 +37,46 @@
3737

3838
#include <math.h>
3939

40+
/* These values are specified by POSIX */
41+
#define RAND48_MULT UINT64CONST(0x0005deece66d)
42+
#define RAND48_ADD UINT64CONST(0x000b)
43+
44+
/* POSIX specifies 0x330e's use in srand48, but the other bits are arbitrary */
4045
#define RAND48_SEED_0 (0x330e)
4146
#define RAND48_SEED_1 (0xabcd)
4247
#define RAND48_SEED_2 (0x1234)
43-
#define RAND48_MULT_0 (0xe66d)
44-
#define RAND48_MULT_1 (0xdeec)
45-
#define RAND48_MULT_2 (0x0005)
46-
#define RAND48_ADD (0x000b)
4748

4849
static unsigned short _rand48_seed[3] = {
4950
RAND48_SEED_0,
5051
RAND48_SEED_1,
5152
RAND48_SEED_2
5253
};
53-
static unsigned short _rand48_mult[3] = {
54-
RAND48_MULT_0,
55-
RAND48_MULT_1,
56-
RAND48_MULT_2
57-
};
58-
static unsigned short _rand48_add = RAND48_ADD;
5954

6055

6156
/*
6257
* Advance the 48-bit value stored in xseed[] to the next "random" number.
58+
*
59+
* Also returns the value of that number --- without masking it to 48 bits.
60+
* If caller uses the result, it must mask off the bits it wants.
6361
*/
64-
static void
62+
static uint64
6563
_dorand48(unsigned short xseed[3])
6664
{
67-
unsigned long accu;
68-
unsigned short temp[2];
69-
70-
accu = (unsigned long) _rand48_mult[0] * (unsigned long) xseed[0] +
71-
(unsigned long) _rand48_add;
72-
temp[0] = (unsigned short) accu; /* lower 16 bits */
73-
accu >>= sizeof(unsigned short) * 8;
74-
accu += (unsigned long) _rand48_mult[0] * (unsigned long) xseed[1] +
75-
(unsigned long) _rand48_mult[1] * (unsigned long) xseed[0];
76-
temp[1] = (unsigned short) accu; /* middle 16 bits */
77-
accu >>= sizeof(unsigned short) * 8;
78-
accu += _rand48_mult[0] * xseed[2] + _rand48_mult[1] * xseed[1] + _rand48_mult[2] * xseed[0];
79-
xseed[0] = temp[0];
80-
xseed[1] = temp[1];
81-
xseed[2] = (unsigned short) accu;
65+
/*
66+
* We do the arithmetic in uint64; any type wider than 48 bits would work.
67+
*/
68+
uint64 in;
69+
uint64 out;
70+
71+
in = (uint64) xseed[2] << 32 | (uint64) xseed[1] << 16 | (uint64) xseed[0];
72+
73+
out = in * RAND48_MULT + RAND48_ADD;
74+
75+
xseed[0] = out & 0xFFFF;
76+
xseed[1] = (out >> 16) & 0xFFFF;
77+
xseed[2] = (out >> 32) & 0xFFFF;
78+
79+
return out;
8280
}
8381

8482

@@ -89,10 +87,9 @@ _dorand48(unsigned short xseed[3])
8987
double
9088
pg_erand48(unsigned short xseed[3])
9189
{
92-
_dorand48(xseed);
93-
return ldexp((double) xseed[0], -48) +
94-
ldexp((double) xseed[1], -32) +
95-
ldexp((double) xseed[2], -16);
90+
uint64 x = _dorand48(xseed);
91+
92+
return ldexp((double) (x & UINT64CONST(0xFFFFFFFFFFFF)), -48);
9693
}
9794

9895
/*
@@ -102,8 +99,9 @@ pg_erand48(unsigned short xseed[3])
10299
long
103100
pg_lrand48(void)
104101
{
105-
_dorand48(_rand48_seed);
106-
return ((long) _rand48_seed[2] << 15) + ((long) _rand48_seed[1] >> 1);
102+
uint64 x = _dorand48(_rand48_seed);
103+
104+
return (x >> 17) & UINT64CONST(0x7FFFFFFF);
107105
}
108106

109107
/*
@@ -113,8 +111,9 @@ pg_lrand48(void)
113111
long
114112
pg_jrand48(unsigned short xseed[3])
115113
{
116-
_dorand48(xseed);
117-
return (int32) (((uint32) xseed[2] << 16) + (uint32) xseed[1]);
114+
uint64 x = _dorand48(xseed);
115+
116+
return (int32) ((x >> 16) & UINT64CONST(0xFFFFFFFF));
118117
}
119118

120119
/*
@@ -134,8 +133,4 @@ pg_srand48(long seed)
134133
_rand48_seed[0] = RAND48_SEED_0;
135134
_rand48_seed[1] = (unsigned short) seed;
136135
_rand48_seed[2] = (unsigned short) (seed >> 16);
137-
_rand48_mult[0] = RAND48_MULT_0;
138-
_rand48_mult[1] = RAND48_MULT_1;
139-
_rand48_mult[2] = RAND48_MULT_2;
140-
_rand48_add = RAND48_ADD;
141136
}

0 commit comments

Comments
 (0)