Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 1453435

Browse files
committed
Allow numeric to use a more compact, 2-byte header in many cases.
Review by Brendan Jurd and Tom Lane.
1 parent db04f2b commit 1453435

File tree

1 file changed

+192
-66
lines changed

1 file changed

+192
-66
lines changed

src/backend/utils/adt/numeric.c

+192-66
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* Copyright (c) 1998-2010, PostgreSQL Global Development Group
1515
*
1616
* IDENTIFICATION
17-
* $PostgreSQL: pgsql/src/backend/utils/adt/numeric.c,v 1.124 2010/07/30 04:30:23 rhaas Exp $
17+
* $PostgreSQL: pgsql/src/backend/utils/adt/numeric.c,v 1.125 2010/08/03 23:09:29 rhaas Exp $
1818
*
1919
*-------------------------------------------------------------------------
2020
*/
@@ -35,38 +35,6 @@
3535
#include "utils/int8.h"
3636
#include "utils/numeric.h"
3737

38-
/*
39-
* Sign values and macros to deal with packing/unpacking n_sign_dscale
40-
*/
41-
#define NUMERIC_SIGN_MASK 0xC000
42-
#define NUMERIC_POS 0x0000
43-
#define NUMERIC_NEG 0x4000
44-
#define NUMERIC_NAN 0xC000
45-
#define NUMERIC_DSCALE_MASK 0x3FFF
46-
#define NUMERIC_SIGN(n) ((n)->n_sign_dscale & NUMERIC_SIGN_MASK)
47-
#define NUMERIC_DSCALE(n) ((n)->n_sign_dscale & NUMERIC_DSCALE_MASK)
48-
#define NUMERIC_IS_NAN(n) (NUMERIC_SIGN(n) != NUMERIC_POS && \
49-
NUMERIC_SIGN(n) != NUMERIC_NEG)
50-
#define NUMERIC_HDRSZ (VARHDRSZ + sizeof(uint16) + sizeof(int16))
51-
52-
53-
/*
54-
* The Numeric data type stored in the database
55-
*
56-
* NOTE: by convention, values in the packed form have been stripped of
57-
* all leading and trailing zero digits (where a "digit" is of base NBASE).
58-
* In particular, if the value is zero, there will be no digits at all!
59-
* The weight is arbitrary in that case, but we normally set it to zero.
60-
*/
61-
struct NumericData
62-
{
63-
int32 vl_len_; /* varlena header (do not touch directly!) */
64-
uint16 n_sign_dscale; /* Sign + display scale */
65-
int16 n_weight; /* Weight of 1st digit */
66-
char n_data[1]; /* Digits (really array of NumericDigit) */
67-
};
68-
69-
7038
/* ----------
7139
* Uncomment the following to enable compilation of dump_numeric()
7240
* and dump_var() and to get a dump of any result produced by make_result().
@@ -120,6 +88,122 @@ typedef signed char NumericDigit;
12088
typedef int16 NumericDigit;
12189
#endif
12290

91+
/*
92+
* The Numeric type as stored on disk.
93+
*
94+
* If the high bits of the first word of a NumericChoice (n_header, or
95+
* n_short.n_header, or n_long.n_sign_dscale) are NUMERIC_SHORT, then the
96+
* numeric follows the NumericShort format; if they are NUMERIC_POS or
97+
* NUMERIC_NEG, it follows the NumericLong format. If they are NUMERIC_NAN,
98+
* it is a NaN. We currently always store a NaN using just two bytes (i.e.
99+
* only n_header), but previous releases used only the NumericLong format,
100+
* so we might find 4-byte NaNs on disk if a database has been migrated using
101+
* pg_upgrade. In either case, when the high bits indicate a NaN, the
102+
* remaining bits are never examined. Currently, we always initialize these
103+
* to zero, but it might be possible to use them for some other purpose in
104+
* the future.
105+
*
106+
* In the NumericShort format, the remaining 14 bits of the header word
107+
* (n_short.n_header) are allocated as follows: 1 for sign (positive or
108+
* negative), 6 for dynamic scale, and 7 for weight. In practice, most
109+
* commonly-encountered values can be represented this way.
110+
*
111+
* In the NumericLong format, the remaining 14 bits of the header word
112+
* (n_long.n_sign_dscale) represent the display scale; and the weight is
113+
* stored separately in n_weight.
114+
*
115+
* NOTE: by convention, values in the packed form have been stripped of
116+
* all leading and trailing zero digits (where a "digit" is of base NBASE).
117+
* In particular, if the value is zero, there will be no digits at all!
118+
* The weight is arbitrary in that case, but we normally set it to zero.
119+
*/
120+
121+
struct NumericShort
122+
{
123+
uint16 n_header; /* Sign + display scale + weight */
124+
NumericDigit n_data[1]; /* Digits */
125+
};
126+
127+
struct NumericLong
128+
{
129+
uint16 n_sign_dscale; /* Sign + display scale */
130+
int16 n_weight; /* Weight of 1st digit */
131+
NumericDigit n_data[1]; /* Digits */
132+
};
133+
134+
union NumericChoice
135+
{
136+
uint16 n_header; /* Header word */
137+
struct NumericLong n_long; /* Long form (4-byte header) */
138+
struct NumericShort n_short; /* Short form (2-byte header) */
139+
};
140+
141+
struct NumericData
142+
{
143+
int32 vl_len_; /* varlena header (do not touch directly!) */
144+
union NumericChoice choice; /* choice of format */
145+
};
146+
147+
148+
/*
149+
* Interpretation of high bits.
150+
*/
151+
152+
#define NUMERIC_SIGN_MASK 0xC000
153+
#define NUMERIC_POS 0x0000
154+
#define NUMERIC_NEG 0x4000
155+
#define NUMERIC_SHORT 0x8000
156+
#define NUMERIC_NAN 0xC000
157+
158+
#define NUMERIC_FLAGBITS(n) ((n)->choice.n_header & NUMERIC_SIGN_MASK)
159+
#define NUMERIC_IS_NAN(n) (NUMERIC_FLAGBITS(n) == NUMERIC_NAN)
160+
#define NUMERIC_IS_SHORT(n) (NUMERIC_FLAGBITS(n) == NUMERIC_SHORT)
161+
162+
#define NUMERIC_HDRSZ (VARHDRSZ + sizeof(uint16) + sizeof(int16))
163+
#define NUMERIC_HDRSZ_SHORT (VARHDRSZ + sizeof(uint16))
164+
165+
/*
166+
* If the flag bits are NUMERIC_SHORT or NUMERIC_NAN, we want the short header;
167+
* otherwise, we want the long one. Instead of testing against each value, we
168+
* can just look at the high bit, for a slight efficiency gain.
169+
*/
170+
#define NUMERIC_HEADER_SIZE(n) \
171+
(VARHDRSZ + sizeof(uint16) + \
172+
(((NUMERIC_FLAGBITS(n) & 0x8000) == 0) ? sizeof(int16) : 0))
173+
174+
/*
175+
* Short format definitions.
176+
*/
177+
178+
#define NUMERIC_SHORT_SIGN_MASK 0x2000
179+
#define NUMERIC_SHORT_DSCALE_MASK 0x1F80
180+
#define NUMERIC_SHORT_DSCALE_SHIFT 7
181+
#define NUMERIC_SHORT_DSCALE_MAX \
182+
(NUMERIC_SHORT_DSCALE_MASK >> NUMERIC_SHORT_DSCALE_SHIFT)
183+
#define NUMERIC_SHORT_WEIGHT_SIGN_MASK 0x0040
184+
#define NUMERIC_SHORT_WEIGHT_MASK 0x003F
185+
#define NUMERIC_SHORT_WEIGHT_MAX NUMERIC_SHORT_WEIGHT_MASK
186+
#define NUMERIC_SHORT_WEIGHT_MIN (-(NUMERIC_SHORT_WEIGHT_MASK+1))
187+
188+
/*
189+
* Extract sign, display scale, weight.
190+
*/
191+
192+
#define NUMERIC_DSCALE_MASK 0x3FFF
193+
194+
#define NUMERIC_SIGN(n) \
195+
(NUMERIC_IS_SHORT(n) ? \
196+
(((n)->choice.n_short.n_header & NUMERIC_SHORT_SIGN_MASK) ? \
197+
NUMERIC_NEG : NUMERIC_POS) : NUMERIC_FLAGBITS(n))
198+
#define NUMERIC_DSCALE(n) (NUMERIC_IS_SHORT((n)) ? \
199+
((n)->choice.n_short.n_header & NUMERIC_SHORT_DSCALE_MASK) \
200+
>> NUMERIC_SHORT_DSCALE_SHIFT \
201+
: ((n)->choice.n_long.n_sign_dscale & NUMERIC_DSCALE_MASK))
202+
#define NUMERIC_WEIGHT(n) (NUMERIC_IS_SHORT((n)) ? \
203+
(((n)->choice.n_short.n_header & NUMERIC_SHORT_WEIGHT_SIGN_MASK ? \
204+
~NUMERIC_SHORT_WEIGHT_MASK : 0) \
205+
| ((n)->choice.n_short.n_header & NUMERIC_SHORT_WEIGHT_MASK)) \
206+
: ((n)->choice.n_long.n_weight))
123207

124208
/* ----------
125209
* NumericVar is the format we use for arithmetic. The digit-array part
@@ -266,9 +350,14 @@ static void dump_var(const char *str, NumericVar *var);
266350

267351
#define init_var(v) MemSetAligned(v, 0, sizeof(NumericVar))
268352

269-
#define NUMERIC_DIGITS(num) ((NumericDigit *)(num)->n_data)
353+
#define NUMERIC_DIGITS(num) (NUMERIC_IS_SHORT(num) ? \
354+
(num)->choice.n_short.n_data : (num)->choice.n_long.n_data)
270355
#define NUMERIC_NDIGITS(num) \
271-
((VARSIZE(num) - NUMERIC_HDRSZ) / sizeof(NumericDigit))
356+
((VARSIZE(num) - NUMERIC_HEADER_SIZE(num)) / sizeof(NumericDigit))
357+
#define NUMERIC_CAN_BE_SHORT(scale,weight) \
358+
((scale) <= NUMERIC_SHORT_DSCALE_MAX && \
359+
(weight) <= NUMERIC_SHORT_WEIGHT_MAX && \
360+
(weight) >= NUMERIC_SHORT_WEIGHT_MIN)
272361

273362
static void alloc_var(NumericVar *var, int ndigits);
274363
static void free_var(NumericVar *var);
@@ -652,15 +741,23 @@ numeric (PG_FUNCTION_ARGS)
652741
/*
653742
* If the number is certainly in bounds and due to the target scale no
654743
* rounding could be necessary, just make a copy of the input and modify
655-
* its scale fields. (Note we assume the existing dscale is honest...)
744+
* its scale fields, unless the larger scale forces us to abandon the
745+
* short representation. (Note we assume the existing dscale is honest...)
656746
*/
657-
ddigits = (num->n_weight + 1) * DEC_DIGITS;
658-
if (ddigits <= maxdigits && scale >= NUMERIC_DSCALE(num))
747+
ddigits = (NUMERIC_WEIGHT(num) + 1) * DEC_DIGITS;
748+
if (ddigits <= maxdigits && scale >= NUMERIC_DSCALE(num)
749+
&& (NUMERIC_CAN_BE_SHORT(scale, NUMERIC_WEIGHT(num))
750+
|| !NUMERIC_IS_SHORT(num)))
659751
{
660752
new = (Numeric) palloc(VARSIZE(num));
661753
memcpy(new, num, VARSIZE(num));
662-
new->n_sign_dscale = NUMERIC_SIGN(new) |
663-
((uint16) scale & NUMERIC_DSCALE_MASK);
754+
if (NUMERIC_IS_SHORT(num))
755+
new->choice.n_short.n_header =
756+
(num->choice.n_short.n_header & ~NUMERIC_SHORT_DSCALE_MASK)
757+
| (scale << NUMERIC_SHORT_DSCALE_SHIFT);
758+
else
759+
new->choice.n_long.n_sign_dscale = NUMERIC_SIGN(new) |
760+
((uint16) scale & NUMERIC_DSCALE_MASK);
664761
PG_RETURN_NUMERIC(new);
665762
}
666763

@@ -766,7 +863,11 @@ numeric_abs(PG_FUNCTION_ARGS)
766863
res = (Numeric) palloc(VARSIZE(num));
767864
memcpy(res, num, VARSIZE(num));
768865

769-
res->n_sign_dscale = NUMERIC_POS | NUMERIC_DSCALE(num);
866+
if (NUMERIC_IS_SHORT(num))
867+
res->choice.n_short.n_header =
868+
num->choice.n_short.n_header & ~NUMERIC_SHORT_SIGN_MASK;
869+
else
870+
res->choice.n_long.n_sign_dscale = NUMERIC_POS | NUMERIC_DSCALE(num);
770871

771872
PG_RETURN_NUMERIC(res);
772873
}
@@ -795,13 +896,18 @@ numeric_uminus(PG_FUNCTION_ARGS)
795896
* we can identify a ZERO by the fact that there are no digits at all. Do
796897
* nothing to a zero.
797898
*/
798-
if (VARSIZE(num) != NUMERIC_HDRSZ)
899+
if (NUMERIC_NDIGITS(num) != 0)
799900
{
800901
/* Else, flip the sign */
801-
if (NUMERIC_SIGN(num) == NUMERIC_POS)
802-
res->n_sign_dscale = NUMERIC_NEG | NUMERIC_DSCALE(num);
902+
if (NUMERIC_IS_SHORT(num))
903+
res->choice.n_short.n_header =
904+
num->choice.n_short.n_header ^ NUMERIC_SHORT_SIGN_MASK;
905+
else if (NUMERIC_SIGN(num) == NUMERIC_POS)
906+
res->choice.n_long.n_sign_dscale =
907+
NUMERIC_NEG | NUMERIC_DSCALE(num);
803908
else
804-
res->n_sign_dscale = NUMERIC_POS | NUMERIC_DSCALE(num);
909+
res->choice.n_long.n_sign_dscale =
910+
NUMERIC_POS | NUMERIC_DSCALE(num);
805911
}
806912

807913
PG_RETURN_NUMERIC(res);
@@ -845,7 +951,7 @@ numeric_sign(PG_FUNCTION_ARGS)
845951
* The packed format is known to be totally zero digit trimmed always. So
846952
* we can identify a ZERO by the fact that there are no digits at all.
847953
*/
848-
if (VARSIZE(num) == NUMERIC_HDRSZ)
954+
if (NUMERIC_NDIGITS(num) == 0)
849955
set_var_from_var(&const_zero, &result);
850956
else
851957
{
@@ -1283,9 +1389,9 @@ cmp_numerics(Numeric num1, Numeric num2)
12831389
else
12841390
{
12851391
result = cmp_var_common(NUMERIC_DIGITS(num1), NUMERIC_NDIGITS(num1),
1286-
num1->n_weight, NUMERIC_SIGN(num1),
1392+
NUMERIC_WEIGHT(num1), NUMERIC_SIGN(num1),
12871393
NUMERIC_DIGITS(num2), NUMERIC_NDIGITS(num2),
1288-
num2->n_weight, NUMERIC_SIGN(num2));
1394+
NUMERIC_WEIGHT(num2), NUMERIC_SIGN(num2));
12891395
}
12901396

12911397
return result;
@@ -1302,12 +1408,13 @@ hash_numeric(PG_FUNCTION_ARGS)
13021408
int end_offset;
13031409
int i;
13041410
int hash_len;
1411+
NumericDigit *digits;
13051412

13061413
/* If it's NaN, don't try to hash the rest of the fields */
13071414
if (NUMERIC_IS_NAN(key))
13081415
PG_RETURN_UINT32(0);
13091416

1310-
weight = key->n_weight;
1417+
weight = NUMERIC_WEIGHT(key);
13111418
start_offset = 0;
13121419
end_offset = 0;
13131420

@@ -1317,9 +1424,10 @@ hash_numeric(PG_FUNCTION_ARGS)
13171424
* zeros are suppressed, but we're paranoid. Note that we measure the
13181425
* starting and ending offsets in units of NumericDigits, not bytes.
13191426
*/
1427+
digits = NUMERIC_DIGITS(key);
13201428
for (i = 0; i < NUMERIC_NDIGITS(key); i++)
13211429
{
1322-
if (NUMERIC_DIGITS(key)[i] != (NumericDigit) 0)
1430+
if (digits[i] != (NumericDigit) 0)
13231431
break;
13241432

13251433
start_offset++;
@@ -1340,7 +1448,7 @@ hash_numeric(PG_FUNCTION_ARGS)
13401448

13411449
for (i = NUMERIC_NDIGITS(key) - 1; i >= 0; i--)
13421450
{
1343-
if (NUMERIC_DIGITS(key)[i] != (NumericDigit) 0)
1451+
if (digits[i] != (NumericDigit) 0)
13441452
break;
13451453

13461454
end_offset++;
@@ -2536,7 +2644,7 @@ numeric_avg(PG_FUNCTION_ARGS)
25362644

25372645
/* SQL92 defines AVG of no values to be NULL */
25382646
/* N is zero iff no digits (cf. numeric_uminus) */
2539-
if (VARSIZE(N) == NUMERIC_HDRSZ)
2647+
if (NUMERIC_NDIGITS(N) == 0)
25402648
PG_RETURN_NULL();
25412649

25422650
PG_RETURN_DATUM(DirectFunctionCall2(numeric_div,
@@ -2974,7 +3082,8 @@ dump_numeric(const char *str, Numeric num)
29743082

29753083
ndigits = NUMERIC_NDIGITS(num);
29763084

2977-
printf("%s: NUMERIC w=%d d=%d ", str, num->n_weight, NUMERIC_DSCALE(num));
3085+
printf("%s: NUMERIC w=%d d=%d ", str,
3086+
NUMERIC_WEIGHT(num), NUMERIC_DSCALE(num));
29783087
switch (NUMERIC_SIGN(num))
29793088
{
29803089
case NUMERIC_POS:
@@ -3265,11 +3374,11 @@ set_var_from_num(Numeric num, NumericVar *dest)
32653374

32663375
alloc_var(dest, ndigits);
32673376

3268-
dest->weight = num->n_weight;
3377+
dest->weight = NUMERIC_WEIGHT(num);
32693378
dest->sign = NUMERIC_SIGN(num);
32703379
dest->dscale = NUMERIC_DSCALE(num);
32713380

3272-
memcpy(dest->digits, num->n_data, ndigits * sizeof(NumericDigit));
3381+
memcpy(dest->digits, NUMERIC_DIGITS(num), ndigits * sizeof(NumericDigit));
32733382
}
32743383

32753384

@@ -3561,11 +3670,11 @@ make_result(NumericVar *var)
35613670

35623671
if (sign == NUMERIC_NAN)
35633672
{
3564-
result = (Numeric) palloc(NUMERIC_HDRSZ);
3673+
result = (Numeric) palloc(NUMERIC_HDRSZ_SHORT);
35653674

3566-
SET_VARSIZE(result, NUMERIC_HDRSZ);
3567-
result->n_weight = 0;
3568-
result->n_sign_dscale = NUMERIC_NAN;
3675+
SET_VARSIZE(result, NUMERIC_HDRSZ_SHORT);
3676+
result->choice.n_header = NUMERIC_NAN;
3677+
/* the header word is all we need */
35693678

35703679
dump_numeric("make_result()", result);
35713680
return result;
@@ -3592,16 +3701,33 @@ make_result(NumericVar *var)
35923701
}
35933702

35943703
/* Build the result */
3595-
len = NUMERIC_HDRSZ + n * sizeof(NumericDigit);
3596-
result = (Numeric) palloc(len);
3597-
SET_VARSIZE(result, len);
3598-
result->n_weight = weight;
3599-
result->n_sign_dscale = sign | (var->dscale & NUMERIC_DSCALE_MASK);
3704+
if (NUMERIC_CAN_BE_SHORT(var->dscale, weight))
3705+
{
3706+
len = NUMERIC_HDRSZ_SHORT + n * sizeof(NumericDigit);
3707+
result = (Numeric) palloc(len);
3708+
SET_VARSIZE(result, len);
3709+
result->choice.n_short.n_header =
3710+
(sign == NUMERIC_NEG ? (NUMERIC_SHORT | NUMERIC_SHORT_SIGN_MASK)
3711+
: NUMERIC_SHORT)
3712+
| (var->dscale << NUMERIC_SHORT_DSCALE_SHIFT)
3713+
| (weight < 0 ? NUMERIC_SHORT_WEIGHT_SIGN_MASK : 0)
3714+
| (weight & NUMERIC_SHORT_WEIGHT_MASK);
3715+
}
3716+
else
3717+
{
3718+
len = NUMERIC_HDRSZ + n * sizeof(NumericDigit);
3719+
result = (Numeric) palloc(len);
3720+
SET_VARSIZE(result, len);
3721+
result->choice.n_long.n_sign_dscale =
3722+
sign | (var->dscale & NUMERIC_DSCALE_MASK);
3723+
result->choice.n_long.n_weight = weight;
3724+
}
36003725

3601-
memcpy(result->n_data, digits, n * sizeof(NumericDigit));
3726+
memcpy(NUMERIC_DIGITS(result), digits, n * sizeof(NumericDigit));
3727+
Assert(NUMERIC_NDIGITS(result) == n);
36023728

36033729
/* Check for overflow of int16 fields */
3604-
if (result->n_weight != weight ||
3730+
if (NUMERIC_WEIGHT(result) != weight ||
36053731
NUMERIC_DSCALE(result) != var->dscale)
36063732
ereport(ERROR,
36073733
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),

0 commit comments

Comments
 (0)