Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit faff8f8

Browse files
committed
Allow underscores in integer and numeric constants.
This allows underscores to be used in integer and numeric literals, and their corresponding type input functions, for visual grouping. For example: 1_500_000_000 3.14159_26535_89793 0xffff_ffff 0b_1001_0001 A single underscore is allowed between any 2 digits, or immediately after the base prefix indicator of non-decimal integers, per SQL:202x draft. Peter Eisentraut and Dean Rasheed Discussion: https://postgr.es/m/84aae844-dc55-a4be-86d9-4f0fa405cc97%40enterprisedb.com
1 parent 1b6f632 commit faff8f8

File tree

22 files changed

+724
-184
lines changed

22 files changed

+724
-184
lines changed

doc/src/sgml/syntax.sgml

+23-13
Original file line numberDiff line numberDiff line change
@@ -677,7 +677,8 @@ $function$
677677
decimal point, if one is used. At least one digit must follow the
678678
exponent marker (<literal>e</literal>), if one is present.
679679
There cannot be any spaces or other characters embedded in the
680-
constant. Note that any leading plus or minus sign is not actually
680+
constant, except for underscores, which can be used for visual grouping as
681+
described below. Note that any leading plus or minus sign is not actually
681682
considered part of the constant; it is an operator applied to the
682683
constant.
683684
</para>
@@ -695,23 +696,24 @@ $function$
695696
</para>
696697

697698
<para>
698-
Additionally, non-decimal integer constants can be used in these forms:
699+
Additionally, non-decimal integer constants are accepted in these forms:
699700
<synopsis>
700701
0x<replaceable>hexdigits</replaceable>
701702
0o<replaceable>octdigits</replaceable>
702703
0b<replaceable>bindigits</replaceable>
703704
</synopsis>
704-
<replaceable>hexdigits</replaceable> is one or more hexadecimal digits
705+
where <replaceable>hexdigits</replaceable> is one or more hexadecimal digits
705706
(0-9, A-F), <replaceable>octdigits</replaceable> is one or more octal
706-
digits (0-7), <replaceable>bindigits</replaceable> is one or more binary
707+
digits (0-7), and <replaceable>bindigits</replaceable> is one or more binary
707708
digits (0 or 1). Hexadecimal digits and the radix prefixes can be in
708709
upper or lower case. Note that only integers can have non-decimal forms,
709710
not numbers with fractional parts.
710711
</para>
711712

712713
<para>
713-
These are some examples of this:
714-
<literallayout>0b100101
714+
These are some examples of valid non-decimal integer constants:
715+
<literallayout>
716+
0b100101
715717
0B10011001
716718
0o273
717719
0O755
@@ -720,13 +722,21 @@ $function$
720722
</literallayout>
721723
</para>
722724

723-
<note>
724-
<para>
725-
Non-decimal integer constants are currently only supported in the range
726-
of the <type>bigint</type> type (see <xref
727-
linkend="datatype-numeric-table"/>).
728-
</para>
729-
</note>
725+
<para>
726+
For visual grouping, underscores can be inserted between digits. These
727+
have no further effect on the value of the constant. For example:
728+
<literallayout>
729+
1_500_000_000
730+
0b10001000_00000000
731+
0o_1_755
732+
0xFFFF_FFFF
733+
1.618_034
734+
</literallayout>
735+
Underscores are not allowed at the start or end of a numeric constant or
736+
a group of digits (that is, immediately before or after the decimal point
737+
or the exponent marker), and more than one underscore in a row is not
738+
allowed.
739+
</para>
730740

731741
<para>
732742
<indexterm><primary>integer</primary></indexterm>

src/backend/catalog/sql_features.txt

+1
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,7 @@ T653 SQL-schema statements in external routines YES
528528
T654 SQL-dynamic statements in external routines NO
529529
T655 Cyclically dependent routines YES
530530
T661 Non-decimal integer literals YES SQL:202x draft
531+
T662 Underscores in integer literals YES SQL:202x draft
531532
T811 Basic SQL/JSON constructor functions NO
532533
T812 SQL/JSON: JSON_OBJECTAGG NO
533534
T813 SQL/JSON: JSON_ARRAYAGG with ORDER BY NO

src/backend/parser/parse_node.c

+4-39
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "catalog/pg_type.h"
2020
#include "mb/pg_wchar.h"
2121
#include "nodes/makefuncs.h"
22+
#include "nodes/miscnodes.h"
2223
#include "nodes/nodeFuncs.h"
2324
#include "nodes/subscripting.h"
2425
#include "parser/parse_coerce.h"
@@ -385,47 +386,11 @@ make_const(ParseState *pstate, A_Const *aconst)
385386
{
386387
/* could be an oversize integer as well as a float ... */
387388

388-
int base = 10;
389-
char *startptr;
390-
int sign;
391-
char *testvalue;
389+
ErrorSaveContext escontext = {T_ErrorSaveContext};
392390
int64 val64;
393-
char *endptr;
394391

395-
startptr = aconst->val.fval.fval;
396-
if (startptr[0] == '-')
397-
{
398-
sign = -1;
399-
startptr++;
400-
}
401-
else
402-
sign = +1;
403-
if (startptr[0] == '0')
404-
{
405-
if (startptr[1] == 'b' || startptr[1] == 'B')
406-
{
407-
base = 2;
408-
startptr += 2;
409-
}
410-
else if (startptr[1] == 'o' || startptr[1] == 'O')
411-
{
412-
base = 8;
413-
startptr += 2;
414-
}
415-
else if (startptr[1] == 'x' || startptr[1] == 'X')
416-
{
417-
base = 16;
418-
startptr += 2;
419-
}
420-
}
421-
422-
if (sign == +1)
423-
testvalue = startptr;
424-
else
425-
testvalue = psprintf("-%s", startptr);
426-
errno = 0;
427-
val64 = strtoi64(testvalue, &endptr, base);
428-
if (errno == 0 && *endptr == '\0')
392+
val64 = pg_strtoint64_safe(aconst->val.fval.fval, (Node *) &escontext);
393+
if (!escontext.error_occurred)
429394
{
430395
/*
431396
* It might actually fit in int32. Probably only INT_MIN

src/backend/parser/scan.l

+14-13
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,12 @@
3737

3838
#include "common/string.h"
3939
#include "gramparse.h"
40+
#include "nodes/miscnodes.h"
4041
#include "parser/parser.h" /* only needed for GUC variables */
4142
#include "parser/scansup.h"
4243
#include "port/pg_bitutils.h"
4344
#include "mb/pg_wchar.h"
45+
#include "utils/builtins.h"
4446
}
4547

4648
%{
@@ -395,19 +397,19 @@ hexdigit [0-9A-Fa-f]
395397
octdigit [0-7]
396398
bindigit [0-1]
397399

398-
decinteger {decdigit}+
399-
hexinteger 0[xX]{hexdigit}+
400-
octinteger 0[oO]{octdigit}+
401-
bininteger 0[bB]{bindigit}+
400+
decinteger {decdigit}(_?{decdigit})*
401+
hexinteger 0[xX](_?{hexdigit})+
402+
octinteger 0[oO](_?{octdigit})+
403+
bininteger 0[bB](_?{bindigit})+
402404

403-
hexfail 0[xX]
404-
octfail 0[oO]
405-
binfail 0[bB]
405+
hexfail 0[xX]_?
406+
octfail 0[oO]_?
407+
binfail 0[bB]_?
406408

407409
numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
408410
numericfail {decdigit}+\.\.
409411

410-
real ({decinteger}|{numeric})[Ee][-+]?{decdigit}+
412+
real ({decinteger}|{numeric})[Ee][-+]?{decinteger}
411413
realfail ({decinteger}|{numeric})[Ee][-+]
412414

413415
decinteger_junk {decinteger}{ident_start}
@@ -1364,12 +1366,11 @@ litbufdup(core_yyscan_t yyscanner)
13641366
static int
13651367
process_integer_literal(const char *token, YYSTYPE *lval, int base)
13661368
{
1367-
int val;
1368-
char *endptr;
1369+
ErrorSaveContext escontext = {T_ErrorSaveContext};
1370+
int32 val;
13691371

1370-
errno = 0;
1371-
val = strtoint(base == 10 ? token : token + 2, &endptr, base);
1372-
if (*endptr != '\0' || errno == ERANGE)
1372+
val = pg_strtoint32_safe(token, (Node *) &escontext);
1373+
if (escontext.error_occurred)
13731374
{
13741375
/* integer too large (or contains decimal pt), treat it as a float */
13751376
lval->str = pstrdup(token);

src/backend/utils/adt/numeric.c

+83-23
Original file line numberDiff line numberDiff line change
@@ -6968,10 +6968,7 @@ set_var_from_str(const char *str, const char *cp,
69686968
}
69696969

69706970
if (!isdigit((unsigned char) *cp))
6971-
ereturn(escontext, false,
6972-
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
6973-
errmsg("invalid input syntax for type %s: \"%s\"",
6974-
"numeric", str)));
6971+
goto invalid_syntax;
69756972

69766973
decdigits = (unsigned char *) palloc(strlen(cp) + DEC_DIGITS * 2);
69776974

@@ -6992,12 +6989,19 @@ set_var_from_str(const char *str, const char *cp,
69926989
else if (*cp == '.')
69936990
{
69946991
if (have_dp)
6995-
ereturn(escontext, false,
6996-
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
6997-
errmsg("invalid input syntax for type %s: \"%s\"",
6998-
"numeric", str)));
6992+
goto invalid_syntax;
69996993
have_dp = true;
70006994
cp++;
6995+
/* decimal point must not be followed by underscore */
6996+
if (*cp == '_')
6997+
goto invalid_syntax;
6998+
}
6999+
else if (*cp == '_')
7000+
{
7001+
/* underscore must be followed by more digits */
7002+
cp++;
7003+
if (!isdigit((unsigned char) *cp))
7004+
goto invalid_syntax;
70017005
}
70027006
else
70037007
break;
@@ -7010,17 +7014,8 @@ set_var_from_str(const char *str, const char *cp,
70107014
/* Handle exponent, if any */
70117015
if (*cp == 'e' || *cp == 'E')
70127016
{
7013-
long exponent;
7014-
char *endptr;
7015-
7016-
cp++;
7017-
exponent = strtol(cp, &endptr, 10);
7018-
if (endptr == cp)
7019-
ereturn(escontext, false,
7020-
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
7021-
errmsg("invalid input syntax for type %s: \"%s\"",
7022-
"numeric", str)));
7023-
cp = endptr;
7017+
int64 exponent = 0;
7018+
bool neg = false;
70247019

70257020
/*
70267021
* At this point, dweight and dscale can't be more than about
@@ -7030,10 +7025,43 @@ set_var_from_str(const char *str, const char *cp,
70307025
* fit in storage format, make_result() will complain about it later;
70317026
* for consistency use the same ereport errcode/text as make_result().
70327027
*/
7033-
if (exponent >= INT_MAX / 2 || exponent <= -(INT_MAX / 2))
7034-
ereturn(escontext, false,
7035-
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
7036-
errmsg("value overflows numeric format")));
7028+
7029+
/* exponent sign */
7030+
cp++;
7031+
if (*cp == '+')
7032+
cp++;
7033+
else if (*cp == '-')
7034+
{
7035+
neg = true;
7036+
cp++;
7037+
}
7038+
7039+
/* exponent digits */
7040+
if (!isdigit((unsigned char) *cp))
7041+
goto invalid_syntax;
7042+
7043+
while (*cp)
7044+
{
7045+
if (isdigit((unsigned char) *cp))
7046+
{
7047+
exponent = exponent * 10 + (*cp++ - '0');
7048+
if (exponent > PG_INT32_MAX / 2)
7049+
goto out_of_range;
7050+
}
7051+
else if (*cp == '_')
7052+
{
7053+
/* underscore must be followed by more digits */
7054+
cp++;
7055+
if (!isdigit((unsigned char) *cp))
7056+
goto invalid_syntax;
7057+
}
7058+
else
7059+
break;
7060+
}
7061+
7062+
if (neg)
7063+
exponent = -exponent;
7064+
70377065
dweight += (int) exponent;
70387066
dscale -= (int) exponent;
70397067
if (dscale < 0)
@@ -7085,6 +7113,17 @@ set_var_from_str(const char *str, const char *cp,
70857113
*endptr = cp;
70867114

70877115
return true;
7116+
7117+
out_of_range:
7118+
ereturn(escontext, false,
7119+
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
7120+
errmsg("value overflows numeric format")));
7121+
7122+
invalid_syntax:
7123+
ereturn(escontext, false,
7124+
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
7125+
errmsg("invalid input syntax for type %s: \"%s\"",
7126+
"numeric", str)));
70887127
}
70897128

70907129

@@ -7167,6 +7206,13 @@ set_var_from_non_decimal_integer_str(const char *str, const char *cp, int sign,
71677206
tmp = tmp * 16 + xdigit_value(*cp++);
71687207
mul = mul * 16;
71697208
}
7209+
else if (*cp == '_')
7210+
{
7211+
/* Underscore must be followed by more digits */
7212+
cp++;
7213+
if (!isxdigit((unsigned char) *cp))
7214+
goto invalid_syntax;
7215+
}
71707216
else
71717217
break;
71727218
}
@@ -7197,6 +7243,13 @@ set_var_from_non_decimal_integer_str(const char *str, const char *cp, int sign,
71977243
tmp = tmp * 8 + (*cp++ - '0');
71987244
mul = mul * 8;
71997245
}
7246+
else if (*cp == '_')
7247+
{
7248+
/* Underscore must be followed by more digits */
7249+
cp++;
7250+
if (*cp < '0' || *cp > '7')
7251+
goto invalid_syntax;
7252+
}
72007253
else
72017254
break;
72027255
}
@@ -7227,6 +7280,13 @@ set_var_from_non_decimal_integer_str(const char *str, const char *cp, int sign,
72277280
tmp = tmp * 2 + (*cp++ - '0');
72287281
mul = mul * 2;
72297282
}
7283+
else if (*cp == '_')
7284+
{
7285+
/* Underscore must be followed by more digits */
7286+
cp++;
7287+
if (*cp < '0' || *cp > '1')
7288+
goto invalid_syntax;
7289+
}
72307290
else
72317291
break;
72327292
}

0 commit comments

Comments
 (0)