Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 1a950f3

Browse files
committed
Implement standard datetime parsing mode
SQL Standard 2016 defines rules for handling separators in datetime template strings, which are different to to_date()/to_timestamp() rules. Standard allows only small set of separators and requires strict matching for them. Standard applies to jsonpath .datetime() method and CAST (... FORMAT ...) SQL clause. We're not going to change handling of separators in existing to_date()/to_timestamp() functions, because their current behavior is familiar for users. Standard behavior now available by special flag, which will be used in upcoming .datetime() jsonpath method. Discussion: https://postgr.es/m/CAPpHfdsZgYEra_PeCLGNoXOWYx6iU-S3wF8aX0ObQUcZU%2B4XTw%40mail.gmail.com Author: Alexander Korotkov
1 parent bd29cc1 commit 1a950f3

File tree

1 file changed

+104
-40
lines changed

1 file changed

+104
-40
lines changed

src/backend/utils/adt/formatting.c

+104-40
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,12 @@
9999
#include "utils/pg_locale.h"
100100

101101
/* ----------
102-
* Routines type
102+
* Routines flags
103103
* ----------
104104
*/
105-
#define DCH_TYPE 1 /* DATE-TIME version */
106-
#define NUM_TYPE 2 /* NUMBER version */
105+
#define DCH_FLAG 0x1 /* DATE-TIME flag */
106+
#define NUM_FLAG 0x2 /* NUMBER flag */
107+
#define STD_FLAG 0x4 /* STANDARD flag */
107108

108109
/* ----------
109110
* KeyWord Index (ascii from position 32 (' ') to 126 (~))
@@ -384,6 +385,7 @@ typedef struct
384385
{
385386
FormatNode format[DCH_CACHE_SIZE + 1];
386387
char str[DCH_CACHE_SIZE + 1];
388+
bool std;
387389
bool valid;
388390
int age;
389391
} DCHCacheEntry;
@@ -1000,11 +1002,12 @@ static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int t
10001002
static bool is_separator_char(const char *str);
10011003
static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
10021004
static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1003-
const KeySuffix *suf, const int *index, int ver, NUMDesc *Num);
1005+
const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num);
10041006

10051007
static void DCH_to_char(FormatNode *node, bool is_interval,
10061008
TmToChar *in, char *out, Oid collid);
1007-
static void DCH_from_char(FormatNode *node, char *in, TmFromChar *out);
1009+
static void DCH_from_char(FormatNode *node, char *in, TmFromChar *out,
1010+
bool std);
10081011

10091012
#ifdef DEBUG_TO_FROM_CHAR
10101013
static void dump_index(const KeyWord *k, const int *index);
@@ -1021,7 +1024,7 @@ static int from_char_parse_int_len(int *dest, char **src, const int len, FormatN
10211024
static int from_char_parse_int(int *dest, char **src, FormatNode *node);
10221025
static int seq_search(char *name, const char *const *array, int type, int max, int *len);
10231026
static int from_char_seq_search(int *dest, char **src, const char *const *array, int type, int max, FormatNode *node);
1024-
static void do_to_timestamp(text *date_txt, text *fmt,
1027+
static void do_to_timestamp(text *date_txt, text *fmt, bool std,
10251028
struct pg_tm *tm, fsec_t *fsec, int *fprec);
10261029
static char *fill_str(char *str, int c, int max);
10271030
static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
@@ -1033,9 +1036,9 @@ static void NUM_numpart_to_char(NUMProc *Np, int id);
10331036
static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
10341037
char *number, int input_len, int to_char_out_pre_spaces,
10351038
int sign, bool is_to_char, Oid collid);
1036-
static DCHCacheEntry *DCH_cache_getnew(const char *str);
1037-
static DCHCacheEntry *DCH_cache_search(const char *str);
1038-
static DCHCacheEntry *DCH_cache_fetch(const char *str);
1039+
static DCHCacheEntry *DCH_cache_getnew(const char *str, bool std);
1040+
static DCHCacheEntry *DCH_cache_search(const char *str, bool std);
1041+
static DCHCacheEntry *DCH_cache_fetch(const char *str, bool std);
10391042
static NUMCacheEntry *NUM_cache_getnew(const char *str);
10401043
static NUMCacheEntry *NUM_cache_search(const char *str);
10411044
static NUMCacheEntry *NUM_cache_fetch(const char *str);
@@ -1278,7 +1281,7 @@ NUMDesc_prepare(NUMDesc *num, FormatNode *n)
12781281
*/
12791282
static void
12801283
parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1281-
const KeySuffix *suf, const int *index, int ver, NUMDesc *Num)
1284+
const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num)
12821285
{
12831286
FormatNode *n;
12841287

@@ -1296,7 +1299,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
12961299
/*
12971300
* Prefix
12981301
*/
1299-
if (ver == DCH_TYPE &&
1302+
if ((flags & DCH_FLAG) &&
13001303
(s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
13011304
{
13021305
suffix |= s->id;
@@ -1317,13 +1320,13 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
13171320
/*
13181321
* NUM version: Prepare global NUMDesc struct
13191322
*/
1320-
if (ver == NUM_TYPE)
1323+
if (flags & NUM_FLAG)
13211324
NUMDesc_prepare(Num, n);
13221325

13231326
/*
13241327
* Postfix
13251328
*/
1326-
if (ver == DCH_TYPE && *str &&
1329+
if ((flags & DCH_FLAG) && *str &&
13271330
(s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
13281331
{
13291332
n->suffix |= s->id;
@@ -1337,11 +1340,34 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
13371340
{
13381341
int chlen;
13391342

1340-
/*
1341-
* Process double-quoted literal string, if any
1342-
*/
1343-
if (*str == '"')
1343+
if (flags & STD_FLAG)
1344+
{
1345+
/*
1346+
* Standard mode, allow only following separators: "-./,':; "
1347+
*/
1348+
if (strchr("-./,':; ", *str) == NULL)
1349+
ereport(ERROR,
1350+
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
1351+
errmsg("invalid datetime format separator: \"%s\"",
1352+
pnstrdup(str, pg_mblen(str)))));
1353+
1354+
if (*str == ' ')
1355+
n->type = NODE_TYPE_SPACE;
1356+
else
1357+
n->type = NODE_TYPE_SEPARATOR;
1358+
1359+
n->character[0] = *str;
1360+
n->character[1] = '\0';
1361+
n->key = NULL;
1362+
n->suffix = 0;
1363+
n++;
1364+
str++;
1365+
}
1366+
else if (*str == '"')
13441367
{
1368+
/*
1369+
* Process double-quoted literal string, if any
1370+
*/
13451371
str++;
13461372
while (*str)
13471373
{
@@ -1373,7 +1399,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
13731399
str++;
13741400
chlen = pg_mblen(str);
13751401

1376-
if (ver == DCH_TYPE && is_separator_char(str))
1402+
if ((flags & DCH_FLAG) && is_separator_char(str))
13771403
n->type = NODE_TYPE_SEPARATOR;
13781404
else if (isspace((unsigned char) *str))
13791405
n->type = NODE_TYPE_SPACE;
@@ -3060,13 +3086,13 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
30603086
* ----------
30613087
*/
30623088
static void
3063-
DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
3089+
DCH_from_char(FormatNode *node, char *in, TmFromChar *out, bool std)
30643090
{
30653091
FormatNode *n;
30663092
char *s;
30673093
int len,
30683094
value;
3069-
bool fx_mode = false;
3095+
bool fx_mode = std;
30703096

30713097
/* number of extra skipped characters (more than given in format string) */
30723098
int extra_skip = 0;
@@ -3089,7 +3115,23 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
30893115

30903116
if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR)
30913117
{
3092-
if (!fx_mode)
3118+
if (std)
3119+
{
3120+
/*
3121+
* Standard mode requires strict matching between format
3122+
* string separators/spaces and input string.
3123+
*/
3124+
Assert(n->character[0] && !n->character[1]);
3125+
3126+
if (*s == n->character[0])
3127+
s++;
3128+
else
3129+
ereport(ERROR,
3130+
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3131+
errmsg("unmatched format separator \"%c\"",
3132+
n->character[0])));
3133+
}
3134+
else if (!fx_mode)
30933135
{
30943136
/*
30953137
* In non FX (fixed format) mode one format string space or
@@ -3434,6 +3476,27 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
34343476
}
34353477
}
34363478
}
3479+
3480+
/*
3481+
* Standard parsing mode doesn't allow unmatched format patterns or
3482+
* trailing characters in the input string.
3483+
*/
3484+
if (std)
3485+
{
3486+
if (n->type != NODE_TYPE_END)
3487+
ereport(ERROR,
3488+
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3489+
errmsg("input string is too short for datetime format")));
3490+
3491+
while (*s != '\0' && isspace((unsigned char) *s))
3492+
s++;
3493+
3494+
if (*s != '\0')
3495+
ereport(ERROR,
3496+
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3497+
errmsg("trailing characters remain in input string after "
3498+
"datetime format")));
3499+
}
34373500
}
34383501

34393502
/*
@@ -3456,7 +3519,7 @@ DCH_prevent_counter_overflow(void)
34563519

34573520
/* select a DCHCacheEntry to hold the given format picture */
34583521
static DCHCacheEntry *
3459-
DCH_cache_getnew(const char *str)
3522+
DCH_cache_getnew(const char *str, bool std)
34603523
{
34613524
DCHCacheEntry *ent;
34623525

@@ -3506,6 +3569,7 @@ DCH_cache_getnew(const char *str)
35063569
MemoryContextAllocZero(TopMemoryContext, sizeof(DCHCacheEntry));
35073570
ent->valid = false;
35083571
StrNCpy(ent->str, str, DCH_CACHE_SIZE + 1);
3572+
ent->std = std;
35093573
ent->age = (++DCHCounter);
35103574
/* caller is expected to fill format, then set valid */
35113575
++n_DCHCache;
@@ -3515,7 +3579,7 @@ DCH_cache_getnew(const char *str)
35153579

35163580
/* look for an existing DCHCacheEntry matching the given format picture */
35173581
static DCHCacheEntry *
3518-
DCH_cache_search(const char *str)
3582+
DCH_cache_search(const char *str, bool std)
35193583
{
35203584
/* Ensure we can advance DCHCounter below */
35213585
DCH_prevent_counter_overflow();
@@ -3524,7 +3588,7 @@ DCH_cache_search(const char *str)
35243588
{
35253589
DCHCacheEntry *ent = DCHCache[i];
35263590

3527-
if (ent->valid && strcmp(ent->str, str) == 0)
3591+
if (ent->valid && strcmp(ent->str, str) == 0 && ent->std == std)
35283592
{
35293593
ent->age = (++DCHCounter);
35303594
return ent;
@@ -3536,21 +3600,21 @@ DCH_cache_search(const char *str)
35363600

35373601
/* Find or create a DCHCacheEntry for the given format picture */
35383602
static DCHCacheEntry *
3539-
DCH_cache_fetch(const char *str)
3603+
DCH_cache_fetch(const char *str, bool std)
35403604
{
35413605
DCHCacheEntry *ent;
35423606

3543-
if ((ent = DCH_cache_search(str)) == NULL)
3607+
if ((ent = DCH_cache_search(str, std)) == NULL)
35443608
{
35453609
/*
35463610
* Not in the cache, must run parser and save a new format-picture to
35473611
* the cache. Do not mark the cache entry valid until parsing
35483612
* succeeds.
35493613
*/
3550-
ent = DCH_cache_getnew(str);
3614+
ent = DCH_cache_getnew(str, std);
35513615

3552-
parse_format(ent->format, str, DCH_keywords,
3553-
DCH_suff, DCH_index, DCH_TYPE, NULL);
3616+
parse_format(ent->format, str, DCH_keywords, DCH_suff, DCH_index,
3617+
DCH_FLAG | (std ? STD_FLAG : 0), NULL);
35543618

35553619
ent->valid = true;
35563620
}
@@ -3595,14 +3659,14 @@ datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
35953659
format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
35963660

35973661
parse_format(format, fmt_str, DCH_keywords,
3598-
DCH_suff, DCH_index, DCH_TYPE, NULL);
3662+
DCH_suff, DCH_index, DCH_FLAG, NULL);
35993663
}
36003664
else
36013665
{
36023666
/*
36033667
* Use cache buffers
36043668
*/
3605-
DCHCacheEntry *ent = DCH_cache_fetch(fmt_str);
3669+
DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false);
36063670

36073671
incache = true;
36083672
format = ent->format;
@@ -3744,7 +3808,7 @@ to_timestamp(PG_FUNCTION_ARGS)
37443808
fsec_t fsec;
37453809
int fprec;
37463810

3747-
do_to_timestamp(date_txt, fmt, &tm, &fsec, &fprec);
3811+
do_to_timestamp(date_txt, fmt, false, &tm, &fsec, &fprec);
37483812

37493813
/* Use the specified time zone, if any. */
37503814
if (tm.tm_zone)
@@ -3783,7 +3847,7 @@ to_date(PG_FUNCTION_ARGS)
37833847
struct pg_tm tm;
37843848
fsec_t fsec;
37853849

3786-
do_to_timestamp(date_txt, fmt, &tm, &fsec, NULL);
3850+
do_to_timestamp(date_txt, fmt, false, &tm, &fsec, NULL);
37873851

37883852
/* Prevent overflow in Julian-day routines */
37893853
if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
@@ -3818,7 +3882,7 @@ to_date(PG_FUNCTION_ARGS)
38183882
* struct 'tm' and 'fsec'.
38193883
*/
38203884
static void
3821-
do_to_timestamp(text *date_txt, text *fmt,
3885+
do_to_timestamp(text *date_txt, text *fmt, bool std,
38223886
struct pg_tm *tm, fsec_t *fsec, int *fprec)
38233887
{
38243888
FormatNode *format;
@@ -3853,15 +3917,15 @@ do_to_timestamp(text *date_txt, text *fmt,
38533917

38543918
format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
38553919

3856-
parse_format(format, fmt_str, DCH_keywords,
3857-
DCH_suff, DCH_index, DCH_TYPE, NULL);
3920+
parse_format(format, fmt_str, DCH_keywords, DCH_suff, DCH_index,
3921+
DCH_FLAG | (std ? STD_FLAG : 0), NULL);
38583922
}
38593923
else
38603924
{
38613925
/*
38623926
* Use cache buffers
38633927
*/
3864-
DCHCacheEntry *ent = DCH_cache_fetch(fmt_str);
3928+
DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, std);
38653929

38663930
incache = true;
38673931
format = ent->format;
@@ -3872,7 +3936,7 @@ do_to_timestamp(text *date_txt, text *fmt,
38723936
/* dump_index(DCH_keywords, DCH_index); */
38733937
#endif
38743938

3875-
DCH_from_char(format, date_str, &tmfc);
3939+
DCH_from_char(format, date_str, &tmfc, std);
38763940

38773941
pfree(fmt_str);
38783942

@@ -4241,7 +4305,7 @@ NUM_cache_fetch(const char *str)
42414305
zeroize_NUM(&ent->Num);
42424306

42434307
parse_format(ent->format, str, NUM_keywords,
4244-
NULL, NUM_index, NUM_TYPE, &ent->Num);
4308+
NULL, NUM_index, NUM_FLAG, &ent->Num);
42454309

42464310
ent->valid = true;
42474311
}
@@ -4273,7 +4337,7 @@ NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree)
42734337
zeroize_NUM(Num);
42744338

42754339
parse_format(format, str, NUM_keywords,
4276-
NULL, NUM_index, NUM_TYPE, Num);
4340+
NULL, NUM_index, NUM_FLAG, Num);
42774341
}
42784342
else
42794343
{

0 commit comments

Comments
 (0)