Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit ae8f731

Browse files
committed
Rewrite ReadArrayStr() to avoid O(N^2) behavior on large strings,
and hopefully improve code clarity while at it. One intentional semantics change: a backslashed space will not be treated as removable trailing whitespace, as the prior coding would do. ISTM that if it wouldn't be considered removable leading whitespace, it shouldn't be stripped at the end either.
1 parent 5a2c8cf commit ae8f731

File tree

1 file changed

+115
-105
lines changed

1 file changed

+115
-105
lines changed

src/backend/utils/adt/arrayfuncs.c

Lines changed: 115 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.112 2004/09/16 03:15:52 neilc Exp $
11+
* $PostgreSQL: pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.113 2004/09/27 01:39:02 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -74,7 +74,8 @@
7474
#define RETURN_NULL(type) do { *isNull = true; return (type) 0; } while (0)
7575

7676
static int ArrayCount(char *str, int *dim, char typdelim);
77-
static Datum *ReadArrayStr(char *arrayStr, int nitems, int ndim, int *dim,
77+
static Datum *ReadArrayStr(char *arrayStr, const char *origStr,
78+
int nitems, int ndim, int *dim,
7879
FmgrInfo *inputproc, Oid typioparam, int32 typmod,
7980
char typdelim,
8081
int typlen, bool typbyval, char typalign,
@@ -325,7 +326,8 @@ array_in(PG_FUNCTION_ARGS)
325326
ereport(ERROR,
326327
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
327328
errmsg("missing left brace")));
328-
dataPtr = ReadArrayStr(p, nitems, ndim, dim, &my_extra->proc, typioparam,
329+
dataPtr = ReadArrayStr(p, string,
330+
nitems, ndim, dim, &my_extra->proc, typioparam,
329331
typmod, typdelim, typlen, typbyval, typalign,
330332
&nbytes);
331333
nbytes += ARR_OVERHEAD(ndim);
@@ -371,7 +373,7 @@ ArrayCount(char *str, int *dim, char typdelim)
371373
temp[MAXDIM],
372374
nelems[MAXDIM],
373375
nelems_last[MAXDIM];
374-
bool scanning_string = false;
376+
bool in_quotes = false;
375377
bool eoArray = false;
376378
bool empty_array = true;
377379
char *ptr;
@@ -443,14 +445,14 @@ ArrayCount(char *str, int *dim, char typdelim)
443445
ereport(ERROR,
444446
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
445447
errmsg("malformed array literal: \"%s\"", str)));
446-
scanning_string = !scanning_string;
447-
if (scanning_string)
448+
in_quotes = !in_quotes;
449+
if (in_quotes)
448450
parse_state = ARRAY_QUOTED_ELEM_STARTED;
449451
else
450452
parse_state = ARRAY_QUOTED_ELEM_COMPLETED;
451453
break;
452454
case '{':
453-
if (!scanning_string)
455+
if (!in_quotes)
454456
{
455457
/*
456458
* A left brace can occur if no nesting has
@@ -476,7 +478,7 @@ ArrayCount(char *str, int *dim, char typdelim)
476478
}
477479
break;
478480
case '}':
479-
if (!scanning_string)
481+
if (!in_quotes)
480482
{
481483
/*
482484
* A right brace can occur after an element start,
@@ -520,7 +522,7 @@ ArrayCount(char *str, int *dim, char typdelim)
520522
}
521523
break;
522524
default:
523-
if (!scanning_string)
525+
if (!in_quotes)
524526
{
525527
if (*ptr == typdelim)
526528
{
@@ -595,16 +597,19 @@ ArrayCount(char *str, int *dim, char typdelim)
595597
* declaration. Unspecified elements are initialized to zero for fixed length
596598
* base types and to empty varlena structures for variable length base
597599
* types. (This is pretty bogus; NULL would be much safer.)
600+
*
598601
* result :
599602
* returns a palloc'd array of Datum representations of the array elements.
600603
* If element type is pass-by-ref, the Datums point to palloc'd values.
601604
* *nbytes is set to the amount of data space needed for the array,
602605
* including alignment padding but not including array header overhead.
603-
* CAUTION: the contents of "arrayStr" may be modified!
606+
*
607+
* CAUTION: the contents of "arrayStr" will be modified!
604608
*---------------------------------------------------------------------------
605609
*/
606610
static Datum *
607611
ReadArrayStr(char *arrayStr,
612+
const char *origStr,
608613
int nitems,
609614
int ndim,
610615
int *dim,
@@ -620,154 +625,158 @@ ReadArrayStr(char *arrayStr,
620625
int i,
621626
nest_level = 0;
622627
Datum *values;
623-
char *ptr;
624-
bool scanning_string = false;
628+
char *srcptr;
629+
bool in_quotes = false;
625630
bool eoArray = false;
631+
int totbytes;
626632
int indx[MAXDIM],
627633
prod[MAXDIM];
628634

629635
mda_get_prod(ndim, dim, prod);
630636
values = (Datum *) palloc0(nitems * sizeof(Datum));
631637
MemSet(indx, 0, sizeof(indx));
632638

633-
/* read array enclosed within {} */
634-
ptr = arrayStr;
639+
/*
640+
* We have to remove " and \ characters to create a clean item value
641+
* to pass to the datatype input routine. We overwrite each item
642+
* value in-place within arrayStr to do this. srcptr is the current
643+
* scan point, and dstptr is where we are copying to.
644+
*
645+
* We also want to suppress leading and trailing unquoted whitespace.
646+
* We use the leadingspace flag to suppress leading space. Trailing
647+
* space is tracked by using dstendptr to point to the last significant
648+
* output character.
649+
*
650+
* The error checking in this routine is mostly pro-forma, since we
651+
* expect that ArrayCount() already validated the string.
652+
*/
653+
srcptr = arrayStr;
635654
while (!eoArray)
636655
{
637656
bool itemdone = false;
638-
bool itemquoted = false;
639-
int i = -1;
657+
bool leadingspace = true;
640658
char *itemstart;
641-
char *eptr;
659+
char *dstptr;
660+
char *dstendptr;
642661

643-
/* skip leading whitespace */
644-
while (isspace((unsigned char) *ptr))
645-
ptr++;
646-
647-
itemstart = ptr;
662+
i = -1;
663+
itemstart = dstptr = dstendptr = srcptr;
648664

649665
while (!itemdone)
650666
{
651-
switch (*ptr)
667+
switch (*srcptr)
652668
{
653669
case '\0':
654670
/* Signal a premature end of the string */
655671
ereport(ERROR,
656672
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
657-
errmsg("malformed array literal: \"%s\"", arrayStr)));
673+
errmsg("malformed array literal: \"%s\"",
674+
origStr)));
658675
break;
659676
case '\\':
660-
{
661-
char *cptr;
662-
663-
/* Crunch the string on top of the backslash. */
664-
for (cptr = ptr; *cptr != '\0'; cptr++)
665-
*cptr = *(cptr + 1);
666-
if (*ptr == '\0')
667-
ereport(ERROR,
668-
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
669-
errmsg("malformed array literal: \"%s\"", arrayStr)));
670-
break;
671-
}
677+
/* Skip backslash, copy next character as-is. */
678+
srcptr++;
679+
if (*srcptr == '\0')
680+
ereport(ERROR,
681+
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
682+
errmsg("malformed array literal: \"%s\"",
683+
origStr)));
684+
*dstptr++ = *srcptr++;
685+
/* Treat the escaped character as non-whitespace */
686+
leadingspace = false;
687+
dstendptr = dstptr;
688+
break;
672689
case '\"':
690+
in_quotes = !in_quotes;
691+
if (in_quotes)
692+
leadingspace = false;
693+
else
673694
{
674-
char *cptr;
675-
676-
scanning_string = !scanning_string;
677-
if (scanning_string)
678-
{
679-
itemquoted = true;
680-
681-
/*
682-
* Crunch the string on top of the first
683-
* quote.
684-
*/
685-
for (cptr = ptr; *cptr != '\0'; cptr++)
686-
*cptr = *(cptr + 1);
687-
/* Back up to not miss following character. */
688-
ptr--;
689-
}
690-
break;
695+
/*
696+
* Advance dstendptr when we exit in_quotes; this
697+
* saves having to do it in all the other in_quotes
698+
* cases.
699+
*/
700+
dstendptr = dstptr;
691701
}
702+
srcptr++;
703+
break;
692704
case '{':
693-
if (!scanning_string)
705+
if (!in_quotes)
694706
{
695707
if (nest_level >= ndim)
696708
ereport(ERROR,
697-
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
698-
errmsg("malformed array literal: \"%s\"", arrayStr)));
709+
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
710+
errmsg("malformed array literal: \"%s\"",
711+
origStr)));
699712
nest_level++;
700713
indx[nest_level - 1] = 0;
701-
/* skip leading whitespace */
702-
while (isspace((unsigned char) *(ptr + 1)))
703-
ptr++;
704-
itemstart = ptr + 1;
714+
srcptr++;
705715
}
716+
else
717+
*dstptr++ = *srcptr++;
706718
break;
707719
case '}':
708-
if (!scanning_string)
720+
if (!in_quotes)
709721
{
710722
if (nest_level == 0)
711723
ereport(ERROR,
712-
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
713-
errmsg("malformed array literal: \"%s\"", arrayStr)));
724+
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
725+
errmsg("malformed array literal: \"%s\"",
726+
origStr)));
714727
if (i == -1)
715728
i = ArrayGetOffset0(ndim, indx, prod);
716729
indx[nest_level - 1] = 0;
717730
nest_level--;
718731
if (nest_level == 0)
719732
eoArray = itemdone = true;
720733
else
721-
{
722-
/*
723-
* tricky coding: terminate item value string
724-
* at first '}', but don't process it till we
725-
* see a typdelim char or end of array. This
726-
* handles case where several '}'s appear
727-
* successively in a multidimensional array.
728-
*/
729-
*ptr = '\0';
730734
indx[nest_level - 1]++;
731-
}
735+
srcptr++;
732736
}
737+
else
738+
*dstptr++ = *srcptr++;
733739
break;
734740
default:
735-
if (*ptr == typdelim && !scanning_string)
741+
if (in_quotes)
742+
*dstptr++ = *srcptr++;
743+
else if (*srcptr == typdelim)
736744
{
737745
if (i == -1)
738746
i = ArrayGetOffset0(ndim, indx, prod);
739747
itemdone = true;
740748
indx[ndim - 1]++;
749+
srcptr++;
750+
}
751+
else if (isspace((unsigned char) *srcptr))
752+
{
753+
/*
754+
* If leading space, drop it immediately. Else,
755+
* copy but don't advance dstendptr.
756+
*/
757+
if (leadingspace)
758+
srcptr++;
759+
else
760+
*dstptr++ = *srcptr++;
761+
}
762+
else
763+
{
764+
*dstptr++ = *srcptr++;
765+
leadingspace = false;
766+
dstendptr = dstptr;
741767
}
742768
break;
743769
}
744-
if (!itemdone)
745-
ptr++;
746770
}
747-
*ptr++ = '\0';
771+
772+
Assert(dstptr < srcptr);
773+
*dstendptr = '\0';
774+
748775
if (i < 0 || i >= nitems)
749776
ereport(ERROR,
750777
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
751-
errmsg("malformed array literal: \"%s\"", arrayStr)));
752-
753-
/*
754-
* skip trailing whitespace
755-
*/
756-
eptr = ptr - 1;
757-
if (!itemquoted)
758-
{
759-
/* skip to last non-NULL, non-space, character */
760-
while ((*eptr == '\0') || (isspace((unsigned char) *eptr)))
761-
eptr--;
762-
*(++eptr) = '\0';
763-
}
764-
else
765-
{
766-
/* skip to last quote character */
767-
while (*eptr != '"')
768-
eptr--;
769-
*eptr = '\0';
770-
}
778+
errmsg("malformed array literal: \"%s\"",
779+
origStr)));
771780

772781
values[i] = FunctionCall3(inputproc,
773782
CStringGetDatum(itemstart),
@@ -780,7 +789,7 @@ ReadArrayStr(char *arrayStr,
780789
*/
781790
if (typlen > 0)
782791
{
783-
*nbytes = nitems * att_align(typlen, typalign);
792+
totbytes = nitems * att_align(typlen, typalign);
784793
if (!typbyval)
785794
for (i = 0; i < nitems; i++)
786795
if (values[i] == (Datum) 0)
@@ -789,36 +798,37 @@ ReadArrayStr(char *arrayStr,
789798
else
790799
{
791800
Assert(!typbyval);
792-
*nbytes = 0;
801+
totbytes = 0;
793802
for (i = 0; i < nitems; i++)
794803
{
795804
if (values[i] != (Datum) 0)
796805
{
797806
/* let's just make sure data is not toasted */
798807
if (typlen == -1)
799808
values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i]));
800-
*nbytes = att_addlength(*nbytes, typlen, values[i]);
801-
*nbytes = att_align(*nbytes, typalign);
809+
totbytes = att_addlength(totbytes, typlen, values[i]);
810+
totbytes = att_align(totbytes, typalign);
802811
}
803812
else if (typlen == -1)
804813
{
805814
/* dummy varlena value (XXX bogus, see notes above) */
806815
values[i] = PointerGetDatum(palloc(sizeof(int32)));
807816
VARATT_SIZEP(DatumGetPointer(values[i])) = sizeof(int32);
808-
*nbytes += sizeof(int32);
809-
*nbytes = att_align(*nbytes, typalign);
817+
totbytes += sizeof(int32);
818+
totbytes = att_align(totbytes, typalign);
810819
}
811820
else
812821
{
813822
/* dummy cstring value */
814823
Assert(typlen == -2);
815824
values[i] = PointerGetDatum(palloc(1));
816825
*((char *) DatumGetPointer(values[i])) = '\0';
817-
*nbytes += 1;
818-
*nbytes = att_align(*nbytes, typalign);
826+
totbytes += 1;
827+
totbytes = att_align(totbytes, typalign);
819828
}
820829
}
821830
}
831+
*nbytes = totbytes;
822832
return values;
823833
}
824834

0 commit comments

Comments
 (0)