8
8
*
9
9
*
10
10
* IDENTIFICATION
11
- * $PostgreSQL: pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.112 2004/09/16 03:15:52 neilc Exp $
11
+ * $PostgreSQL: pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.113 2004/09/27 01:39:02 tgl Exp $
12
12
*
13
13
*-------------------------------------------------------------------------
14
14
*/
74
74
#define RETURN_NULL (type ) do { *isNull = true; return (type) 0; } while (0)
75
75
76
76
static int ArrayCount (char * str , int * dim , char typdelim );
77
- static Datum * ReadArrayStr (char * arrayStr , int nitems , int ndim , int * dim ,
77
+ static Datum * ReadArrayStr (char * arrayStr , const char * origStr ,
78
+ int nitems , int ndim , int * dim ,
78
79
FmgrInfo * inputproc , Oid typioparam , int32 typmod ,
79
80
char typdelim ,
80
81
int typlen , bool typbyval , char typalign ,
@@ -325,7 +326,8 @@ array_in(PG_FUNCTION_ARGS)
325
326
ereport (ERROR ,
326
327
(errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
327
328
errmsg ("missing left brace" )));
328
- dataPtr = ReadArrayStr (p , nitems , ndim , dim , & my_extra -> proc , typioparam ,
329
+ dataPtr = ReadArrayStr (p , string ,
330
+ nitems , ndim , dim , & my_extra -> proc , typioparam ,
329
331
typmod , typdelim , typlen , typbyval , typalign ,
330
332
& nbytes );
331
333
nbytes += ARR_OVERHEAD (ndim );
@@ -371,7 +373,7 @@ ArrayCount(char *str, int *dim, char typdelim)
371
373
temp [MAXDIM ],
372
374
nelems [MAXDIM ],
373
375
nelems_last [MAXDIM ];
374
- bool scanning_string = false;
376
+ bool in_quotes = false;
375
377
bool eoArray = false;
376
378
bool empty_array = true;
377
379
char * ptr ;
@@ -443,14 +445,14 @@ ArrayCount(char *str, int *dim, char typdelim)
443
445
ereport (ERROR ,
444
446
(errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
445
447
errmsg ("malformed array literal: \"%s\"" , str )));
446
- scanning_string = !scanning_string ;
447
- if (scanning_string )
448
+ in_quotes = !in_quotes ;
449
+ if (in_quotes )
448
450
parse_state = ARRAY_QUOTED_ELEM_STARTED ;
449
451
else
450
452
parse_state = ARRAY_QUOTED_ELEM_COMPLETED ;
451
453
break ;
452
454
case '{' :
453
- if (!scanning_string )
455
+ if (!in_quotes )
454
456
{
455
457
/*
456
458
* A left brace can occur if no nesting has
@@ -476,7 +478,7 @@ ArrayCount(char *str, int *dim, char typdelim)
476
478
}
477
479
break ;
478
480
case '}' :
479
- if (!scanning_string )
481
+ if (!in_quotes )
480
482
{
481
483
/*
482
484
* A right brace can occur after an element start,
@@ -520,7 +522,7 @@ ArrayCount(char *str, int *dim, char typdelim)
520
522
}
521
523
break ;
522
524
default :
523
- if (!scanning_string )
525
+ if (!in_quotes )
524
526
{
525
527
if (* ptr == typdelim )
526
528
{
@@ -595,16 +597,19 @@ ArrayCount(char *str, int *dim, char typdelim)
595
597
* declaration. Unspecified elements are initialized to zero for fixed length
596
598
* base types and to empty varlena structures for variable length base
597
599
* types. (This is pretty bogus; NULL would be much safer.)
600
+ *
598
601
* result :
599
602
* returns a palloc'd array of Datum representations of the array elements.
600
603
* If element type is pass-by-ref, the Datums point to palloc'd values.
601
604
* *nbytes is set to the amount of data space needed for the array,
602
605
* including alignment padding but not including array header overhead.
603
- * CAUTION: the contents of "arrayStr" may be modified!
606
+ *
607
+ * CAUTION: the contents of "arrayStr" will be modified!
604
608
*---------------------------------------------------------------------------
605
609
*/
606
610
static Datum *
607
611
ReadArrayStr (char * arrayStr ,
612
+ const char * origStr ,
608
613
int nitems ,
609
614
int ndim ,
610
615
int * dim ,
@@ -620,154 +625,158 @@ ReadArrayStr(char *arrayStr,
620
625
int i ,
621
626
nest_level = 0 ;
622
627
Datum * values ;
623
- char * ptr ;
624
- bool scanning_string = false;
628
+ char * srcptr ;
629
+ bool in_quotes = false;
625
630
bool eoArray = false;
631
+ int totbytes ;
626
632
int indx [MAXDIM ],
627
633
prod [MAXDIM ];
628
634
629
635
mda_get_prod (ndim , dim , prod );
630
636
values = (Datum * ) palloc0 (nitems * sizeof (Datum ));
631
637
MemSet (indx , 0 , sizeof (indx ));
632
638
633
- /* read array enclosed within {} */
634
- ptr = arrayStr ;
639
+ /*
640
+ * We have to remove " and \ characters to create a clean item value
641
+ * to pass to the datatype input routine. We overwrite each item
642
+ * value in-place within arrayStr to do this. srcptr is the current
643
+ * scan point, and dstptr is where we are copying to.
644
+ *
645
+ * We also want to suppress leading and trailing unquoted whitespace.
646
+ * We use the leadingspace flag to suppress leading space. Trailing
647
+ * space is tracked by using dstendptr to point to the last significant
648
+ * output character.
649
+ *
650
+ * The error checking in this routine is mostly pro-forma, since we
651
+ * expect that ArrayCount() already validated the string.
652
+ */
653
+ srcptr = arrayStr ;
635
654
while (!eoArray )
636
655
{
637
656
bool itemdone = false;
638
- bool itemquoted = false;
639
- int i = -1 ;
657
+ bool leadingspace = true;
640
658
char * itemstart ;
641
- char * eptr ;
659
+ char * dstptr ;
660
+ char * dstendptr ;
642
661
643
- /* skip leading whitespace */
644
- while (isspace ((unsigned char ) * ptr ))
645
- ptr ++ ;
646
-
647
- itemstart = ptr ;
662
+ i = -1 ;
663
+ itemstart = dstptr = dstendptr = srcptr ;
648
664
649
665
while (!itemdone )
650
666
{
651
- switch (* ptr )
667
+ switch (* srcptr )
652
668
{
653
669
case '\0' :
654
670
/* Signal a premature end of the string */
655
671
ereport (ERROR ,
656
672
(errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
657
- errmsg ("malformed array literal: \"%s\"" , arrayStr )));
673
+ errmsg ("malformed array literal: \"%s\"" ,
674
+ origStr )));
658
675
break ;
659
676
case '\\' :
660
- {
661
- char * cptr ;
662
-
663
- /* Crunch the string on top of the backslash. */
664
- for ( cptr = ptr ; * cptr != '\0' ; cptr ++ )
665
- * cptr = * ( cptr + 1 );
666
- if ( * ptr == '\0' )
667
- ereport ( ERROR ,
668
- ( errcode ( ERRCODE_INVALID_TEXT_REPRESENTATION ),
669
- errmsg ( "malformed array literal: \"%s\"" , arrayStr ))) ;
670
- break ;
671
- }
677
+ /* Skip backslash, copy next character as-is. */
678
+ srcptr ++ ;
679
+ if ( * srcptr == '\0' )
680
+ ereport ( ERROR ,
681
+ ( errcode ( ERRCODE_INVALID_TEXT_REPRESENTATION ),
682
+ errmsg ( "malformed array literal: \"%s\"" ,
683
+ origStr )));
684
+ * dstptr ++ = * srcptr ++ ;
685
+ /* Treat the escaped character as non-whitespace */
686
+ leadingspace = false ;
687
+ dstendptr = dstptr ;
688
+ break ;
672
689
case '\"' :
690
+ in_quotes = !in_quotes ;
691
+ if (in_quotes )
692
+ leadingspace = false;
693
+ else
673
694
{
674
- char * cptr ;
675
-
676
- scanning_string = !scanning_string ;
677
- if (scanning_string )
678
- {
679
- itemquoted = true;
680
-
681
- /*
682
- * Crunch the string on top of the first
683
- * quote.
684
- */
685
- for (cptr = ptr ; * cptr != '\0' ; cptr ++ )
686
- * cptr = * (cptr + 1 );
687
- /* Back up to not miss following character. */
688
- ptr -- ;
689
- }
690
- break ;
695
+ /*
696
+ * Advance dstendptr when we exit in_quotes; this
697
+ * saves having to do it in all the other in_quotes
698
+ * cases.
699
+ */
700
+ dstendptr = dstptr ;
691
701
}
702
+ srcptr ++ ;
703
+ break ;
692
704
case '{' :
693
- if (!scanning_string )
705
+ if (!in_quotes )
694
706
{
695
707
if (nest_level >= ndim )
696
708
ereport (ERROR ,
697
- (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
698
- errmsg ("malformed array literal: \"%s\"" , arrayStr )));
709
+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
710
+ errmsg ("malformed array literal: \"%s\"" ,
711
+ origStr )));
699
712
nest_level ++ ;
700
713
indx [nest_level - 1 ] = 0 ;
701
- /* skip leading whitespace */
702
- while (isspace ((unsigned char ) * (ptr + 1 )))
703
- ptr ++ ;
704
- itemstart = ptr + 1 ;
714
+ srcptr ++ ;
705
715
}
716
+ else
717
+ * dstptr ++ = * srcptr ++ ;
706
718
break ;
707
719
case '}' :
708
- if (!scanning_string )
720
+ if (!in_quotes )
709
721
{
710
722
if (nest_level == 0 )
711
723
ereport (ERROR ,
712
- (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
713
- errmsg ("malformed array literal: \"%s\"" , arrayStr )));
724
+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
725
+ errmsg ("malformed array literal: \"%s\"" ,
726
+ origStr )));
714
727
if (i == -1 )
715
728
i = ArrayGetOffset0 (ndim , indx , prod );
716
729
indx [nest_level - 1 ] = 0 ;
717
730
nest_level -- ;
718
731
if (nest_level == 0 )
719
732
eoArray = itemdone = true;
720
733
else
721
- {
722
- /*
723
- * tricky coding: terminate item value string
724
- * at first '}', but don't process it till we
725
- * see a typdelim char or end of array. This
726
- * handles case where several '}'s appear
727
- * successively in a multidimensional array.
728
- */
729
- * ptr = '\0' ;
730
734
indx [nest_level - 1 ]++ ;
731
- }
735
+ srcptr ++ ;
732
736
}
737
+ else
738
+ * dstptr ++ = * srcptr ++ ;
733
739
break ;
734
740
default :
735
- if (* ptr == typdelim && !scanning_string )
741
+ if (in_quotes )
742
+ * dstptr ++ = * srcptr ++ ;
743
+ else if (* srcptr == typdelim )
736
744
{
737
745
if (i == -1 )
738
746
i = ArrayGetOffset0 (ndim , indx , prod );
739
747
itemdone = true;
740
748
indx [ndim - 1 ]++ ;
749
+ srcptr ++ ;
750
+ }
751
+ else if (isspace ((unsigned char ) * srcptr ))
752
+ {
753
+ /*
754
+ * If leading space, drop it immediately. Else,
755
+ * copy but don't advance dstendptr.
756
+ */
757
+ if (leadingspace )
758
+ srcptr ++ ;
759
+ else
760
+ * dstptr ++ = * srcptr ++ ;
761
+ }
762
+ else
763
+ {
764
+ * dstptr ++ = * srcptr ++ ;
765
+ leadingspace = false;
766
+ dstendptr = dstptr ;
741
767
}
742
768
break ;
743
769
}
744
- if (!itemdone )
745
- ptr ++ ;
746
770
}
747
- * ptr ++ = '\0' ;
771
+
772
+ Assert (dstptr < srcptr );
773
+ * dstendptr = '\0' ;
774
+
748
775
if (i < 0 || i >= nitems )
749
776
ereport (ERROR ,
750
777
(errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
751
- errmsg ("malformed array literal: \"%s\"" , arrayStr )));
752
-
753
- /*
754
- * skip trailing whitespace
755
- */
756
- eptr = ptr - 1 ;
757
- if (!itemquoted )
758
- {
759
- /* skip to last non-NULL, non-space, character */
760
- while ((* eptr == '\0' ) || (isspace ((unsigned char ) * eptr )))
761
- eptr -- ;
762
- * (++ eptr ) = '\0' ;
763
- }
764
- else
765
- {
766
- /* skip to last quote character */
767
- while (* eptr != '"' )
768
- eptr -- ;
769
- * eptr = '\0' ;
770
- }
778
+ errmsg ("malformed array literal: \"%s\"" ,
779
+ origStr )));
771
780
772
781
values [i ] = FunctionCall3 (inputproc ,
773
782
CStringGetDatum (itemstart ),
@@ -780,7 +789,7 @@ ReadArrayStr(char *arrayStr,
780
789
*/
781
790
if (typlen > 0 )
782
791
{
783
- * nbytes = nitems * att_align (typlen , typalign );
792
+ totbytes = nitems * att_align (typlen , typalign );
784
793
if (!typbyval )
785
794
for (i = 0 ; i < nitems ; i ++ )
786
795
if (values [i ] == (Datum ) 0 )
@@ -789,36 +798,37 @@ ReadArrayStr(char *arrayStr,
789
798
else
790
799
{
791
800
Assert (!typbyval );
792
- * nbytes = 0 ;
801
+ totbytes = 0 ;
793
802
for (i = 0 ; i < nitems ; i ++ )
794
803
{
795
804
if (values [i ] != (Datum ) 0 )
796
805
{
797
806
/* let's just make sure data is not toasted */
798
807
if (typlen == -1 )
799
808
values [i ] = PointerGetDatum (PG_DETOAST_DATUM (values [i ]));
800
- * nbytes = att_addlength (* nbytes , typlen , values [i ]);
801
- * nbytes = att_align (* nbytes , typalign );
809
+ totbytes = att_addlength (totbytes , typlen , values [i ]);
810
+ totbytes = att_align (totbytes , typalign );
802
811
}
803
812
else if (typlen == -1 )
804
813
{
805
814
/* dummy varlena value (XXX bogus, see notes above) */
806
815
values [i ] = PointerGetDatum (palloc (sizeof (int32 )));
807
816
VARATT_SIZEP (DatumGetPointer (values [i ])) = sizeof (int32 );
808
- * nbytes += sizeof (int32 );
809
- * nbytes = att_align (* nbytes , typalign );
817
+ totbytes += sizeof (int32 );
818
+ totbytes = att_align (totbytes , typalign );
810
819
}
811
820
else
812
821
{
813
822
/* dummy cstring value */
814
823
Assert (typlen == -2 );
815
824
values [i ] = PointerGetDatum (palloc (1 ));
816
825
* ((char * ) DatumGetPointer (values [i ])) = '\0' ;
817
- * nbytes += 1 ;
818
- * nbytes = att_align (* nbytes , typalign );
826
+ totbytes += 1 ;
827
+ totbytes = att_align (totbytes , typalign );
819
828
}
820
829
}
821
830
}
831
+ * nbytes = totbytes ;
822
832
return values ;
823
833
}
824
834
0 commit comments