@@ -416,17 +416,34 @@ tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
416
416
return -1 ;
417
417
}
418
418
419
+ /*
420
+ * qsort comparator functions
421
+ */
422
+
419
423
static int
420
- compareint (const void * va , const void * vb )
424
+ compare_int (const void * va , const void * vb )
421
425
{
422
- int32 a = * ((const int32 * ) va );
423
- int32 b = * ((const int32 * ) vb );
426
+ int a = * ((const int * ) va );
427
+ int b = * ((const int * ) vb );
424
428
425
429
if (a == b )
426
430
return 0 ;
427
431
return (a > b ) ? 1 : -1 ;
428
432
}
429
433
434
+ static int
435
+ compare_text_lexemes (const void * va , const void * vb )
436
+ {
437
+ Datum a = * ((const Datum * ) va );
438
+ Datum b = * ((const Datum * ) vb );
439
+ char * alex = VARDATA_ANY (a );
440
+ int alex_len = VARSIZE_ANY_EXHDR (a );
441
+ char * blex = VARDATA_ANY (b );
442
+ int blex_len = VARSIZE_ANY_EXHDR (b );
443
+
444
+ return tsCompareString (alex , alex_len , blex , blex_len , false);
445
+ }
446
+
430
447
/*
431
448
* Internal routine to delete lexemes from TSVector by array of offsets.
432
449
*
@@ -459,7 +476,7 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
459
476
{
460
477
int kp ;
461
478
462
- qsort (indices_to_delete , indices_count , sizeof (int ), compareint );
479
+ qsort (indices_to_delete , indices_count , sizeof (int ), compare_int );
463
480
kp = 0 ;
464
481
for (k = 1 ; k < indices_count ; k ++ )
465
482
{
@@ -743,32 +760,50 @@ array_to_tsvector(PG_FUNCTION_ARGS)
743
760
bool * nulls ;
744
761
int nitems ,
745
762
i ,
763
+ j ,
746
764
tslen ,
747
765
datalen = 0 ;
748
766
char * cur ;
749
767
750
768
deconstruct_array (v , TEXTOID , -1 , false, 'i' , & dlexemes , & nulls , & nitems );
751
769
770
+ /* Reject nulls (maybe we should just ignore them, instead?) */
752
771
for (i = 0 ; i < nitems ; i ++ )
753
772
{
754
773
if (nulls [i ])
755
774
ereport (ERROR ,
756
775
(errcode (ERRCODE_NULL_VALUE_NOT_ALLOWED ),
757
776
errmsg ("lexeme array may not contain nulls" )));
777
+ }
758
778
759
- datalen += VARSIZE_ANY_EXHDR (dlexemes [i ]);
779
+ /* Sort and de-dup, because this is required for a valid tsvector. */
780
+ if (nitems > 1 )
781
+ {
782
+ qsort (dlexemes , nitems , sizeof (Datum ), compare_text_lexemes );
783
+ j = 0 ;
784
+ for (i = 1 ; i < nitems ; i ++ )
785
+ {
786
+ if (compare_text_lexemes (& dlexemes [j ], & dlexemes [i ]) < 0 )
787
+ dlexemes [++ j ] = dlexemes [i ];
788
+ }
789
+ nitems = ++ j ;
760
790
}
761
791
792
+ /* Calculate space needed for surviving lexemes. */
793
+ for (i = 0 ; i < nitems ; i ++ )
794
+ datalen += VARSIZE_ANY_EXHDR (dlexemes [i ]);
762
795
tslen = CALCDATASIZE (nitems , datalen );
796
+
797
+ /* Allocate and fill tsvector. */
763
798
tsout = (TSVector ) palloc0 (tslen );
764
799
SET_VARSIZE (tsout , tslen );
765
800
tsout -> size = nitems ;
801
+
766
802
arrout = ARRPTR (tsout );
767
803
cur = STRPTR (tsout );
768
-
769
804
for (i = 0 ; i < nitems ; i ++ )
770
805
{
771
- char * lex = VARDATA (dlexemes [i ]);
806
+ char * lex = VARDATA_ANY (dlexemes [i ]);
772
807
int lex_len = VARSIZE_ANY_EXHDR (dlexemes [i ]);
773
808
774
809
memcpy (cur , lex , lex_len );
0 commit comments