Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit f10eab7

Browse files
committed
Make array_to_tsvector() sort and de-duplicate the given strings.
This is required for the result to be a legal tsvector value. Noted while fooling with Andreas Seltenreich's ts_delete() crash. Discussion: <87invhoj6e.fsf@credativ.de>
1 parent c50d192 commit f10eab7

File tree

4 files changed

+52
-8
lines changed

4 files changed

+52
-8
lines changed

doc/src/sgml/func.sgml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9294,7 +9294,7 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
92949294
<entry><type>tsvector</type></entry>
92959295
<entry>convert array of lexemes to <type>tsvector</type></entry>
92969296
<entry><literal>array_to_tsvector('{fat,cat,rat}'::text[])</literal></entry>
9297-
<entry><literal>'fat' 'cat' 'rat'</literal></entry>
9297+
<entry><literal>'cat' 'fat' 'rat'</literal></entry>
92989298
</row>
92999299
<row>
93009300
<entry>

src/backend/utils/adt/tsvector_op.c

+42-7
Original file line numberDiff line numberDiff line change
@@ -416,17 +416,34 @@ tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
416416
return -1;
417417
}
418418

419+
/*
420+
* qsort comparator functions
421+
*/
422+
419423
static int
420-
compareint(const void *va, const void *vb)
424+
compare_int(const void *va, const void *vb)
421425
{
422-
int32 a = *((const int32 *) va);
423-
int32 b = *((const int32 *) vb);
426+
int a = *((const int *) va);
427+
int b = *((const int *) vb);
424428

425429
if (a == b)
426430
return 0;
427431
return (a > b) ? 1 : -1;
428432
}
429433

434+
static int
435+
compare_text_lexemes(const void *va, const void *vb)
436+
{
437+
Datum a = *((const Datum *) va);
438+
Datum b = *((const Datum *) vb);
439+
char *alex = VARDATA_ANY(a);
440+
int alex_len = VARSIZE_ANY_EXHDR(a);
441+
char *blex = VARDATA_ANY(b);
442+
int blex_len = VARSIZE_ANY_EXHDR(b);
443+
444+
return tsCompareString(alex, alex_len, blex, blex_len, false);
445+
}
446+
430447
/*
431448
* Internal routine to delete lexemes from TSVector by array of offsets.
432449
*
@@ -459,7 +476,7 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
459476
{
460477
int kp;
461478

462-
qsort(indices_to_delete, indices_count, sizeof(int), compareint);
479+
qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
463480
kp = 0;
464481
for (k = 1; k < indices_count; k++)
465482
{
@@ -743,32 +760,50 @@ array_to_tsvector(PG_FUNCTION_ARGS)
743760
bool *nulls;
744761
int nitems,
745762
i,
763+
j,
746764
tslen,
747765
datalen = 0;
748766
char *cur;
749767

750768
deconstruct_array(v, TEXTOID, -1, false, 'i', &dlexemes, &nulls, &nitems);
751769

770+
/* Reject nulls (maybe we should just ignore them, instead?) */
752771
for (i = 0; i < nitems; i++)
753772
{
754773
if (nulls[i])
755774
ereport(ERROR,
756775
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
757776
errmsg("lexeme array may not contain nulls")));
777+
}
758778

759-
datalen += VARSIZE_ANY_EXHDR(dlexemes[i]);
779+
/* Sort and de-dup, because this is required for a valid tsvector. */
780+
if (nitems > 1)
781+
{
782+
qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
783+
j = 0;
784+
for (i = 1; i < nitems; i++)
785+
{
786+
if (compare_text_lexemes(&dlexemes[j], &dlexemes[i]) < 0)
787+
dlexemes[++j] = dlexemes[i];
788+
}
789+
nitems = ++j;
760790
}
761791

792+
/* Calculate space needed for surviving lexemes. */
793+
for (i = 0; i < nitems; i++)
794+
datalen += VARSIZE_ANY_EXHDR(dlexemes[i]);
762795
tslen = CALCDATASIZE(nitems, datalen);
796+
797+
/* Allocate and fill tsvector. */
763798
tsout = (TSVector) palloc0(tslen);
764799
SET_VARSIZE(tsout, tslen);
765800
tsout->size = nitems;
801+
766802
arrout = ARRPTR(tsout);
767803
cur = STRPTR(tsout);
768-
769804
for (i = 0; i < nitems; i++)
770805
{
771-
char *lex = VARDATA(dlexemes[i]);
806+
char *lex = VARDATA_ANY(dlexemes[i]);
772807
int lex_len = VARSIZE_ANY_EXHDR(dlexemes[i]);
773808

774809
memcpy(cur, lex, lex_len);

src/test/regress/expected/tstypes.out

+7
Original file line numberDiff line numberDiff line change
@@ -1165,6 +1165,13 @@ SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
11651165

11661166
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]);
11671167
ERROR: lexeme array may not contain nulls
1168+
-- array_to_tsvector must sort and de-dup
1169+
SELECT array_to_tsvector(ARRAY['foo','bar','baz','bar']);
1170+
array_to_tsvector
1171+
-------------------
1172+
'bar' 'baz' 'foo'
1173+
(1 row)
1174+
11681175
SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
11691176
setweight
11701177
----------------------------------------------------------

src/test/regress/sql/tstypes.sql

+2
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,8 @@ SELECT tsvector_to_array('base hidden rebel spaceship strike'::tsvector);
226226

227227
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
228228
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]);
229+
-- array_to_tsvector must sort and de-dup
230+
SELECT array_to_tsvector(ARRAY['foo','bar','baz','bar']);
229231

230232
SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
231233
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c');

0 commit comments

Comments
 (0)