@@ -317,7 +317,7 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
317
317
318
318
if (nulls [i ])
319
319
ereport (ERROR ,
320
- (errcode (ERRCODE_INVALID_PARAMETER_VALUE ),
320
+ (errcode (ERRCODE_NULL_VALUE_NOT_ALLOWED ),
321
321
errmsg ("lexeme array may not contain nulls" )));
322
322
323
323
lex = VARDATA (dlexemes [i ]);
@@ -430,7 +430,7 @@ compareint(const void *va, const void *vb)
430
430
/*
431
431
* Internal routine to delete lexemes from TSVector by array of offsets.
432
432
*
433
- * int *indices_to_delete -- array of lexeme offsets to delete
433
+ * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
434
434
* int indices_count -- size of that array
435
435
*
436
436
* Returns new TSVector without given lexemes along with their positions
@@ -445,52 +445,68 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
445
445
* arrout ;
446
446
char * data = STRPTR (tsv ),
447
447
* dataout ;
448
- int i ,
449
- j ,
450
- k ,
451
- curoff ;
448
+ int i , /* index in arrin */
449
+ j , /* index in arrout */
450
+ k , /* index in indices_to_delete */
451
+ curoff ; /* index in dataout area */
452
452
453
453
/*
454
- * Here we overestimates tsout size, since we don't know exact size
455
- * occupied by positions and weights. We will set exact size later after a
456
- * pass through TSVector .
454
+ * Sort the filter array to simplify membership checks below. Also, get
455
+ * rid of any duplicate entries, so that we can assume that indices_count
456
+ * is exactly equal to the number of lexemes that will be removed .
457
457
*/
458
- tsout = (TSVector ) palloc0 (VARSIZE (tsv ));
459
- arrout = ARRPTR (tsout );
460
- tsout -> size = tsv -> size - indices_count ;
461
-
462
- /* Sort our filter array to simplify membership check later. */
463
458
if (indices_count > 1 )
459
+ {
460
+ int kp ;
461
+
464
462
qsort (indices_to_delete , indices_count , sizeof (int ), compareint );
463
+ kp = 0 ;
464
+ for (k = 1 ; k < indices_count ; k ++ )
465
+ {
466
+ if (indices_to_delete [k ] != indices_to_delete [kp ])
467
+ indices_to_delete [++ kp ] = indices_to_delete [k ];
468
+ }
469
+ indices_count = ++ kp ;
470
+ }
465
471
466
472
/*
467
- * Copy tsv to tsout skipping lexemes that enlisted in indices_to_delete.
473
+ * Here we overestimate tsout size, since we don't know how much space is
474
+ * used by the deleted lexeme(s). We will set exact size below.
468
475
*/
469
- curoff = 0 ;
476
+ tsout = (TSVector ) palloc0 (VARSIZE (tsv ));
477
+
478
+ /* This count must be correct because STRPTR(tsout) relies on it. */
479
+ tsout -> size = tsv -> size - indices_count ;
480
+
481
+ /*
482
+ * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
483
+ */
484
+ arrout = ARRPTR (tsout );
470
485
dataout = STRPTR (tsout );
486
+ curoff = 0 ;
471
487
for (i = j = k = 0 ; i < tsv -> size ; i ++ )
472
488
{
473
489
/*
474
- * Here we should check whether current i is present in
475
- * indices_to_delete or not. Since indices_to_delete is already sorted
476
- * we can advance it index only when we have match .
490
+ * If current i is present in indices_to_delete, skip this lexeme.
491
+ * Since indices_to_delete is already sorted, we only need to check
492
+ * the current (k'th) entry .
477
493
*/
478
494
if (k < indices_count && i == indices_to_delete [k ])
479
495
{
480
496
k ++ ;
481
497
continue ;
482
498
}
483
499
484
- /* Copy lexeme, it's positions and weights */
500
+ /* Copy lexeme and its positions and weights */
485
501
memcpy (dataout + curoff , data + arrin [i ].pos , arrin [i ].len );
486
502
arrout [j ].haspos = arrin [i ].haspos ;
487
503
arrout [j ].len = arrin [i ].len ;
488
504
arrout [j ].pos = curoff ;
489
505
curoff += arrin [i ].len ;
490
506
if (arrin [i ].haspos )
491
507
{
492
- int len = POSDATALEN (tsv , arrin + i ) * sizeof (WordEntryPos ) +
493
- sizeof (uint16 );
508
+ int len = POSDATALEN (tsv , arrin + i ) * sizeof (WordEntryPos )
509
+ + sizeof (uint16 );
494
510
495
511
curoff = SHORTALIGN (curoff );
496
512
memcpy (dataout + curoff ,
@@ -503,10 +519,9 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
503
519
}
504
520
505
521
/*
506
- * After the pass through TSVector k should equals exactly to
507
- * indices_count. If it isn't then the caller provided us with indices
508
- * outside of [0, tsv->size) range and estimation of tsout's size is
509
- * wrong.
522
+ * k should now be exactly equal to indices_count. If it isn't then the
523
+ * caller provided us with indices outside of [0, tsv->size) range and
524
+ * estimation of tsout's size is wrong.
510
525
*/
511
526
Assert (k == indices_count );
512
527
@@ -560,7 +575,7 @@ tsvector_delete_arr(PG_FUNCTION_ARGS)
560
575
561
576
/*
562
577
* In typical use case array of lexemes to delete is relatively small. So
563
- * here we optimizing things for that scenario: iterate through lexarr
578
+ * here we optimize things for that scenario: iterate through lexarr
564
579
* performing binary search of each lexeme from lexarr in tsvector.
565
580
*/
566
581
skip_indices = palloc0 (nlex * sizeof (int ));
@@ -572,10 +587,10 @@ tsvector_delete_arr(PG_FUNCTION_ARGS)
572
587
573
588
if (nulls [i ])
574
589
ereport (ERROR ,
575
- (errcode (ERRCODE_INVALID_PARAMETER_VALUE ),
590
+ (errcode (ERRCODE_NULL_VALUE_NOT_ALLOWED ),
576
591
errmsg ("lexeme array may not contain nulls" )));
577
592
578
- lex = VARDATA (dlexemes [i ]);
593
+ lex = VARDATA_ANY (dlexemes [i ]);
579
594
lex_len = VARSIZE_ANY_EXHDR (dlexemes [i ]);
580
595
lex_pos = tsvector_bsearch (tsin , lex , lex_len );
581
596
@@ -738,7 +753,7 @@ array_to_tsvector(PG_FUNCTION_ARGS)
738
753
{
739
754
if (nulls [i ])
740
755
ereport (ERROR ,
741
- (errcode (ERRCODE_INVALID_PARAMETER_VALUE ),
756
+ (errcode (ERRCODE_NULL_VALUE_NOT_ALLOWED ),
742
757
errmsg ("lexeme array may not contain nulls" )));
743
758
744
759
datalen += VARSIZE_ANY_EXHDR (dlexemes [i ]);
@@ -797,7 +812,7 @@ tsvector_filter(PG_FUNCTION_ARGS)
797
812
798
813
if (nulls [i ])
799
814
ereport (ERROR ,
800
- (errcode (ERRCODE_INVALID_PARAMETER_VALUE ),
815
+ (errcode (ERRCODE_NULL_VALUE_NOT_ALLOWED ),
801
816
errmsg ("weight array may not contain nulls" )));
802
817
803
818
char_weight = DatumGetChar (dweights [i ]);
0 commit comments