Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 52fd2d6

Browse files
committed
Fix up core tsquery GIN support for new extractQuery API.
No need for the empty-prefix-match kluge to force a full scan anymore.
1 parent 3048450 commit 52fd2d6

File tree

3 files changed

+96
-53
lines changed

3 files changed

+96
-53
lines changed

src/backend/utils/adt/tsginidx.c

Lines changed: 36 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
*/
1414
#include "postgres.h"
1515

16+
#include "access/gin.h"
1617
#include "access/skey.h"
1718
#include "tsearch/ts_type.h"
1819
#include "tsearch/ts_utils.h"
@@ -26,8 +27,7 @@ gin_cmp_tslexeme(PG_FUNCTION_ARGS)
2627
text *b = PG_GETARG_TEXT_PP(1);
2728
int cmp;
2829

29-
cmp = tsCompareString(
30-
VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
30+
cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
3131
VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
3232
false);
3333

@@ -48,8 +48,7 @@ gin_cmp_prefix(PG_FUNCTION_ARGS)
4848
#endif
4949
int cmp;
5050

51-
cmp = tsCompareString(
52-
VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
51+
cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
5352
VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
5453
true);
5554

@@ -96,71 +95,72 @@ gin_extract_tsquery(PG_FUNCTION_ARGS)
9695
{
9796
TSQuery query = PG_GETARG_TSQUERY(0);
9897
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
99-
10098
/* StrategyNumber strategy = PG_GETARG_UINT16(2); */
10199
bool **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3);
102100
Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
101+
/* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
102+
int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
103103
Datum *entries = NULL;
104-
bool *partialmatch;
105104

106105
*nentries = 0;
107106

108107
if (query->size > 0)
109108
{
109+
QueryItem *item = GETQUERY(query);
110110
int4 i,
111-
j = 0,
112-
len;
113-
QueryItem *item;
114-
bool use_fullscan = false;
111+
j;
112+
bool *partialmatch;
115113
int *map_item_operand;
116114

117-
item = clean_NOT(GETQUERY(query), &len);
118-
if (!item)
119-
{
120-
use_fullscan = true;
121-
*nentries = 1;
122-
}
123-
124-
item = GETQUERY(query);
115+
/*
116+
* If the query doesn't have any required positive matches (for
117+
* instance, it's something like '! foo'), we have to do a full
118+
* index scan.
119+
*/
120+
if (tsquery_requires_match(item))
121+
*searchMode = GIN_SEARCH_MODE_DEFAULT;
122+
else
123+
*searchMode = GIN_SEARCH_MODE_ALL;
125124

125+
/* count number of VAL items */
126+
j = 0;
126127
for (i = 0; i < query->size; i++)
128+
{
127129
if (item[i].type == QI_VAL)
128-
(*nentries)++;
130+
j++;
131+
}
132+
*nentries = j;
129133

130-
entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
131-
partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * (*nentries));
134+
entries = (Datum *) palloc(sizeof(Datum) * j);
135+
partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j);
132136

133137
/*
134138
* Make map to convert item's number to corresponding operand's (the
135139
* same, entry's) number. Entry's number is used in check array in
136140
* consistent method. We use the same map for each entry.
137141
*/
138-
*extra_data = (Pointer *) palloc0(sizeof(Pointer) * (*nentries));
139-
map_item_operand = palloc0(sizeof(int) * (query->size + 1));
142+
*extra_data = (Pointer *) palloc(sizeof(Pointer) * j);
143+
map_item_operand = (int *) palloc0(sizeof(int) * query->size);
140144

145+
/* Now rescan the VAL items and fill in the arrays */
146+
j = 0;
141147
for (i = 0; i < query->size; i++)
148+
{
142149
if (item[i].type == QI_VAL)
143150
{
144-
text *txt;
145151
QueryOperand *val = &item[i].qoperand;
152+
text *txt;
146153

147154
txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
148155
val->length);
156+
entries[j] = PointerGetDatum(txt);
157+
partialmatch[j] = val->prefix;
149158
(*extra_data)[j] = (Pointer) map_item_operand;
150159
map_item_operand[i] = j;
151-
partialmatch[j] = val->prefix;
152-
entries[j++] = PointerGetDatum(txt);
160+
j++;
153161
}
154-
155-
if (use_fullscan)
156-
{
157-
(*extra_data)[j] = (Pointer) map_item_operand;
158-
map_item_operand[i] = j;
159-
entries[j++] = PointerGetDatum(cstring_to_text_with_len("", 0));
160162
}
161163
}
162-
else
163-
*nentries = -1; /* nothing can be found */
164164

165165
PG_FREE_IF_COPY(query, 0);
166166

@@ -222,12 +222,10 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS)
222222
gcv.map_item_operand = (int *) (extra_data[0]);
223223
gcv.need_recheck = recheck;
224224

225-
res = TS_execute(
226-
GETQUERY(query),
225+
res = TS_execute(GETQUERY(query),
227226
&gcv,
228227
true,
229-
checkcondition_gin
230-
);
228+
checkcondition_gin);
231229
}
232230

233231
PG_RETURN_BOOL(res);

src/backend/utils/adt/tsvector_op.c

Lines changed: 59 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,8 @@ tsvector_concat(PG_FUNCTION_ARGS)
525525

526526
/*
527527
* Compare two strings by tsvector rules.
528-
* if isPrefix = true then it returns not-zero value if b has prefix a
528+
*
529+
* if isPrefix = true then it returns zero value iff b has prefix a
529530
*/
530531
int4
531532
tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
@@ -535,8 +536,7 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
535536
if (lena == 0)
536537
{
537538
if (prefix)
538-
cmp = 0; /* emtry string is equal to any if a prefix
539-
* match */
539+
cmp = 0; /* empty string is prefix of anything */
540540
else
541541
cmp = (lenb > 0) ? -1 : 0;
542542
}
@@ -551,14 +551,9 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
551551
if (prefix)
552552
{
553553
if (cmp == 0 && lena > lenb)
554-
{
555-
/*
556-
* b argument is not beginning with argument a
557-
*/
558-
cmp = 1;
559-
}
554+
cmp = 1; /* a is longer, so not a prefix of b */
560555
}
561-
else if ((cmp == 0) && (lena != lenb))
556+
else if (cmp == 0 && lena != lenb)
562557
{
563558
cmp = (lena < lenb) ? -1 : 1;
564559
}
@@ -650,13 +645,13 @@ checkcondition_str(void *checkval, QueryOperand *val)
650645
}
651646

652647
/*
653-
* check for boolean condition.
648+
* Evaluate tsquery boolean expression.
654649
*
655-
* if calcnot is false, NOT expressions are always evaluated to be true. This is used in ranking.
650+
* chkcond is a callback function used to evaluate each VAL node in the query.
656651
* checkval can be used to pass information to the callback. TS_execute doesn't
657652
* do anything with it.
658-
* chkcond is a callback function used to evaluate each VAL node in the query.
659-
*
653+
* if calcnot is false, NOT expressions are always evaluated to be true. This
654+
* is used in ranking.
660655
*/
661656
bool
662657
TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
@@ -675,6 +670,7 @@ TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
675670
return !TS_execute(curitem + 1, checkval, calcnot, chkcond);
676671
else
677672
return true;
673+
678674
case OP_AND:
679675
if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond))
680676
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
@@ -695,6 +691,55 @@ TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
695691
return false;
696692
}
697693

694+
/*
695+
* Detect whether a tsquery boolean expression requires any positive matches
696+
* to values shown in the tsquery.
697+
*
698+
* This is needed to know whether a GIN index search requires full index scan.
699+
* For example, 'x & !y' requires a match of x, so it's sufficient to scan
700+
* entries for x; but 'x | !y' could match rows containing neither x nor y.
701+
*/
702+
bool
703+
tsquery_requires_match(QueryItem *curitem)
704+
{
705+
/* since this function recurses, it could be driven to stack overflow */
706+
check_stack_depth();
707+
708+
if (curitem->type == QI_VAL)
709+
return true;
710+
711+
switch (curitem->qoperator.oper)
712+
{
713+
case OP_NOT:
714+
/*
715+
* Assume there are no required matches underneath a NOT. For
716+
* some cases with nested NOTs, we could prove there's a required
717+
* match, but it seems unlikely to be worth the trouble.
718+
*/
719+
return false;
720+
721+
case OP_AND:
722+
/* If either side requires a match, we're good */
723+
if (tsquery_requires_match(curitem + curitem->qoperator.left))
724+
return true;
725+
else
726+
return tsquery_requires_match(curitem + 1);
727+
728+
case OP_OR:
729+
/* Both sides must require a match */
730+
if (tsquery_requires_match(curitem + curitem->qoperator.left))
731+
return tsquery_requires_match(curitem + 1);
732+
else
733+
return false;
734+
735+
default:
736+
elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
737+
}
738+
739+
/* not reachable, but keep compiler quiet */
740+
return false;
741+
}
742+
698743
/*
699744
* boolean operations
700745
*/

src/include/tsearch/ts_utils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,9 @@ extern text *generateHeadline(HeadlineParsedText *prs);
104104
/*
105105
* Common check function for tsvector @@ tsquery
106106
*/
107-
108107
extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
109108
bool (*chkcond) (void *checkval, QueryOperand *val));
109+
extern bool tsquery_requires_match(QueryItem *curitem);
110110

111111
/*
112112
* to_ts* - text transformation to tsvector, tsquery

0 commit comments

Comments
 (0)