Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 3dbbd0f

Browse files
committed
Do not fallback to AND for FTS phrase operator.
If there is no positional information of lexemes then phrase operator will not fallback to AND operator. This change makes needing to modify TS_execute() interface, because somewhere (in indexes, for example) positional information is unaccesible and in this cases we need to force fallback to AND. Per discussion c19fcfec308e6ccd952cdde9e648b505@mail.gmail.com
1 parent 028350f commit 3dbbd0f

File tree

7 files changed

+53
-27
lines changed

7 files changed

+53
-27
lines changed

src/backend/utils/adt/tsginidx.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS)
308308

309309
res = TS_execute(GETQUERY(query),
310310
&gcv,
311-
true,
311+
TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_AS_AND,
312312
checkcondition_gin);
313313
}
314314

src/backend/utils/adt/tsgistidx.c

+4-2
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,8 @@ gtsvector_consistent(PG_FUNCTION_ARGS)
361361

362362
PG_RETURN_BOOL(TS_execute(
363363
GETQUERY(query),
364-
(void *) GETSIGN(key), false,
364+
(void *) GETSIGN(key),
365+
TS_EXEC_PHRASE_AS_AND,
365366
checkcondition_bit
366367
));
367368
}
@@ -373,7 +374,8 @@ gtsvector_consistent(PG_FUNCTION_ARGS)
373374
chkval.arre = chkval.arrb + ARRNELEM(key);
374375
PG_RETURN_BOOL(TS_execute(
375376
GETQUERY(query),
376-
(void *) &chkval, true,
377+
(void *) &chkval,
378+
TS_EXEC_PHRASE_AS_AND | TS_EXEC_CALC_NOT,
377379
checkcondition_arr
378380
));
379381
}

src/backend/utils/adt/tsrank.c

+4-2
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,8 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
662662
{
663663
fillQueryRepresentationData(qr, ptr);
664664

665-
if (TS_execute(GETQUERY(qr->query), (void *) qr, false, checkcondition_QueryOperand))
665+
if (TS_execute(GETQUERY(qr->query), (void *) qr,
666+
TS_EXEC_EMPTY, checkcondition_QueryOperand))
666667
{
667668
if (WEP_GETPOS(ptr->pos) > ext->q)
668669
{
@@ -691,7 +692,8 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
691692
*/
692693
fillQueryRepresentationData(qr, ptr);
693694

694-
if (TS_execute(GETQUERY(qr->query), (void *) qr, true, checkcondition_QueryOperand))
695+
if (TS_execute(GETQUERY(qr->query), (void *) qr,
696+
TS_EXEC_CALC_NOT, checkcondition_QueryOperand))
695697
{
696698
if (WEP_GETPOS(ptr->pos) < ext->p)
697699
{

src/backend/utils/adt/tsvector_op.c

+19-16
Original file line numberDiff line numberDiff line change
@@ -1360,7 +1360,7 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
13601360
*/
13611361
static bool
13621362
TS_phrase_execute(QueryItem *curitem,
1363-
void *checkval, bool calcnot, ExecPhraseData *data,
1363+
void *checkval, uint32 flags, ExecPhraseData *data,
13641364
bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *))
13651365
{
13661366
/* since this function recurses, it could be driven to stack overflow */
@@ -1382,18 +1382,19 @@ TS_phrase_execute(QueryItem *curitem,
13821382
Assert(curitem->qoperator.oper == OP_PHRASE);
13831383

13841384
if (!TS_phrase_execute(curitem + curitem->qoperator.left,
1385-
checkval, calcnot, &Ldata, chkcond))
1385+
checkval, flags, &Ldata, chkcond))
13861386
return false;
13871387

1388-
if (!TS_phrase_execute(curitem + 1, checkval, calcnot, &Rdata, chkcond))
1388+
if (!TS_phrase_execute(curitem + 1, checkval, flags, &Rdata, chkcond))
13891389
return false;
13901390

13911391
/*
13921392
* if at least one of the operands has no position information,
1393-
* fallback to AND operation.
1393+
* then return false. But if TS_EXEC_PHRASE_AS_AND flag is set then
1394+
* we return true as it is a AND operation
13941395
*/
13951396
if (Ldata.npos == 0 || Rdata.npos == 0)
1396-
return true;
1397+
return (flags & TS_EXEC_PHRASE_AS_AND) ? true : false;
13971398

13981399
/*
13991400
* Result of the operation is a list of the corresponding positions of
@@ -1498,13 +1499,11 @@ TS_phrase_execute(QueryItem *curitem,
14981499
* chkcond is a callback function used to evaluate each VAL node in the query.
14991500
* checkval can be used to pass information to the callback. TS_execute doesn't
15001501
* do anything with it.
1501-
* if calcnot is false, NOT expressions are always evaluated to be true. This
1502-
* is used in ranking.
15031502
* It believes that ordinary operators are always closier to root than phrase
15041503
* operator, so, TS_execute() may not take care of lexeme's position at all.
15051504
*/
15061505
bool
1507-
TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
1506+
TS_execute(QueryItem *curitem, void *checkval, uint32 flags,
15081507
bool (*chkcond) (void *checkval, QueryOperand *val, ExecPhraseData *data))
15091508
{
15101509
/* since this function recurses, it could be driven to stack overflow */
@@ -1517,25 +1516,29 @@ TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
15171516
switch (curitem->qoperator.oper)
15181517
{
15191518
case OP_NOT:
1520-
if (calcnot)
1521-
return !TS_execute(curitem + 1, checkval, calcnot, chkcond);
1519+
if (flags & TS_EXEC_CALC_NOT)
1520+
return !TS_execute(curitem + 1, checkval, flags, chkcond);
15221521
else
15231522
return true;
15241523

15251524
case OP_AND:
1526-
if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond))
1527-
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
1525+
if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond))
1526+
return TS_execute(curitem + 1, checkval, flags, chkcond);
15281527
else
15291528
return false;
15301529

15311530
case OP_OR:
1532-
if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond))
1531+
if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond))
15331532
return true;
15341533
else
1535-
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
1534+
return TS_execute(curitem + 1, checkval, flags, chkcond);
15361535

15371536
case OP_PHRASE:
1538-
return TS_phrase_execute(curitem, checkval, calcnot, NULL, chkcond);
1537+
/*
1538+
* do not check TS_EXEC_PHRASE_AS_AND here because chkcond()
1539+
* could do something more if it's called from TS_phrase_execute()
1540+
*/
1541+
return TS_phrase_execute(curitem, checkval, flags, NULL, chkcond);
15391542

15401543
default:
15411544
elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
@@ -1633,7 +1636,7 @@ ts_match_vq(PG_FUNCTION_ARGS)
16331636
result = TS_execute(
16341637
GETQUERY(query),
16351638
&chkval,
1636-
true,
1639+
TS_EXEC_CALC_NOT,
16371640
checkcondition_str
16381641
);
16391642

src/include/tsearch/ts_utils.h

+18-1
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,25 @@ typedef struct ExecPhraseData
111111
WordEntryPos *pos;
112112
} ExecPhraseData;
113113

114-
extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
114+
/*
115+
* Evaluates tsquery, flags are followe below
116+
*/
117+
extern bool TS_execute(QueryItem *curitem, void *checkval, uint32 flags,
115118
bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *));
119+
120+
#define TS_EXEC_EMPTY (0x00)
121+
/*
122+
* if TS_EXEC_CALC_NOT is not set then NOT expression evaluated to be true,
123+
* used in cases where NOT cannot be accurately computed (GiST) or
124+
* it isn't important (ranking)
125+
*/
126+
#define TS_EXEC_CALC_NOT (0x01)
127+
/*
128+
* Treat OP_PHRASE as OP_AND. Used when posiotional information is not
129+
* accessible, like in consistent methods of GIN/GiST indexes
130+
*/
131+
#define TS_EXEC_PHRASE_AS_AND (0x02)
132+
116133
extern bool tsquery_requires_match(QueryItem *curitem);
117134

118135
/*

src/test/regress/expected/tsearch.out

+5-4
Original file line numberDiff line numberDiff line change
@@ -1459,13 +1459,14 @@ select * from pendtest where 'ipi:*'::tsquery @@ ts;
14591459

14601460
--check OP_PHRASE on index
14611461
create temp table phrase_index_test(fts tsvector);
1462-
insert into phrase_index_test values('A fat cat has just eaten a rat.');
1462+
insert into phrase_index_test values ('A fat cat has just eaten a rat.');
1463+
insert into phrase_index_test values (to_tsvector('english', 'A fat cat has just eaten a rat.'));
14631464
create index phrase_index_test_idx on phrase_index_test using gin(fts);
14641465
set enable_seqscan = off;
14651466
select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat');
1466-
fts
1467-
-------------------------------------------------
1468-
'A' 'a' 'cat' 'eaten' 'fat' 'has' 'just' 'rat.'
1467+
fts
1468+
-----------------------------------
1469+
'cat':3 'eaten':6 'fat':2 'rat':8
14691470
(1 row)
14701471

14711472
set enable_seqscan = on;

src/test/regress/sql/tsearch.sql

+2-1
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,8 @@ select * from pendtest where 'ipi:*'::tsquery @@ ts;
482482

483483
--check OP_PHRASE on index
484484
create temp table phrase_index_test(fts tsvector);
485-
insert into phrase_index_test values('A fat cat has just eaten a rat.');
485+
insert into phrase_index_test values ('A fat cat has just eaten a rat.');
486+
insert into phrase_index_test values (to_tsvector('english', 'A fat cat has just eaten a rat.'));
486487
create index phrase_index_test_idx on phrase_index_test using gin(fts);
487488
set enable_seqscan = off;
488489
select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat');

0 commit comments

Comments
 (0)