Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 6f73455

Browse files
committed
Improve documentation around TS_execute().
I got frustrated by the lack of commentary in this area, so here is some reverse-engineered documentation, along with minor stylistic cleanup. No code changes more significant than removal of unused variables. Back-patch to 9.6, not because that's useful in itself, but because we have some bugs to fix in phrase search and this would cause merge failures if it's only in HEAD.
1 parent b344b87 commit 6f73455

File tree

4 files changed

+103
-64
lines changed

4 files changed

+103
-64
lines changed

src/backend/tsearch/wparser_def.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -2123,7 +2123,7 @@ hlCover(HeadlineParsedText *prs, TSQuery query, int *p, int *q)
21232123

21242124
ch.words = &(prs->words[*p]);
21252125
ch.len = *q - *p + 1;
2126-
if (TS_execute(GETQUERY(query), &ch, false, checkcondition_HL))
2126+
if (TS_execute(GETQUERY(query), &ch, TS_EXEC_EMPTY, checkcondition_HL))
21272127
return true;
21282128
else
21292129
{

src/backend/utils/adt/tsginidx.c

+5-7
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *d
188188
* information then set recheck flag
189189
*/
190190
if (val->weight != 0 || data != NULL)
191-
*gcv->need_recheck = true;
191+
*(gcv->need_recheck) = true;
192192

193193
/* convert item's number to corresponding entry's (operand's) number */
194194
j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
@@ -289,19 +289,18 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS)
289289
bool *recheck = (bool *) PG_GETARG_POINTER(5);
290290
bool res = FALSE;
291291

292-
/* The query requires recheck only if it involves weights */
292+
/* Initially assume query doesn't require recheck */
293293
*recheck = false;
294294

295295
if (query->size > 0)
296296
{
297-
QueryItem *item;
298297
GinChkVal gcv;
299298

300299
/*
301300
* check-parameter array has one entry for each value (operand) in the
302301
* query.
303302
*/
304-
gcv.first_item = item = GETQUERY(query);
303+
gcv.first_item = GETQUERY(query);
305304
gcv.check = check;
306305
gcv.map_item_operand = (int *) (extra_data[0]);
307306
gcv.need_recheck = recheck;
@@ -328,19 +327,18 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
328327
GinTernaryValue res = GIN_FALSE;
329328
bool recheck;
330329

331-
/* The query requires recheck only if it involves weights */
330+
/* Initially assume query doesn't require recheck */
332331
recheck = false;
333332

334333
if (query->size > 0)
335334
{
336-
QueryItem *item;
337335
GinChkVal gcv;
338336

339337
/*
340338
* check-parameter array has one entry for each value (operand) in the
341339
* query.
342340
*/
343-
gcv.first_item = item = GETQUERY(query);
341+
gcv.first_item = GETQUERY(query);
344342
gcv.check = check;
345343
gcv.map_item_operand = (int *) (extra_data[0]);
346344
gcv.need_recheck = &recheck;

src/backend/utils/adt/tsvector_op.c

+50-42
Original file line numberDiff line numberDiff line change
@@ -1405,20 +1405,26 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
14051405
}
14061406

14071407
/*
1408-
* Check for phrase condition. Fallback to the AND operation
1409-
* if there is no positional information.
1408+
* Execute tsquery at or below an OP_PHRASE operator.
1409+
*
1410+
* This handles the recursion at levels where we need to care about
1411+
* match locations. In addition to the same arguments used for TS_execute,
1412+
* the caller may pass a preinitialized-to-zeroes ExecPhraseData struct to
1413+
* be filled with lexeme match positions on success. data == NULL if no
1414+
* match data need be returned. (In practice, outside callers pass NULL,
1415+
* and only the internal recursion cases pass a data pointer.)
14101416
*/
14111417
static bool
1412-
TS_phrase_execute(QueryItem *curitem,
1413-
void *checkval, uint32 flags, ExecPhraseData *data,
1414-
bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *))
1418+
TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
1419+
ExecPhraseData *data,
1420+
TSExecuteCallback chkcond)
14151421
{
14161422
/* since this function recurses, it could be driven to stack overflow */
14171423
check_stack_depth();
14181424

14191425
if (curitem->type == QI_VAL)
14201426
{
1421-
return chkcond(checkval, (QueryOperand *) curitem, data);
1427+
return chkcond(arg, (QueryOperand *) curitem, data);
14221428
}
14231429
else
14241430
{
@@ -1432,33 +1438,31 @@ TS_phrase_execute(QueryItem *curitem,
14321438
Assert(curitem->qoperator.oper == OP_PHRASE);
14331439

14341440
if (!TS_phrase_execute(curitem + curitem->qoperator.left,
1435-
checkval, flags, &Ldata, chkcond))
1441+
arg, flags, &Ldata, chkcond))
14361442
return false;
14371443

1438-
if (!TS_phrase_execute(curitem + 1, checkval, flags, &Rdata, chkcond))
1444+
if (!TS_phrase_execute(curitem + 1, arg, flags, &Rdata, chkcond))
14391445
return false;
14401446

14411447
/*
1442-
* if at least one of the operands has no position information, then
1443-
* return false. But if TS_EXEC_PHRASE_AS_AND flag is set then we
1444-
* return true as it is a AND operation
1448+
* If either operand has no position information, then we normally
1449+
* return false. But if TS_EXEC_PHRASE_AS_AND flag is set then we
1450+
* return true, treating OP_PHRASE as if it were OP_AND.
14451451
*/
14461452
if (Ldata.npos == 0 || Rdata.npos == 0)
14471453
return (flags & TS_EXEC_PHRASE_AS_AND) ? true : false;
14481454

14491455
/*
1450-
* Result of the operation is a list of the corresponding positions of
1451-
* RIGHT operand.
1456+
* Prepare output position array if needed.
14521457
*/
14531458
if (data)
14541459
{
1460+
/*
1461+
* We can recycle the righthand operand's result array if it was
1462+
* palloc'd, else must allocate our own. The number of matches
1463+
* couldn't be more than the smaller of the two operands' matches.
1464+
*/
14551465
if (!Rdata.allocated)
1456-
1457-
/*
1458-
* OP_PHRASE is based on the OP_AND, so the number of
1459-
* resulting positions could not be greater than the total
1460-
* amount of operands.
1461-
*/
14621466
data->pos = palloc(sizeof(WordEntryPos) * Min(Ldata.npos, Rdata.npos));
14631467
else
14641468
data->pos = Rdata.pos;
@@ -1469,10 +1473,12 @@ TS_phrase_execute(QueryItem *curitem,
14691473
}
14701474

14711475
/*
1472-
* Find matches by distance, WEP_GETPOS() is needed because
1473-
* ExecPhraseData->data can point to the tsvector's WordEntryPosVector
1476+
* Find matches by distance. WEP_GETPOS() is needed because
1477+
* ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
1478+
*
1479+
* Note that the output positions are those of the matching RIGHT
1480+
* operands.
14741481
*/
1475-
14761482
Rpos = Rdata.pos;
14771483
LposStart = Ldata.pos;
14781484
while (Rpos < Rdata.pos + Rdata.npos)
@@ -1505,8 +1511,9 @@ TS_phrase_execute(QueryItem *curitem,
15051511
else
15061512
{
15071513
/*
1508-
* We are in the root of the phrase tree and hence we
1509-
* don't have to store the resulting positions
1514+
* We are at the root of the phrase tree and hence we
1515+
* don't have to identify all the match positions.
1516+
* Just report success.
15101517
*/
15111518
return true;
15121519
}
@@ -1546,50 +1553,53 @@ TS_phrase_execute(QueryItem *curitem,
15461553
/*
15471554
* Evaluate tsquery boolean expression.
15481555
*
1549-
* chkcond is a callback function used to evaluate each VAL node in the query.
1550-
* checkval can be used to pass information to the callback. TS_execute doesn't
1551-
* do anything with it.
1552-
* It believes that ordinary operators are always closier to root than phrase
1553-
* operator, so, TS_execute() may not take care of lexeme's position at all.
1556+
* curitem: current tsquery item (initially, the first one)
1557+
* arg: opaque value to pass through to callback function
1558+
* flags: bitmask of flag bits shown in ts_utils.h
1559+
* chkcond: callback function to check whether a primitive value is present
1560+
*
1561+
* The logic here deals only with operators above any phrase operator, for
1562+
* which we do not need to worry about lexeme positions. As soon as we hit an
1563+
* OP_PHRASE operator, we pass it off to TS_phrase_execute which does worry.
15541564
*/
15551565
bool
1556-
TS_execute(QueryItem *curitem, void *checkval, uint32 flags,
1557-
bool (*chkcond) (void *checkval, QueryOperand *val, ExecPhraseData *data))
1566+
TS_execute(QueryItem *curitem, void *arg, uint32 flags,
1567+
TSExecuteCallback chkcond)
15581568
{
15591569
/* since this function recurses, it could be driven to stack overflow */
15601570
check_stack_depth();
15611571

15621572
if (curitem->type == QI_VAL)
1563-
return chkcond(checkval, (QueryOperand *) curitem,
1573+
return chkcond(arg, (QueryOperand *) curitem,
15641574
NULL /* we don't need position info */ );
15651575

15661576
switch (curitem->qoperator.oper)
15671577
{
15681578
case OP_NOT:
15691579
if (flags & TS_EXEC_CALC_NOT)
1570-
return !TS_execute(curitem + 1, checkval, flags, chkcond);
1580+
return !TS_execute(curitem + 1, arg, flags, chkcond);
15711581
else
15721582
return true;
15731583

15741584
case OP_AND:
1575-
if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond))
1576-
return TS_execute(curitem + 1, checkval, flags, chkcond);
1585+
if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond))
1586+
return TS_execute(curitem + 1, arg, flags, chkcond);
15771587
else
15781588
return false;
15791589

15801590
case OP_OR:
1581-
if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond))
1591+
if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond))
15821592
return true;
15831593
else
1584-
return TS_execute(curitem + 1, checkval, flags, chkcond);
1594+
return TS_execute(curitem + 1, arg, flags, chkcond);
15851595

15861596
case OP_PHRASE:
15871597

15881598
/*
15891599
* do not check TS_EXEC_PHRASE_AS_AND here because chkcond() could
15901600
* do something more if it's called from TS_phrase_execute()
15911601
*/
1592-
return TS_phrase_execute(curitem, checkval, flags, NULL, chkcond);
1602+
return TS_phrase_execute(curitem, arg, flags, NULL, chkcond);
15931603

15941604
default:
15951605
elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
@@ -1684,12 +1694,10 @@ ts_match_vq(PG_FUNCTION_ARGS)
16841694
chkval.arre = chkval.arrb + val->size;
16851695
chkval.values = STRPTR(val);
16861696
chkval.operand = GETOPERAND(query);
1687-
result = TS_execute(
1688-
GETQUERY(query),
1697+
result = TS_execute(GETQUERY(query),
16891698
&chkval,
16901699
TS_EXEC_CALC_NOT,
1691-
checkcondition_str
1692-
);
1700+
checkcondition_str);
16931701

16941702
PG_FREE_IF_COPY(val, 0);
16951703
PG_FREE_IF_COPY(query, 1);

src/include/tsearch/ts_utils.h

+47-14
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
#ifndef _PG_TS_UTILS_H_
1313
#define _PG_TS_UTILS_H_
1414

15-
#include "tsearch/ts_type.h"
16-
#include "tsearch/ts_public.h"
1715
#include "nodes/pg_list.h"
16+
#include "tsearch/ts_public.h"
17+
#include "tsearch/ts_type.h"
1818

1919
/*
2020
* Common parse definitions for tsvector and tsquery
@@ -102,34 +102,67 @@ extern void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query,
102102
extern text *generateHeadline(HeadlineParsedText *prs);
103103

104104
/*
105-
* Common check function for tsvector @@ tsquery
105+
* TSQuery execution support
106+
*
107+
* TS_execute() executes a tsquery against data that can be represented in
108+
* various forms. The TSExecuteCallback callback function is called to check
109+
* whether a given primitive tsquery value is matched in the data.
110+
*/
111+
112+
/*
113+
* struct ExecPhraseData is passed to a TSExecuteCallback function if we need
114+
* lexeme position data (because of a phrase-match operator in the tsquery).
115+
* The callback should fill in position data when it returns true (success).
116+
* If it cannot return position data, it may ignore its "data" argument, but
117+
* then the caller of TS_execute() must pass the TS_EXEC_PHRASE_AS_AND flag
118+
* and must arrange for a later recheck with position data available.
119+
*
120+
* The reported lexeme positions must be sorted and unique. Callers must only
121+
* consult the position bits of the pos array, ie, WEP_GETPOS(data->pos[i]).
122+
* This allows the returned "pos" to point directly to the WordEntryPos
123+
* portion of a tsvector value. If "allocated" is true then the pos array
124+
* is palloc'd workspace and caller may free it when done.
125+
*
126+
* All fields of the ExecPhraseData struct are initially zeroed by caller.
106127
*/
107128
typedef struct ExecPhraseData
108129
{
109-
int npos;
110-
bool allocated;
111-
WordEntryPos *pos;
130+
int npos; /* number of positions reported */
131+
bool allocated; /* pos points to palloc'd data? */
132+
WordEntryPos *pos; /* ordered, non-duplicate lexeme positions */
112133
} ExecPhraseData;
113134

114135
/*
115-
* Evaluates tsquery, flags are followe below
136+
* Signature for TSQuery lexeme check functions
137+
*
138+
* arg: opaque value passed through from caller of TS_execute
139+
* val: lexeme to test for presence of
140+
* data: to be filled with lexeme positions; NULL if position data not needed
141+
*
142+
* Return TRUE if lexeme is present in data, else FALSE
116143
*/
117-
extern bool TS_execute(QueryItem *curitem, void *checkval, uint32 flags,
118-
bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *));
144+
typedef bool (*TSExecuteCallback) (void *arg, QueryOperand *val,
145+
ExecPhraseData *data);
119146

147+
/*
148+
* Flag bits for TS_execute
149+
*/
120150
#define TS_EXEC_EMPTY (0x00)
121151
/*
122-
* if TS_EXEC_CALC_NOT is not set then NOT expression evaluated to be true,
123-
* used in cases where NOT cannot be accurately computed (GiST) or
124-
* it isn't important (ranking)
152+
* If TS_EXEC_CALC_NOT is not set, then NOT expressions are automatically
153+
* evaluated to be true. Useful in cases where NOT cannot be accurately
154+
* computed (GiST) or it isn't important (ranking).
125155
*/
126156
#define TS_EXEC_CALC_NOT (0x01)
127157
/*
128-
* Treat OP_PHRASE as OP_AND. Used when posiotional information is not
129-
* accessible, like in consistent methods of GIN/GiST indexes
158+
* Treat OP_PHRASE as OP_AND. Used when positional information is not
159+
* accessible, like in consistent methods of GIN/GiST indexes; rechecking
160+
* must occur later.
130161
*/
131162
#define TS_EXEC_PHRASE_AS_AND (0x02)
132163

164+
extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags,
165+
TSExecuteCallback chkcond);
133166
extern bool tsquery_requires_match(QueryItem *curitem);
134167

135168
/*

0 commit comments

Comments
 (0)