Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 5644d6f

Browse files
committed
Add comments and a missing CHECK_FOR_INTERRUPTS in ts_headline.
I just spent an annoying amount of time reverse-engineering the 100%-undocumented API between ts_headline and the text search parser's prsheadline function. Add some commentary about that while it's fresh in mind. Also remove some unused macros in wparser_def.c. While at it, I noticed that when commit 78e73e8 added a CHECK_FOR_INTERRUPTS call in TS_execute_recurse, it missed doing so in the parallel function TS_phrase_execute, which surely needs one just as much. Back-patch because of the missing CHECK_FOR_INTERRUPTS. Might as well back-patch the rest of this too.
1 parent f686ae8 commit 5644d6f

File tree

4 files changed

+60
-21
lines changed

4 files changed

+60
-21
lines changed

src/backend/tsearch/ts_parse.c

+13
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,8 @@ parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen)
433433
/*
434434
* Headline framework
435435
*/
436+
437+
/* Add a word to prs->words[] */
436438
static void
437439
hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type)
438440
{
@@ -449,6 +451,14 @@ hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type)
449451
prs->curwords++;
450452
}
451453

454+
/*
455+
* Add pos and matching-query-item data to the just-added word.
456+
* Here, buf/buflen represent a processed lexeme, not raw token text.
457+
*
458+
* If the query contains more than one matching item, we replicate
459+
* the last-added word so that each item can be pointed to. The
460+
* duplicate entries are marked with repeated = 1.
461+
*/
452462
static void
453463
hlfinditem(HeadlineParsedText *prs, TSQuery query, int32 pos, char *buf, int buflen)
454464
{
@@ -589,6 +599,9 @@ hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int bu
589599
FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
590600
}
591601

602+
/*
603+
* Generate the headline, as a text object, from HeadlineParsedText.
604+
*/
592605
text *
593606
generateHeadline(HeadlineParsedText *prs)
594607
{

src/backend/tsearch/wparser_def.c

-4
Original file line numberDiff line numberDiff line change
@@ -1914,10 +1914,6 @@ prsd_end(PG_FUNCTION_ARGS)
19141914
*/
19151915

19161916
/* token type classification macros */
1917-
#define LEAVETOKEN(x) ( (x)==SPACE )
1918-
#define COMPLEXTOKEN(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
1919-
#define ENDPUNCTOKEN(x) ( (x)==SPACE )
1920-
19211917
#define TS_IDIGNORE(x) ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )
19221918
#define HLIDREPLACE(x) ( (x)==TAG_T )
19231919
#define HLIDSKIP(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )

src/backend/utils/adt/tsvector_op.c

+3
Original file line numberDiff line numberDiff line change
@@ -1617,6 +1617,9 @@ TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
16171617
/* since this function recurses, it could be driven to stack overflow */
16181618
check_stack_depth();
16191619

1620+
/* ... and let's check for query cancel while we're at it */
1621+
CHECK_FOR_INTERRUPTS();
1622+
16201623
if (curitem->type == QI_VAL)
16211624
return chkcond(arg, (QueryOperand *) curitem, data);
16221625

src/include/tsearch/ts_public.h

+44-17
Original file line numberDiff line numberDiff line change
@@ -30,33 +30,60 @@ typedef struct
3030
} LexDescr;
3131

3232
/*
33-
* Interface to headline generator
33+
* Interface to headline generator (tsparser's prsheadline function)
34+
*
35+
* HeadlineParsedText describes the text that is to be highlighted.
36+
* Some fields are passed from the core code to the prsheadline function,
37+
* while others are output from the prsheadline function.
38+
*
39+
* The principal data is words[], an array of HeadlineWordEntry,
40+
* one entry per token, of length curwords.
41+
* The fields of HeadlineWordEntry are:
42+
*
43+
* in, selected, replace, skip: these flags are initially zero
44+
* and may be set by the prsheadline function. A consecutive group
45+
* of tokens marked "in" form a "fragment" to be output.
46+
* Such tokens may additionally be marked selected, replace, or skip
47+
* to modify how they are shown. (If you set more than one of those
48+
* bits, you get an unspecified one of those behaviors.)
49+
*
50+
* type, len, pos, word: filled by core code to describe the token.
51+
*
52+
* item: if the token matches any operand of the tsquery of interest,
53+
* a pointer to such an operand. (If there are multiple matching
54+
* operands, we generate extra copies of the HeadlineWordEntry to hold
55+
* all the pointers. The extras are marked with repeated = 1 and should
56+
* be ignored except for checking the item pointer.)
3457
*/
3558
typedef struct
3659
{
37-
uint32 selected:1,
38-
in:1,
39-
replace:1,
40-
repeated:1,
41-
skip:1,
42-
unused:3,
43-
type:8,
44-
len:16;
45-
WordEntryPos pos;
46-
char *word;
47-
QueryOperand *item;
60+
uint32 selected:1, /* token is to be highlighted */
61+
in:1, /* token is part of headline */
62+
replace:1, /* token is to be replaced with a space */
63+
repeated:1, /* duplicate entry to hold item pointer */
64+
skip:1, /* token is to be skipped (not output) */
65+
unused:3, /* available bits */
66+
type:8, /* parser's token category */
67+
len:16; /* length of token */
68+
WordEntryPos pos; /* position of token */
69+
char *word; /* text of token (not null-terminated) */
70+
QueryOperand *item; /* a matching query operand, or NULL if none */
4871
} HeadlineWordEntry;
4972

5073
typedef struct
5174
{
75+
/* Fields filled by core code before calling prsheadline function: */
5276
HeadlineWordEntry *words;
53-
int32 lenwords;
54-
int32 curwords;
55-
int32 vectorpos; /* positions a-la tsvector */
56-
char *startsel;
77+
int32 lenwords; /* allocated length of words[] */
78+
int32 curwords; /* current number of valid entries */
79+
int32 vectorpos; /* used by ts_parse.c in filling pos fields */
80+
81+
/* The prsheadline function must fill these fields: */
82+
/* Strings for marking selected tokens and separating fragments: */
83+
char *startsel; /* palloc'd strings */
5784
char *stopsel;
5885
char *fragdelim;
59-
int16 startsellen;
86+
int16 startsellen; /* lengths of strings */
6087
int16 stopsellen;
6188
int16 fragdelimlen;
6289
} HeadlineParsedText;

0 commit comments

Comments
 (0)