Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit bb14050

Browse files
committed
Phrase full text search.
Patch introduces new text search operator (<-> or <DISTANCE>) into tsquery. On-disk and binary in/out format of tsquery are backward compatible. It has two side effect: - change order for tsquery, so, users, who has a btree index over tsquery, should reindex it - less number of parenthesis in tsquery output, and tsquery becomes more readable Authors: Teodor Sigaev, Oleg Bartunov, Dmitry Ivanov Reviewers: Alexander Korotkov, Artur Zakirov
1 parent 015e889 commit bb14050

30 files changed

+2536
-444
lines changed

contrib/tsearch2/expected/tsearch2.out

+28-28
Original file line numberDiff line numberDiff line change
@@ -278,15 +278,15 @@ SELECT '(!1|2)&3'::tsquery;
278278
(1 row)
279279

280280
SELECT '1|(2|(4|(5|6)))'::tsquery;
281-
tsquery
282-
-----------------------------------------
283-
'1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
281+
tsquery
282+
-----------------------------
283+
'1' | '2' | '4' | '5' | '6'
284284
(1 row)
285285

286286
SELECT '1|2|4|5|6'::tsquery;
287-
tsquery
288-
-----------------------------------------
289-
( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
287+
tsquery
288+
-----------------------------
289+
'1' | '2' | '4' | '5' | '6'
290290
(1 row)
291291

292292
SELECT '1&(2&(4&(5&6)))'::tsquery;
@@ -340,7 +340,7 @@ select 'a' > 'b & c'::tsquery;
340340
select 'a | f' < 'b & c'::tsquery;
341341
?column?
342342
----------
343-
t
343+
f
344344
(1 row)
345345

346346
select 'a | ff' < 'b & c'::tsquery;
@@ -443,9 +443,9 @@ select count(*) from test_tsquery where keyword > 'new & york';
443443

444444
set enable_seqscan=on;
445445
select rewrite('foo & bar & qq & new & york', 'new & york'::tsquery, 'big & apple | nyc | new & york & city');
446-
rewrite
447-
----------------------------------------------------------------------------------
448-
'foo' & 'bar' & 'qq' & ( 'city' & 'new' & 'york' | ( 'nyc' | 'big' & 'apple' ) )
446+
rewrite
447+
------------------------------------------------------------------------------
448+
'foo' & 'bar' & 'qq' & ( 'nyc' | 'big' & 'apple' | 'city' & 'new' & 'york' )
449449
(1 row)
450450

451451
select rewrite('moscow', 'select keyword, sample from test_tsquery'::text );
@@ -461,9 +461,9 @@ select rewrite('moscow & hotel', 'select keyword, sample from test_tsquery'::tex
461461
(1 row)
462462

463463
select rewrite('bar & new & qq & foo & york', 'select keyword, sample from test_tsquery'::text );
464-
rewrite
465-
-------------------------------------------------------------------------------------
466-
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
464+
rewrite
465+
---------------------------------------------------------------------------------
466+
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
467467
(1 row)
468468

469469
select rewrite( ARRAY['moscow', keyword, sample] ) from test_tsquery;
@@ -479,9 +479,9 @@ select rewrite( ARRAY['moscow & hotel', keyword, sample] ) from test_tsquery;
479479
(1 row)
480480

481481
select rewrite( ARRAY['bar & new & qq & foo & york', keyword, sample] ) from test_tsquery;
482-
rewrite
483-
-------------------------------------------------------------------------------------
484-
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
482+
rewrite
483+
---------------------------------------------------------------------------------
484+
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
485485
(1 row)
486486

487487
select keyword from test_tsquery where keyword @> 'new';
@@ -520,9 +520,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
520520
(1 row)
521521

522522
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where keyword <@ query;
523-
rewrite
524-
-------------------------------------------------------------------------------------
525-
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
523+
rewrite
524+
---------------------------------------------------------------------------------
525+
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
526526
(1 row)
527527

528528
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'moscow') as query where query @> keyword;
@@ -538,9 +538,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
538538
(1 row)
539539

540540
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where query @> keyword;
541-
rewrite
542-
-------------------------------------------------------------------------------------
543-
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
541+
rewrite
542+
---------------------------------------------------------------------------------
543+
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
544544
(1 row)
545545

546546
create index qq on test_tsquery using gist (keyword gist_tp_tsquery_ops);
@@ -581,9 +581,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
581581
(1 row)
582582

583583
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where keyword <@ query;
584-
rewrite
585-
-------------------------------------------------------------------------------------
586-
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
584+
rewrite
585+
---------------------------------------------------------------------------------
586+
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
587587
(1 row)
588588

589589
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'moscow') as query where query @> keyword;
@@ -599,9 +599,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
599599
(1 row)
600600

601601
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where query @> keyword;
602-
rewrite
603-
-------------------------------------------------------------------------------------
604-
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
602+
rewrite
603+
---------------------------------------------------------------------------------
604+
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
605605
(1 row)
606606

607607
set enable_seqscan='on';

doc/src/sgml/datatype.sgml

+5-4
Original file line numberDiff line numberDiff line change
@@ -3924,8 +3924,9 @@ SELECT to_tsvector('english', 'The Fat Rats');
39243924
<para>
39253925
A <type>tsquery</type> value stores lexemes that are to be
39263926
searched for, and combines them honoring the Boolean operators
3927-
<literal>&amp;</literal> (AND), <literal>|</literal> (OR), and
3928-
<literal>!</> (NOT). Parentheses can be used to enforce grouping
3927+
<literal>&amp;</literal> (AND), <literal>|</literal> (OR),
3928+
<literal>!</> (NOT) and <literal>&lt;-&gt;</> (FOLLOWED BY) phrase search
3929+
operator. Parentheses can be used to enforce grouping
39293930
of the operators:
39303931

39313932
<programlisting>
@@ -3946,8 +3947,8 @@ SELECT 'fat &amp; rat &amp; ! cat'::tsquery;
39463947
</programlisting>
39473948

39483949
In the absence of parentheses, <literal>!</> (NOT) binds most tightly,
3949-
and <literal>&amp;</literal> (AND) binds more tightly than
3950-
<literal>|</literal> (OR).
3950+
and <literal>&amp;</literal> (AND) and <literal>&lt;-&gt;</literal> (FOLLOWED BY)
3951+
both bind more tightly than <literal>|</literal> (OR).
39513952
</para>
39523953

39533954
<para>

doc/src/sgml/func.sgml

+39
Original file line numberDiff line numberDiff line change
@@ -9127,6 +9127,12 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
91279127
<entry><literal>!! 'cat'::tsquery</literal></entry>
91289128
<entry><literal>!'cat'</literal></entry>
91299129
</row>
9130+
<row>
9131+
<entry> <literal>&lt;-&gt;</literal> </entry>
9132+
<entry><type>tsquery</> followed by <type>tsquery</></entry>
9133+
<entry><literal>to_tsquery('fat') &lt;-&gt; to_tsquery('rat')</literal></entry>
9134+
<entry><literal>'fat' &lt;-&gt; 'rat'</literal></entry>
9135+
</row>
91309136
<row>
91319137
<entry> <literal>@&gt;</literal> </entry>
91329138
<entry><type>tsquery</> contains another ?</entry>
@@ -9219,6 +9225,18 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
92199225
<entry><literal>plainto_tsquery('english', 'The Fat Rats')</literal></entry>
92209226
<entry><literal>'fat' &amp; 'rat'</literal></entry>
92219227
</row>
9228+
<row>
9229+
<entry>
9230+
<indexterm>
9231+
<primary>phraseto_tsquery</primary>
9232+
</indexterm>
9233+
<literal><function>phraseto_tsquery(<optional> <replaceable class="PARAMETER">config</> <type>regconfig</> , </optional> <replaceable class="PARAMETER">query</> <type>text</type>)</function></literal>
9234+
</entry>
9235+
<entry><type>tsquery</type></entry>
9236+
<entry>produce <type>tsquery</> ignoring punctuation</entry>
9237+
<entry><literal>phraseto_tsquery('english', 'The Fat Rats')</literal></entry>
9238+
<entry><literal>'fat' &lt;-&gt; 'rat'</literal></entry>
9239+
</row>
92229240
<row>
92239241
<entry>
92249242
<indexterm>
@@ -9421,6 +9439,27 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
94219439
<entry><literal>SELECT ts_rewrite('a &amp; b'::tsquery, 'SELECT t,s FROM aliases')</literal></entry>
94229440
<entry><literal>'b' &amp; ( 'foo' | 'bar' )</literal></entry>
94239441
</row>
9442+
<row>
9443+
<entry>
9444+
<indexterm>
9445+
<primary>tsquery_phrase</primary>
9446+
</indexterm>
9447+
<literal><function>tsquery_phrase(<replaceable class="PARAMETER">query1</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">query2</replaceable> <type>tsquery</>)</function></literal>
9448+
</entry>
9449+
<entry><type>tsquery</type></entry>
9450+
<entry>implementation of <literal>&lt;-&gt;</> (FOLLOWED BY) operator</entry>
9451+
<entry><literal>tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'))</literal></entry>
9452+
<entry><literal>'fat' &lt;-&gt; 'cat'</literal></entry>
9453+
</row>
9454+
<row>
9455+
<entry>
9456+
<literal><function>tsquery_phrase(<replaceable class="PARAMETER">query1</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">query2</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">distance</replaceable> <type>integer</>)</function></literal>
9457+
</entry>
9458+
<entry><type>tsquery</type></entry>
9459+
<entry>phrase-concatenate with distance</entry>
9460+
<entry><literal>tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'), 10)</literal></entry>
9461+
<entry><literal>'fat' &lt;10&gt; 'cat'</literal></entry>
9462+
</row>
94249463
<row>
94259464
<entry>
94269465
<indexterm>

0 commit comments

Comments
 (0)