Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 2dfc126

Browse files
committed
Ensure that a tsquery like '!foo' matches empty tsvectors.
!foo means "the tsvector does not contain foo", and therefore it should match an empty tsvector. ts_match_vq() overenthusiastically supposed that an empty tsvector could never match any query, so it forcibly returned FALSE, the wrong answer. Remove the premature optimization. Our behavior on this point was inconsistent, because while seqscans and GIST index searches both failed to match empty tsvectors, GIN index searches would find them, since GIN scans don't rely on ts_match_vq(). That makes this certainly a bug, not a debatable definition disagreement, so back-patch to all supported branches. Report and diagnosis by Tom Dunstan (bug #14515); added test cases by me. Discussion: https://postgr.es/m/20170126025524.1434.97828@wrigleys.postgresql.org
1 parent e4e5ea6 commit 2dfc126

File tree

5 files changed

+186
-1
lines changed

5 files changed

+186
-1
lines changed

src/backend/utils/adt/tsvector_op.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -1926,7 +1926,8 @@ ts_match_vq(PG_FUNCTION_ARGS)
19261926
CHKVAL chkval;
19271927
bool result;
19281928

1929-
if (!val->size || !query->size)
1929+
/* empty query matches nothing */
1930+
if (!query->size)
19301931
{
19311932
PG_FREE_IF_COPY(val, 0);
19321933
PG_FREE_IF_COPY(query, 1);

src/test/regress/expected/tsearch.out

+137
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,108 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
9898
494
9999
(1 row)
100100

101+
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
102+
count
103+
-------
104+
158
105+
(1 row)
106+
107+
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
108+
count
109+
-------
110+
0
111+
(1 row)
112+
113+
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
114+
count
115+
-------
116+
508
117+
(1 row)
118+
101119
create index wowidx on test_tsvector using gist (a);
102120
SET enable_seqscan=OFF;
121+
SET enable_indexscan=ON;
122+
SET enable_bitmapscan=OFF;
123+
explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
124+
QUERY PLAN
125+
-------------------------------------------------------
126+
Aggregate
127+
-> Index Scan using wowidx on test_tsvector
128+
Index Cond: (a @@ '''wr'' | ''qh'''::tsquery)
129+
(3 rows)
130+
131+
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
132+
count
133+
-------
134+
158
135+
(1 row)
136+
137+
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
138+
count
139+
-------
140+
17
141+
(1 row)
142+
143+
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
144+
count
145+
-------
146+
6
147+
(1 row)
148+
149+
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
150+
count
151+
-------
152+
98
153+
(1 row)
154+
155+
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
156+
count
157+
-------
158+
23
159+
(1 row)
160+
161+
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
162+
count
163+
-------
164+
39
165+
(1 row)
166+
167+
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
168+
count
169+
-------
170+
494
171+
(1 row)
172+
173+
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
174+
count
175+
-------
176+
158
177+
(1 row)
178+
179+
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
180+
count
181+
-------
182+
0
183+
(1 row)
184+
185+
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
186+
count
187+
-------
188+
508
189+
(1 row)
190+
191+
SET enable_indexscan=OFF;
192+
SET enable_bitmapscan=ON;
193+
explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
194+
QUERY PLAN
195+
-------------------------------------------------------------
196+
Aggregate
197+
-> Bitmap Heap Scan on test_tsvector
198+
Recheck Cond: (a @@ '''wr'' | ''qh'''::tsquery)
199+
-> Bitmap Index Scan on wowidx
200+
Index Cond: (a @@ '''wr'' | ''qh'''::tsquery)
201+
(5 rows)
202+
103203
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
104204
count
105205
-------
@@ -148,10 +248,35 @@ SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
148248
158
149249
(1 row)
150250

251+
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
252+
count
253+
-------
254+
0
255+
(1 row)
256+
257+
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
258+
count
259+
-------
260+
508
261+
(1 row)
262+
151263
RESET enable_seqscan;
264+
RESET enable_indexscan;
265+
RESET enable_bitmapscan;
152266
DROP INDEX wowidx;
153267
CREATE INDEX wowidx ON test_tsvector USING gin (a);
154268
SET enable_seqscan=OFF;
269+
-- GIN only supports bitmapscan, so no need to test plain indexscan
270+
explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
271+
QUERY PLAN
272+
-------------------------------------------------------------
273+
Aggregate
274+
-> Bitmap Heap Scan on test_tsvector
275+
Recheck Cond: (a @@ '''wr'' | ''qh'''::tsquery)
276+
-> Bitmap Index Scan on wowidx
277+
Index Cond: (a @@ '''wr'' | ''qh'''::tsquery)
278+
(5 rows)
279+
155280
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
156281
count
157282
-------
@@ -200,6 +325,18 @@ SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
200325
158
201326
(1 row)
202327

328+
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
329+
count
330+
-------
331+
0
332+
(1 row)
333+
334+
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
335+
count
336+
-------
337+
508
338+
(1 row)
339+
203340
RESET enable_seqscan;
204341
INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH');
205342
SELECT * FROM ts_stat('SELECT a FROM test_tsvector') ORDER BY ndoc DESC, nentry DESC, word LIMIT 10;

src/test/regress/expected/tstypes.out

+12
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,18 @@ select to_tsvector('simple', 'x y q y') @@ '!x <-> y' AS "true";
773773
t
774774
(1 row)
775775

776+
select to_tsvector('simple', 'x y q y') @@ '!foo' AS "true";
777+
true
778+
------
779+
t
780+
(1 row)
781+
782+
select to_tsvector('simple', '') @@ '!foo' AS "true";
783+
true
784+
------
785+
t
786+
(1 row)
787+
776788
--ranking
777789
SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s');
778790
ts_rank

src/test/regress/sql/tsearch.sql

+33
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,17 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
4848
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
4949
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
5050
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
51+
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
52+
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
53+
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
5154

5255
create index wowidx on test_tsvector using gist (a);
5356

5457
SET enable_seqscan=OFF;
58+
SET enable_indexscan=ON;
59+
SET enable_bitmapscan=OFF;
60+
61+
explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
5562

5663
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
5764
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
@@ -61,14 +68,37 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
6168
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
6269
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
6370
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
71+
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
72+
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
73+
74+
SET enable_indexscan=OFF;
75+
SET enable_bitmapscan=ON;
76+
77+
explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
78+
79+
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
80+
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
81+
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
82+
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
83+
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
84+
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
85+
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
86+
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
87+
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
88+
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
6489

6590
RESET enable_seqscan;
91+
RESET enable_indexscan;
92+
RESET enable_bitmapscan;
6693

6794
DROP INDEX wowidx;
6895

6996
CREATE INDEX wowidx ON test_tsvector USING gin (a);
7097

7198
SET enable_seqscan=OFF;
99+
-- GIN only supports bitmapscan, so no need to test plain indexscan
100+
101+
explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
72102

73103
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
74104
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
@@ -78,8 +108,11 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
78108
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
79109
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
80110
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
111+
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
112+
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
81113

82114
RESET enable_seqscan;
115+
83116
INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH');
84117
SELECT * FROM ts_stat('SELECT a FROM test_tsvector') ORDER BY ndoc DESC, nentry DESC, word LIMIT 10;
85118
SELECT * FROM ts_stat('SELECT a FROM test_tsvector', 'AB') ORDER BY ndoc DESC, nentry DESC, word;

src/test/regress/sql/tstypes.sql

+2
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,8 @@ select to_tsvector('simple', 'x q') @@ '(x | y <-> !z) <-> q' AS "true";
145145
select to_tsvector('simple', 'x q') @@ '(!x | y <-> z) <-> q' AS "false";
146146
select to_tsvector('simple', 'z q') @@ '(!x | y <-> z) <-> q' AS "true";
147147
select to_tsvector('simple', 'x y q y') @@ '!x <-> y' AS "true";
148+
select to_tsvector('simple', 'x y q y') @@ '!foo' AS "true";
149+
select to_tsvector('simple', '') @@ '!foo' AS "true";
148150

149151
--ranking
150152
SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s');

0 commit comments

Comments
 (0)