Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 1664ae1

Browse files
committed
Add websearch_to_tsquery
Error-tolerant conversion function with web-like syntax for search query, it simplifies constraining search engine with close to habitual interface for users. Bump catalog version Authors: Victor Drobny, Dmitry Ivanov with editorization by me Reviewed by: Aleksander Alekseev, Tomas Vondra, Thomas Munro, Aleksandr Parfenov Discussion: https://www.postgresql.org/message-id/flat/fe931111ff7e9ad79196486ada79e268@postgrespro.ru
1 parent fbc2733 commit 1664ae1

File tree

11 files changed

+1000
-119
lines changed

11 files changed

+1000
-119
lines changed

doc/src/sgml/func.sgml

+12
Original file line numberDiff line numberDiff line change
@@ -9630,6 +9630,18 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
96309630
<entry><literal>phraseto_tsquery('english', 'The Fat Rats')</literal></entry>
96319631
<entry><literal>'fat' &lt;-&gt; 'rat'</literal></entry>
96329632
</row>
9633+
<row>
9634+
<entry>
9635+
<indexterm>
9636+
<primary>websearch_to_tsquery</primary>
9637+
</indexterm>
9638+
<literal><function>websearch_to_tsquery(<optional> <replaceable class="parameter">config</replaceable> <type>regconfig</type> , </optional> <replaceable class="parameter">query</replaceable> <type>text</type>)</function></literal>
9639+
</entry>
9640+
<entry><type>tsquery</type></entry>
9641+
<entry>produce <type>tsquery</type> from a web search style query</entry>
9642+
<entry><literal>websearch_to_tsquery('english', '"fat rat" or rat')</literal></entry>
9643+
<entry><literal>'fat' &lt;-&gt; 'rat' | 'rat'</literal></entry>
9644+
</row>
96339645
<row>
96349646
<entry>
96359647
<indexterm>

doc/src/sgml/textsearch.sgml

+88-4
Original file line numberDiff line numberDiff line change
@@ -797,13 +797,16 @@ UPDATE tt SET ti =
797797
<para>
798798
<productname>PostgreSQL</productname> provides the
799799
functions <function>to_tsquery</function>,
800-
<function>plainto_tsquery</function>, and
801-
<function>phraseto_tsquery</function>
800+
<function>plainto_tsquery</function>,
801+
<function>phraseto_tsquery</function> and
802+
<function>websearch_to_tsquery</function>
802803
for converting a query to the <type>tsquery</type> data type.
803804
<function>to_tsquery</function> offers access to more features
804805
than either <function>plainto_tsquery</function> or
805-
<function>phraseto_tsquery</function>, but it is less forgiving
806-
about its input.
806+
<function>phraseto_tsquery</function>, but it is less forgiving about its
807+
input. <function>websearch_to_tsquery</function> is a simplified version
808+
of <function>to_tsquery</function> with an alternative syntax, similar
809+
to the one used by web search engines.
807810
</para>
808811

809812
<indexterm>
@@ -962,6 +965,87 @@ SELECT phraseto_tsquery('english', 'The Fat &amp; Rats:C');
962965
</screen>
963966
</para>
964967

968+
<synopsis>
969+
websearch_to_tsquery(<optional> <replaceable class="parameter">config</replaceable> <type>regconfig</type>, </optional> <replaceable class="parameter">querytext</replaceable> <type>text</type>) returns <type>tsquery</type>
970+
</synopsis>
971+
972+
<para>
973+
<function>websearch_to_tsquery</function> creates a <type>tsquery</type>
974+
value from <replaceable>querytext</replaceable> using an alternative
975+
syntax in which simple unformatted text is a valid query.
976+
Unlike <function>plainto_tsquery</function>
977+
and <function>phraseto_tsquery</function>, it also recognizes certain
978+
operators. Moreover, this function should never raise syntax errors,
979+
which makes it possible to use raw user-supplied input for search.
980+
The following syntax is supported:
981+
<itemizedlist spacing="compact" mark="bullet">
982+
<listitem>
983+
<para>
984+
<literal>unquoted text</literal>: text not inside quote marks will be
985+
converted to terms separated by <literal>&amp;</literal> operators, as
986+
if processed by
987+
<function>plainto_tsquery</function>.
988+
</para>
989+
</listitem>
990+
<listitem>
991+
<para>
992+
<literal>"quoted text"</literal>: text inside quote marks will be
993+
converted to terms separated by <literal>&lt;-&gt;</literal>
994+
operators, as if processed by <function>phraseto_tsquery</function>.
995+
</para>
996+
</listitem>
997+
<listitem>
998+
<para>
999+
<literal>OR</literal>: logical or will be converted to
1000+
the <literal>|</literal> operator.
1001+
</para>
1002+
</listitem>
1003+
<listitem>
1004+
<para>
1005+
<literal>-</literal>: the logical not operator, converted to the
1006+
the <literal>!</literal> operator.
1007+
</para>
1008+
</listitem>
1009+
</itemizedlist>
1010+
</para>
1011+
<para>
1012+
Examples:
1013+
<screen>
1014+
select websearch_to_tsquery('english', 'The fat rats');
1015+
websearch_to_tsquery
1016+
-----------------
1017+
'fat' &amp; 'rat'
1018+
(1 row)
1019+
</screen>
1020+
<screen>
1021+
select websearch_to_tsquery('english', '"supernovae stars" -crab');
1022+
websearch_to_tsquery
1023+
----------------------------------
1024+
'supernova' &lt;-&gt; 'star' &amp; !'crab'
1025+
(1 row)
1026+
</screen>
1027+
<screen>
1028+
select websearch_to_tsquery('english', '"sad cat" or "fat rat"');
1029+
websearch_to_tsquery
1030+
-----------------------------------
1031+
'sad' &lt;-&gt; 'cat' | 'fat' &lt;-&gt; 'rat'
1032+
(1 row)
1033+
</screen>
1034+
<screen>
1035+
select websearch_to_tsquery('english', 'signal -"segmentation fault"');
1036+
websearch_to_tsquery
1037+
---------------------------------------
1038+
'signal' &amp; !( 'segment' &lt;-&gt; 'fault' )
1039+
(1 row)
1040+
</screen>
1041+
<screen>
1042+
select websearch_to_tsquery('english', '""" )( dummy \\ query &lt;-&gt;');
1043+
websearch_to_tsquery
1044+
----------------------
1045+
'dummi' &amp; 'queri'
1046+
(1 row)
1047+
</screen>
1048+
</para>
9651049
</sect2>
9661050

9671051
<sect2 id="textsearch-ranking">

src/backend/tsearch/to_tsany.c

+35-3
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,7 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
490490
query = parse_tsquery(text_to_cstring(in),
491491
pushval_morph,
492492
PointerGetDatum(&data),
493-
false);
493+
0);
494494

495495
PG_RETURN_TSQUERY(query);
496496
}
@@ -520,7 +520,7 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
520520
query = parse_tsquery(text_to_cstring(in),
521521
pushval_morph,
522522
PointerGetDatum(&data),
523-
true);
523+
P_TSQ_PLAIN);
524524

525525
PG_RETURN_POINTER(query);
526526
}
@@ -551,7 +551,7 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS)
551551
query = parse_tsquery(text_to_cstring(in),
552552
pushval_morph,
553553
PointerGetDatum(&data),
554-
true);
554+
P_TSQ_PLAIN);
555555

556556
PG_RETURN_TSQUERY(query);
557557
}
@@ -567,3 +567,35 @@ phraseto_tsquery(PG_FUNCTION_ARGS)
567567
ObjectIdGetDatum(cfgId),
568568
PointerGetDatum(in)));
569569
}
570+
571+
Datum
572+
websearch_to_tsquery_byid(PG_FUNCTION_ARGS)
573+
{
574+
text *in = PG_GETARG_TEXT_PP(1);
575+
MorphOpaque data;
576+
TSQuery query = NULL;
577+
578+
data.cfg_id = PG_GETARG_OID(0);
579+
580+
data.qoperator = OP_AND;
581+
582+
query = parse_tsquery(text_to_cstring(in),
583+
pushval_morph,
584+
PointerGetDatum(&data),
585+
P_TSQ_WEB);
586+
587+
PG_RETURN_TSQUERY(query);
588+
}
589+
590+
Datum
591+
websearch_to_tsquery(PG_FUNCTION_ARGS)
592+
{
593+
text *in = PG_GETARG_TEXT_PP(0);
594+
Oid cfgId;
595+
596+
cfgId = getTSCurrentConfig(true);
597+
PG_RETURN_DATUM(DirectFunctionCall2(websearch_to_tsquery_byid,
598+
ObjectIdGetDatum(cfgId),
599+
PointerGetDatum(in)));
600+
601+
}

0 commit comments

Comments
 (0)