|
1 |
| -<!-- $PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.207 2007/08/21 01:11:11 tgl Exp $ --> |
| 1 | +<!-- $PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.208 2007/08/29 20:37:14 momjian Exp $ --> |
2 | 2 |
|
3 | 3 | <chapter id="datatype">
|
4 | 4 | <title id="datatype-title">Data Types</title>
|
|
234 | 234 | <entry>date and time, including time zone</entry>
|
235 | 235 | </row>
|
236 | 236 |
|
| 237 | + <row> |
| 238 | + <entry><type>tsquery</type></entry> |
| 239 | + <entry></entry> |
| 240 | + <entry>full text search query</entry> |
| 241 | + </row> |
| 242 | + |
| 243 | + <row> |
| 244 | + <entry><type>tsvector</type></entry> |
| 245 | + <entry></entry> |
| 246 | + <entry>full text search document</entry> |
| 247 | + </row> |
| 248 | + |
237 | 249 | <row>
|
238 | 250 | <entry><type>uuid</type></entry>
|
239 | 251 | <entry></entry>
|
@@ -3264,6 +3276,137 @@ a0eebc999c0b4ef8bb6d6bb9bd380a11
|
3264 | 3276 | </para>
|
3265 | 3277 | </sect1>
|
3266 | 3278 |
|
| 3279 | + <sect1 id="datatype-textsearch"> |
| 3280 | + <title>Full Text Search</title> |
| 3281 | + |
| 3282 | + <variablelist> |
| 3283 | + |
| 3284 | + <indexterm zone="datatype-textsearch"> |
| 3285 | + <primary>tsvector</primary> |
| 3286 | + </indexterm> |
| 3287 | + |
| 3288 | + <varlistentry> |
| 3289 | + <term><firstterm>tsvector</firstterm></term> |
| 3290 | + <listitem> |
| 3291 | + |
| 3292 | + <para> |
| 3293 | + <type>tsvector</type> is a data type that represents a document and is |
| 3294 | + optimized for full text searching. In the simplest case, |
| 3295 | + <type>tsvector</type> is a sorted list of lexemes, so even without indexes |
| 3296 | + full text searches perform better than standard <literal>~</literal> and |
| 3297 | + <literal>LIKE</literal> operations: |
| 3298 | + |
| 3299 | +<programlisting> |
| 3300 | +SELECT 'a fat cat sat on a mat and ate a fat rat'::tsvector; |
| 3301 | + tsvector |
| 3302 | +---------------------------------------------------- |
| 3303 | + 'a' 'on' 'and' 'ate' 'cat' 'fat' 'mat' 'rat' 'sat' |
| 3304 | +</programlisting> |
| 3305 | + |
| 3306 | + Notice, that <literal>space</literal> is also a lexeme: |
| 3307 | + |
| 3308 | +<programlisting> |
| 3309 | +SELECT 'space '' '' is a lexeme'::tsvector; |
| 3310 | + tsvector |
| 3311 | +---------------------------------- |
| 3312 | + 'a' 'is' ' ' 'space' 'lexeme' |
| 3313 | +</programlisting> |
| 3314 | + |
| 3315 | + Each lexeme, optionally, can have positional information which is used for |
| 3316 | + <varname>proximity ranking</varname>: |
| 3317 | + |
| 3318 | +<programlisting> |
| 3319 | +SELECT 'a:1 fat:2 cat:3 sat:4 on:5 a:6 mat:7 and:8 ate:9 a:10 fat:11 rat:12'::tsvector; |
| 3320 | + tsvector |
| 3321 | +------------------------------------------------------------------------------- |
| 3322 | + 'a':1,6,10 'on':5 'and':8 'ate':9 'cat':3 'fat':2,11 'mat':7 'rat':12 'sat':4 |
| 3323 | +</programlisting> |
| 3324 | + |
| 3325 | + Each lexeme position also can be labeled as <literal>A</literal>, |
| 3326 | + <literal>B</literal>, <literal>C</literal>, <literal>D</literal>, |
| 3327 | + where <literal>D</literal> is the default. These labels can be used to group |
| 3328 | + lexemes into different <emphasis>importance</emphasis> or |
| 3329 | + <emphasis>rankings</emphasis>, for example to reflect document structure. |
| 3330 | + Actual values can be assigned at search time and used during the calculation |
| 3331 | + of the document rank. This is very useful for controlling search results. |
| 3332 | + </para> |
| 3333 | + |
| 3334 | + <para> |
| 3335 | + The concatenation operator, e.g. <literal>tsvector || tsvector</literal>, |
| 3336 | + can "construct" a document from several parts. The order is important if |
| 3337 | + <type>tsvector</type> contains positional information. Of course, |
| 3338 | + it is also possible to build a document using different tables: |
| 3339 | + |
| 3340 | +<programlisting> |
| 3341 | +SELECT 'fat:1 cat:2'::tsvector || 'fat:1 rat:2'::tsvector; |
| 3342 | + ?column? |
| 3343 | +--------------------------- |
| 3344 | + 'cat':2 'fat':1,3 'rat':4 |
| 3345 | + |
| 3346 | +SELECT 'fat:1 rat:2'::tsvector || 'fat:1 cat:2'::tsvector; |
| 3347 | + ?column? |
| 3348 | +--------------------------- |
| 3349 | + 'cat':4 'fat':1,3 'rat':2 |
| 3350 | +</programlisting> |
| 3351 | + |
| 3352 | + </para> |
| 3353 | + |
| 3354 | + </listitem> |
| 3355 | + |
| 3356 | + </varlistentry> |
| 3357 | + |
| 3358 | + <indexterm zone="datatype-textsearch"> |
| 3359 | + <primary>tsquery</primary> |
| 3360 | + </indexterm> |
| 3361 | + |
| 3362 | + <varlistentry> |
| 3363 | + <term><firstterm>tsquery</firstterm></term> |
| 3364 | + <listitem> |
| 3365 | + |
| 3366 | + <para> |
| 3367 | + <type>tsquery</type> is a data type for textual queries which supports |
| 3368 | + the boolean operators <literal>&</literal> (AND), <literal>|</literal> (OR), |
| 3369 | + and parentheses. A <type>tsquery</type> consists of lexemes |
| 3370 | + (optionally labeled by letters) with boolean operators in between: |
| 3371 | + |
| 3372 | +<programlisting> |
| 3373 | +SELECT 'fat & cat'::tsquery; |
| 3374 | + tsquery |
| 3375 | +--------------- |
| 3376 | + 'fat' & 'cat' |
| 3377 | +SELECT 'fat:ab & cat'::tsquery; |
| 3378 | + tsquery |
| 3379 | +------------------ |
| 3380 | + 'fat':AB & 'cat' |
| 3381 | +</programlisting> |
| 3382 | + |
| 3383 | + Labels can be used to restrict the search region, which allows the |
| 3384 | + development of different search engines using the same full text index. |
| 3385 | + </para> |
| 3386 | + |
| 3387 | + <para> |
| 3388 | + <type>tsqueries</type> can be concatenated using <literal>&&</literal> (AND) |
| 3389 | + and <literal>||</literal> (OR) operators: |
| 3390 | + |
| 3391 | +<programlisting> |
| 3392 | +SELECT 'a & b'::tsquery && 'c | d'::tsquery; |
| 3393 | + ?column? |
| 3394 | +--------------------------- |
| 3395 | + 'a' & 'b' & ( 'c' | 'd' ) |
| 3396 | + |
| 3397 | +SELECT 'a & b'::tsquery || 'c|d'::tsquery; |
| 3398 | + ?column? |
| 3399 | +--------------------------- |
| 3400 | + 'a' & 'b' | ( 'c' | 'd' ) |
| 3401 | +</programlisting> |
| 3402 | + |
| 3403 | + </para> |
| 3404 | + </listitem> |
| 3405 | + </varlistentry> |
| 3406 | + </variablelist> |
| 3407 | + |
| 3408 | + </sect1> |
| 3409 | + |
3267 | 3410 | <sect1 id="datatype-xml">
|
3268 | 3411 | <title><acronym>XML</> Type</title>
|
3269 | 3412 |
|
|
0 commit comments