8
8
*
9
9
*
10
10
* IDENTIFICATION
11
- * $Header: /cvsroot/pgsql/src/backend/executor/nodeIndexscan.c,v 1.82 2003/08/04 02:39:59 momjian Exp $
11
+ * $Header: /cvsroot/pgsql/src/backend/executor/nodeIndexscan.c,v 1.83 2003/08/22 20:26:43 tgl Exp $
12
12
*
13
13
*-------------------------------------------------------------------------
14
14
*/
28
28
#include "access/heapam.h"
29
29
#include "executor/execdebug.h"
30
30
#include "executor/nodeIndexscan.h"
31
+ #include "miscadmin.h"
31
32
#include "nodes/nodeFuncs.h"
32
33
#include "optimizer/clauses.h"
33
34
#include "parser/parsetree.h"
34
35
35
- /* ----------------
36
- * Misc stuff to move to executor.h soon -cim 6/5/90
37
- * ----------------
38
- */
36
+
39
37
#define NO_OP 0
40
38
#define LEFT_OP 1
41
39
#define RIGHT_OP 2
42
40
41
+ /*
42
+ * In a multiple-index plan, we must take care to return any given tuple
43
+ * only once, even if it matches conditions of several index scans. Our
44
+ * preferred way to do this is to record already-returned tuples in a hash
45
+ * table (using the TID as unique identifier). However, in a very large
46
+ * scan this could conceivably run out of memory. We limit the hash table
47
+ * to no more than SortMem KB; if it grows past that, we fall back to the
48
+ * pre-7.4 technique: evaluate the prior-scan index quals again for each
49
+ * tuple (which is space-efficient, but slow).
50
+ *
51
+ * When scanning backwards, we use scannum to determine when to emit the
52
+ * tuple --- we have to re-emit a tuple in the same scan as it was first
53
+ * encountered.
54
+ *
55
+ * Note: this code would break if the planner were ever to create a multiple
56
+ * index plan with overall backwards direction, because the hashtable code
57
+ * will emit a tuple the first time it is encountered (which would be the
58
+ * highest scan in which it matches the index), but the evaluate-the-quals
59
+ * code will emit a tuple in the lowest-numbered scan in which it's valid.
60
+ * This could be fixed at need by making the evaluate-the-quals case more
61
+ * complex. Currently the planner will never create such a plan (since it
62
+ * considers multi-index plans unordered anyway), so there's no need for
63
+ * more complexity.
64
+ */
65
+ typedef struct
66
+ {
67
+ /* tid is the hash key and so must be first! */
68
+ ItemPointerData tid ; /* TID of a tuple we've returned */
69
+ int scannum ; /* number of scan we returned it in */
70
+ } DupHashTabEntry ;
71
+
72
+
43
73
static TupleTableSlot * IndexNext (IndexScanState * node );
74
+ static void create_duphash (IndexScanState * node );
75
+
44
76
45
77
/* ----------------------------------------------------------------
46
78
* IndexNext
@@ -163,7 +195,7 @@ IndexNext(IndexScanState *node)
163
195
while ((tuple = index_getnext (scandesc , direction )) != NULL )
164
196
{
165
197
/*
166
- * store the scanned tuple in the scan tuple slot of the scan
198
+ * Store the scanned tuple in the scan tuple slot of the scan
167
199
* state. Note: we pass 'false' because tuples returned by
168
200
* amgetnext are pointers onto disk pages and must not be
169
201
* pfree()'d.
@@ -174,36 +206,80 @@ IndexNext(IndexScanState *node)
174
206
false); /* don't pfree */
175
207
176
208
/*
177
- * We must check to see if the current tuple was already
178
- * matched by an earlier index, so we don't double-report it.
179
- * We do this by passing the tuple through ExecQual and
180
- * checking for failure with all previous qualifications.
209
+ * If it's a multiple-index scan, make sure not to double-report
210
+ * a tuple matched by more than one index. (See notes above.)
181
211
*/
182
- if (node -> iss_IndexPtr > 0 )
212
+ if (numIndices > 1 )
183
213
{
184
- bool prev_matches = false;
185
- int prev_index ;
186
- List * qual ;
187
-
188
- econtext -> ecxt_scantuple = slot ;
189
- ResetExprContext (econtext );
190
- qual = node -> indxqualorig ;
191
- for (prev_index = 0 ;
192
- prev_index < node -> iss_IndexPtr ;
193
- prev_index ++ )
214
+ /* First try the hash table */
215
+ if (node -> iss_DupHash )
194
216
{
195
- if (ExecQual ((List * ) lfirst (qual ), econtext , false))
217
+ DupHashTabEntry * entry ;
218
+ bool found ;
219
+
220
+ entry = (DupHashTabEntry * )
221
+ hash_search (node -> iss_DupHash ,
222
+ & tuple -> t_data -> t_ctid ,
223
+ HASH_ENTER ,
224
+ & found );
225
+ if (entry == NULL ||
226
+ node -> iss_DupHash -> hctl -> nentries > node -> iss_MaxHash )
227
+ {
228
+ /* out of memory (either hard or soft limit) */
229
+ /* release hash table and fall thru to old code */
230
+ hash_destroy (node -> iss_DupHash );
231
+ node -> iss_DupHash = NULL ;
232
+ }
233
+ else if (found )
196
234
{
197
- prev_matches = true;
198
- break ;
235
+ /* pre-existing entry */
236
+
237
+ /*
238
+ * It's duplicate if first emitted in a different
239
+ * scan. If same scan, we must be backing up, so
240
+ * okay to emit again.
241
+ */
242
+ if (entry -> scannum != node -> iss_IndexPtr )
243
+ {
244
+ /* Dup, so drop it and loop back for another */
245
+ ExecClearTuple (slot );
246
+ continue ;
247
+ }
248
+ }
249
+ else
250
+ {
251
+ /* new entry, finish filling it in */
252
+ entry -> scannum = node -> iss_IndexPtr ;
199
253
}
200
- qual = lnext (qual );
201
254
}
202
- if (prev_matches )
255
+ /* If hash table has overflowed, do it the hard way */
256
+ if (node -> iss_DupHash == NULL &&
257
+ node -> iss_IndexPtr > 0 )
203
258
{
204
- /* Duplicate, so drop it and loop back for another */
205
- ExecClearTuple (slot );
206
- continue ;
259
+ bool prev_matches = false;
260
+ int prev_index ;
261
+ List * qual ;
262
+
263
+ econtext -> ecxt_scantuple = slot ;
264
+ ResetExprContext (econtext );
265
+ qual = node -> indxqualorig ;
266
+ for (prev_index = 0 ;
267
+ prev_index < node -> iss_IndexPtr ;
268
+ prev_index ++ )
269
+ {
270
+ if (ExecQual ((List * ) lfirst (qual ), econtext , false))
271
+ {
272
+ prev_matches = true;
273
+ break ;
274
+ }
275
+ qual = lnext (qual );
276
+ }
277
+ if (prev_matches )
278
+ {
279
+ /* Dup, so drop it and loop back for another */
280
+ ExecClearTuple (slot );
281
+ continue ;
282
+ }
207
283
}
208
284
}
209
285
@@ -383,6 +459,14 @@ ExecIndexReScan(IndexScanState *node, ExprContext *exprCtxt)
383
459
return ;
384
460
}
385
461
462
+ /* reset hash table */
463
+ if (numIndices > 1 )
464
+ {
465
+ if (node -> iss_DupHash )
466
+ hash_destroy (node -> iss_DupHash );
467
+ create_duphash (node );
468
+ }
469
+
386
470
/* reset index scans */
387
471
if (ScanDirectionIsBackward (((IndexScan * ) node -> ss .ps .plan )-> indxorderdir ))
388
472
node -> iss_IndexPtr = numIndices ;
@@ -432,6 +516,10 @@ ExecEndIndexScan(IndexScanState *node)
432
516
ExecClearTuple (node -> ss .ps .ps_ResultTupleSlot );
433
517
ExecClearTuple (node -> ss .ss_ScanTupleSlot );
434
518
519
+ /* drop hash table */
520
+ if (node -> iss_DupHash )
521
+ hash_destroy (node -> iss_DupHash );
522
+
435
523
/*
436
524
* close the index relations
437
525
*/
@@ -507,7 +595,7 @@ ExecIndexRestrPos(IndexScanState *node)
507
595
508
596
/* ----------------------------------------------------------------
509
597
* ExecInitIndexScan
510
- *
598
+ *
511
599
* Initializes the index scan's state information, creates
512
600
* scan keys, and opens the base and index relations.
513
601
*
@@ -919,12 +1007,42 @@ ExecInitIndexScan(IndexScan *node, EState *estate)
919
1007
ExecAssignResultTypeFromTL (& indexstate -> ss .ps );
920
1008
ExecAssignScanProjectionInfo (& indexstate -> ss );
921
1009
1010
+ /*
1011
+ * Initialize hash table if needed.
1012
+ */
1013
+ if (numIndices > 1 )
1014
+ create_duphash (indexstate );
1015
+ else
1016
+ indexstate -> iss_DupHash = NULL ;
1017
+
922
1018
/*
923
1019
* all done.
924
1020
*/
925
1021
return indexstate ;
926
1022
}
927
1023
1024
+ static void
1025
+ create_duphash (IndexScanState * node )
1026
+ {
1027
+ HASHCTL hash_ctl ;
1028
+
1029
+ MemSet (& hash_ctl , 0 , sizeof (hash_ctl ));
1030
+ hash_ctl .keysize = SizeOfIptrData ;
1031
+ hash_ctl .entrysize = sizeof (DupHashTabEntry );
1032
+ hash_ctl .hash = tag_hash ;
1033
+ hash_ctl .hcxt = CurrentMemoryContext ;
1034
+ node -> iss_DupHash = hash_create ("DupHashTable" ,
1035
+ (long ) ceil (node -> ss .ps .plan -> plan_rows ),
1036
+ & hash_ctl ,
1037
+ HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT );
1038
+ if (node -> iss_DupHash == NULL )
1039
+ ereport (ERROR ,
1040
+ (errcode (ERRCODE_OUT_OF_MEMORY ),
1041
+ errmsg ("out of memory" )));
1042
+ node -> iss_MaxHash = (SortMem * 1024L ) /
1043
+ (MAXALIGN (sizeof (HASHELEMENT )) + MAXALIGN (sizeof (DupHashTabEntry )));
1044
+ }
1045
+
928
1046
int
929
1047
ExecCountSlotsIndexScan (IndexScan * node )
930
1048
{
0 commit comments