Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 485375a

Browse files
committed
Fix hash aggregation to suppress unneeded columns from being stored in
tuple hash table entries. This addresses the problem previously noted that use of a 'physical tlist' in the input scan node could bloat the hash table entries far beyond what the planner expects. It's a better answer than my previous thought of undoing the physical tlist optimization, because we can also remove columns that are needed to compute the aggregate functions but aren't part of the grouping column set.
1 parent cfc7103 commit 485375a

File tree

2 files changed

+113
-9
lines changed

2 files changed

+113
-9
lines changed

src/backend/executor/nodeAgg.c

Lines changed: 110 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
* Portions Copyright (c) 1994, Regents of the University of California
6262
*
6363
* IDENTIFICATION
64-
* $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.141 2006/06/28 17:05:49 tgl Exp $
64+
* $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.142 2006/06/28 19:40:52 tgl Exp $
6565
*
6666
*-------------------------------------------------------------------------
6767
*/
@@ -223,9 +223,11 @@ static void finalize_aggregate(AggState *aggstate,
223223
AggStatePerAgg peraggstate,
224224
AggStatePerGroup pergroupstate,
225225
Datum *resultVal, bool *resultIsNull);
226+
static Bitmapset *find_unaggregated_cols(AggState *aggstate);
227+
static bool find_unaggregated_cols_walker(Node *node, Bitmapset **colnos);
226228
static void build_hash_table(AggState *aggstate);
227229
static AggHashEntry lookup_hash_entry(AggState *aggstate,
228-
TupleTableSlot *slot);
230+
TupleTableSlot *inputslot);
229231
static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
230232
static void agg_fill_hash_table(AggState *aggstate);
231233
static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
@@ -579,6 +581,46 @@ finalize_aggregate(AggState *aggstate,
579581
MemoryContextSwitchTo(oldContext);
580582
}
581583

584+
/*
585+
* find_unaggregated_cols
586+
* Construct a bitmapset of the column numbers of un-aggregated Vars
587+
* appearing in our targetlist and qual (HAVING clause)
588+
*/
589+
static Bitmapset *
590+
find_unaggregated_cols(AggState *aggstate)
591+
{
592+
Agg *node = (Agg *) aggstate->ss.ps.plan;
593+
Bitmapset *colnos;
594+
595+
colnos = NULL;
596+
(void) find_unaggregated_cols_walker((Node *) node->plan.targetlist,
597+
&colnos);
598+
(void) find_unaggregated_cols_walker((Node *) node->plan.qual,
599+
&colnos);
600+
return colnos;
601+
}
602+
603+
static bool
604+
find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
605+
{
606+
if (node == NULL)
607+
return false;
608+
if (IsA(node, Var))
609+
{
610+
Var *var = (Var *) node;
611+
612+
/* setrefs.c should have set the varno to 0 */
613+
Assert(var->varno == 0);
614+
Assert(var->varlevelsup == 0);
615+
*colnos = bms_add_member(*colnos, var->varattno);
616+
return false;
617+
}
618+
if (IsA(node, Aggref)) /* do not descend into aggregate exprs */
619+
return false;
620+
return expression_tree_walker(node, find_unaggregated_cols_walker,
621+
(void *) colnos);
622+
}
623+
582624
/*
583625
* Initialize the hash table to empty.
584626
*
@@ -590,6 +632,9 @@ build_hash_table(AggState *aggstate)
590632
Agg *node = (Agg *) aggstate->ss.ps.plan;
591633
MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
592634
Size entrysize;
635+
Bitmapset *colnos;
636+
List *collist;
637+
int i;
593638

594639
Assert(node->aggstrategy == AGG_HASHED);
595640
Assert(node->numGroups > 0);
@@ -605,13 +650,48 @@ build_hash_table(AggState *aggstate)
605650
entrysize,
606651
aggstate->aggcontext,
607652
tmpmem);
653+
654+
/*
655+
* Create a list of the tuple columns that actually need to be stored
656+
* in hashtable entries. The incoming tuples from the child plan node
657+
* will contain grouping columns, other columns referenced in our
658+
* targetlist and qual, columns used to compute the aggregate functions,
659+
* and perhaps just junk columns we don't use at all. Only columns of the
660+
* first two types need to be stored in the hashtable, and getting rid of
661+
* the others can make the table entries significantly smaller. To avoid
662+
* messing up Var numbering, we keep the same tuple descriptor for
663+
* hashtable entries as the incoming tuples have, but set unwanted columns
664+
* to NULL in the tuples that go into the table.
665+
*
666+
* To eliminate duplicates, we build a bitmapset of the needed columns,
667+
* then convert it to an integer list (cheaper to scan at runtime).
668+
* The list is in decreasing order so that the first entry is the largest;
669+
* lookup_hash_entry depends on this to use slot_getsomeattrs correctly.
670+
*
671+
* Note: at present, searching the tlist/qual is not really necessary
672+
* since the parser should disallow any unaggregated references to
673+
* ungrouped columns. However, the search will be needed when we add
674+
* support for SQL99 semantics that allow use of "functionally dependent"
675+
* columns that haven't been explicitly grouped by.
676+
*/
677+
678+
/* Find Vars that will be needed in tlist and qual */
679+
colnos = find_unaggregated_cols(aggstate);
680+
/* Add in all the grouping columns */
681+
for (i = 0; i < node->numCols; i++)
682+
colnos = bms_add_member(colnos, node->grpColIdx[i]);
683+
/* Convert to list, using lcons so largest element ends up first */
684+
collist = NIL;
685+
while ((i = bms_first_member(colnos)) >= 0)
686+
collist = lcons_int(i, collist);
687+
aggstate->hash_needed = collist;
608688
}
609689

610690
/*
611691
* Estimate per-hash-table-entry overhead for the planner.
612692
*
613693
* Note that the estimate does not include space for pass-by-reference
614-
* transition data values.
694+
* transition data values, nor for the representative tuple of each group.
615695
*/
616696
Size
617697
hash_agg_entry_size(int numAggs)
@@ -621,9 +701,9 @@ hash_agg_entry_size(int numAggs)
621701
/* This must match build_hash_table */
622702
entrysize = sizeof(AggHashEntryData) +
623703
(numAggs - 1) *sizeof(AggStatePerGroupData);
624-
/* Account for hashtable overhead */
625-
entrysize += 2 * sizeof(void *);
626704
entrysize = MAXALIGN(entrysize);
705+
/* Account for hashtable overhead (assuming fill factor = 1) */
706+
entrysize += 3 * sizeof(void *);
627707
return entrysize;
628708
}
629709

@@ -634,13 +714,34 @@ hash_agg_entry_size(int numAggs)
634714
* When called, CurrentMemoryContext should be the per-query context.
635715
*/
636716
static AggHashEntry
637-
lookup_hash_entry(AggState *aggstate, TupleTableSlot *slot)
717+
lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
638718
{
719+
TupleTableSlot *hashslot = aggstate->hashslot;
720+
ListCell *l;
639721
AggHashEntry entry;
640722
bool isnew;
641723

724+
/* if first time through, initialize hashslot by cloning input slot */
725+
if (hashslot->tts_tupleDescriptor == NULL)
726+
{
727+
ExecSetSlotDescriptor(hashslot, inputslot->tts_tupleDescriptor);
728+
/* Make sure all unused columns are NULLs */
729+
ExecStoreAllNullTuple(hashslot);
730+
}
731+
732+
/* transfer just the needed columns into hashslot */
733+
slot_getsomeattrs(inputslot, linitial_int(aggstate->hash_needed));
734+
foreach(l, aggstate->hash_needed)
735+
{
736+
int varNumber = lfirst_int(l) - 1;
737+
738+
hashslot->tts_values[varNumber] = inputslot->tts_values[varNumber];
739+
hashslot->tts_isnull[varNumber] = inputslot->tts_isnull[varNumber];
740+
}
741+
742+
/* find or create the hashtable entry using the filtered tuple */
642743
entry = (AggHashEntry) LookupTupleHashEntry(aggstate->hashtable,
643-
slot,
744+
hashslot,
644745
&isnew);
645746

646747
if (isnew)
@@ -1063,13 +1164,14 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
10631164
ALLOCSET_DEFAULT_INITSIZE,
10641165
ALLOCSET_DEFAULT_MAXSIZE);
10651166

1066-
#define AGG_NSLOTS 2
1167+
#define AGG_NSLOTS 3
10671168

10681169
/*
10691170
* tuple table initialization
10701171
*/
10711172
ExecInitScanTupleSlot(estate, &aggstate->ss);
10721173
ExecInitResultTupleSlot(estate, &aggstate->ss.ps);
1174+
aggstate->hashslot = ExecInitExtraTupleSlot(estate);
10731175

10741176
/*
10751177
* initialize child expressions

src/include/nodes/execnodes.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.151 2006/06/28 17:05:49 tgl Exp $
10+
* $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.152 2006/06/28 19:40:52 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -1245,6 +1245,8 @@ typedef struct AggState
12451245
HeapTuple grp_firstTuple; /* copy of first tuple of current group */
12461246
/* these fields are used in AGG_HASHED mode: */
12471247
TupleHashTable hashtable; /* hash table with one entry per group */
1248+
TupleTableSlot *hashslot; /* slot for loading hash table */
1249+
List *hash_needed; /* list of columns needed in hash table */
12481250
bool table_filled; /* hash table filled yet? */
12491251
TupleHashIterator hashiter; /* for iterating through hash table */
12501252
} AggState;

0 commit comments

Comments
 (0)