Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 87bd07d

Browse files
committed
Make EXPLAIN sampling smarter, to avoid excessive sampling delay.
Martijn van Oosterhout
1 parent 53d669e commit 87bd07d

File tree

8 files changed

+150
-55
lines changed

8 files changed

+150
-55
lines changed

src/backend/commands/trigger.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.201 2006/04/27 00:33:41 momjian Exp $
10+
* $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.202 2006/05/30 14:01:57 momjian Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -1306,7 +1306,7 @@ ExecCallTriggerFunc(TriggerData *trigdata,
13061306
* one "tuple returned" (really the number of firings).
13071307
*/
13081308
if (instr)
1309-
InstrStopNode(instr + tgindx, true);
1309+
InstrStopNode(instr + tgindx, 1);
13101310

13111311
return (HeapTuple) DatumGetPointer(result);
13121312
}
@@ -2154,7 +2154,7 @@ AfterTriggerExecute(AfterTriggerEvent event,
21542154
* one "tuple returned" (really the number of firings).
21552155
*/
21562156
if (instr)
2157-
InstrStopNode(instr + tgindx, true);
2157+
InstrStopNode(instr + tgindx, 1);
21582158
}
21592159

21602160

src/backend/executor/execProcnode.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*
1313
*
1414
* IDENTIFICATION
15-
* $PostgreSQL: pgsql/src/backend/executor/execProcnode.c,v 1.54 2006/03/05 15:58:26 momjian Exp $
15+
* $PostgreSQL: pgsql/src/backend/executor/execProcnode.c,v 1.55 2006/05/30 14:01:57 momjian Exp $
1616
*
1717
*-------------------------------------------------------------------------
1818
*/
@@ -423,7 +423,7 @@ ExecProcNode(PlanState *node)
423423
}
424424

425425
if (node->instrument)
426-
InstrStopNode(node->instrument, !TupIsNull(result));
426+
InstrStopNode(node->instrument, TupIsNull(result) ? 0 : 1 );
427427

428428
return result;
429429
}

src/backend/executor/instrument.c

Lines changed: 130 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,78 @@
77
* Copyright (c) 2001-2006, PostgreSQL Global Development Group
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.14 2006/03/05 15:58:26 momjian Exp $
10+
* $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.15 2006/05/30 14:01:58 momjian Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
1414
#include "postgres.h"
1515

1616
#include <unistd.h>
17+
#include <math.h>
1718

1819
#include "executor/instrument.h"
1920

21+
/* This is the function that is used to determine the sampling intervals. In
22+
* general, if the function is f(x), then for N tuples we will take on the
23+
* order of integral(1/f(x), x=0..N) samples. Some examples follow, with the
24+
* number of samples that would be collected over 1,000,000 tuples.
25+
26+
f(x) = x => log2(N) 20
27+
f(x) = x^(1/2) => 2 * N^(1/2) 2000
28+
f(x) = x^(1/3) => 1.5 * N^(2/3) 15000
29+
30+
* I've chosen the last one as it seems to provide a good compromise between
31+
* low overhead but still getting a meaningful number of samples. However,
32+
* not all machines have the cbrt() function so on those we substitute
33+
* sqrt(). The difference is not very significant in the tests I made.
34+
*/
35+
#ifdef HAVE_CBRT
36+
#define SampleFunc cbrt
37+
#else
38+
#define SampleFunc sqrt
39+
#endif
40+
41+
#define SAMPLE_THRESHOLD 50
42+
43+
static double SampleOverhead;
44+
static bool SampleOverheadCalculated;
45+
46+
static void
47+
CalculateSampleOverhead()
48+
{
49+
Instrumentation instr;
50+
int i;
51+
52+
/* We want to determine the sampling overhead, to correct
53+
* calculations later. This only needs to be done once per backend.
54+
* Is this the place? A wrong value here (due to a mistimed
55+
* task-switch) will cause bad calculations later.
56+
*
57+
* To minimize the risk we do it a few times and take the lowest.
58+
*/
59+
60+
SampleOverhead = 1.0e6;
61+
62+
for( i = 0; i<5; i++ )
63+
{
64+
int j;
65+
double overhead;
66+
67+
memset( &instr, 0, sizeof(instr) );
68+
69+
/* Loop SAMPLE_THRESHOLD times or 100 microseconds, whichever is faster */
70+
for( j=0; j<SAMPLE_THRESHOLD && INSTR_TIME_GET_DOUBLE(instr.counter) < 100e-6; i++ )
71+
{
72+
InstrStartNode( &instr );
73+
InstrStopNode( &instr, 1 );
74+
}
75+
overhead = INSTR_TIME_GET_DOUBLE(instr.counter) / instr.samplecount;
76+
if( overhead < SampleOverhead )
77+
SampleOverhead = overhead;
78+
}
79+
80+
SampleOverheadCalculated = true;
81+
}
2082

2183
/* Allocate new instrumentation structure(s) */
2284
Instrumentation *
@@ -25,7 +87,10 @@ InstrAlloc(int n)
2587
Instrumentation *instr = palloc0(n * sizeof(Instrumentation));
2688

2789
/* we don't need to do any initialization except zero 'em */
28-
90+
91+
/* Calculate overhead, if not done yet */
92+
if( !SampleOverheadCalculated )
93+
CalculateSampleOverhead();
2994
return instr;
3095
}
3196

@@ -34,49 +99,69 @@ void
3499
InstrStartNode(Instrumentation *instr)
35100
{
36101
if (INSTR_TIME_IS_ZERO(instr->starttime))
37-
INSTR_TIME_SET_CURRENT(instr->starttime);
102+
{
103+
/* We always sample the first SAMPLE_THRESHOLD tuples, so small nodes are always accurate */
104+
if (instr->tuplecount < SAMPLE_THRESHOLD)
105+
instr->sampling = true;
106+
else
107+
{
108+
/* Otherwise we go to sampling, see the comments on SampleFunc at the top of the file */
109+
if( instr->tuplecount > instr->nextsample )
110+
{
111+
instr->sampling = true;
112+
/* The doubling is so the random will average 1 over time */
113+
instr->nextsample += 2.0 * SampleFunc(instr->tuplecount) * (double)rand() / (double)RAND_MAX;
114+
}
115+
}
116+
if (instr->sampling)
117+
INSTR_TIME_SET_CURRENT(instr->starttime);
118+
}
38119
else
39120
elog(DEBUG2, "InstrStartNode called twice in a row");
40121
}
41122

42123
/* Exit from a plan node */
43124
void
44-
InstrStopNode(Instrumentation *instr, bool returnedTuple)
125+
InstrStopNode(Instrumentation *instr, double nTuples)
45126
{
46127
instr_time endtime;
47128

48129
/* count the returned tuples */
49-
if (returnedTuple)
50-
instr->tuplecount += 1;
130+
instr->tuplecount += nTuples;
51131

52-
if (INSTR_TIME_IS_ZERO(instr->starttime))
132+
if (instr->sampling)
53133
{
54-
elog(DEBUG2, "InstrStopNode called without start");
55-
return;
56-
}
134+
if (INSTR_TIME_IS_ZERO(instr->starttime))
135+
{
136+
elog(DEBUG2, "InstrStopNode called without start");
137+
return;
138+
}
57139

58-
INSTR_TIME_SET_CURRENT(endtime);
140+
INSTR_TIME_SET_CURRENT(endtime);
59141

60142
#ifndef WIN32
61-
instr->counter.tv_sec += endtime.tv_sec - instr->starttime.tv_sec;
62-
instr->counter.tv_usec += endtime.tv_usec - instr->starttime.tv_usec;
63-
64-
/* Normalize after each add to avoid overflow/underflow of tv_usec */
65-
while (instr->counter.tv_usec < 0)
66-
{
67-
instr->counter.tv_usec += 1000000;
68-
instr->counter.tv_sec--;
69-
}
70-
while (instr->counter.tv_usec >= 1000000)
71-
{
72-
instr->counter.tv_usec -= 1000000;
73-
instr->counter.tv_sec++;
74-
}
143+
instr->counter.tv_sec += endtime.tv_sec - instr->starttime.tv_sec;
144+
instr->counter.tv_usec += endtime.tv_usec - instr->starttime.tv_usec;
145+
146+
/* Normalize after each add to avoid overflow/underflow of tv_usec */
147+
while (instr->counter.tv_usec < 0)
148+
{
149+
instr->counter.tv_usec += 1000000;
150+
instr->counter.tv_sec--;
151+
}
152+
while (instr->counter.tv_usec >= 1000000)
153+
{
154+
instr->counter.tv_usec -= 1000000;
155+
instr->counter.tv_sec++;
156+
}
75157
#else /* WIN32 */
76-
instr->counter.QuadPart += (endtime.QuadPart - instr->starttime.QuadPart);
158+
instr->counter.QuadPart += (endtime.QuadPart - instr->starttime.QuadPart);
77159
#endif
78160

79-
INSTR_TIME_SET_ZERO(instr->starttime);
161+
INSTR_TIME_SET_ZERO(instr->starttime);
162+
instr->samplecount += nTuples;
163+
instr->sampling = false;
164+
}
80165

81166
/* Is this the first tuple of this cycle? */
82167
if (!instr->running)
@@ -86,17 +171,6 @@ InstrStopNode(Instrumentation *instr, bool returnedTuple)
86171
}
87172
}
88173

89-
/* As above, but count multiple tuples returned at once */
90-
void
91-
InstrStopNodeMulti(Instrumentation *instr, double nTuples)
92-
{
93-
/* count the returned tuples */
94-
instr->tuplecount += nTuples;
95-
96-
/* delegate the rest */
97-
InstrStopNode(instr, false);
98-
}
99-
100174
/* Finish a run cycle for a plan node */
101175
void
102176
InstrEndLoop(Instrumentation *instr)
@@ -114,14 +188,31 @@ InstrEndLoop(Instrumentation *instr)
114188
totaltime = INSTR_TIME_GET_DOUBLE(instr->counter);
115189

116190
instr->startup += instr->firsttuple;
117-
instr->total += totaltime;
191+
192+
/* Here we take into account sampling effects. Doing it naively ends
193+
* up assuming the sampling overhead applies to all tuples, even the
194+
* ones we didn't measure. We've calculated an overhead, so we
195+
* subtract that for all samples we didn't measure. The first tuple
196+
* is also special cased, because it usually takes longer. */
197+
198+
if( instr->samplecount < instr->tuplecount )
199+
{
200+
double pertuple = (totaltime - instr->firsttuple) / (instr->samplecount - 1);
201+
instr->total += instr->firsttuple + (pertuple * (instr->samplecount - 1))
202+
+ ((pertuple - SampleOverhead) * (instr->tuplecount - instr->samplecount));
203+
}
204+
else
205+
instr->total += totaltime;
206+
118207
instr->ntuples += instr->tuplecount;
208+
instr->nsamples += instr->samplecount;
119209
instr->nloops += 1;
120210

121211
/* Reset for next cycle (if any) */
122212
instr->running = false;
123213
INSTR_TIME_SET_ZERO(instr->starttime);
124214
INSTR_TIME_SET_ZERO(instr->counter);
125215
instr->firsttuple = 0;
216+
instr->samplecount = 0;
126217
instr->tuplecount = 0;
127218
}

src/backend/executor/nodeBitmapAnd.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/executor/nodeBitmapAnd.c,v 1.7 2006/05/23 15:21:52 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/executor/nodeBitmapAnd.c,v 1.8 2006/05/30 14:01:58 momjian Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -161,7 +161,7 @@ MultiExecBitmapAnd(BitmapAndState *node)
161161

162162
/* must provide our own instrumentation support */
163163
if (node->ps.instrument)
164-
InstrStopNodeMulti(node->ps.instrument, 0 /* XXX */ );
164+
InstrStopNode(node->ps.instrument, 0 /* XXX */ );
165165

166166
return (Node *) result;
167167
}

src/backend/executor/nodeBitmapIndexscan.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/executor/nodeBitmapIndexscan.c,v 1.18 2006/05/23 15:21:52 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/executor/nodeBitmapIndexscan.c,v 1.19 2006/05/30 14:01:58 momjian Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -112,7 +112,7 @@ MultiExecBitmapIndexScan(BitmapIndexScanState *node)
112112

113113
/* must provide our own instrumentation support */
114114
if (node->ss.ps.instrument)
115-
InstrStopNodeMulti(node->ss.ps.instrument, nTuples);
115+
InstrStopNode(node->ss.ps.instrument, nTuples);
116116

117117
return (Node *) tbm;
118118
}

src/backend/executor/nodeBitmapOr.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/executor/nodeBitmapOr.c,v 1.6 2006/05/23 15:21:52 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/executor/nodeBitmapOr.c,v 1.7 2006/05/30 14:01:58 momjian Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -177,7 +177,7 @@ MultiExecBitmapOr(BitmapOrState *node)
177177

178178
/* must provide our own instrumentation support */
179179
if (node->ps.instrument)
180-
InstrStopNodeMulti(node->ps.instrument, 0 /* XXX */ );
180+
InstrStopNode(node->ps.instrument, 0 /* XXX */ );
181181

182182
return (Node *) result;
183183
}

src/backend/executor/nodeHash.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.102 2006/05/23 15:21:52 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.103 2006/05/30 14:01:58 momjian Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -97,7 +97,7 @@ MultiExecHash(HashState *node)
9797

9898
/* must provide our own instrumentation support */
9999
if (node->ps.instrument)
100-
InstrStopNodeMulti(node->ps.instrument, hashtable->totalTuples);
100+
InstrStopNode(node->ps.instrument, hashtable->totalTuples);
101101

102102
/*
103103
* We do not return the hash table directly because it's not a subtype of

src/include/executor/instrument.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
*
77
* Copyright (c) 2001-2006, PostgreSQL Global Development Group
88
*
9-
* $PostgreSQL: pgsql/src/include/executor/instrument.h,v 1.13 2006/03/05 15:58:56 momjian Exp $
9+
* $PostgreSQL: pgsql/src/include/executor/instrument.h,v 1.14 2006/05/30 14:01:58 momjian Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -61,17 +61,21 @@ typedef struct Instrumentation
6161
instr_time counter; /* Accumulated runtime for this node */
6262
double firsttuple; /* Time for first tuple of this cycle */
6363
double tuplecount; /* Tuples emitted so far this cycle */
64+
double samplecount; /* Samples collected this cycle */
6465
/* Accumulated statistics across all completed cycles: */
6566
double startup; /* Total startup time (in seconds) */
6667
double total; /* Total total time (in seconds) */
6768
double ntuples; /* Total tuples produced */
6869
double nloops; /* # of run cycles for this node */
70+
double nsamples; /* # of samples taken */
71+
/* Tracking for sampling */
72+
bool sampling; /* Are we sampling this iteration */
73+
double nextsample; /* The next tuplecount we're going to sample */
6974
} Instrumentation;
7075

7176
extern Instrumentation *InstrAlloc(int n);
7277
extern void InstrStartNode(Instrumentation *instr);
73-
extern void InstrStopNode(Instrumentation *instr, bool returnedTuple);
74-
extern void InstrStopNodeMulti(Instrumentation *instr, double nTuples);
78+
extern void InstrStopNode(Instrumentation *instr, double nTuples);
7579
extern void InstrEndLoop(Instrumentation *instr);
7680

7781
#endif /* INSTRUMENT_H */

0 commit comments

Comments
 (0)