Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit a18ebc5

Browse files
committed
Code review for EXPLAIN patch. Fix some typos, make it behave sanely
across multiple loops, get rid of the shaky assumption that exactly one tuple is returned per node iteration.
1 parent 3993bd8 commit a18ebc5

File tree

2 files changed

+113
-76
lines changed

2 files changed

+113
-76
lines changed

src/backend/executor/instrument.c

Lines changed: 108 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -7,90 +7,117 @@
77
* Copyright (c) 2001-2006, PostgreSQL Global Development Group
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.15 2006/05/30 14:01:58 momjian Exp $
10+
* $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.16 2006/05/30 19:24:25 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
1414
#include "postgres.h"
1515

16-
#include <unistd.h>
1716
#include <math.h>
17+
#include <unistd.h>
1818

1919
#include "executor/instrument.h"
2020

21-
/* This is the function that is used to determine the sampling intervals. In
22-
* general, if the function is f(x), then for N tuples we will take on the
23-
* order of integral(1/f(x), x=0..N) samples. Some examples follow, with the
24-
* number of samples that would be collected over 1,000,000 tuples.
2521

26-
f(x) = x => log2(N) 20
27-
f(x) = x^(1/2) => 2 * N^(1/2) 2000
28-
f(x) = x^(1/3) => 1.5 * N^(2/3) 15000
29-
22+
/*
23+
* As of PostgreSQL 8.2, we try to reduce the overhead of EXPLAIN ANALYZE
24+
* by not calling INSTR_TIME_SET_CURRENT() for every single node execution.
25+
* (Because that requires a kernel call on most systems, it's expensive.)
26+
*
27+
* This macro determines the sampling interval: that is, after how many more
28+
* iterations will we take the next time sample, given that niters iterations
29+
* have occurred already. In general, if the function is f(x), then for N
30+
* iterations we will take on the order of integral(1/f(x), x=0..N)
31+
* samples. Some examples follow, with the number of samples that would be
32+
* collected over 1,000,000 iterations:
33+
*
34+
* f(x) = x => log2(N) 20
35+
* f(x) = x^(1/2) => 2 * N^(1/2) 2000
36+
* f(x) = x^(1/3) => 1.5 * N^(2/3) 15000
37+
*
3038
* I've chosen the last one as it seems to provide a good compromise between
3139
* low overhead but still getting a meaningful number of samples. However,
3240
* not all machines have the cbrt() function so on those we substitute
3341
* sqrt(). The difference is not very significant in the tests I made.
34-
*/
42+
*
43+
* The actual sampling interval is randomized with the SampleFunc() value
44+
* as the mean; this hopefully will reduce any measurement bias due to
45+
* cyclic variation in the node execution time.
46+
*/
3547
#ifdef HAVE_CBRT
36-
#define SampleFunc cbrt
48+
#define SampleFunc(niters) cbrt(niters)
3749
#else
38-
#define SampleFunc sqrt
50+
#define SampleFunc(niters) sqrt(niters)
3951
#endif
4052

53+
#define SampleInterval(niters) \
54+
(SampleFunc(niters) * (double) random() / (double) (MAX_RANDOM_VALUE/2))
55+
56+
/*
57+
* We sample at every node iteration until we've reached this threshold,
58+
* so that nodes not called a large number of times are completely accurate.
59+
* (Perhaps this should be a GUC variable? But beware of messing up
60+
* CalculateSampleOverhead if value is too small.)
61+
*/
4162
#define SAMPLE_THRESHOLD 50
4263

64+
/*
65+
* Determine the sampling overhead. This only needs to be done once per
66+
* backend (actually, probably once per postmaster would do ...)
67+
*/
4368
static double SampleOverhead;
44-
static bool SampleOverheadCalculated;
69+
static bool SampleOverheadCalculated = false;
4570

4671
static void
47-
CalculateSampleOverhead()
72+
CalculateSampleOverhead(void)
4873
{
4974
Instrumentation instr;
5075
int i;
51-
52-
/* We want to determine the sampling overhead, to correct
53-
* calculations later. This only needs to be done once per backend.
54-
* Is this the place? A wrong value here (due to a mistimed
55-
* task-switch) will cause bad calculations later.
56-
*
76+
77+
/*
78+
* We could get a wrong result due to being interrupted by task switch.
5779
* To minimize the risk we do it a few times and take the lowest.
5880
*/
59-
6081
SampleOverhead = 1.0e6;
61-
62-
for( i = 0; i<5; i++ )
82+
83+
for (i = 0; i < 5; i++)
6384
{
6485
int j;
6586
double overhead;
66-
67-
memset( &instr, 0, sizeof(instr) );
68-
69-
/* Loop SAMPLE_THRESHOLD times or 100 microseconds, whichever is faster */
70-
for( j=0; j<SAMPLE_THRESHOLD && INSTR_TIME_GET_DOUBLE(instr.counter) < 100e-6; i++ )
87+
88+
memset(&instr, 0, sizeof(instr));
89+
/*
90+
* We know that samples will actually be taken up to SAMPLE_THRESHOLD,
91+
* so that's as far as we can test.
92+
*/
93+
for (j=0; j < SAMPLE_THRESHOLD; j++)
7194
{
72-
InstrStartNode( &instr );
73-
InstrStopNode( &instr, 1 );
95+
InstrStartNode(&instr);
96+
InstrStopNode(&instr, 1);
7497
}
7598
overhead = INSTR_TIME_GET_DOUBLE(instr.counter) / instr.samplecount;
76-
if( overhead < SampleOverhead )
99+
if (overhead < SampleOverhead)
77100
SampleOverhead = overhead;
78101
}
79-
102+
80103
SampleOverheadCalculated = true;
81104
}
82105

106+
83107
/* Allocate new instrumentation structure(s) */
84108
Instrumentation *
85109
InstrAlloc(int n)
86110
{
87-
Instrumentation *instr = palloc0(n * sizeof(Instrumentation));
111+
Instrumentation *instr;
88112

89-
/* we don't need to do any initialization except zero 'em */
90-
91-
/* Calculate overhead, if not done yet */
92-
if( !SampleOverheadCalculated )
113+
/* Calculate sampling overhead, if not done yet in this backend */
114+
if (!SampleOverheadCalculated)
93115
CalculateSampleOverhead();
116+
117+
instr = palloc0(n * sizeof(Instrumentation));
118+
119+
/* we don't need to do any initialization except zero 'em */
120+
94121
return instr;
95122
}
96123

@@ -100,18 +127,17 @@ InstrStartNode(Instrumentation *instr)
100127
{
101128
if (INSTR_TIME_IS_ZERO(instr->starttime))
102129
{
103-
/* We always sample the first SAMPLE_THRESHOLD tuples, so small nodes are always accurate */
104-
if (instr->tuplecount < SAMPLE_THRESHOLD)
130+
/*
131+
* Always sample if not yet up to threshold, else check whether
132+
* next threshold has been reached
133+
*/
134+
if (instr->itercount < SAMPLE_THRESHOLD)
105135
instr->sampling = true;
106-
else
136+
else if (instr->itercount >= instr->nextsample)
107137
{
108-
/* Otherwise we go to sampling, see the comments on SampleFunc at the top of the file */
109-
if( instr->tuplecount > instr->nextsample )
110-
{
111-
instr->sampling = true;
112-
/* The doubling is so the random will average 1 over time */
113-
instr->nextsample += 2.0 * SampleFunc(instr->tuplecount) * (double)rand() / (double)RAND_MAX;
114-
}
138+
instr->sampling = true;
139+
instr->nextsample =
140+
instr->itercount + SampleInterval(instr->itercount);
115141
}
116142
if (instr->sampling)
117143
INSTR_TIME_SET_CURRENT(instr->starttime);
@@ -124,13 +150,15 @@ InstrStartNode(Instrumentation *instr)
124150
void
125151
InstrStopNode(Instrumentation *instr, double nTuples)
126152
{
127-
instr_time endtime;
128-
129-
/* count the returned tuples */
153+
/* count the returned tuples and iterations */
130154
instr->tuplecount += nTuples;
155+
instr->itercount += 1;
131156

157+
/* measure runtime if appropriate */
132158
if (instr->sampling)
133159
{
160+
instr_time endtime;
161+
134162
if (INSTR_TIME_IS_ZERO(instr->starttime))
135163
{
136164
elog(DEBUG2, "InstrStopNode called without start");
@@ -159,7 +187,8 @@ InstrStopNode(Instrumentation *instr, double nTuples)
159187
#endif
160188

161189
INSTR_TIME_SET_ZERO(instr->starttime);
162-
instr->samplecount += nTuples;
190+
191+
instr->samplecount += 1;
163192
instr->sampling = false;
164193
}
165194

@@ -184,35 +213,44 @@ InstrEndLoop(Instrumentation *instr)
184213
if (!INSTR_TIME_IS_ZERO(instr->starttime))
185214
elog(DEBUG2, "InstrEndLoop called on running node");
186215

187-
/* Accumulate per-cycle statistics into totals */
216+
/* Compute time spent in node */
188217
totaltime = INSTR_TIME_GET_DOUBLE(instr->counter);
189218

190-
instr->startup += instr->firsttuple;
191-
192-
/* Here we take into account sampling effects. Doing it naively ends
193-
* up assuming the sampling overhead applies to all tuples, even the
194-
* ones we didn't measure. We've calculated an overhead, so we
195-
* subtract that for all samples we didn't measure. The first tuple
196-
* is also special cased, because it usually takes longer. */
197-
198-
if( instr->samplecount < instr->tuplecount )
219+
/*
220+
* If we didn't measure runtime on every iteration, then we must increase
221+
* the measured total to account for the other iterations. Naively
222+
* multiplying totaltime by itercount/samplecount would be wrong because
223+
* it effectively assumes the sampling overhead applies to all iterations,
224+
* even the ones we didn't measure. (Note that what we are trying to
225+
* estimate here is the actual time spent in the node, including the
226+
* actual measurement overhead; not the time exclusive of measurement
227+
* overhead.) We exclude the first iteration from the correction basis,
228+
* because it often takes longer than others.
229+
*/
230+
if (instr->itercount > instr->samplecount)
199231
{
200-
double pertuple = (totaltime - instr->firsttuple) / (instr->samplecount - 1);
201-
instr->total += instr->firsttuple + (pertuple * (instr->samplecount - 1))
202-
+ ((pertuple - SampleOverhead) * (instr->tuplecount - instr->samplecount));
232+
double per_iter;
233+
234+
per_iter = (totaltime - instr->firsttuple) / (instr->samplecount - 1)
235+
- SampleOverhead;
236+
if (per_iter > 0) /* sanity check */
237+
totaltime += per_iter * (instr->itercount - instr->samplecount);
203238
}
204-
else
205-
instr->total += totaltime;
206-
239+
240+
/* Accumulate per-cycle statistics into totals */
241+
instr->startup += instr->firsttuple;
242+
instr->total += totaltime;
207243
instr->ntuples += instr->tuplecount;
208-
instr->nsamples += instr->samplecount;
209244
instr->nloops += 1;
210245

211246
/* Reset for next cycle (if any) */
212247
instr->running = false;
248+
instr->sampling = false;
213249
INSTR_TIME_SET_ZERO(instr->starttime);
214250
INSTR_TIME_SET_ZERO(instr->counter);
215251
instr->firsttuple = 0;
216-
instr->samplecount = 0;
217252
instr->tuplecount = 0;
253+
instr->itercount = 0;
254+
instr->samplecount = 0;
255+
instr->nextsample = 0;
218256
}

src/include/executor/instrument.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
*
77
* Copyright (c) 2001-2006, PostgreSQL Global Development Group
88
*
9-
* $PostgreSQL: pgsql/src/include/executor/instrument.h,v 1.14 2006/05/30 14:01:58 momjian Exp $
9+
* $PostgreSQL: pgsql/src/include/executor/instrument.h,v 1.15 2006/05/30 19:24:25 tgl Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -57,20 +57,19 @@ typedef struct Instrumentation
5757
{
5858
/* Info about current plan cycle: */
5959
bool running; /* TRUE if we've completed first tuple */
60+
bool sampling; /* Are we sampling in current iteration? */
6061
instr_time starttime; /* Start time of current iteration of node */
6162
instr_time counter; /* Accumulated runtime for this node */
6263
double firsttuple; /* Time for first tuple of this cycle */
6364
double tuplecount; /* Tuples emitted so far this cycle */
64-
double samplecount; /* Samples collected this cycle */
65+
double itercount; /* Plan node iterations this cycle */
66+
double samplecount; /* Iterations in which we sampled runtime */
67+
double nextsample; /* Next itercount to sample at */
6568
/* Accumulated statistics across all completed cycles: */
6669
double startup; /* Total startup time (in seconds) */
6770
double total; /* Total total time (in seconds) */
6871
double ntuples; /* Total tuples produced */
6972
double nloops; /* # of run cycles for this node */
70-
double nsamples; /* # of samples taken */
71-
/* Tracking for sampling */
72-
bool sampling; /* Are we sampling this iteration */
73-
double nextsample; /* The next tuplecount we're going to sample */
7473
} Instrumentation;
7574

7675
extern Instrumentation *InstrAlloc(int n);

0 commit comments

Comments
 (0)