Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 649c1a1

Browse files
committed
One more pass at reducing the cost of pg_dump's new implementation:
reduce the number of times TopoSort() has to be executed by trying to extract multiple dependency loops from each pass, instead of only one. This saves about another factor of ten on the regression database. This could be considered as another exercise in grokking Fred Brooks' maxim: Representation *is* the essence of programming.
1 parent 53e7c13 commit 649c1a1

File tree

1 file changed

+154
-47
lines changed

1 file changed

+154
-47
lines changed

src/bin/pg_dump/pg_dump_sort.c

+154-47
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $PostgreSQL: pgsql/src/bin/pg_dump/pg_dump_sort.c,v 1.2 2003/12/06 22:55:11 tgl Exp $
12+
* $PostgreSQL: pgsql/src/bin/pg_dump/pg_dump_sort.c,v 1.3 2003/12/07 05:44:50 tgl Exp $
1313
*
1414
*-------------------------------------------------------------------------
1515
*/
@@ -54,10 +54,12 @@ static bool TopoSort(DumpableObject **objs,
5454
int *nOrdering);
5555
static void addHeapElement(int val, int *heap, int heapLength);
5656
static int removeHeapElement(int *heap, int heapLength);
57+
static void findDependencyLoops(DumpableObject **objs, int nObjs, int totObjs);
5758
static bool findLoop(DumpableObject *obj,
59+
DumpId startPoint,
60+
DumpableObject **workspace,
5861
int depth,
59-
DumpableObject **ordering,
60-
int *nOrdering);
62+
int *newDepth);
6163
static void repairDependencyLoop(DumpableObject **loop,
6264
int nLoop);
6365
static void describeDumpableObject(DumpableObject *obj,
@@ -104,12 +106,15 @@ sortDumpableObjects(DumpableObject **objs, int numObjs)
104106
DumpableObject **ordering;
105107
int nOrdering;
106108

109+
if (numObjs <= 0)
110+
return;
111+
107112
ordering = (DumpableObject **) malloc(numObjs * sizeof(DumpableObject *));
108113
if (ordering == NULL)
109114
exit_horribly(NULL, modulename, "out of memory\n");
110115

111116
while (!TopoSort(objs, numObjs, ordering, &nOrdering))
112-
repairDependencyLoop(ordering, nOrdering);
117+
findDependencyLoops(ordering, nOrdering, numObjs);
113118

114119
memcpy(objs, ordering, numObjs * sizeof(DumpableObject *));
115120

@@ -133,10 +138,12 @@ sortDumpableObjects(DumpableObject **objs, int numObjs)
133138
* On success (TRUE result), ordering[] is filled with a sorted array of
134139
* DumpableObject pointers, of length equal to the input list length.
135140
*
136-
* On failure (FALSE result), ordering[] is filled with an array of
137-
* DumpableObject pointers of length *nOrdering, representing a circular set
138-
* of dependency constraints. (If there is more than one cycle in the given
139-
* constraints, one is picked at random to return.)
141+
* On failure (FALSE result), ordering[] is filled with an unsorted array of
142+
* DumpableObject pointers of length *nOrdering, listing the objects that
143+
* prevented the sort from being completed. In general, these objects either
144+
* participate directly in a dependency cycle, or are depended on by objects
145+
* that are in a cycle. (The latter objects are not actually problematic,
146+
* but it takes further analysis to identify which are which.)
140147
*
141148
* The caller is responsible for allocating sufficient space at *ordering.
142149
*/
@@ -262,28 +269,18 @@ TopoSort(DumpableObject **objs,
262269
}
263270

264271
/*
265-
* If we failed, report one of the circular constraint sets. We do
266-
* this by scanning beforeConstraints[] to locate the items that have
267-
* not yet been output, and for each one, trying to trace a constraint
268-
* loop leading back to it. (There may be items that depend on items
269-
* involved in a loop, but aren't themselves part of the loop, so not
270-
* every nonzero beforeConstraints entry is necessarily a useful
271-
* starting point. We keep trying till we find a loop.)
272+
* If we failed, report the objects that couldn't be output; these are
273+
* the ones with beforeConstraints[] still nonzero.
272274
*/
273275
if (i != 0)
274276
{
277+
k = 0;
275278
for (j = 1; j <= maxDumpId; j++)
276279
{
277280
if (beforeConstraints[j] != 0)
278-
{
279-
ordering[0] = obj = objs[idMap[j]];
280-
if (findLoop(obj, 1, ordering, nOrdering))
281-
break;
282-
}
281+
ordering[k++] = objs[idMap[j]];
283282
}
284-
if (j > maxDumpId)
285-
exit_horribly(NULL, modulename,
286-
"could not find dependency loop\n");
283+
*nOrdering = k;
287284
}
288285

289286
/* Done */
@@ -361,46 +358,156 @@ removeHeapElement(int *heap, int heapLength)
361358
}
362359

363360
/*
364-
* Recursively search for a circular dependency loop
361+
* findDependencyLoops - identify loops in TopoSort's failure output,
362+
* and pass each such loop to repairDependencyLoop() for action
363+
*
364+
* In general there may be many loops in the set of objects returned by
365+
* TopoSort; for speed we should try to repair as many loops as we can
366+
* before trying TopoSort again. We can safely repair loops that are
367+
* disjoint (have no members in common); if we find overlapping loops
368+
* then we repair only the first one found, because the action taken to
369+
* repair the first might have repaired the other as well. (If not,
370+
* we'll fix it on the next go-round.)
371+
*
372+
* objs[] lists the objects TopoSort couldn't sort
373+
* nObjs is the number of such objects
374+
* totObjs is the total number of objects in the universe
375+
*/
376+
static void
377+
findDependencyLoops(DumpableObject **objs, int nObjs, int totObjs)
378+
{
379+
/*
380+
* We use a workspace array, the initial part of which stores
381+
* objects already processed, and the rest of which is used as
382+
* temporary space to try to build a loop in. This is convenient
383+
* because we do not care about loops involving already-processed
384+
* objects (see notes above); we can easily reject such loops in
385+
* findLoop() because of this representation. After we identify
386+
* and process a loop, we can add it to the initial part of the
387+
* workspace just by moving the boundary pointer.
388+
*
389+
* When we determine that an object is not part of any interesting
390+
* loop, we also add it to the initial part of the workspace. This
391+
* is not necessary for correctness, but saves later invocations of
392+
* findLoop() from uselessly chasing references to such an object.
393+
*
394+
* We make the workspace large enough to hold all objects in the
395+
* original universe. This is probably overkill, but it's provably
396+
* enough space...
397+
*/
398+
DumpableObject **workspace;
399+
int initiallen;
400+
bool fixedloop;
401+
int i;
402+
403+
workspace = (DumpableObject **) malloc(totObjs * sizeof(DumpableObject *));
404+
if (workspace == NULL)
405+
exit_horribly(NULL, modulename, "out of memory\n");
406+
initiallen = 0;
407+
fixedloop = false;
408+
409+
for (i = 0; i < nObjs; i++)
410+
{
411+
DumpableObject *obj = objs[i];
412+
int newlen;
413+
414+
workspace[initiallen] = NULL; /* see test below */
415+
416+
if (findLoop(obj, obj->dumpId, workspace, initiallen, &newlen))
417+
{
418+
/* Found a loop of length newlen - initiallen */
419+
repairDependencyLoop(&workspace[initiallen], newlen - initiallen);
420+
/* Add loop members to workspace */
421+
initiallen = newlen;
422+
fixedloop = true;
423+
}
424+
else
425+
{
426+
/*
427+
* Didn't find a loop, but add this object to workspace anyway,
428+
* unless it's already present. We piggyback on the test that
429+
* findLoop() already did: it won't have tentatively added obj
430+
* to workspace if it's already present.
431+
*/
432+
if (workspace[initiallen] == obj)
433+
initiallen++;
434+
}
435+
}
436+
437+
/* We'd better have fixed at least one loop */
438+
if (!fixedloop)
439+
exit_horribly(NULL, modulename, "could not identify dependency loop\n");
440+
441+
free(workspace);
442+
}
443+
444+
/*
445+
* Recursively search for a circular dependency loop that doesn't include
446+
* any existing workspace members.
447+
*
448+
* obj: object we are examining now
449+
* startPoint: dumpId of starting object for the hoped-for circular loop
450+
* workspace[]: work array for previously processed and current objects
451+
* depth: number of valid entries in workspace[] at call
452+
* newDepth: if successful, set to new number of workspace[] entries
453+
*
454+
* On success, *newDepth is set and workspace[] entries depth..*newDepth-1
455+
* are filled with pointers to the members of the loop.
456+
*
457+
* Note: it is possible that the given starting object is a member of more
458+
* than one cycle; if so, we will find an arbitrary one of the cycles.
365459
*/
366460
static bool
367461
findLoop(DumpableObject *obj,
462+
DumpId startPoint,
463+
DumpableObject **workspace,
368464
int depth,
369-
DumpableObject **ordering, /* output argument */
370-
int *nOrdering) /* output argument */
465+
int *newDepth)
371466
{
372-
DumpId startPoint = ordering[0]->dumpId;
373-
int j;
374-
int k;
467+
int i;
375468

376-
/* See if we've found a loop back to the starting point */
377-
for (j = 0; j < obj->nDeps; j++)
469+
/*
470+
* Reject if obj is already present in workspace. This test serves
471+
* three purposes: it prevents us from finding loops that overlap
472+
* previously-processed loops, it prevents us from going into infinite
473+
* recursion if we are given a startPoint object that links to a cycle
474+
* it's not a member of, and it guarantees that we can't overflow the
475+
* allocated size of workspace[].
476+
*/
477+
for (i = 0; i < depth; i++)
478+
{
479+
if (workspace[i] == obj)
480+
return false;
481+
}
482+
/*
483+
* Okay, tentatively add obj to workspace
484+
*/
485+
workspace[depth++] = obj;
486+
/*
487+
* See if we've found a loop back to the desired startPoint; if so, done
488+
*/
489+
for (i = 0; i < obj->nDeps; i++)
378490
{
379-
if (obj->dependencies[j] == startPoint)
491+
if (obj->dependencies[i] == startPoint)
380492
{
381-
*nOrdering = depth;
493+
*newDepth = depth;
382494
return true;
383495
}
384496
}
385-
/* Try each outgoing branch */
386-
for (j = 0; j < obj->nDeps; j++)
497+
/*
498+
* Recurse down each outgoing branch
499+
*/
500+
for (i = 0; i < obj->nDeps; i++)
387501
{
388-
DumpableObject *nextobj = findObjectByDumpId(obj->dependencies[j]);
502+
DumpableObject *nextobj = findObjectByDumpId(obj->dependencies[i]);
389503

390504
if (!nextobj)
391505
continue; /* ignore dependencies on undumped objects */
392-
for (k = 0; k < depth; k++)
393-
{
394-
if (ordering[k] == nextobj)
395-
break;
396-
}
397-
if (k < depth)
398-
continue; /* ignore loops not including start point */
399-
ordering[depth] = nextobj;
400506
if (findLoop(nextobj,
401-
depth + 1,
402-
ordering,
403-
nOrdering))
507+
startPoint,
508+
workspace,
509+
depth,
510+
newDepth))
404511
return true;
405512
}
406513

0 commit comments

Comments
 (0)