Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit c8e2e0e

Browse files
committed
Fix a performance problem in pg_dump's dump order selection logic.
findDependencyLoops() was not bright about cases where there are multiple dependency paths between the same two dumpable objects. In most scenarios this did not hurt us too badly; but since the introduction of section boundary pseudo-objects in commit a1ef01f, it was possible for this code to take unreasonable amounts of time (tens of seconds on a database with a couple thousand objects), as reported in bug #11033 from Joe Van Dyk. Joe's particular problem scenario involved "pg_dump -a" mode with long chains of foreign key constraints, but I think that similar problems could arise with other situations as long as there were enough objects. To fix, add a flag array that lets us notice when we arrive at the same object again while searching from a given start object. This simple change seems to be enough to eliminate the performance problem. Back-patch to 9.1, like the patch that introduced section boundary objects.
1 parent de35a97 commit c8e2e0e

File tree

1 file changed

+45
-9
lines changed

1 file changed

+45
-9
lines changed

src/bin/pg_dump/pg_dump_sort.c

+45-9
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ static void findDependencyLoops(DumpableObject **objs, int nObjs, int totObjs);
137137
static int findLoop(DumpableObject *obj,
138138
DumpId startPoint,
139139
bool *processed,
140+
DumpId *searchFailed,
140141
DumpableObject **workspace,
141142
int depth);
142143
static void repairDependencyLoop(DumpableObject **loop,
@@ -633,21 +634,35 @@ static void
633634
findDependencyLoops(DumpableObject **objs, int nObjs, int totObjs)
634635
{
635636
/*
636-
* We use two data structures here. One is a bool array processed[],
637-
* which is indexed by dump ID and marks the objects already processed
638-
* during this invocation of findDependencyLoops(). The other is a
639-
* workspace[] array of DumpableObject pointers, in which we try to build
640-
* lists of objects constituting loops. We make workspace[] large enough
641-
* to hold all the objects, which is huge overkill in most cases but could
642-
* theoretically be necessary if there is a single dependency chain
643-
* linking all the objects.
637+
* We use three data structures here:
638+
*
639+
* processed[] is a bool array indexed by dump ID, marking the objects
640+
* already processed during this invocation of findDependencyLoops().
641+
*
642+
* searchFailed[] is another array indexed by dump ID. searchFailed[j] is
643+
* set to dump ID k if we have proven that there is no dependency path
644+
* leading from object j back to start point k. This allows us to skip
645+
* useless searching when there are multiple dependency paths from k to j,
646+
* which is a common situation. We could use a simple bool array for
647+
* this, but then we'd need to re-zero it for each start point, resulting
648+
* in O(N^2) zeroing work. Using the start point's dump ID as the "true"
649+
* value lets us skip clearing the array before we consider the next start
650+
* point.
651+
*
652+
* workspace[] is an array of DumpableObject pointers, in which we try to
653+
* build lists of objects constituting loops. We make workspace[] large
654+
* enough to hold all the objects in TopoSort's output, which is huge
655+
* overkill in most cases but could theoretically be necessary if there is
656+
* a single dependency chain linking all the objects.
644657
*/
645658
bool *processed;
659+
DumpId *searchFailed;
646660
DumpableObject **workspace;
647661
bool fixedloop;
648662
int i;
649663

650664
processed = (bool *) pg_malloc0((getMaxDumpId() + 1) * sizeof(bool));
665+
searchFailed = (DumpId *) pg_malloc0((getMaxDumpId() + 1) * sizeof(DumpId));
651666
workspace = (DumpableObject **) pg_malloc(totObjs * sizeof(DumpableObject *));
652667
fixedloop = false;
653668

@@ -657,7 +672,12 @@ findDependencyLoops(DumpableObject **objs, int nObjs, int totObjs)
657672
int looplen;
658673
int j;
659674

660-
looplen = findLoop(obj, obj->dumpId, processed, workspace, 0);
675+
looplen = findLoop(obj,
676+
obj->dumpId,
677+
processed,
678+
searchFailed,
679+
workspace,
680+
0);
661681

662682
if (looplen > 0)
663683
{
@@ -685,6 +705,7 @@ findDependencyLoops(DumpableObject **objs, int nObjs, int totObjs)
685705
exit_horribly(modulename, "could not identify dependency loop\n");
686706

687707
free(workspace);
708+
free(searchFailed);
688709
free(processed);
689710
}
690711

@@ -695,6 +716,7 @@ findDependencyLoops(DumpableObject **objs, int nObjs, int totObjs)
695716
* obj: object we are examining now
696717
* startPoint: dumpId of starting object for the hoped-for circular loop
697718
* processed[]: flag array marking already-processed objects
719+
* searchFailed[]: flag array marking already-unsuccessfully-visited objects
698720
* workspace[]: work array in which we are building list of loop members
699721
* depth: number of valid entries in workspace[] at call
700722
*
@@ -708,6 +730,7 @@ static int
708730
findLoop(DumpableObject *obj,
709731
DumpId startPoint,
710732
bool *processed,
733+
DumpId *searchFailed,
711734
DumpableObject **workspace,
712735
int depth)
713736
{
@@ -720,6 +743,13 @@ findLoop(DumpableObject *obj,
720743
if (processed[obj->dumpId])
721744
return 0;
722745

746+
/*
747+
* If we've already proven there is no path from this object back to the
748+
* startPoint, forget it.
749+
*/
750+
if (searchFailed[obj->dumpId] == startPoint)
751+
return 0;
752+
723753
/*
724754
* Reject if obj is already present in workspace. This test prevents us
725755
* from going into infinite recursion if we are given a startPoint object
@@ -759,12 +789,18 @@ findLoop(DumpableObject *obj,
759789
newDepth = findLoop(nextobj,
760790
startPoint,
761791
processed,
792+
searchFailed,
762793
workspace,
763794
depth);
764795
if (newDepth > 0)
765796
return newDepth;
766797
}
767798

799+
/*
800+
* Remember there is no path from here back to startPoint
801+
*/
802+
searchFailed[obj->dumpId] = startPoint;
803+
768804
return 0;
769805
}
770806

0 commit comments

Comments
 (0)