Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 66904c0

Browse files
author
Commitfest Bot
committed
[CF 5678] optimizations for dumping statistics
This branch was automatically generated by a robot using patches from an email thread registered at: https://commitfest.postgresql.org/patch/5678 The branch will be overwritten each time a new patch version is posted to the thread, and also periodically to check for bitrot caused by changes on the master branch. Patch(es): https://www.postgresql.org/message-id/Z-9Bx3ml2i7OfHiN@nathan Author(s): Nathan Bossart
2 parents 1aff1dc + 183c288 commit 66904c0

File tree

5 files changed

+276
-64
lines changed

5 files changed

+276
-64
lines changed

src/bin/pg_dump/pg_backup.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,10 @@ typedef int DumpId;
285285
* Function pointer prototypes for assorted callback methods.
286286
*/
287287

288+
/* forward declaration to avoid including pg_backup_archiver.h here */
289+
typedef struct _tocEntry TocEntry;
290+
291+
typedef char *(*DefnDumperPtr) (Archive *AH, const void *userArg, const TocEntry *te);
288292
typedef int (*DataDumperPtr) (Archive *AH, const void *userArg);
289293

290294
typedef void (*SetupWorkerPtrType) (Archive *AH);

src/bin/pg_dump/pg_backup_archiver.c

Lines changed: 104 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ static void processEncodingEntry(ArchiveHandle *AH, TocEntry *te);
7272
static void processStdStringsEntry(ArchiveHandle *AH, TocEntry *te);
7373
static void processSearchPathEntry(ArchiveHandle *AH, TocEntry *te);
7474
static int _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH);
75-
static RestorePass _tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te);
75+
static RestorePass _tocEntryRestorePass(TocEntry *te);
7676
static bool _tocEntryIsACL(TocEntry *te);
7777
static void _disableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te);
7878
static void _enableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te);
@@ -102,8 +102,7 @@ static void pending_list_append(TocEntry *l, TocEntry *te);
102102
static void pending_list_remove(TocEntry *te);
103103
static int TocEntrySizeCompareQsort(const void *p1, const void *p2);
104104
static int TocEntrySizeCompareBinaryheap(void *p1, void *p2, void *arg);
105-
static void move_to_ready_heap(ArchiveHandle *AH,
106-
TocEntry *pending_list,
105+
static void move_to_ready_heap(TocEntry *pending_list,
107106
binaryheap *ready_heap,
108107
RestorePass pass);
109108
static TocEntry *pop_next_work_item(binaryheap *ready_heap,
@@ -749,7 +748,7 @@ RestoreArchive(Archive *AHX)
749748
if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) == 0)
750749
continue; /* ignore if not to be dumped at all */
751750

752-
switch (_tocEntryRestorePass(AH, te))
751+
switch (_tocEntryRestorePass(te))
753752
{
754753
case RESTORE_PASS_MAIN:
755754
(void) restore_toc_entry(AH, te, false);
@@ -768,7 +767,7 @@ RestoreArchive(Archive *AHX)
768767
for (te = AH->toc->next; te != AH->toc; te = te->next)
769768
{
770769
if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) != 0 &&
771-
_tocEntryRestorePass(AH, te) == RESTORE_PASS_ACL)
770+
_tocEntryRestorePass(te) == RESTORE_PASS_ACL)
772771
(void) restore_toc_entry(AH, te, false);
773772
}
774773
}
@@ -778,7 +777,7 @@ RestoreArchive(Archive *AHX)
778777
for (te = AH->toc->next; te != AH->toc; te = te->next)
779778
{
780779
if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) != 0 &&
781-
_tocEntryRestorePass(AH, te) == RESTORE_PASS_POST_ACL)
780+
_tocEntryRestorePass(te) == RESTORE_PASS_POST_ACL)
782781
(void) restore_toc_entry(AH, te, false);
783782
}
784783
}
@@ -1266,6 +1265,9 @@ ArchiveEntry(Archive *AHX, CatalogId catalogId, DumpId dumpId,
12661265
newToc->dataDumperArg = opts->dumpArg;
12671266
newToc->hadDumper = opts->dumpFn ? true : false;
12681267

1268+
newToc->defnDumper = opts->defnFn;
1269+
newToc->defnDumperArg = opts->defnArg;
1270+
12691271
newToc->formatData = NULL;
12701272
newToc->dataLength = 0;
12711273

@@ -2621,7 +2623,45 @@ WriteToc(ArchiveHandle *AH)
26212623
WriteStr(AH, te->tag);
26222624
WriteStr(AH, te->desc);
26232625
WriteInt(AH, te->section);
2624-
WriteStr(AH, te->defn);
2626+
2627+
if (te->defnLen)
2628+
{
2629+
/*
2630+
* defnLen should only be set for custom format's second call to
2631+
* WriteToc(), which rewrites the TOC in place to update data
2632+
* offsets. Instead of calling the defnDumper a second time
2633+
* (which could involve re-executing queries), just skip writing
2634+
* the entry. While regenerating the definition should
2635+
* theoretically produce the same result as before, it's expensive
2636+
* and feels risky.
2637+
*
2638+
* The custom format only calls WriteToc() a second time if
2639+
* fseeko() is usable (see _CloseArchive() in pg_backup_custom.c),
2640+
* so we can safely use it without checking. For other formats,
2641+
* we fail because one of our assumptions must no longer hold
2642+
* true.
2643+
*
2644+
* XXX This is certainly a layering violation, but the alternative
2645+
* is an awkward and complicated callback infrastructure for this
2646+
* special case. This might be worth revisiting in the future.
2647+
*/
2648+
if (AH->format != archCustom)
2649+
pg_fatal("unexpected TOC entry in WriteToc(): %d %s %s",
2650+
te->dumpId, te->desc, te->tag);
2651+
2652+
if (fseeko(AH->FH, te->defnLen, SEEK_CUR != 0))
2653+
pg_fatal("error during file seek: %m");
2654+
}
2655+
else if (te->defnDumper)
2656+
{
2657+
char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg, te);
2658+
2659+
te->defnLen = WriteStr(AH, defn);
2660+
pg_free(defn);
2661+
}
2662+
else
2663+
WriteStr(AH, te->defn);
2664+
26252665
WriteStr(AH, te->dropStmt);
26262666
WriteStr(AH, te->copyStmt);
26272667
WriteStr(AH, te->namespace);
@@ -3220,7 +3260,7 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH)
32203260
* See notes with the RestorePass typedef in pg_backup_archiver.h.
32213261
*/
32223262
static RestorePass
3223-
_tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te)
3263+
_tocEntryRestorePass(TocEntry *te)
32243264
{
32253265
/* "ACL LANGUAGE" was a crock emitted only in PG 7.4 */
32263266
if (strcmp(te->desc, "ACL") == 0 ||
@@ -3243,23 +3283,17 @@ _tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te)
32433283

32443284
/*
32453285
* If statistics data is dependent on materialized view data, it must be
3246-
* deferred to RESTORE_PASS_POST_ACL.
3286+
* deferred to RESTORE_PASS_POST_ACL. Those entries are already marked
3287+
* with SECTION_POST_DATA, and some other stats entries (e.g., index
3288+
* stats) will also be marked SECTION_POST_DATA. Additionally, our
3289+
* lookahead code in fetchAttributeStats() assumes that we dump all
3290+
* statistics data entries in TOC order. To ensure this assumption holds,
3291+
* we move all statistics data entries in SECTION_POST_DATA to
3292+
* RESTORE_PASS_POST_ACL.
32473293
*/
3248-
if (strcmp(te->desc, "STATISTICS DATA") == 0)
3249-
{
3250-
for (int i = 0; i < te->nDeps; i++)
3251-
{
3252-
DumpId depid = te->dependencies[i];
3253-
3254-
if (depid <= AH->maxDumpId && AH->tocsByDumpId[depid] != NULL)
3255-
{
3256-
TocEntry *otherte = AH->tocsByDumpId[depid];
3257-
3258-
if (strcmp(otherte->desc, "MATERIALIZED VIEW DATA") == 0)
3259-
return RESTORE_PASS_POST_ACL;
3260-
}
3261-
}
3262-
}
3294+
if (strcmp(te->desc, "STATISTICS DATA") == 0 &&
3295+
te->section == SECTION_POST_DATA)
3296+
return RESTORE_PASS_POST_ACL;
32633297

32643298
/* All else can be handled in the main pass. */
32653299
return RESTORE_PASS_MAIN;
@@ -3849,7 +3883,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx)
38493883

38503884
/*
38513885
* Actually print the definition. Normally we can just print the defn
3852-
* string if any, but we have three special cases:
3886+
* string if any, but we have four special cases:
38533887
*
38543888
* 1. A crude hack for suppressing AUTHORIZATION clause that old pg_dump
38553889
* versions put into CREATE SCHEMA. Don't mutate the variant for schema
@@ -3862,6 +3896,11 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx)
38623896
* 3. ACL LARGE OBJECTS entries need special processing because they
38633897
* contain only one copy of the ACL GRANT/REVOKE commands, which we must
38643898
* apply to each large object listed in the associated BLOB METADATA.
3899+
*
3900+
* 4. Entries with a defnDumper need to call it to generate the
3901+
* definition. This is primarily intended to provide a way to save memory
3902+
* for objects that would otherwise need a lot of it (e.g., statistics
3903+
* data).
38653904
*/
38663905
if (ropt->noOwner &&
38673906
strcmp(te->desc, "SCHEMA") == 0 && strncmp(te->defn, "--", 2) != 0)
@@ -3877,6 +3916,40 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx)
38773916
{
38783917
IssueACLPerBlob(AH, te);
38793918
}
3919+
else if (te->defnLen && AH->format != archTar)
3920+
{
3921+
/*
3922+
* If defnLen is set, the defnDumper has already been called for this
3923+
* TOC entry. We don't normally expect a defnDumper to be called for
3924+
* a TOC entry a second time in _printTocEntry(), but there's an
3925+
* exception. The tar format first calls WriteToc(), which scans the
3926+
* entire TOC, and then it later calls RestoreArchive() to generate
3927+
* restore.sql, which scans the TOC again. There doesn't appear to be
3928+
* a good way to prevent a second defnDumper call in this case without
3929+
* storing the definition in memory, which defeats the purpose. This
3930+
* second defnDumper invocation should generate the same output as the
3931+
* first, but even if it doesn't, the worst-case scenario is that the
3932+
* content of the archive and restore.sql (which isn't used by
3933+
* pg_restore) will differ.
3934+
*
3935+
* In all other cases, encountering a TOC entry a second time in
3936+
* _printTocEntry() is unexpected, so we fail because one of our
3937+
* assumptions must no longer hold true.
3938+
*
3939+
* XXX This is certainly a layering violation, but the alternative is
3940+
* an awkward and complicated callback infrastructure for this special
3941+
* case. This might be worth revisiting in the future.
3942+
*/
3943+
pg_fatal("unexpected TOC entry in _printTocEntry(): %d %s %s",
3944+
te->dumpId, te->desc, te->tag);
3945+
}
3946+
else if (te->defnDumper)
3947+
{
3948+
char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg, te);
3949+
3950+
te->defnLen = ahprintf(AH, "%s\n\n", defn);
3951+
pg_free(defn);
3952+
}
38803953
else if (te->defn && strlen(te->defn) > 0)
38813954
{
38823955
ahprintf(AH, "%s\n\n", te->defn);
@@ -4270,7 +4343,7 @@ restore_toc_entries_prefork(ArchiveHandle *AH, TocEntry *pending_list)
42704343
* not set skipped_some in this case, since by assumption no main-pass
42714344
* items could depend on these.
42724345
*/
4273-
if (_tocEntryRestorePass(AH, next_work_item) != RESTORE_PASS_MAIN)
4346+
if (_tocEntryRestorePass(next_work_item) != RESTORE_PASS_MAIN)
42744347
do_now = false;
42754348

42764349
if (do_now)
@@ -4352,7 +4425,7 @@ restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate,
43524425
* process in the current restore pass.
43534426
*/
43544427
AH->restorePass = RESTORE_PASS_MAIN;
4355-
move_to_ready_heap(AH, pending_list, ready_heap, AH->restorePass);
4428+
move_to_ready_heap(pending_list, ready_heap, AH->restorePass);
43564429

43574430
/*
43584431
* main parent loop
@@ -4401,7 +4474,7 @@ restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate,
44014474
/* Advance to next restore pass */
44024475
AH->restorePass++;
44034476
/* That probably allows some stuff to be made ready */
4404-
move_to_ready_heap(AH, pending_list, ready_heap, AH->restorePass);
4477+
move_to_ready_heap(pending_list, ready_heap, AH->restorePass);
44054478
/* Loop around to see if anything's now ready */
44064479
continue;
44074480
}
@@ -4572,8 +4645,7 @@ TocEntrySizeCompareBinaryheap(void *p1, void *p2, void *arg)
45724645
* which applies the same logic one-at-a-time.)
45734646
*/
45744647
static void
4575-
move_to_ready_heap(ArchiveHandle *AH,
4576-
TocEntry *pending_list,
4648+
move_to_ready_heap(TocEntry *pending_list,
45774649
binaryheap *ready_heap,
45784650
RestorePass pass)
45794651
{
@@ -4586,7 +4658,7 @@ move_to_ready_heap(ArchiveHandle *AH,
45864658
next_te = te->pending_next;
45874659

45884660
if (te->depCount == 0 &&
4589-
_tocEntryRestorePass(AH, te) == pass)
4661+
_tocEntryRestorePass(te) == pass)
45904662
{
45914663
/* Remove it from pending_list ... */
45924664
pending_list_remove(te);
@@ -4980,7 +5052,7 @@ reduce_dependencies(ArchiveHandle *AH, TocEntry *te,
49805052
* memberships changed.
49815053
*/
49825054
if (otherte->depCount == 0 &&
4983-
_tocEntryRestorePass(AH, otherte) == AH->restorePass &&
5055+
_tocEntryRestorePass(otherte) == AH->restorePass &&
49845056
otherte->pending_prev != NULL &&
49855057
ready_heap != NULL)
49865058
{

src/bin/pg_dump/pg_backup_archiver.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,10 @@ struct _tocEntry
368368
const void *dataDumperArg; /* Arg for above routine */
369369
void *formatData; /* TOC Entry data specific to file format */
370370

371+
DefnDumperPtr defnDumper; /* routine to dump definition statement */
372+
const void *defnDumperArg; /* arg for above routine */
373+
size_t defnLen; /* length of dumped definition */
374+
371375
/* working state while dumping/restoring */
372376
pgoff_t dataLength; /* item's data size; 0 if none or unknown */
373377
int reqs; /* do we need schema and/or data of object
@@ -407,6 +411,8 @@ typedef struct _archiveOpts
407411
int nDeps;
408412
DataDumperPtr dumpFn;
409413
const void *dumpArg;
414+
DefnDumperPtr defnFn;
415+
const void *defnArg;
410416
} ArchiveOpts;
411417
#define ARCHIVE_OPTS(...) &(ArchiveOpts){__VA_ARGS__}
412418
/* Called to add a TOC entry */

src/bin/pg_dump/pg_backup_custom.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -755,9 +755,11 @@ _CloseArchive(ArchiveHandle *AH)
755755
* If possible, re-write the TOC in order to update the data offset
756756
* information. This is not essential, as pg_restore can cope in most
757757
* cases without it; but it can make pg_restore significantly faster
758-
* in some situations (especially parallel restore).
758+
* in some situations (especially parallel restore). We can skip this
759+
* step if we're not dumping any data; there are no offsets to update
760+
* in that case.
759761
*/
760-
if (ctx->hasSeek &&
762+
if (ctx->hasSeek && AH->public.dopt->dumpData &&
761763
fseeko(AH->FH, tpos, SEEK_SET) == 0)
762764
WriteToc(AH);
763765
}

0 commit comments

Comments
 (0)