Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 65aaed2

Browse files
committed
Account for TOAST data while scheduling parallel dumps.
In parallel mode, pg_dump tries to order the table-data-dumping jobs with the largest tables first. However, it was only consulting the pg_class.relpages value to determine table size. This ignores TOAST data, and so we could make poor scheduling decisions in cases where some large tables are mostly TOASTed data while others have very little. To fix, add in the relpages value for the TOAST table as well. This patch also fixes a potential integer-overflow issue that could result in poor scheduling on machines where off_t is only 32 bits wide. Such platforms are probably extinct in the wild, but we do still nominally support them, so repair. Per complaint from Hans Buschmann. Discussion: https://postgr.es/m/7d7eb6128f40401d81b3b7a898b6b4de@W2012-02.nidsa.loc
1 parent be85727 commit 65aaed2

File tree

2 files changed

+27
-9
lines changed

2 files changed

+27
-9
lines changed

src/bin/pg_dump/pg_dump.c

+26-9
Original file line numberDiff line numberDiff line change
@@ -2467,13 +2467,26 @@ dumpTableData(Archive *fout, const TableDataInfo *tdinfo)
24672467
/*
24682468
* Set the TocEntry's dataLength in case we are doing a parallel dump
24692469
* and want to order dump jobs by table size. We choose to measure
2470-
* dataLength in table pages during dump, so no scaling is needed.
2470+
* dataLength in table pages (including TOAST pages) during dump, so
2471+
* no scaling is needed.
2472+
*
24712473
* However, relpages is declared as "integer" in pg_class, and hence
24722474
* also in TableInfo, but it's really BlockNumber a/k/a unsigned int.
24732475
* Cast so that we get the right interpretation of table sizes
24742476
* exceeding INT_MAX pages.
24752477
*/
24762478
te->dataLength = (BlockNumber) tbinfo->relpages;
2479+
te->dataLength += (BlockNumber) tbinfo->toastpages;
2480+
2481+
/*
2482+
* If pgoff_t is only 32 bits wide, the above refinement is useless,
2483+
* and instead we'd better worry about integer overflow. Clamp to
2484+
* INT_MAX if the correct result exceeds that.
2485+
*/
2486+
if (sizeof(te->dataLength) == 4 &&
2487+
(tbinfo->relpages < 0 || tbinfo->toastpages < 0 ||
2488+
te->dataLength < 0))
2489+
te->dataLength = INT_MAX;
24772490
}
24782491

24792492
destroyPQExpBuffer(copyBuf);
@@ -6254,6 +6267,7 @@ getTables(Archive *fout, int *numTables)
62546267
int i_relhasindex;
62556268
int i_relhasrules;
62566269
int i_relpages;
6270+
int i_toastpages;
62576271
int i_owning_tab;
62586272
int i_owning_col;
62596273
int i_reltablespace;
@@ -6303,6 +6317,7 @@ getTables(Archive *fout, int *numTables)
63036317
"(%s c.relowner) AS rolname, "
63046318
"c.relchecks, "
63056319
"c.relhasindex, c.relhasrules, c.relpages, "
6320+
"tc.relpages AS toastpages, "
63066321
"d.refobjid AS owning_tab, "
63076322
"d.refobjsubid AS owning_col, "
63086323
"tsp.spcname AS reltablespace, ",
@@ -6459,17 +6474,14 @@ getTables(Archive *fout, int *numTables)
64596474
"LEFT JOIN pg_am am ON (c.relam = am.oid)\n");
64606475

64616476
/*
6462-
* We don't need any data from the TOAST table before 8.2.
6463-
*
64646477
* We purposefully ignore toast OIDs for partitioned tables; the reason is
64656478
* that versions 10 and 11 have them, but later versions do not, so
64666479
* emitting them causes the upgrade to fail.
64676480
*/
6468-
if (fout->remoteVersion >= 80200)
6469-
appendPQExpBufferStr(query,
6470-
"LEFT JOIN pg_class tc ON (c.reltoastrelid = tc.oid"
6471-
" AND tc.relkind = " CppAsString2(RELKIND_TOASTVALUE)
6472-
" AND c.relkind <> " CppAsString2(RELKIND_PARTITIONED_TABLE) ")\n");
6481+
appendPQExpBufferStr(query,
6482+
"LEFT JOIN pg_class tc ON (c.reltoastrelid = tc.oid"
6483+
" AND tc.relkind = " CppAsString2(RELKIND_TOASTVALUE)
6484+
" AND c.relkind <> " CppAsString2(RELKIND_PARTITIONED_TABLE) ")\n");
64736485

64746486
/*
64756487
* Restrict to interesting relkinds (in particular, not indexes). Not all
@@ -6520,6 +6532,7 @@ getTables(Archive *fout, int *numTables)
65206532
i_relhasindex = PQfnumber(res, "relhasindex");
65216533
i_relhasrules = PQfnumber(res, "relhasrules");
65226534
i_relpages = PQfnumber(res, "relpages");
6535+
i_toastpages = PQfnumber(res, "toastpages");
65236536
i_owning_tab = PQfnumber(res, "owning_tab");
65246537
i_owning_col = PQfnumber(res, "owning_col");
65256538
i_reltablespace = PQfnumber(res, "reltablespace");
@@ -6581,6 +6594,10 @@ getTables(Archive *fout, int *numTables)
65816594
tblinfo[i].hasindex = (strcmp(PQgetvalue(res, i, i_relhasindex), "t") == 0);
65826595
tblinfo[i].hasrules = (strcmp(PQgetvalue(res, i, i_relhasrules), "t") == 0);
65836596
tblinfo[i].relpages = atoi(PQgetvalue(res, i, i_relpages));
6597+
if (PQgetisnull(res, i, i_toastpages))
6598+
tblinfo[i].toastpages = 0;
6599+
else
6600+
tblinfo[i].toastpages = atoi(PQgetvalue(res, i, i_toastpages));
65846601
if (PQgetisnull(res, i, i_owning_tab))
65856602
{
65866603
tblinfo[i].owning_tab = InvalidOid;
@@ -10407,7 +10424,7 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj)
1040710424
* about is allowing blob dumping to be parallelized, not just
1040810425
* getting a smarter estimate for the single TOC entry.)
1040910426
*/
10410-
te->dataLength = MaxBlockNumber;
10427+
te->dataLength = INT_MAX;
1041110428
}
1041210429
break;
1041310430
case DO_POLICY:

src/bin/pg_dump/pg_dump.h

+1
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,7 @@ typedef struct _tableInfo
315315
int owning_col; /* attr # of column owning sequence */
316316
bool is_identity_sequence;
317317
int relpages; /* table's size in pages (from pg_class) */
318+
int toastpages; /* toast table's size in pages, if any */
318319

319320
bool interesting; /* true if need to collect more data */
320321
bool dummy_view; /* view's real definition must be postponed */

0 commit comments

Comments
 (0)