Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan Bossart2024-07-31 15:12:42 +0000
committerNathan Bossart2024-07-31 15:12:42 +0000
commitbd15b7db489deadb2d9af7f21d16a6ed4a09465b (patch)
treea5bbfddad0729130f790fe79eea5141cd386f689
parentc8b06bb969bf26c01f10b835e59d0aff39b7f516 (diff)
Improve performance of dumpSequenceData().
As one might guess, this function dumps the sequence data. It is called once per sequence, and each such call executes a query to retrieve the relevant data for a single sequence. This can cause pg_dump to take significantly longer, especially when there are many sequences. This commit improves the performance of this function by gathering all the sequence data with a single query at the beginning of pg_dump. This information is stored in a sorted array that dumpSequenceData() can bsearch() for what it needs. This follows a similar approach as previous commits that introduced sorted arrays for role information, pg_class information, and sequence metadata. As with those commits, this patch will cause pg_dump to use more memory, but that isn't expected to be too egregious. Note that we use the brand new function pg_sequence_read_tuple() in the query that gathers all sequence data, so we must continue to use the preexisting query-per-sequence approach for versions older than 18. Reviewed-by: Euler Taveira, Michael Paquier, Tom Lane Discussion: https://postgr.es/m/20240503025140.GA1227404%40nathanxps13
-rw-r--r--src/bin/pg_dump/pg_dump.c81
1 files changed, 63 insertions, 18 deletions
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index d4e6694cc92..0d025162738 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -132,6 +132,8 @@ typedef struct
int64 startv; /* start value */
int64 incby; /* increment value */
int64 cache; /* cache size */
+ int64 last_value; /* last value of sequence */
+ bool is_called; /* whether nextval advances before returning */
} SequenceItem;
typedef enum OidOptions
@@ -17330,16 +17332,30 @@ collectSequences(Archive *fout)
* Before Postgres 10, sequence metadata is in the sequence itself. With
* some extra effort, we might be able to use the sorted table for those
* versions, but for now it seems unlikely to be worth it.
+ *
+ * Since version 18, we can gather the sequence data in this query with
+ * pg_sequence_read_tuple(), but we only do so for non-schema-only dumps.
*/
if (fout->remoteVersion < 100000)
return;
- else
+ else if (fout->remoteVersion < 180000 ||
+ (fout->dopt->schemaOnly && !fout->dopt->sequence_data))
query = "SELECT seqrelid, format_type(seqtypid, NULL), "
"seqstart, seqincrement, "
"seqmax, seqmin, "
- "seqcache, seqcycle "
+ "seqcache, seqcycle, "
+ "NULL, 'f' "
"FROM pg_catalog.pg_sequence "
"ORDER BY seqrelid";
+ else
+ query = "SELECT seqrelid, format_type(seqtypid, NULL), "
+ "seqstart, seqincrement, "
+ "seqmax, seqmin, "
+ "seqcache, seqcycle, "
+ "last_value, is_called "
+ "FROM pg_catalog.pg_sequence, "
+ "pg_sequence_read_tuple(seqrelid) "
+ "ORDER BY seqrelid;";
res = ExecuteSqlQuery(fout, query, PGRES_TUPLES_OK);
@@ -17356,6 +17372,8 @@ collectSequences(Archive *fout)
sequences[i].minv = strtoi64(PQgetvalue(res, i, 5), NULL, 10);
sequences[i].cache = strtoi64(PQgetvalue(res, i, 6), NULL, 10);
sequences[i].cycled = (strcmp(PQgetvalue(res, i, 7), "t") == 0);
+ sequences[i].last_value = strtoi64(PQgetvalue(res, i, 8), NULL, 10);
+ sequences[i].is_called = (strcmp(PQgetvalue(res, i, 9), "t") == 0);
}
PQclear(res);
@@ -17622,30 +17640,59 @@ static void
dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo)
{
TableInfo *tbinfo = tdinfo->tdtable;
- PGresult *res;
- char *last;
+ int64 last;
bool called;
PQExpBuffer query = createPQExpBuffer();
- appendPQExpBuffer(query,
- "SELECT last_value, is_called FROM %s",
- fmtQualifiedDumpable(tbinfo));
+ /*
+ * For versions >= 18, the sequence information is gathered in the sorted
+ * array before any calls to dumpSequenceData(). See collectSequences()
+ * for more information.
+ *
+ * For older versions, we have to query the sequence relations
+ * individually.
+ */
+ if (fout->remoteVersion < 180000)
+ {
+ PGresult *res;
- res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
+ appendPQExpBuffer(query,
+ "SELECT last_value, is_called FROM %s",
+ fmtQualifiedDumpable(tbinfo));
- if (PQntuples(res) != 1)
- pg_fatal(ngettext("query to get data of sequence \"%s\" returned %d row (expected 1)",
- "query to get data of sequence \"%s\" returned %d rows (expected 1)",
- PQntuples(res)),
- tbinfo->dobj.name, PQntuples(res));
+ res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
- last = PQgetvalue(res, 0, 0);
- called = (strcmp(PQgetvalue(res, 0, 1), "t") == 0);
+ if (PQntuples(res) != 1)
+ pg_fatal(ngettext("query to get data of sequence \"%s\" returned %d row (expected 1)",
+ "query to get data of sequence \"%s\" returned %d rows (expected 1)",
+ PQntuples(res)),
+ tbinfo->dobj.name, PQntuples(res));
+
+ last = strtoi64(PQgetvalue(res, 0, 0), NULL, 10);
+ called = (strcmp(PQgetvalue(res, 0, 1), "t") == 0);
+
+ PQclear(res);
+ }
+ else
+ {
+ SequenceItem key = {0};
+ SequenceItem *entry;
+
+ Assert(sequences);
+ Assert(tbinfo->dobj.catId.oid);
+
+ key.oid = tbinfo->dobj.catId.oid;
+ entry = bsearch(&key, sequences, nsequences,
+ sizeof(SequenceItem), SequenceItemCmp);
+
+ last = entry->last_value;
+ called = entry->is_called;
+ }
resetPQExpBuffer(query);
appendPQExpBufferStr(query, "SELECT pg_catalog.setval(");
appendStringLiteralAH(query, fmtQualifiedDumpable(tbinfo), fout);
- appendPQExpBuffer(query, ", %s, %s);\n",
+ appendPQExpBuffer(query, ", " INT64_FORMAT ", %s);\n",
last, (called ? "true" : "false"));
if (tdinfo->dobj.dump & DUMP_COMPONENT_DATA)
@@ -17659,8 +17706,6 @@ dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo)
.deps = &(tbinfo->dobj.dumpId),
.nDeps = 1));
- PQclear(res);
-
destroyPQExpBuffer(query);
}