Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit f3316a0

Browse files
committed
Fix pg_restore's direct-to-database mode for INSERT-style table data.
In commit 6545a90, I removed the mini SQL lexer that was in pg_backup_db.c, thinking that it had no real purpose beyond separating COPY data from SQL commands, which purpose had been obsoleted by long-ago fixes in pg_dump's archive file format. Unfortunately this was in error: that code was also used to identify command boundaries in INSERT-style table data, which is run together as a single string in the archive file for better compressibility. As a result, direct-to-database restores from archive files made with --inserts or --column-inserts fail in our latest releases, as reported by Dick Visser. To fix, restore the mini SQL lexer, but simplify it by adjusting the calling logic so that it's only required to cope with INSERT-style table data, not arbitrary SQL commands. This allows us to not have to deal with SQL comments, E'' strings, or dollar-quoted strings, none of which have ever been emitted by dumpTableData_insert. Also, fix the lexer to cope with standard-conforming strings, which was the actual bug that the previous patch was meant to solve. Back-patch to all supported branches. The previous patch went back to 8.2, which unfortunately means that the EOL release of 8.2 contains this bug, but I don't think we're doing another 8.2 release just because of that.
1 parent 7e4911b commit f3316a0

File tree

4 files changed

+135
-11
lines changed

4 files changed

+135
-11
lines changed

src/bin/pg_dump/pg_backup_archiver.c

+14-9
Original file line numberDiff line numberDiff line change
@@ -620,20 +620,20 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te,
620620
if (te->copyStmt && strlen(te->copyStmt) > 0)
621621
{
622622
ahprintf(AH, "%s", te->copyStmt);
623-
AH->writingCopyData = true;
623+
AH->outputKind = OUTPUT_COPYDATA;
624624
}
625+
else
626+
AH->outputKind = OUTPUT_OTHERDATA;
625627

626628
(*AH->PrintTocDataPtr) (AH, te, ropt);
627629

628630
/*
629631
* Terminate COPY if needed.
630632
*/
631-
if (AH->writingCopyData)
632-
{
633-
if (RestoringToDB(AH))
634-
EndDBCopyMode(AH, te);
635-
AH->writingCopyData = false;
636-
}
633+
if (AH->outputKind == OUTPUT_COPYDATA &&
634+
RestoringToDB(AH))
635+
EndDBCopyMode(AH, te);
636+
AH->outputKind = OUTPUT_SQLCMDS;
637637

638638
/* close out the transaction started above */
639639
if (is_parallel && te->created)
@@ -1975,6 +1975,8 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt,
19751975
AH->mode = mode;
19761976
AH->compression = compression;
19771977

1978+
memset(&(AH->sqlparse), 0, sizeof(AH->sqlparse));
1979+
19781980
/* Open stdout with no compression for AH output handle */
19791981
AH->gzOut = 0;
19801982
AH->OF = stdout;
@@ -4194,7 +4196,8 @@ CloneArchive(ArchiveHandle *AH)
41944196
clone = (ArchiveHandle *) pg_malloc(sizeof(ArchiveHandle));
41954197
memcpy(clone, AH, sizeof(ArchiveHandle));
41964198

4197-
/* Handle format-independent fields ... none at the moment */
4199+
/* Handle format-independent fields */
4200+
memset(&(clone->sqlparse), 0, sizeof(clone->sqlparse));
41984201

41994202
/* The clone will have its own connection, so disregard connection state */
42004203
clone->connection = NULL;
@@ -4227,7 +4230,9 @@ DeCloneArchive(ArchiveHandle *AH)
42274230
/* Clear format-specific state */
42284231
(AH->DeClonePtr) (AH);
42294232

4230-
/* Clear state allocated by CloneArchive ... none at the moment */
4233+
/* Clear state allocated by CloneArchive */
4234+
if (AH->sqlparse.curCmd)
4235+
destroyPQExpBuffer(AH->sqlparse.curCmd);
42314236

42324237
/* Clear any connection-local state */
42334238
if (AH->currUser)

src/bin/pg_dump/pg_backup_archiver.h

+24-1
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,20 @@ typedef void (*DeClonePtr) (struct _archiveHandle * AH);
132132

133133
typedef size_t (*CustomOutPtr) (struct _archiveHandle * AH, const void *buf, size_t len);
134134

135+
typedef enum
136+
{
137+
SQL_SCAN = 0, /* normal */
138+
SQL_IN_SINGLE_QUOTE, /* '...' literal */
139+
SQL_IN_DOUBLE_QUOTE /* "..." identifier */
140+
} sqlparseState;
141+
142+
typedef struct
143+
{
144+
sqlparseState state; /* see above */
145+
bool backSlash; /* next char is backslash quoted? */
146+
PQExpBuffer curCmd; /* incomplete line (NULL if not created) */
147+
} sqlparseInfo;
148+
135149
typedef enum
136150
{
137151
STAGE_NONE = 0,
@@ -140,6 +154,13 @@ typedef enum
140154
STAGE_FINALIZING
141155
} ArchiverStage;
142156

157+
typedef enum
158+
{
159+
OUTPUT_SQLCMDS = 0, /* emitting general SQL commands */
160+
OUTPUT_COPYDATA, /* writing COPY data */
161+
OUTPUT_OTHERDATA /* writing data as INSERT commands */
162+
} ArchiverOutput;
163+
143164
typedef enum
144165
{
145166
REQ_SCHEMA = 1,
@@ -167,6 +188,8 @@ typedef struct _archiveHandle
167188
* Added V1.7 */
168189
ArchiveFormat format; /* Archive format */
169190

191+
sqlparseInfo sqlparse; /* state for parsing INSERT data */
192+
170193
time_t createDate; /* Date archive created */
171194

172195
/*
@@ -217,7 +240,7 @@ typedef struct _archiveHandle
217240
PGconn *connection;
218241
int connectToDB; /* Flag to indicate if direct DB connection is
219242
* required */
220-
bool writingCopyData; /* True when we are sending COPY data */
243+
ArchiverOutput outputKind; /* Flag for what we're currently writing */
221244
bool pgCopyIn; /* Currently in libpq 'COPY IN' mode. */
222245

223246
int loFd; /* BLOB fd */

src/bin/pg_dump/pg_backup_db.c

+89-1
Original file line numberDiff line numberDiff line change
@@ -364,15 +364,93 @@ ExecuteSqlCommand(ArchiveHandle *AH, const char *qry, const char *desc)
364364
}
365365

366366

367+
/*
368+
* Process non-COPY table data (that is, INSERT commands).
369+
*
370+
* The commands have been run together as one long string for compressibility,
371+
* and we are receiving them in bufferloads with arbitrary boundaries, so we
372+
* have to locate command boundaries and save partial commands across calls.
373+
* All state must be kept in AH->sqlparse, not in local variables of this
374+
* routine. We assume that AH->sqlparse was filled with zeroes when created.
375+
*
376+
* We have to lex the data to the extent of identifying literals and quoted
377+
* identifiers, so that we can recognize statement-terminating semicolons.
378+
* We assume that INSERT data will not contain SQL comments, E'' literals,
379+
* or dollar-quoted strings, so this is much simpler than a full SQL lexer.
380+
*/
381+
static void
382+
ExecuteInsertCommands(ArchiveHandle *AH, const char *buf, size_t bufLen)
383+
{
384+
const char *qry = buf;
385+
const char *eos = buf + bufLen;
386+
387+
/* initialize command buffer if first time through */
388+
if (AH->sqlparse.curCmd == NULL)
389+
AH->sqlparse.curCmd = createPQExpBuffer();
390+
391+
for (; qry < eos; qry++)
392+
{
393+
char ch = *qry;
394+
395+
/* For neatness, we skip any newlines between commands */
396+
if (!(ch == '\n' && AH->sqlparse.curCmd->len == 0))
397+
appendPQExpBufferChar(AH->sqlparse.curCmd, ch);
398+
399+
switch (AH->sqlparse.state)
400+
{
401+
case SQL_SCAN: /* Default state == 0, set in _allocAH */
402+
if (ch == ';')
403+
{
404+
/*
405+
* We've found the end of a statement. Send it and reset
406+
* the buffer.
407+
*/
408+
ExecuteSqlCommand(AH, AH->sqlparse.curCmd->data,
409+
"could not execute query");
410+
resetPQExpBuffer(AH->sqlparse.curCmd);
411+
}
412+
else if (ch == '\'')
413+
{
414+
AH->sqlparse.state = SQL_IN_SINGLE_QUOTE;
415+
AH->sqlparse.backSlash = false;
416+
}
417+
else if (ch == '"')
418+
{
419+
AH->sqlparse.state = SQL_IN_DOUBLE_QUOTE;
420+
}
421+
break;
422+
423+
case SQL_IN_SINGLE_QUOTE:
424+
/* We needn't handle '' specially */
425+
if (ch == '\'' && !AH->sqlparse.backSlash)
426+
AH->sqlparse.state = SQL_SCAN;
427+
else if (ch == '\\' && !AH->public.std_strings)
428+
AH->sqlparse.backSlash = !AH->sqlparse.backSlash;
429+
else
430+
AH->sqlparse.backSlash = false;
431+
break;
432+
433+
case SQL_IN_DOUBLE_QUOTE:
434+
/* We needn't handle "" specially */
435+
if (ch == '"')
436+
AH->sqlparse.state = SQL_SCAN;
437+
break;
438+
}
439+
}
440+
}
441+
442+
367443
/*
368444
* Implement ahwrite() for direct-to-DB restore
369445
*/
370446
int
371447
ExecuteSqlCommandBuf(ArchiveHandle *AH, const char *buf, size_t bufLen)
372448
{
373-
if (AH->writingCopyData)
449+
if (AH->outputKind == OUTPUT_COPYDATA)
374450
{
375451
/*
452+
* COPY data.
453+
*
376454
* We drop the data on the floor if libpq has failed to enter COPY
377455
* mode; this allows us to behave reasonably when trying to continue
378456
* after an error in a COPY command.
@@ -382,9 +460,19 @@ ExecuteSqlCommandBuf(ArchiveHandle *AH, const char *buf, size_t bufLen)
382460
die_horribly(AH, modulename, "error returned by PQputCopyData: %s",
383461
PQerrorMessage(AH->connection));
384462
}
463+
else if (AH->outputKind == OUTPUT_OTHERDATA)
464+
{
465+
/*
466+
* Table data expressed as INSERT commands.
467+
*/
468+
ExecuteInsertCommands(AH, buf, bufLen);
469+
}
385470
else
386471
{
387472
/*
473+
* General SQL commands; we assume that commands will not be split
474+
* across calls.
475+
*
388476
* In most cases the data passed to us will be a null-terminated
389477
* string, but if it's not, we have to add a trailing null.
390478
*/

src/bin/pg_dump/pg_dump.c

+8
Original file line numberDiff line numberDiff line change
@@ -1399,6 +1399,14 @@ dumpTableData_copy(Archive *fout, void *dcontext)
13991399
return 1;
14001400
}
14011401

1402+
/*
1403+
* Dump table data using INSERT commands.
1404+
*
1405+
* Caution: when we restore from an archive file direct to database, the
1406+
* INSERT commands emitted by this function have to be parsed by
1407+
* pg_backup_db.c's ExecuteInsertCommands(), which will not handle comments,
1408+
* E'' strings, or dollar-quoted strings. So don't emit anything like that.
1409+
*/
14021410
static int
14031411
dumpTableData_insert(Archive *fout, void *dcontext)
14041412
{

0 commit comments

Comments
 (0)