Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 55e8608

Browse files
committed
fix pg_stop_backup waiting
1 parent 43bd813 commit 55e8608

File tree

3 files changed

+151
-141
lines changed

3 files changed

+151
-141
lines changed

src/backup.c

Lines changed: 148 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ static int server_version = 0;
6464
static bool exclusive_backup = false;
6565
/* Is pg_start_backup() was executed */
6666
static bool backup_in_progress = false;
67+
/* Is pg_stop_backup() was sent */
68+
static bool pg_stop_backup_is_sent = false;
6769

6870
typedef struct
6971
{
@@ -1558,182 +1560,189 @@ pg_stop_backup(pgBackup *backup)
15581560
* postgres archive_command problem and in this case we will
15591561
* wait for pg_stop_backup() forever.
15601562
*/
1561-
if (!exclusive_backup)
1562-
/*
1563-
* Stop the non-exclusive backup. Besides stop_lsn it returns from
1564-
* pg_stop_backup(false) copy of the backup label and tablespace map
1565-
* so they can be written to disk by the caller.
1566-
*/
1567-
sent = pgut_send(conn,
1568-
"SELECT *, txid_snapshot_xmax(txid_current_snapshot()),"
1569-
" current_timestamp(0)::timestamptz"
1570-
" FROM pg_stop_backup(false)",
1571-
0, NULL, WARNING);
1572-
else
1573-
sent = pgut_send(conn,
1574-
"SELECT *, txid_snapshot_xmax(txid_current_snapshot()),"
1575-
" current_timestamp(0)::timestamptz"
1576-
" FROM pg_stop_backup()",
1577-
0, NULL, WARNING);
1578-
1579-
if (!sent)
1580-
elog(WARNING, "Failed to send pg_stop_backup query");
15811563

1564+
if (!pg_stop_backup_is_sent)
1565+
{
1566+
if (!exclusive_backup)
1567+
/*
1568+
* Stop the non-exclusive backup. Besides stop_lsn it returns from
1569+
* pg_stop_backup(false) copy of the backup label and tablespace map
1570+
* so they can be written to disk by the caller.
1571+
*/
1572+
sent = pgut_send(conn,
1573+
"SELECT *, txid_snapshot_xmax(txid_current_snapshot()),"
1574+
" current_timestamp(0)::timestamptz"
1575+
" FROM pg_stop_backup(false)",
1576+
0, NULL, WARNING);
1577+
else
1578+
sent = pgut_send(conn,
1579+
"SELECT *, txid_snapshot_xmax(txid_current_snapshot()),"
1580+
" current_timestamp(0)::timestamptz"
1581+
" FROM pg_stop_backup()",
1582+
0, NULL, WARNING);
1583+
1584+
pg_stop_backup_is_sent = true;
1585+
if (!sent)
1586+
elog(ERROR, "Failed to send pg_stop_backup query");
1587+
}
15821588

15831589
/*
15841590
* Wait for the result of pg_stop_backup(),
15851591
* but no longer than PG_STOP_BACKUP_TIMEOUT seconds
15861592
*/
1587-
while (1)
1593+
if (pg_stop_backup_is_sent && !in_cleanup)
15881594
{
1589-
if (!PQconsumeInput(conn) || PQisBusy(conn))
1595+
while (1)
15901596
{
1591-
pg_stop_backup_timeout++;
1592-
sleep(1);
1593-
1594-
if (interrupted)
1597+
if (!PQconsumeInput(conn) || PQisBusy(conn))
15951598
{
1596-
pgut_cancel(conn);
1597-
elog(ERROR, "interrupted during waiting for pg_stop_backup");
1598-
}
1599+
pg_stop_backup_timeout++;
1600+
sleep(1);
15991601

1600-
if (pg_stop_backup_timeout == 1)
1601-
elog(INFO, "wait for pg_stop_backup()");
1602+
if (interrupted)
1603+
{
1604+
pgut_cancel(conn);
1605+
elog(ERROR, "interrupted during waiting for pg_stop_backup");
1606+
}
16021607

1603-
/*
1604-
* If postgres haven't answered in PG_STOP_BACKUP_TIMEOUT seconds,
1605-
* send an interrupt.
1606-
*/
1607-
if (pg_stop_backup_timeout > PG_STOP_BACKUP_TIMEOUT)
1608+
if (pg_stop_backup_timeout == 1)
1609+
elog(INFO, "wait for pg_stop_backup()");
1610+
1611+
/*
1612+
* If postgres haven't answered in PG_STOP_BACKUP_TIMEOUT seconds,
1613+
* send an interrupt.
1614+
*/
1615+
if (pg_stop_backup_timeout > PG_STOP_BACKUP_TIMEOUT)
1616+
{
1617+
pgut_cancel(conn);
1618+
elog(ERROR, "pg_stop_backup doesn't answer in %d seconds, cancel it",
1619+
PG_STOP_BACKUP_TIMEOUT);
1620+
}
1621+
}
1622+
else
16081623
{
1609-
pgut_cancel(conn);
1610-
elog(ERROR, "pg_stop_backup doesn't answer in %d seconds, cancel it",
1611-
PG_STOP_BACKUP_TIMEOUT);
1624+
res = PQgetResult(conn);
1625+
break;
16121626
}
16131627
}
1614-
else
1615-
{
1616-
res = PQgetResult(conn);
1617-
break;
1618-
}
1628+
if (!res)
1629+
elog(ERROR, "pg_stop backup() failed");
16191630
}
16201631

1621-
if (!res)
1622-
elog(ERROR, "pg_stop backup() failed");
1623-
16241632
backup_in_progress = false;
1625-
1626-
/* Extract timeline and LSN from results of pg_stop_backup() */
1627-
XLogDataFromLSN(PQgetvalue(res, 0, 0), &xlogid, &xrecoff);
1628-
/* Calculate LSN */
1629-
stop_backup_lsn = (XLogRecPtr) ((uint64) xlogid << 32) | xrecoff;
1630-
1631-
if (!XRecOffIsValid(stop_backup_lsn))
1633+
/* If stop_backup was sent and we are here, it means that is was received */
1634+
if (pg_stop_backup_is_sent && !in_cleanup)
16321635
{
1633-
stop_backup_lsn = restore_lsn;
1634-
}
1635-
1636-
if (!XRecOffIsValid(stop_backup_lsn))
1637-
elog(ERROR, "Invalid stop_backup_lsn value %X/%X",
1638-
(uint32) (stop_backup_lsn >> 32), (uint32) (stop_backup_lsn));
1639-
1640-
/* Write backup_label and tablespace_map for backup from replica */
1641-
if (!exclusive_backup)
1642-
{
1643-
char path[MAXPGPATH];
1644-
char backup_label[MAXPGPATH];
1645-
FILE *fp;
1646-
pgFile *file;
1647-
size_t len;
1648-
1649-
Assert(PQnfields(res) >= 5);
1650-
1651-
pgBackupGetPath(&current, path, lengthof(path), DATABASE_DIR);
1652-
1653-
/* Write backup_label */
1654-
join_path_components(backup_label, path, PG_BACKUP_LABEL_FILE);
1655-
fp = fopen(backup_label, "w");
1656-
if (fp == NULL)
1657-
elog(ERROR, "can't open backup label file \"%s\": %s",
1658-
backup_label, strerror(errno));
1659-
1660-
len = strlen(PQgetvalue(res, 0, 1));
1661-
if (fwrite(PQgetvalue(res, 0, 1), 1, len, fp) != len ||
1662-
fflush(fp) != 0 ||
1663-
fsync(fileno(fp)) != 0 ||
1664-
fclose(fp))
1665-
elog(ERROR, "can't write backup label file \"%s\": %s",
1666-
backup_label, strerror(errno));
1636+
/* Extract timeline and LSN from results of pg_stop_backup() */
1637+
XLogDataFromLSN(PQgetvalue(res, 0, 0), &xlogid, &xrecoff);
1638+
/* Calculate LSN */
1639+
stop_backup_lsn = (XLogRecPtr) ((uint64) xlogid << 32) | xrecoff;
16671640

1668-
/*
1669-
* It's vital to check if backup_files_list is initialized,
1670-
* because we could get here because the backup was interrupted
1671-
*/
1672-
if (backup_files_list)
1641+
if (!XRecOffIsValid(stop_backup_lsn))
16731642
{
1674-
file = pgFileNew(backup_label, true);
1675-
calc_file_checksum(file);
1676-
free(file->path);
1677-
file->path = strdup(PG_BACKUP_LABEL_FILE);
1678-
parray_append(backup_files_list, file);
1643+
stop_backup_lsn = restore_lsn;
16791644
}
16801645

1681-
/* Write tablespace_map */
1682-
if (strlen(PQgetvalue(res, 0, 2)) > 0)
1646+
if (!XRecOffIsValid(stop_backup_lsn))
1647+
elog(ERROR, "Invalid stop_backup_lsn value %X/%X",
1648+
(uint32) (stop_backup_lsn >> 32), (uint32) (stop_backup_lsn));
1649+
1650+
/* Write backup_label and tablespace_map for backup from replica */
1651+
if (!exclusive_backup)
16831652
{
1684-
char tablespace_map[MAXPGPATH];
1653+
char path[MAXPGPATH];
1654+
char backup_label[MAXPGPATH];
1655+
FILE *fp;
1656+
pgFile *file;
1657+
size_t len;
1658+
1659+
Assert(PQnfields(res) >= 5);
1660+
1661+
pgBackupGetPath(&current, path, lengthof(path), DATABASE_DIR);
16851662

1686-
join_path_components(tablespace_map, path, PG_TABLESPACE_MAP_FILE);
1687-
fp = fopen(tablespace_map, "w");
1663+
/* Write backup_label */
1664+
join_path_components(backup_label, path, PG_BACKUP_LABEL_FILE);
1665+
fp = fopen(backup_label, "w");
16881666
if (fp == NULL)
1689-
elog(ERROR, "can't open tablespace map file \"%s\": %s",
1690-
tablespace_map, strerror(errno));
1667+
elog(ERROR, "can't open backup label file \"%s\": %s",
1668+
backup_label, strerror(errno));
16911669

1692-
len = strlen(PQgetvalue(res, 0, 2));
1693-
if (fwrite(PQgetvalue(res, 0, 2), 1, len, fp) != len ||
1670+
len = strlen(PQgetvalue(res, 0, 1));
1671+
if (fwrite(PQgetvalue(res, 0, 1), 1, len, fp) != len ||
16941672
fflush(fp) != 0 ||
16951673
fsync(fileno(fp)) != 0 ||
16961674
fclose(fp))
1697-
elog(ERROR, "can't write tablespace map file \"%s\": %s",
1698-
tablespace_map, strerror(errno));
1675+
elog(ERROR, "can't write backup label file \"%s\": %s",
1676+
backup_label, strerror(errno));
16991677

1678+
/*
1679+
* It's vital to check if backup_files_list is initialized,
1680+
* because we could get here because the backup was interrupted
1681+
*/
17001682
if (backup_files_list)
17011683
{
1702-
file = pgFileNew(tablespace_map, true);
1703-
if (S_ISREG(file->mode))
1704-
calc_file_checksum(file);
1684+
file = pgFileNew(backup_label, true);
1685+
calc_file_checksum(file);
17051686
free(file->path);
1706-
file->path = strdup(PG_TABLESPACE_MAP_FILE);
1687+
file->path = strdup(PG_BACKUP_LABEL_FILE);
17071688
parray_append(backup_files_list, file);
17081689
}
1709-
}
1710-
1711-
if (sscanf(PQgetvalue(res, 0, 3), XID_FMT, &recovery_xid) != 1)
1712-
elog(ERROR,
1713-
"result of txid_snapshot_xmax() is invalid: %s",
1714-
PQerrorMessage(conn));
1715-
if (!parse_time(PQgetvalue(res, 0, 4), &recovery_time))
1716-
elog(ERROR,
1717-
"result of current_timestamp is invalid: %s",
1718-
PQerrorMessage(conn));
1719-
}
1720-
else
1721-
{
1722-
if (sscanf(PQgetvalue(res, 0, 1), XID_FMT, &recovery_xid) != 1)
1723-
elog(ERROR,
1724-
"result of txid_snapshot_xmax() is invalid: %s",
1725-
PQerrorMessage(conn));
1726-
if (!parse_time(PQgetvalue(res, 0, 2), &recovery_time))
1727-
elog(ERROR,
1728-
"result of current_timestamp is invalid: %s",
1729-
PQerrorMessage(conn));
1730-
}
17311690

1732-
PQclear(res);
1691+
/* Write tablespace_map */
1692+
if (strlen(PQgetvalue(res, 0, 2)) > 0)
1693+
{
1694+
char tablespace_map[MAXPGPATH];
1695+
1696+
join_path_components(tablespace_map, path, PG_TABLESPACE_MAP_FILE);
1697+
fp = fopen(tablespace_map, "w");
1698+
if (fp == NULL)
1699+
elog(ERROR, "can't open tablespace map file \"%s\": %s",
1700+
tablespace_map, strerror(errno));
1701+
1702+
len = strlen(PQgetvalue(res, 0, 2));
1703+
if (fwrite(PQgetvalue(res, 0, 2), 1, len, fp) != len ||
1704+
fflush(fp) != 0 ||
1705+
fsync(fileno(fp)) != 0 ||
1706+
fclose(fp))
1707+
elog(ERROR, "can't write tablespace map file \"%s\": %s",
1708+
tablespace_map, strerror(errno));
1709+
1710+
if (backup_files_list)
1711+
{
1712+
file = pgFileNew(tablespace_map, true);
1713+
if (S_ISREG(file->mode))
1714+
calc_file_checksum(file);
1715+
free(file->path);
1716+
file->path = strdup(PG_TABLESPACE_MAP_FILE);
1717+
parray_append(backup_files_list, file);
1718+
}
1719+
}
17331720

1734-
if (stream_wal)
1735-
/* Wait for the completion of stream */
1736-
pthread_join(stream_thread, NULL);
1721+
if (sscanf(PQgetvalue(res, 0, 3), XID_FMT, &recovery_xid) != 1)
1722+
elog(ERROR,
1723+
"result of txid_snapshot_xmax() is invalid: %s",
1724+
PQerrorMessage(conn));
1725+
if (!parse_time(PQgetvalue(res, 0, 4), &recovery_time))
1726+
elog(ERROR,
1727+
"result of current_timestamp is invalid: %s",
1728+
PQerrorMessage(conn));
1729+
}
1730+
else
1731+
{
1732+
if (sscanf(PQgetvalue(res, 0, 1), XID_FMT, &recovery_xid) != 1)
1733+
elog(ERROR,
1734+
"result of txid_snapshot_xmax() is invalid: %s",
1735+
PQerrorMessage(conn));
1736+
if (!parse_time(PQgetvalue(res, 0, 2), &recovery_time))
1737+
elog(ERROR,
1738+
"result of current_timestamp is invalid: %s",
1739+
PQerrorMessage(conn));
1740+
}
1741+
PQclear(res);
1742+
if (stream_wal)
1743+
/* Wait for the completion of stream */
1744+
pthread_join(stream_thread, NULL);
1745+
}
17371746

17381747
/* Fill in fields if that is the correct end of backup. */
17391748
if (backup != NULL)

src/utils/pgut.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ bool prompt_password = true;
4545
static PGcancel *volatile cancel_conn = NULL;
4646

4747
/* Interrupted by SIGINT (Ctrl+C) ? */
48-
bool interrupted = false;
49-
static bool in_cleanup = false;
48+
bool interrupted = false;
49+
bool in_cleanup = false;
5050

5151
static bool parse_pair(const char buffer[], char key[], char value[]);
5252

src/utils/pgut.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ extern const char *username;
104104
extern bool prompt_password;
105105

106106
extern bool interrupted;
107+
extern bool in_cleanup;
107108

108109
extern int pgut_getopt(int argc, char **argv, pgut_option options[]);
109110
extern void pgut_readopt(const char *path, pgut_option options[], int elevel);

0 commit comments

Comments
 (0)