Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Add more error context to RestoreBlockImage() and consume it
authorMichael Paquier <michael@paquier.xyz>
Fri, 9 Sep 2022 01:01:14 +0000 (10:01 +0900)
committerMichael Paquier <michael@paquier.xyz>
Fri, 9 Sep 2022 01:01:14 +0000 (10:01 +0900)
On failure in restoring a block image, no details were provided, while
it is possible to see failure with an inconsistent record state, a
failure in processing decompression or a failure in decompression
because a build does not support this option.

RestoreBlockImage() is used in two code paths in the backend code,
during recovery and when checking a page consistency after applying
masking, and both places are changed to consume the error message
produced by the internal routine when it returns a false status.  All
the error messages are reported under ERRCODE_INTERNAL_ERROR, that gets
used also when attempting to access a page compressed by a method
not supported by the build attempting the decompression.  This is
something that can happen in core when doing physical replication with
primary and standby using inconsistent build options, for example.

This routine is available since 2c03216d and it has never provided any
context about the error happening when it failed.  This change is
justified even more after 57aa5b2, that introduced compression of FPWs
in WAL.

Reported-by: Justin Prysby
Author: Michael Paquier
Discussion: https://postgr.es/m/20220905002320.GD31833@telsasoft.com
Backpatch-through: 15

src/backend/access/transam/xlogreader.c
src/backend/access/transam/xlogrecovery.c
src/backend/access/transam/xlogutils.c

index 132f9a1a10cdf52a931f399238884ebcfdb453df..b07f6439466547ecaa03e864ea4fff6778cf3850 100644 (file)
@@ -2034,7 +2034,8 @@ XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
 /*
  * Restore a full-page image from a backup block attached to an XLOG record.
  *
- * Returns true if a full-page image is restored.
+ * Returns true if a full-page image is restored, and false on failure with
+ * an error to be consumed by the caller.
  */
 bool
 RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
@@ -2045,9 +2046,20 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
 
    if (block_id > record->record->max_block_id ||
        !record->record->blocks[block_id].in_use)
+   {
+       report_invalid_record(record,
+                             "could not restore image at %X/%X with invalid block %d specified",
+                             LSN_FORMAT_ARGS(record->ReadRecPtr),
+                             block_id);
        return false;
+   }
    if (!record->record->blocks[block_id].has_image)
+   {
+       report_invalid_record(record, "could not restore image at %X/%X with invalid state, block %d",
+                             LSN_FORMAT_ARGS(record->ReadRecPtr),
+                             block_id);
        return false;
+   }
 
    bkpb = &record->record->blocks[block_id];
    ptr = bkpb->bkp_image;
@@ -2070,7 +2082,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
                                    bkpb->bimg_len, BLCKSZ - bkpb->hole_length) <= 0)
                decomp_success = false;
 #else
-           report_invalid_record(record, "image at %X/%X compressed with %s not supported by build, block %d",
+           report_invalid_record(record, "could not restore image at %X/%X compressed with %s not supported by build, block %d",
                                  LSN_FORMAT_ARGS(record->ReadRecPtr),
                                  "LZ4",
                                  block_id);
@@ -2087,7 +2099,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
            if (ZSTD_isError(decomp_result))
                decomp_success = false;
 #else
-           report_invalid_record(record, "image at %X/%X compressed with %s not supported by build, block %d",
+           report_invalid_record(record, "could not restore image at %X/%X compressed with %s not supported by build, block %d",
                                  LSN_FORMAT_ARGS(record->ReadRecPtr),
                                  "zstd",
                                  block_id);
@@ -2096,7 +2108,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
        }
        else
        {
-           report_invalid_record(record, "image at %X/%X compressed with unknown method, block %d",
+           report_invalid_record(record, "could not restore image at %X/%X compressed with unknown method, block %d",
                                  LSN_FORMAT_ARGS(record->ReadRecPtr),
                                  block_id);
            return false;
@@ -2104,7 +2116,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
 
        if (!decomp_success)
        {
-           report_invalid_record(record, "invalid compressed image at %X/%X, block %d",
+           report_invalid_record(record, "could not decompress image at %X/%X, block %d",
                                  LSN_FORMAT_ARGS(record->ReadRecPtr),
                                  block_id);
            return false;
index f4aeda771c028b0666c9c47e27665c7ee422e689..ec79f492ee8b67c2cfdd9df3eb9ae472cb18560e 100644 (file)
@@ -2410,7 +2410,9 @@ verifyBackupPageConsistency(XLogReaderState *record)
         * can be directly applied on it.
         */
        if (!RestoreBlockImage(record, block_id, primary_image_masked))
-           elog(ERROR, "failed to restore block image");
+           ereport(ERROR,
+                   (errcode(ERRCODE_INTERNAL_ERROR),
+                    errmsg_internal("%s", record->errormsg_buf)));
 
        /*
         * If masking function is defined, mask both the primary and replay
index 48516694f0873e5f09656b40c202c72d052cf126..702c8c14e1216877942c52c8575f3e518f69df5a 100644 (file)
@@ -392,7 +392,9 @@ XLogReadBufferForRedoExtended(XLogReaderState *record,
                                      prefetch_buffer);
        page = BufferGetPage(*buf);
        if (!RestoreBlockImage(record, block_id, page))
-           elog(ERROR, "failed to restore block image");
+           ereport(ERROR,
+                   (errcode(ERRCODE_INTERNAL_ERROR),
+                    errmsg_internal("%s", record->errormsg_buf)));
 
        /*
         * The page may be uninitialized. If so, we can't set the LSN because