diff options
author | Robert Haas | 2022-09-28 13:45:27 +0000 |
---|---|---|
committer | Robert Haas | 2022-09-28 13:55:28 +0000 |
commit | a448e49bcbe40fb72e1ed85af910dd216d45bad8 (patch) | |
tree | 2815aed4f5e89bdea91cdd35ec89facaa846e438 /src/backend | |
parent | 6af082723277eeca74f2da65e7759666bf7c7f9c (diff) |
Revert 56-bit relfilenode change and follow-up commits.
There are still some alignment-related failures in the buildfarm,
which might or might not be able to be fixed quickly, but I've also
just realized that it increased the size of many WAL records by 4 bytes
because a block reference contains a RelFileLocator. The effect of that
hasn't been studied or discussed, so revert for now.
Diffstat (limited to 'src/backend')
33 files changed, 190 insertions, 413 deletions
diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c index bc093f2a887..41b92115bff 100644 --- a/src/backend/access/gin/ginxlog.c +++ b/src/backend/access/gin/ginxlog.c @@ -100,7 +100,7 @@ ginRedoInsertEntry(Buffer buffer, bool isLeaf, BlockNumber rightblkno, void *rda BlockNumber blknum; BufferGetTag(buffer, &locator, &forknum, &blknum); - elog(ERROR, "failed to add item to index page in %u/%u/" UINT64_FORMAT, + elog(ERROR, "failed to add item to index page in %u/%u/%u", locator.spcOid, locator.dbOid, locator.relNumber); } } diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c index d1c8a24d66f..7dd3c1d500f 100644 --- a/src/backend/access/rmgrdesc/gistdesc.c +++ b/src/backend/access/rmgrdesc/gistdesc.c @@ -26,7 +26,7 @@ out_gistxlogPageUpdate(StringInfo buf, gistxlogPageUpdate *xlrec) static void out_gistxlogPageReuse(StringInfo buf, gistxlogPageReuse *xlrec) { - appendStringInfo(buf, "rel %u/%u/" UINT64_FORMAT "; blk %u; latestRemovedXid %u:%u", + appendStringInfo(buf, "rel %u/%u/%u; blk %u; latestRemovedXid %u:%u", xlrec->locator.spcOid, xlrec->locator.dbOid, xlrec->locator.relNumber, xlrec->block, EpochFromFullTransactionId(xlrec->latestRemovedFullXid), diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c index 70bd49303a9..923d3bc43df 100644 --- a/src/backend/access/rmgrdesc/heapdesc.c +++ b/src/backend/access/rmgrdesc/heapdesc.c @@ -169,7 +169,7 @@ heap2_desc(StringInfo buf, XLogReaderState *record) { xl_heap_new_cid *xlrec = (xl_heap_new_cid *) rec; - appendStringInfo(buf, "rel %u/%u/" UINT64_FORMAT "; tid %u/%u", + appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u", xlrec->target_locator.spcOid, xlrec->target_locator.dbOid, xlrec->target_locator.relNumber, diff --git a/src/backend/access/rmgrdesc/nbtdesc.c b/src/backend/access/rmgrdesc/nbtdesc.c index 6192a7ba841..4843cd530df 100644 --- a/src/backend/access/rmgrdesc/nbtdesc.c +++ b/src/backend/access/rmgrdesc/nbtdesc.c @@ -100,7 +100,7 @@ btree_desc(StringInfo buf, XLogReaderState *record) { xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) rec; - appendStringInfo(buf, "rel %u/%u/" UINT64_FORMAT "; latestRemovedXid %u:%u", + appendStringInfo(buf, "rel %u/%u/%u; latestRemovedXid %u:%u", xlrec->locator.spcOid, xlrec->locator.dbOid, xlrec->locator.relNumber, EpochFromFullTransactionId(xlrec->latestRemovedFullXid), diff --git a/src/backend/access/rmgrdesc/seqdesc.c b/src/backend/access/rmgrdesc/seqdesc.c index df72caf1768..b3845f93bff 100644 --- a/src/backend/access/rmgrdesc/seqdesc.c +++ b/src/backend/access/rmgrdesc/seqdesc.c @@ -25,7 +25,7 @@ seq_desc(StringInfo buf, XLogReaderState *record) xl_seq_rec *xlrec = (xl_seq_rec *) rec; if (info == XLOG_SEQ_LOG) - appendStringInfo(buf, "rel %u/%u/" UINT64_FORMAT, + appendStringInfo(buf, "rel %u/%u/%u", xlrec->locator.spcOid, xlrec->locator.dbOid, xlrec->locator.relNumber); } diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index b1cede74cf4..3fd7185f217 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -45,8 +45,8 @@ xlog_desc(StringInfo buf, XLogReaderState *record) CheckPoint *checkpoint = (CheckPoint *) rec; appendStringInfo(buf, "redo %X/%X; " - "tli %u; prev tli %u; fpw %s; xid %u:%u; relfilenumber " UINT64_FORMAT "; oid %u; " - "multi %u; offset %u; oldest xid %u in DB %u; oldest multi %u in DB %u; " + "tli %u; prev tli %u; fpw %s; xid %u:%u; oid %u; multi %u; offset %u; " + "oldest xid %u in DB %u; oldest multi %u in DB %u; " "oldest/newest commit timestamp xid: %u/%u; " "oldest running xid %u; %s", LSN_FORMAT_ARGS(checkpoint->redo), @@ -55,7 +55,6 @@ xlog_desc(StringInfo buf, XLogReaderState *record) checkpoint->fullPageWrites ? "true" : "false", EpochFromFullTransactionId(checkpoint->nextXid), XidFromFullTransactionId(checkpoint->nextXid), - checkpoint->nextRelFileNumber, checkpoint->nextOid, checkpoint->nextMulti, checkpoint->nextMultiOffset, @@ -75,13 +74,6 @@ xlog_desc(StringInfo buf, XLogReaderState *record) memcpy(&nextOid, rec, sizeof(Oid)); appendStringInfo(buf, "%u", nextOid); } - else if (info == XLOG_NEXT_RELFILENUMBER) - { - RelFileNumber nextRelFileNumber; - - memcpy(&nextRelFileNumber, rec, sizeof(RelFileNumber)); - appendStringInfo(buf, UINT64_FORMAT, nextRelFileNumber); - } else if (info == XLOG_RESTORE_POINT) { xl_restore_point *xlrec = (xl_restore_point *) rec; @@ -177,9 +169,6 @@ xlog_identify(uint8 info) case XLOG_NEXTOID: id = "NEXTOID"; break; - case XLOG_NEXT_RELFILENUMBER: - id = "NEXT_RELFILENUMBER"; - break; case XLOG_SWITCH: id = "SWITCH"; break; @@ -248,7 +237,7 @@ XLogRecGetBlockRefInfo(XLogReaderState *record, bool pretty, appendStringInfoChar(buf, ' '); appendStringInfo(buf, - "blkref #%d: rel %u/%u/" UINT64_FORMAT " fork %s blk %u", + "blkref #%d: rel %u/%u/%u fork %s blk %u", block_id, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, forkNames[forknum], @@ -308,7 +297,7 @@ XLogRecGetBlockRefInfo(XLogReaderState *record, bool pretty, if (forknum != MAIN_FORKNUM) { appendStringInfo(buf, - ", blkref #%d: rel %u/%u/" UINT64_FORMAT " fork %s blk %u", + ", blkref #%d: rel %u/%u/%u fork %s blk %u", block_id, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, forkNames[forknum], @@ -317,7 +306,7 @@ XLogRecGetBlockRefInfo(XLogReaderState *record, bool pretty, else { appendStringInfo(buf, - ", blkref #%d: rel %u/%u/" UINT64_FORMAT " blk %u", + ", blkref #%d: rel %u/%u/%u blk %u", block_id, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blk); diff --git a/src/backend/access/transam/README b/src/backend/access/transam/README index 91c2578f7a9..72af6560600 100644 --- a/src/backend/access/transam/README +++ b/src/backend/access/transam/README @@ -692,9 +692,8 @@ by having database restart search for files that don't have any committed entry in pg_class, but that currently isn't done because of the possibility of deleting data that is useful for forensic analysis of the crash. Orphan files are harmless --- at worst they waste a bit of disk space --- -because the relfilenumber counter is monotonically increasing. The maximum -value is 2^56-1, and there is no provision for wraparound. Thus, on-disk -collisions aren't possible. +because we check for on-disk collisions when allocating new relfilenumber +OIDs. So cleaning up isn't really necessary. 3. Deleting a table, which requires an unlink() that could fail. diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index 89da2f44590..849a7ce9d6d 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -13,16 +13,12 @@ #include "postgres.h" -#include <unistd.h> - #include "access/clog.h" #include "access/commit_ts.h" #include "access/subtrans.h" #include "access/transam.h" #include "access/xact.h" #include "access/xlogutils.h" -#include "catalog/pg_class.h" -#include "catalog/pg_tablespace.h" #include "commands/dbcommands.h" #include "miscadmin.h" #include "postmaster/autovacuum.h" @@ -34,15 +30,6 @@ /* Number of OIDs to prefetch (preallocate) per XLOG write */ #define VAR_OID_PREFETCH 8192 -/* Number of RelFileNumbers to be logged per XLOG write */ -#define VAR_RELNUMBER_PER_XLOG 512 - -/* - * Need to log more if remaining logged RelFileNumbers are less than the - * threshold. Valid range could be between 0 to VAR_RELNUMBER_PER_XLOG - 1. - */ -#define VAR_RELNUMBER_NEW_XLOG_THRESHOLD 256 - /* pointer to "variable cache" in shared memory (set up by shmem.c) */ VariableCache ShmemVariableCache = NULL; @@ -534,7 +521,8 @@ ForceTransactionIdLimitUpdate(void) * wide, counter wraparound will occur eventually, and therefore it is unwise * to assume they are unique unless precautions are taken to make them so. * Hence, this routine should generally not be used directly. The only direct - * caller should be GetNewOidWithIndex() in catalog/catalog.c. + * callers should be GetNewOidWithIndex() and GetNewRelFileNumber() in + * catalog/catalog.c. */ Oid GetNewObjectId(void) @@ -625,199 +613,6 @@ SetNextObjectId(Oid nextOid) } /* - * GetNewRelFileNumber - * - * Similar to GetNewObjectId but instead of new Oid it generates new - * relfilenumber. - */ -RelFileNumber -GetNewRelFileNumber(Oid reltablespace, char relpersistence) -{ - RelFileNumber result; - RelFileNumber nextRelFileNumber, - loggedRelFileNumber, - flushedRelFileNumber; - - StaticAssertStmt(VAR_RELNUMBER_NEW_XLOG_THRESHOLD < VAR_RELNUMBER_PER_XLOG, - "VAR_RELNUMBER_NEW_XLOG_THRESHOLD must be smaller than VAR_RELNUMBER_PER_XLOG"); - - /* safety check, we should never get this far in a HS standby */ - if (RecoveryInProgress()) - elog(ERROR, "cannot assign RelFileNumber during recovery"); - - if (IsBinaryUpgrade) - elog(ERROR, "cannot assign RelFileNumber during binary upgrade"); - - LWLockAcquire(RelFileNumberGenLock, LW_EXCLUSIVE); - - nextRelFileNumber = ShmemVariableCache->nextRelFileNumber; - loggedRelFileNumber = ShmemVariableCache->loggedRelFileNumber; - flushedRelFileNumber = ShmemVariableCache->flushedRelFileNumber; - - Assert(nextRelFileNumber <= flushedRelFileNumber); - Assert(flushedRelFileNumber <= loggedRelFileNumber); - - /* check for the wraparound for the relfilenumber counter */ - if (unlikely(nextRelFileNumber > MAX_RELFILENUMBER)) - elog(ERROR, "relfilenumber is too large"); - - /* - * If the remaining logged relfilenumbers values are less than the - * threshold value then log more. Ideally, we can wait until all - * relfilenumbers have been consumed before logging more. Nevertheless, if - * we do that, we must immediately flush the logged wal record because we - * want to ensure that the nextRelFileNumber is always larger than any - * relfilenumber already in use on disk. And, to maintain that invariant, - * we must make sure that the record we log reaches the disk before any new - * files are created with the newly logged range. - * - * So in order to avoid flushing the wal immediately, we always log before - * consuming all the relfilenumber, and now we only have to flush the newly - * logged relfilenumber wal before consuming the relfilenumber from this - * new range. By the time we need to flush this wal, hopefully, those have - * already been flushed with some other XLogFlush operation. - */ - if (loggedRelFileNumber - nextRelFileNumber <= - VAR_RELNUMBER_NEW_XLOG_THRESHOLD) - { - XLogRecPtr recptr; - - loggedRelFileNumber = loggedRelFileNumber + VAR_RELNUMBER_PER_XLOG; - recptr = LogNextRelFileNumber(loggedRelFileNumber); - ShmemVariableCache->loggedRelFileNumber = loggedRelFileNumber; - - /* remember for the future flush */ - ShmemVariableCache->loggedRelFileNumberRecPtr = recptr; - } - - /* - * If the nextRelFileNumber is already reached to the already flushed - * relfilenumber then flush the WAL for previously logged relfilenumber. - */ - if (nextRelFileNumber >= flushedRelFileNumber) - { - XLogFlush(ShmemVariableCache->loggedRelFileNumberRecPtr); - ShmemVariableCache->flushedRelFileNumber = loggedRelFileNumber; - } - - result = ShmemVariableCache->nextRelFileNumber; - - /* we should never be using any relfilenumber outside the flushed range */ - Assert(result <= ShmemVariableCache->flushedRelFileNumber); - - (ShmemVariableCache->nextRelFileNumber)++; - - LWLockRelease(RelFileNumberGenLock); - - /* - * Because the RelFileNumber counter only ever increases and never wraps - * around, it should be impossible for the newly-allocated RelFileNumber to - * already be in use. But, if Asserts are enabled, double check that - * there's no main-fork relation file with the new RelFileNumber already on - * disk. - */ -#ifdef USE_ASSERT_CHECKING - { - RelFileLocatorBackend rlocator; - char *rpath; - BackendId backend; - - switch (relpersistence) - { - case RELPERSISTENCE_TEMP: - backend = BackendIdForTempRelations(); - break; - case RELPERSISTENCE_UNLOGGED: - case RELPERSISTENCE_PERMANENT: - backend = InvalidBackendId; - break; - default: - elog(ERROR, "invalid relpersistence: %c", relpersistence); - } - - /* this logic should match RelationInitPhysicalAddr */ - rlocator.locator.spcOid = - reltablespace ? reltablespace : MyDatabaseTableSpace; - rlocator.locator.dbOid = (reltablespace == GLOBALTABLESPACE_OID) ? - InvalidOid : MyDatabaseId; - rlocator.locator.relNumber = result; - - /* - * The relpath will vary based on the backend ID, so we must - * initialize that properly here to make sure that any collisions - * based on filename are properly detected. - */ - rlocator.backend = backend; - - /* check for existing file of same name. */ - rpath = relpath(rlocator, MAIN_FORKNUM); - Assert(access(rpath, F_OK) != 0); - } -#endif - - return result; -} - -/* - * SetNextRelFileNumber - * - * This may only be called during pg_upgrade; it advances the RelFileNumber - * counter to the specified value if the current value is smaller than the - * input value. - */ -void -SetNextRelFileNumber(RelFileNumber relnumber) -{ - /* safety check, we should never get this far in a HS standby */ - if (RecoveryInProgress()) - elog(ERROR, "cannot set RelFileNumber during recovery"); - - if (!IsBinaryUpgrade) - elog(ERROR, "RelFileNumber can be set only during binary upgrade"); - - LWLockAcquire(RelFileNumberGenLock, LW_EXCLUSIVE); - - /* - * If previous assigned value of the nextRelFileNumber is already higher - * than the current value then nothing to be done. This is possible - * because during upgrade the objects are not created in relfilenumber - * order. - */ - if (relnumber <= ShmemVariableCache->nextRelFileNumber) - { - LWLockRelease(RelFileNumberGenLock); - return; - } - - /* - * If the new relfilenumber to be set is greater than or equal to already - * flushed relfilenumber then log more and flush immediately. - * - * (This is less efficient than GetNewRelFileNumber, which arranges to - * log some new relfilenumbers before the old batch is exhausted in the - * hope that a flush will happen in the background before any values are - * needed from the new batch. However, since this is only used during - * binary upgrade, it shouldn't really matter.) - */ - if (relnumber >= ShmemVariableCache->flushedRelFileNumber) - { - RelFileNumber newlogrelnum; - - newlogrelnum = relnumber + VAR_RELNUMBER_PER_XLOG; - XLogFlush(LogNextRelFileNumber(newlogrelnum)); - - /* we have flushed whatever we have logged so no pending flush */ - ShmemVariableCache->loggedRelFileNumber = newlogrelnum; - ShmemVariableCache->flushedRelFileNumber = newlogrelnum; - ShmemVariableCache->loggedRelFileNumberRecPtr = InvalidXLogRecPtr; - } - - ShmemVariableCache->nextRelFileNumber = relnumber; - - LWLockRelease(RelFileNumberGenLock); -} - -/* * StopGeneratingPinnedObjectIds * * This is called once during initdb to force the OID counter up to diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 35fac945cb1..00992a11b9e 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -4712,7 +4712,6 @@ BootStrapXLOG(void) checkPoint.nextXid = FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId); checkPoint.nextOid = FirstGenbkiObjectId; - checkPoint.nextRelFileNumber = FirstNormalRelFileNumber; checkPoint.nextMulti = FirstMultiXactId; checkPoint.nextMultiOffset = 0; checkPoint.oldestXid = FirstNormalTransactionId; @@ -4726,11 +4725,7 @@ BootStrapXLOG(void) ShmemVariableCache->nextXid = checkPoint.nextXid; ShmemVariableCache->nextOid = checkPoint.nextOid; - ShmemVariableCache->nextRelFileNumber = checkPoint.nextRelFileNumber; ShmemVariableCache->oidCount = 0; - ShmemVariableCache->loggedRelFileNumber = checkPoint.nextRelFileNumber; - ShmemVariableCache->flushedRelFileNumber = checkPoint.nextRelFileNumber; - ShmemVariableCache->loggedRelFileNumberRecPtr = InvalidXLogRecPtr; MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); AdvanceOldestClogXid(checkPoint.oldestXid); SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB); @@ -5196,10 +5191,7 @@ StartupXLOG(void) /* initialize shared memory variables from the checkpoint record */ ShmemVariableCache->nextXid = checkPoint.nextXid; ShmemVariableCache->nextOid = checkPoint.nextOid; - ShmemVariableCache->nextRelFileNumber = checkPoint.nextRelFileNumber; ShmemVariableCache->oidCount = 0; - ShmemVariableCache->loggedRelFileNumber = checkPoint.nextRelFileNumber; - ShmemVariableCache->flushedRelFileNumber = checkPoint.nextRelFileNumber; MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); AdvanceOldestClogXid(checkPoint.oldestXid); SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB); @@ -6671,24 +6663,6 @@ CreateCheckPoint(int flags) checkPoint.nextOid += ShmemVariableCache->oidCount; LWLockRelease(OidGenLock); - /* - * If this is a shutdown checkpoint then we can safely start allocating - * relfilenumber from the nextRelFileNumber value after the restart because - * no one one else can use the relfilenumber beyond that number before the - * shutdown. OTOH, if it is a normal checkpoint then if there is a crash - * after this point then we might end up reusing the same relfilenumbers - * after the restart so we need to set the nextRelFileNumber to the already - * logged relfilenumber as no one will use number beyond this limit without - * logging again. - */ - LWLockAcquire(RelFileNumberGenLock, LW_SHARED); - if (shutdown) - checkPoint.nextRelFileNumber = ShmemVariableCache->nextRelFileNumber; - else - checkPoint.nextRelFileNumber = ShmemVariableCache->loggedRelFileNumber; - - LWLockRelease(RelFileNumberGenLock); - MultiXactGetCheckptMulti(shutdown, &checkPoint.nextMulti, &checkPoint.nextMultiOffset, @@ -7567,24 +7541,6 @@ XLogPutNextOid(Oid nextOid) } /* - * Similar to the XLogPutNextOid but instead of writing NEXTOID log record it - * writes a NEXT_RELFILENUMBER log record. It also returns the XLogRecPtr of - * the currently logged relfilenumber record, so that the caller can flush it - * at the appropriate time. - */ -XLogRecPtr -LogNextRelFileNumber(RelFileNumber nextrelnumber) -{ - XLogRecPtr recptr; - - XLogBeginInsert(); - XLogRegisterData((char *) (&nextrelnumber), sizeof(RelFileNumber)); - recptr = XLogInsert(RM_XLOG_ID, XLOG_NEXT_RELFILENUMBER); - - return recptr; -} - -/* * Write an XLOG SWITCH record. * * Here we just blindly issue an XLogInsert request for the record. @@ -7799,17 +7755,6 @@ xlog_redo(XLogReaderState *record) ShmemVariableCache->oidCount = 0; LWLockRelease(OidGenLock); } - if (info == XLOG_NEXT_RELFILENUMBER) - { - RelFileNumber nextRelFileNumber; - - memcpy(&nextRelFileNumber, XLogRecGetData(record), sizeof(RelFileNumber)); - LWLockAcquire(RelFileNumberGenLock, LW_EXCLUSIVE); - ShmemVariableCache->nextRelFileNumber = nextRelFileNumber; - ShmemVariableCache->loggedRelFileNumber = nextRelFileNumber; - ShmemVariableCache->flushedRelFileNumber = nextRelFileNumber; - LWLockRelease(RelFileNumberGenLock); - } else if (info == XLOG_CHECKPOINT_SHUTDOWN) { CheckPoint checkPoint; @@ -7824,11 +7769,6 @@ xlog_redo(XLogReaderState *record) ShmemVariableCache->nextOid = checkPoint.nextOid; ShmemVariableCache->oidCount = 0; LWLockRelease(OidGenLock); - LWLockAcquire(RelFileNumberGenLock, LW_EXCLUSIVE); - ShmemVariableCache->nextRelFileNumber = checkPoint.nextRelFileNumber; - ShmemVariableCache->loggedRelFileNumber = checkPoint.nextRelFileNumber; - ShmemVariableCache->flushedRelFileNumber = checkPoint.nextRelFileNumber; - LWLockRelease(RelFileNumberGenLock); MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); diff --git a/src/backend/access/transam/xlogprefetcher.c b/src/backend/access/transam/xlogprefetcher.c index cea38eccea6..8f5d4253320 100644 --- a/src/backend/access/transam/xlogprefetcher.c +++ b/src/backend/access/transam/xlogprefetcher.c @@ -613,7 +613,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn) #ifdef XLOGPREFETCHER_DEBUG_LEVEL elog(XLOGPREFETCHER_DEBUG_LEVEL, - "suppressing prefetch in relation %u/%u/" UINT64_FORMAT " until %X/%X is replayed, which creates the relation", + "suppressing prefetch in relation %u/%u/%u until %X/%X is replayed, which creates the relation", xlrec->rlocator.spcOid, xlrec->rlocator.dbOid, xlrec->rlocator.relNumber, @@ -636,7 +636,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn) #ifdef XLOGPREFETCHER_DEBUG_LEVEL elog(XLOGPREFETCHER_DEBUG_LEVEL, - "suppressing prefetch in relation %u/%u/" UINT64_FORMAT " from block %u until %X/%X is replayed, which truncates the relation", + "suppressing prefetch in relation %u/%u/%u from block %u until %X/%X is replayed, which truncates the relation", xlrec->rlocator.spcOid, xlrec->rlocator.dbOid, xlrec->rlocator.relNumber, @@ -735,7 +735,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn) { #ifdef XLOGPREFETCHER_DEBUG_LEVEL elog(XLOGPREFETCHER_DEBUG_LEVEL, - "suppressing all prefetch in relation %u/%u/" UINT64_FORMAT " until %X/%X is replayed, because the relation does not exist on disk", + "suppressing all prefetch in relation %u/%u/%u until %X/%X is replayed, because the relation does not exist on disk", reln->smgr_rlocator.locator.spcOid, reln->smgr_rlocator.locator.dbOid, reln->smgr_rlocator.locator.relNumber, @@ -756,7 +756,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn) { #ifdef XLOGPREFETCHER_DEBUG_LEVEL elog(XLOGPREFETCHER_DEBUG_LEVEL, - "suppressing prefetch in relation %u/%u/" UINT64_FORMAT " from block %u until %X/%X is replayed, because the relation is too small", + "suppressing prefetch in relation %u/%u/%u from block %u until %X/%X is replayed, because the relation is too small", reln->smgr_rlocator.locator.spcOid, reln->smgr_rlocator.locator.dbOid, reln->smgr_rlocator.locator.relNumber, @@ -795,7 +795,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn) * truncated beneath our feet? */ elog(ERROR, - "could not prefetch relation %u/%u/" UINT64_FORMAT " block %u", + "could not prefetch relation %u/%u/%u block %u", reln->smgr_rlocator.locator.spcOid, reln->smgr_rlocator.locator.dbOid, reln->smgr_rlocator.locator.relNumber, @@ -934,7 +934,7 @@ XLogPrefetcherIsFiltered(XLogPrefetcher *prefetcher, RelFileLocator rlocator, { #ifdef XLOGPREFETCHER_DEBUG_LEVEL elog(XLOGPREFETCHER_DEBUG_LEVEL, - "prefetch of %u/%u/" UINT64_FORMAT " block %u suppressed; filtering until LSN %X/%X is replayed (blocks >= %u filtered)", + "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%X is replayed (blocks >= %u filtered)", rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blockno, LSN_FORMAT_ARGS(filter->filter_until_replayed), filter->filter_from_block); @@ -950,7 +950,7 @@ XLogPrefetcherIsFiltered(XLogPrefetcher *prefetcher, RelFileLocator rlocator, { #ifdef XLOGPREFETCHER_DEBUG_LEVEL elog(XLOGPREFETCHER_DEBUG_LEVEL, - "prefetch of %u/%u/" UINT64_FORMAT " block %u suppressed; filtering until LSN %X/%X is replayed (whole database)", + "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%X is replayed (whole database)", rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blockno, LSN_FORMAT_ARGS(filter->filter_until_replayed)); #endif diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index 1026ce5dcf7..b41e6826643 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -2228,14 +2228,14 @@ xlog_block_info(StringInfo buf, XLogReaderState *record) continue; if (forknum != MAIN_FORKNUM) - appendStringInfo(buf, "; blkref #%d: rel %u/%u/" UINT64_FORMAT ", fork %u, blk %u", + appendStringInfo(buf, "; blkref #%d: rel %u/%u/%u, fork %u, blk %u", block_id, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, forknum, blk); else - appendStringInfo(buf, "; blkref #%d: rel %u/%u/" UINT64_FORMAT ", blk %u", + appendStringInfo(buf, "; blkref #%d: rel %u/%u/%u, blk %u", block_id, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, @@ -2433,7 +2433,7 @@ verifyBackupPageConsistency(XLogReaderState *record) if (memcmp(replay_image_masked, primary_image_masked, BLCKSZ) != 0) { elog(FATAL, - "inconsistent page found, rel %u/%u/" UINT64_FORMAT ", forknum %u, blkno %u", + "inconsistent page found, rel %u/%u/%u, forknum %u, blkno %u", rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, forknum, blkno); } diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index ffda2c210b7..563cba258dd 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -619,17 +619,17 @@ CreateFakeRelcacheEntry(RelFileLocator rlocator) rel->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT; /* We don't know the name of the relation; use relfilenumber instead */ - sprintf(RelationGetRelationName(rel), UINT64_FORMAT, rlocator.relNumber); + sprintf(RelationGetRelationName(rel), "%u", rlocator.relNumber); /* * We set up the lockRelId in case anything tries to lock the dummy - * relation. Note that this is fairly bogus since relNumber are completely + * relation. Note that this is fairly bogus since relNumber may be * different from the relation's OID. It shouldn't really matter though. * In recovery, we are running by ourselves and can't have any lock * conflicts. While syncing, we already hold AccessExclusiveLock. */ rel->rd_lockInfo.lockRelId.dbId = rlocator.dbOid; - rel->rd_lockInfo.lockRelId.relId = (Oid) rlocator.relNumber; + rel->rd_lockInfo.lockRelId.relId = rlocator.relNumber; rel->rd_smgr = NULL; diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c index 1434bcdd85c..411cac9be3f 100644 --- a/src/backend/backup/basebackup.c +++ b/src/backend/backup/basebackup.c @@ -1246,7 +1246,7 @@ sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly, if (relForkNum != INIT_FORKNUM) { char initForkFile[MAXPGPATH]; - char relNumber[RELNUMBERCHARS + 1]; + char relNumber[OIDCHARS + 1]; /* * If any other type of fork, check if there is an init fork diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c index a9bd8ae008e..2abd6b007a2 100644 --- a/src/backend/catalog/catalog.c +++ b/src/backend/catalog/catalog.c @@ -483,6 +483,101 @@ GetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn) } /* + * GetNewRelFileNumber + * Generate a new relfilenumber that is unique within the + * database of the given tablespace. + * + * If the relfilenumber will also be used as the relation's OID, pass the + * opened pg_class catalog, and this routine will guarantee that the result + * is also an unused OID within pg_class. If the result is to be used only + * as a relfilenumber for an existing relation, pass NULL for pg_class. + * + * As with GetNewOidWithIndex(), there is some theoretical risk of a race + * condition, but it doesn't seem worth worrying about. + * + * Note: we don't support using this in bootstrap mode. All relations + * created by bootstrap have preassigned OIDs, so there's no need. + */ +RelFileNumber +GetNewRelFileNumber(Oid reltablespace, Relation pg_class, char relpersistence) +{ + RelFileLocatorBackend rlocator; + char *rpath; + bool collides; + BackendId backend; + + /* + * If we ever get here during pg_upgrade, there's something wrong; all + * relfilenumber assignments during a binary-upgrade run should be + * determined by commands in the dump script. + */ + Assert(!IsBinaryUpgrade); + + switch (relpersistence) + { + case RELPERSISTENCE_TEMP: + backend = BackendIdForTempRelations(); + break; + case RELPERSISTENCE_UNLOGGED: + case RELPERSISTENCE_PERMANENT: + backend = InvalidBackendId; + break; + default: + elog(ERROR, "invalid relpersistence: %c", relpersistence); + return InvalidRelFileNumber; /* placate compiler */ + } + + /* This logic should match RelationInitPhysicalAddr */ + rlocator.locator.spcOid = reltablespace ? reltablespace : MyDatabaseTableSpace; + rlocator.locator.dbOid = + (rlocator.locator.spcOid == GLOBALTABLESPACE_OID) ? + InvalidOid : MyDatabaseId; + + /* + * The relpath will vary based on the backend ID, so we must initialize + * that properly here to make sure that any collisions based on filename + * are properly detected. + */ + rlocator.backend = backend; + + do + { + CHECK_FOR_INTERRUPTS(); + + /* Generate the OID */ + if (pg_class) + rlocator.locator.relNumber = GetNewOidWithIndex(pg_class, ClassOidIndexId, + Anum_pg_class_oid); + else + rlocator.locator.relNumber = GetNewObjectId(); + + /* Check for existing file of same name */ + rpath = relpath(rlocator, MAIN_FORKNUM); + + if (access(rpath, F_OK) == 0) + { + /* definite collision */ + collides = true; + } + else + { + /* + * Here we have a little bit of a dilemma: if errno is something + * other than ENOENT, should we declare a collision and loop? In + * practice it seems best to go ahead regardless of the errno. If + * there is a colliding file we will get an smgr failure when we + * attempt to create the new relation file. + */ + collides = false; + } + + pfree(rpath); + } while (collides); + + return rlocator.locator.relNumber; +} + +/* * SQL callable interface for GetNewOidWithIndex(). Outside of initdb's * direct insertions into catalog tables, and recovering from corruption, this * should rarely be needed. diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index de01da198e3..9a80ccdccdf 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -341,19 +341,11 @@ heap_create(const char *relname, else { /* - * If relfilenumber is unspecified by the caller then allocate a new - * one, except for system tables, for which we make the initial - * relfilenumber the same as the table OID. See the comments for - * FirstNormalRelFileNumber for an explanation of why we do this. + * If relfilenumber is unspecified by the caller then create storage + * with oid same as relid. */ if (!RelFileNumberIsValid(relfilenumber)) - { - if (relid < FirstNormalObjectId) - relfilenumber = relid; - else - relfilenumber = GetNewRelFileNumber(reltablespace, - relpersistence); - } + relfilenumber = relid; } /* @@ -909,7 +901,7 @@ InsertPgClassTuple(Relation pg_class_desc, values[Anum_pg_class_reloftype - 1] = ObjectIdGetDatum(rd_rel->reloftype); values[Anum_pg_class_relowner - 1] = ObjectIdGetDatum(rd_rel->relowner); values[Anum_pg_class_relam - 1] = ObjectIdGetDatum(rd_rel->relam); - values[Anum_pg_class_relfilenode - 1] = Int64GetDatum(rd_rel->relfilenode); + values[Anum_pg_class_relfilenode - 1] = ObjectIdGetDatum(rd_rel->relfilenode); values[Anum_pg_class_reltablespace - 1] = ObjectIdGetDatum(rd_rel->reltablespace); values[Anum_pg_class_relpages - 1] = Int32GetDatum(rd_rel->relpages); values[Anum_pg_class_reltuples - 1] = Float4GetDatum(rd_rel->reltuples); @@ -1181,7 +1173,12 @@ heap_create_with_catalog(const char *relname, if (shared_relation && reltablespace != GLOBALTABLESPACE_OID) elog(ERROR, "shared relations must be placed in pg_global tablespace"); - /* Allocate an OID for the relation, unless we were told what to use. */ + /* + * Allocate an OID for the relation, unless we were told what to use. + * + * The OID will be the relfilenumber as well, so make sure it doesn't + * collide with either pg_class OIDs or existing physical files. + */ if (!OidIsValid(relid)) { /* Use binary-upgrade override for pg_class.oid and relfilenumber */ @@ -1235,8 +1232,8 @@ heap_create_with_catalog(const char *relname, } if (!OidIsValid(relid)) - relid = GetNewOidWithIndex(pg_class_desc, ClassOidIndexId, - Anum_pg_class_oid); + relid = GetNewRelFileNumber(reltablespace, pg_class_desc, + relpersistence); } /* diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 1fd40c42a3a..61f1d3926a9 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -898,7 +898,12 @@ index_create(Relation heapRelation, collationObjectId, classObjectId); - /* Allocate an OID for the index, unless we were told what to use. */ + /* + * Allocate an OID for the index, unless we were told what to use. + * + * The OID will be the relfilenumber as well, so make sure it doesn't + * collide with either pg_class OIDs or existing physical files. + */ if (!OidIsValid(indexRelationId)) { /* Use binary-upgrade override for pg_class.oid and relfilenumber */ @@ -930,8 +935,8 @@ index_create(Relation heapRelation, } else { - indexRelationId = GetNewOidWithIndex(pg_class, ClassOidIndexId, - Anum_pg_class_oid); + indexRelationId = + GetNewRelFileNumber(tableSpaceId, pg_class, relpersistence); } } diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index 38bbe325509..d708af19ed2 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -968,10 +968,6 @@ smgr_redo(XLogReaderState *record) xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record); SMgrRelation reln; - if (xlrec->rlocator.relNumber > ShmemVariableCache->nextRelFileNumber) - elog(ERROR, "unexpected relnumber " UINT64_FORMAT " is bigger than nextRelFileNumber " UINT64_FORMAT, - xlrec->rlocator.relNumber, ShmemVariableCache->nextRelFileNumber); - reln = smgropen(xlrec->rlocator, InvalidBackendId); smgrcreate(reln, xlrec->forkNum, true); } @@ -985,10 +981,6 @@ smgr_redo(XLogReaderState *record) int nforks = 0; bool need_fsm_vacuum = false; - if (xlrec->rlocator.relNumber > ShmemVariableCache->nextRelFileNumber) - elog(ERROR, "unexpected relnumber " UINT64_FORMAT " is bigger than nextRelFileNumber " UINT64_FORMAT, - xlrec->rlocator.relNumber, ShmemVariableCache->nextRelFileNumber); - reln = smgropen(xlrec->rlocator, InvalidBackendId); /* diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 1b8e6d57294..7d8a75d23c2 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -14375,14 +14375,10 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) } /* - * Generate a new relfilenumber. We cannot reuse the old relfilenumber - * because of the possibility that that relation will be moved back to the - * original tablespace before the next checkpoint. At that point, the - * first segment of the main fork won't have been unlinked yet, and an - * attempt to create new relation storage with that same relfilenumber - * will fail. - */ - newrelfilenumber = GetNewRelFileNumber(newTableSpace, + * Relfilenumbers are not unique in databases across tablespaces, so we + * need to allocate a new one in the new tablespace. + */ + newrelfilenumber = GetNewRelFileNumber(newTableSpace, NULL, rel->rd_rel->relpersistence); /* Open old and new relation */ diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index cdd7986dfc3..b69ff37dbbd 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -267,7 +267,7 @@ CreateTableSpace(CreateTableSpaceStmt *stmt) * parts. */ if (strlen(location) + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + - OIDCHARS + 1 + RELNUMBERCHARS + 1 + FORKNAMECHARS + 1 + OIDCHARS > MAXPGPATH) + OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1 + OIDCHARS > MAXPGPATH) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("tablespace location \"%s\" is too long", diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl index f1fa8945135..81b8c184a90 100644 --- a/src/backend/nodes/gen_node_support.pl +++ b/src/backend/nodes/gen_node_support.pl @@ -961,12 +961,12 @@ _read${n}(void) print $off "\tWRITE_UINT_FIELD($f);\n"; print $rff "\tREAD_UINT_FIELD($f);\n" unless $no_read; } - elsif ($t eq 'uint64' || $t eq 'RelFileNumber') + elsif ($t eq 'uint64') { print $off "\tWRITE_UINT64_FIELD($f);\n"; print $rff "\tREAD_UINT64_FIELD($f);\n" unless $no_read; } - elsif ($t eq 'Oid') + elsif ($t eq 'Oid' || $t eq 'RelFileNumber') { print $off "\tWRITE_OID_FIELD($f);\n"; print $rff "\tREAD_OID_FIELD($f);\n" unless $no_read; diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c index cdf19a9c204..2cc0ac9eb09 100644 --- a/src/backend/replication/logical/decode.c +++ b/src/backend/replication/logical/decode.c @@ -154,7 +154,6 @@ xlog_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) break; case XLOG_NOOP: case XLOG_NEXTOID: - case XLOG_NEXT_RELFILENUMBER: case XLOG_SWITCH: case XLOG_BACKUP_END: case XLOG_PARAMETER_CHANGE: diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index a0f398b458a..03d9c9c86a2 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -4932,7 +4932,7 @@ DisplayMapping(HTAB *tuplecid_data) hash_seq_init(&hstat, tuplecid_data); while ((ent = (ReorderBufferTupleCidEnt *) hash_seq_search(&hstat)) != NULL) { - elog(DEBUG3, "mapping: node: %u/%u/" UINT64_FORMAT " tid: %u/%u cmin: %u, cmax: %u", + elog(DEBUG3, "mapping: node: %u/%u/%u tid: %u/%u cmin: %u, cmax: %u", ent->key.rlocator.dbOid, ent->key.rlocator.spcOid, ent->key.rlocator.relNumber, diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c index c3faa68126a..647c458b52e 100644 --- a/src/backend/storage/file/reinit.c +++ b/src/backend/storage/file/reinit.c @@ -31,7 +31,7 @@ static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, typedef struct { - RelFileNumber relnumber; /* hash key */ + Oid reloid; /* hash key */ } unlogged_relation_entry; /* @@ -184,10 +184,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) * need to be reset. Otherwise, this cleanup operation would be * O(n^2). */ - ctl.keysize = sizeof(RelFileNumber); + ctl.keysize = sizeof(Oid); ctl.entrysize = sizeof(unlogged_relation_entry); ctl.hcxt = CurrentMemoryContext; - hash = hash_create("unlogged relation RelFileNumbers", 32, &ctl, + hash = hash_create("unlogged relation OIDs", 32, &ctl, HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); /* Scan the directory. */ @@ -208,10 +208,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) continue; /* - * Put the RELFILENUMBER portion of the name into the hash table, - * if it isn't already. + * Put the OID portion of the name into the hash table, if it + * isn't already. */ - ent.relnumber = atorelnumber(de->d_name); + ent.reloid = atooid(de->d_name); (void) hash_search(hash, &ent, HASH_ENTER, NULL); } @@ -248,10 +248,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) continue; /* - * See whether the RELFILENUMBER portion of the name shows up in - * the hash table. If so, nuke it! + * See whether the OID portion of the name shows up in the hash + * table. If so, nuke it! */ - ent.relnumber = atorelnumber(de->d_name); + ent.reloid = atooid(de->d_name); if (hash_search(hash, &ent, HASH_FIND, NULL)) { snprintf(rm_path, sizeof(rm_path), "%s/%s", @@ -286,7 +286,7 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) { ForkNumber forkNum; int relnumchars; - char relnumbuf[RELNUMBERCHARS + 1]; + char relnumbuf[OIDCHARS + 1]; char srcpath[MAXPGPATH * 2]; char dstpath[MAXPGPATH]; @@ -329,7 +329,7 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) { ForkNumber forkNum; int relnumchars; - char relnumbuf[RELNUMBERCHARS + 1]; + char relnumbuf[OIDCHARS + 1]; char mainpath[MAXPGPATH]; /* Skip anything that doesn't look like a relation data file. */ @@ -372,8 +372,8 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) * for a non-temporary relation and false otherwise. * * NB: If this function returns true, the caller is entitled to assume that - * *relnumchars has been set to a value no more than RELNUMBERCHARS, and thus - * that a buffer of RELNUMBERCHARS+1 characters is sufficient to hold the + * *relnumchars has been set to a value no more than OIDCHARS, and thus + * that a buffer of OIDCHARS+1 characters is sufficient to hold the * RelFileNumber portion of the filename. This is critical to protect against * a possible buffer overrun. */ @@ -386,7 +386,7 @@ parse_filename_for_nontemp_relation(const char *name, int *relnumchars, /* Look for a non-empty string of digits (that isn't too long). */ for (pos = 0; isdigit((unsigned char) name[pos]); ++pos) ; - if (pos == 0 || pos > RELNUMBERCHARS) + if (pos == 0 || pos > OIDCHARS) return false; *relnumchars = pos; diff --git a/src/backend/storage/freespace/fsmpage.c b/src/backend/storage/freespace/fsmpage.c index 1210be7470b..af4dab7d2c7 100644 --- a/src/backend/storage/freespace/fsmpage.c +++ b/src/backend/storage/freespace/fsmpage.c @@ -273,7 +273,7 @@ restart: BlockNumber blknum; BufferGetTag(buf, &rlocator, &forknum, &blknum); - elog(DEBUG1, "fixing corrupt FSM block %u, relation %u/%u/" UINT64_FORMAT, + elog(DEBUG1, "fixing corrupt FSM block %u, relation %u/%u/%u", blknum, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber); /* make sure we hold an exclusive lock */ diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt index 3c5d0410795..6c7cf6c2956 100644 --- a/src/backend/storage/lmgr/lwlocknames.txt +++ b/src/backend/storage/lmgr/lwlocknames.txt @@ -53,4 +53,3 @@ XactTruncationLock 44 # 45 was XactTruncationLock until removal of BackendRandomLock WrapLimitsVacuumLock 46 NotifyQueueTailLock 47 -RelFileNumberGenLock 48
\ No newline at end of file diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index bed47f07d73..a515bb36ac1 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -257,13 +257,6 @@ mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) * next checkpoint, we prevent reassignment of the relfilenumber until it's * safe, because relfilenumber assignment skips over any existing file. * - * XXX. Although all of this was true when relfilenumbers were 32 bits wide, - * they are now 56 bits wide and do not wrap around, so in the future we can - * change the code to immediately unlink the first segment of the relation - * along with all the others. We still do reuse relfilenumbers when createdb() - * is performed using the file-copy method or during movedb(), but the scenario - * described above can only happen when creating a new relation. - * * We do not need to go through this dance for temp relations, though, because * we never make WAL entries for temp rels, and so a temp rel poses no threat * to the health of a regular rel that has taken over its relfilenumber. diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index ed46ac3f44e..c1a5febcbfd 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -154,7 +154,7 @@ smgropen(RelFileLocator rlocator, BackendId backend) /* First time through: initialize the hash table */ HASHCTL ctl; - ctl.keysize = SizeOfRelFileLocatorBackend; + ctl.keysize = sizeof(RelFileLocatorBackend); ctl.entrysize = sizeof(SMgrRelationData); SMgrRelationHash = hash_create("smgr relation table", 400, &ctl, HASH_ELEM | HASH_BLOBS); diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index 9f70f3526c9..34efa121b40 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -878,7 +878,7 @@ pg_relation_filenode(PG_FUNCTION_ARGS) if (!RelFileNumberIsValid(result)) PG_RETURN_NULL(); - PG_RETURN_INT64(result); + PG_RETURN_OID(result); } /* @@ -898,12 +898,9 @@ Datum pg_filenode_relation(PG_FUNCTION_ARGS) { Oid reltablespace = PG_GETARG_OID(0); - RelFileNumber relfilenumber = PG_GETARG_INT64(1); + RelFileNumber relfilenumber = PG_GETARG_OID(1); Oid heaprel; - /* check whether the relfilenumber is within a valid range */ - CHECK_RELFILENUMBER_RANGE(relfilenumber); - /* test needed so RelidByRelfilenumber doesn't misbehave */ if (!RelFileNumberIsValid(relfilenumber)) PG_RETURN_NULL(); diff --git a/src/backend/utils/adt/pg_upgrade_support.c b/src/backend/utils/adt/pg_upgrade_support.c index fc2faed9a7d..797f5f539af 100644 --- a/src/backend/utils/adt/pg_upgrade_support.c +++ b/src/backend/utils/adt/pg_upgrade_support.c @@ -17,7 +17,6 @@ #include "catalog/pg_type.h" #include "commands/extension.h" #include "miscadmin.h" -#include "storage/relfilelocator.h" #include "utils/array.h" #include "utils/builtins.h" @@ -99,12 +98,10 @@ binary_upgrade_set_next_heap_pg_class_oid(PG_FUNCTION_ARGS) Datum binary_upgrade_set_next_heap_relfilenode(PG_FUNCTION_ARGS) { - RelFileNumber relfilenumber = PG_GETARG_INT64(0); + RelFileNumber relfilenumber = PG_GETARG_OID(0); CHECK_IS_BINARY_UPGRADE; - CHECK_RELFILENUMBER_RANGE(relfilenumber); binary_upgrade_next_heap_pg_class_relfilenumber = relfilenumber; - SetNextRelFileNumber(relfilenumber + 1); PG_RETURN_VOID(); } @@ -123,12 +120,10 @@ binary_upgrade_set_next_index_pg_class_oid(PG_FUNCTION_ARGS) Datum binary_upgrade_set_next_index_relfilenode(PG_FUNCTION_ARGS) { - RelFileNumber relfilenumber = PG_GETARG_INT64(0); + RelFileNumber relfilenumber = PG_GETARG_OID(0); CHECK_IS_BINARY_UPGRADE; - CHECK_RELFILENUMBER_RANGE(relfilenumber); binary_upgrade_next_index_pg_class_relfilenumber = relfilenumber; - SetNextRelFileNumber(relfilenumber + 1); PG_RETURN_VOID(); } @@ -147,12 +142,10 @@ binary_upgrade_set_next_toast_pg_class_oid(PG_FUNCTION_ARGS) Datum binary_upgrade_set_next_toast_relfilenode(PG_FUNCTION_ARGS) { - RelFileNumber relfilenumber = PG_GETARG_INT64(0); + RelFileNumber relfilenumber = PG_GETARG_OID(0); CHECK_IS_BINARY_UPGRADE; - CHECK_RELFILENUMBER_RANGE(relfilenumber); binary_upgrade_next_toast_pg_class_relfilenumber = relfilenumber; - SetNextRelFileNumber(relfilenumber + 1); PG_RETURN_VOID(); } diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index fecbf06a04d..eb5782f82a4 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -663,9 +663,7 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) */ RelFileLocatorBackend rlocator; - rlocator.locator.dbOid = msg->sm.dbOid; - rlocator.locator.spcOid = msg->sm.spcOid; - rlocator.locator.relNumber = (((uint64) msg->sm.relNumber_hi) << 32) | msg->sm.relNumber_lo; + rlocator.locator = msg->sm.rlocator; rlocator.backend = (msg->sm.backend_hi << 16) | (int) msg->sm.backend_lo; smgrcloserellocator(rlocator); } @@ -1468,10 +1466,7 @@ CacheInvalidateSmgr(RelFileLocatorBackend rlocator) msg.sm.id = SHAREDINVALSMGR_ID; msg.sm.backend_hi = rlocator.backend >> 16; msg.sm.backend_lo = rlocator.backend & 0xffff; - msg.sm.dbOid = rlocator.locator.dbOid; - msg.sm.spcOid = rlocator.locator.spcOid; - msg.sm.relNumber_hi = rlocator.locator.relNumber >> 32; - msg.sm.relNumber_lo = rlocator.locator.relNumber & 0xffffffff; + msg.sm.rlocator = rlocator.locator; /* check AddCatcacheInvalidationMessage() for an explanation */ VALGRIND_MAKE_MEM_DEFINED(&msg, sizeof(msg)); diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 6f4e96dd33b..00dc0f24037 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -3712,7 +3712,7 @@ RelationSetNewRelfilenumber(Relation relation, char persistence) { /* Allocate a new relfilenumber */ newrelfilenumber = GetNewRelFileNumber(relation->rd_rel->reltablespace, - persistence); + NULL, persistence); } else if (relation->rd_rel->relkind == RELKIND_INDEX) { diff --git a/src/backend/utils/cache/relfilenumbermap.c b/src/backend/utils/cache/relfilenumbermap.c index 11427ba97eb..c4245d5ccdd 100644 --- a/src/backend/utils/cache/relfilenumbermap.c +++ b/src/backend/utils/cache/relfilenumbermap.c @@ -88,6 +88,7 @@ static void InitializeRelfilenumberMap(void) { HASHCTL ctl; + int i; /* Make sure we've initialized CacheMemoryContext. */ if (CacheMemoryContext == NULL) @@ -96,20 +97,17 @@ InitializeRelfilenumberMap(void) /* build skey */ MemSet(&relfilenumber_skey, 0, sizeof(relfilenumber_skey)); - fmgr_info_cxt(F_OIDEQ, - &relfilenumber_skey[0].sk_func, - CacheMemoryContext); - relfilenumber_skey[0].sk_strategy = BTEqualStrategyNumber; - relfilenumber_skey[0].sk_subtype = InvalidOid; - relfilenumber_skey[0].sk_collation = InvalidOid; - relfilenumber_skey[0].sk_attno = Anum_pg_class_reltablespace; + for (i = 0; i < 2; i++) + { + fmgr_info_cxt(F_OIDEQ, + &relfilenumber_skey[i].sk_func, + CacheMemoryContext); + relfilenumber_skey[i].sk_strategy = BTEqualStrategyNumber; + relfilenumber_skey[i].sk_subtype = InvalidOid; + relfilenumber_skey[i].sk_collation = InvalidOid; + } - fmgr_info_cxt(F_INT8EQ, - &relfilenumber_skey[1].sk_func, - CacheMemoryContext); - relfilenumber_skey[1].sk_strategy = BTEqualStrategyNumber; - relfilenumber_skey[1].sk_subtype = InvalidOid; - relfilenumber_skey[1].sk_collation = InvalidOid; + relfilenumber_skey[0].sk_attno = Anum_pg_class_reltablespace; relfilenumber_skey[1].sk_attno = Anum_pg_class_relfilenode; /* @@ -198,7 +196,7 @@ RelidByRelfilenumber(Oid reltablespace, RelFileNumber relfilenumber) /* set scan arguments */ skey[0].sk_argument = ObjectIdGetDatum(reltablespace); - skey[1].sk_argument = Int64GetDatum((int64) relfilenumber); + skey[1].sk_argument = ObjectIdGetDatum(relfilenumber); scandesc = systable_beginscan(relation, ClassTblspcRelfilenodeIndexId, @@ -215,7 +213,7 @@ RelidByRelfilenumber(Oid reltablespace, RelFileNumber relfilenumber) if (found) elog(ERROR, - "unexpected duplicate for tablespace %u, relfilenumber " UINT64_FORMAT, + "unexpected duplicate for tablespace %u, relfilenumber %u", reltablespace, relfilenumber); found = true; diff --git a/src/backend/utils/misc/pg_controldata.c b/src/backend/utils/misc/pg_controldata.c index d441cd97e2f..781f8b87580 100644 --- a/src/backend/utils/misc/pg_controldata.c +++ b/src/backend/utils/misc/pg_controldata.c @@ -79,8 +79,8 @@ pg_control_system(PG_FUNCTION_ARGS) Datum pg_control_checkpoint(PG_FUNCTION_ARGS) { - Datum values[19]; - bool nulls[19]; + Datum values[18]; + bool nulls[18]; TupleDesc tupdesc; HeapTuple htup; ControlFileData *ControlFile; @@ -129,8 +129,6 @@ pg_control_checkpoint(PG_FUNCTION_ARGS) XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 18, "checkpoint_time", TIMESTAMPTZOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 19, "next_relfilenumber", - INT8OID, -1, 0); tupdesc = BlessTupleDesc(tupdesc); /* Read the control file. */ @@ -204,9 +202,6 @@ pg_control_checkpoint(PG_FUNCTION_ARGS) values[17] = TimestampTzGetDatum(time_t_to_timestamptz(ControlFile->checkPointCopy.time)); nulls[17] = false; - values[18] = Int64GetDatum((int64) ControlFile->checkPointCopy.nextRelFileNumber); - nulls[18] = false; - htup = heap_form_tuple(tupdesc, values, nulls); PG_RETURN_DATUM(HeapTupleGetDatum(htup)); |