Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 2065dd2

Browse files
committed
Prevent very-low-probability PANIC during PREPARE TRANSACTION.
The code in PostPrepare_Locks supposed that it could reassign locks to the prepared transaction's dummy PGPROC by deleting the PROCLOCK table entries and immediately creating new ones. This was safe when that code was written, but since we invented partitioning of the shared lock table, it's not safe --- another process could steal away the PROCLOCK entry in the short interval when it's on the freelist. Then, if we were otherwise out of shared memory, PostPrepare_Locks would have to PANIC, since it's too late to back out of the PREPARE at that point. Fix by inventing a dynahash.c function to atomically update a hashtable entry's key. (This might possibly have other uses in future.) This is an ancient bug that in principle we ought to back-patch, but the odds of someone hitting it in the field seem really tiny, because (a) the risk window is small, and (b) nobody runs servers with maxed-out lock tables for long, because they'll be getting non-PANIC out-of-memory errors anyway. So fixing it in HEAD seems sufficient, at least until the new code has gotten some testing.
1 parent 9d2cd99 commit 2065dd2

File tree

3 files changed

+169
-48
lines changed

3 files changed

+169
-48
lines changed

src/backend/storage/lmgr/lock.c

+23-48
Original file line numberDiff line numberDiff line change
@@ -3028,7 +3028,6 @@ PostPrepare_Locks(TransactionId xid)
30283028
LOCK *lock;
30293029
PROCLOCK *proclock;
30303030
PROCLOCKTAG proclocktag;
3031-
bool found;
30323031
int partition;
30333032

30343033
/* This is a critical section: any error means big trouble */
@@ -3114,10 +3113,8 @@ PostPrepare_Locks(TransactionId xid)
31143113
while (proclock)
31153114
{
31163115
PROCLOCK *nextplock;
3117-
LOCKMASK holdMask;
3118-
PROCLOCK *newproclock;
31193116

3120-
/* Get link first, since we may unlink/delete this proclock */
3117+
/* Get link first, since we may unlink/relink this proclock */
31213118
nextplock = (PROCLOCK *)
31223119
SHMQueueNext(procLocks, &proclock->procLink,
31233120
offsetof(PROCLOCK, procLink));
@@ -3145,64 +3142,42 @@ PostPrepare_Locks(TransactionId xid)
31453142
if (proclock->releaseMask != proclock->holdMask)
31463143
elog(PANIC, "we seem to have dropped a bit somewhere");
31473144

3148-
holdMask = proclock->holdMask;
3149-
31503145
/*
31513146
* We cannot simply modify proclock->tag.myProc to reassign
31523147
* ownership of the lock, because that's part of the hash key and
3153-
* the proclock would then be in the wrong hash chain. So, unlink
3154-
* and delete the old proclock; create a new one with the right
3155-
* contents; and link it into place. We do it in this order to be
3156-
* certain we won't run out of shared memory (the way dynahash.c
3157-
* works, the deleted object is certain to be available for
3158-
* reallocation).
3148+
* the proclock would then be in the wrong hash chain. Instead
3149+
* use hash_update_hash_key. (We used to create a new hash entry,
3150+
* but that risks out-of-memory failure if other processes are
3151+
* busy making proclocks too.) We must unlink the proclock from
3152+
* our procLink chain and put it into the new proc's chain, too.
3153+
*
3154+
* Note: the updated proclock hash key will still belong to the
3155+
* same hash partition, cf proclock_hash(). So the partition
3156+
* lock we already hold is sufficient for this.
31593157
*/
3160-
SHMQueueDelete(&proclock->lockLink);
31613158
SHMQueueDelete(&proclock->procLink);
3162-
if (!hash_search(LockMethodProcLockHash,
3163-
(void *) &(proclock->tag),
3164-
HASH_REMOVE, NULL))
3165-
elog(PANIC, "proclock table corrupted");
31663159

31673160
/*
3168-
* Create the hash key for the new proclock table.
3161+
* Create the new hash key for the proclock.
31693162
*/
31703163
proclocktag.myLock = lock;
31713164
proclocktag.myProc = newproc;
31723165

3173-
newproclock = (PROCLOCK *) hash_search(LockMethodProcLockHash,
3174-
(void *) &proclocktag,
3175-
HASH_ENTER_NULL, &found);
3176-
if (!newproclock)
3177-
ereport(PANIC, /* should not happen */
3178-
(errcode(ERRCODE_OUT_OF_MEMORY),
3179-
errmsg("out of shared memory"),
3180-
errdetail("Not enough memory for reassigning the prepared transaction's locks.")));
3181-
31823166
/*
3183-
* If new, initialize the new entry
3167+
* Update the proclock. We should not find any existing entry
3168+
* for the same hash key, since there can be only one entry for
3169+
* any given lock with my own proc.
31843170
*/
3185-
if (!found)
3186-
{
3187-
newproclock->holdMask = 0;
3188-
newproclock->releaseMask = 0;
3189-
/* Add new proclock to appropriate lists */
3190-
SHMQueueInsertBefore(&lock->procLocks, &newproclock->lockLink);
3191-
SHMQueueInsertBefore(&(newproc->myProcLocks[partition]),
3192-
&newproclock->procLink);
3193-
PROCLOCK_PRINT("PostPrepare_Locks: new", newproclock);
3194-
}
3195-
else
3196-
{
3197-
PROCLOCK_PRINT("PostPrepare_Locks: found", newproclock);
3198-
Assert((newproclock->holdMask & ~lock->grantMask) == 0);
3199-
}
3171+
if (!hash_update_hash_key(LockMethodProcLockHash,
3172+
(void *) proclock,
3173+
(void *) &proclocktag))
3174+
elog(PANIC, "duplicate entry found while reassigning a prepared transaction's locks");
32003175

3201-
/*
3202-
* Pass over the identified lock ownership.
3203-
*/
3204-
Assert((newproclock->holdMask & holdMask) == 0);
3205-
newproclock->holdMask |= holdMask;
3176+
/* Re-link into the new proc's proclock list */
3177+
SHMQueueInsertBefore(&(newproc->myProcLocks[partition]),
3178+
&proclock->procLink);
3179+
3180+
PROCLOCK_PRINT("PostPrepare_Locks: updated", proclock);
32063181

32073182
next_item:
32083183
proclock = nextplock;

src/backend/utils/hash/dynahash.c

+144
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,12 @@ struct HTAB
183183
*/
184184
#define ELEMENTKEY(helem) (((char *)(helem)) + MAXALIGN(sizeof(HASHELEMENT)))
185185

186+
/*
187+
* Obtain element pointer given pointer to key
188+
*/
189+
#define ELEMENT_FROM_KEY(key) \
190+
((HASHELEMENT *) (((char *) (key)) - MAXALIGN(sizeof(HASHELEMENT))))
191+
186192
/*
187193
* Fast MOD arithmetic, assuming that y is a power of 2 !
188194
*/
@@ -987,6 +993,144 @@ hash_search_with_hash_value(HTAB *hashp,
987993
return NULL; /* keep compiler quiet */
988994
}
989995

996+
/*
997+
* hash_update_hash_key -- change the hash key of an existing table entry
998+
*
999+
* This is equivalent to removing the entry, making a new entry, and copying
1000+
* over its data, except that the entry never goes to the table's freelist.
1001+
* Therefore this cannot suffer an out-of-memory failure, even if there are
1002+
* other processes operating in other partitions of the hashtable.
1003+
*
1004+
* Returns TRUE if successful, FALSE if the requested new hash key is already
1005+
* present. Throws error if the specified entry pointer isn't actually a
1006+
* table member.
1007+
*
1008+
* NB: currently, there is no special case for old and new hash keys being
1009+
* identical, which means we'll report FALSE for that situation. This is
1010+
* preferable for existing uses.
1011+
*
1012+
* NB: for a partitioned hashtable, caller must hold lock on both relevant
1013+
* partitions, if the new hash key would belong to a different partition.
1014+
*/
1015+
bool
1016+
hash_update_hash_key(HTAB *hashp,
1017+
void *existingEntry,
1018+
const void *newKeyPtr)
1019+
{
1020+
HASHELEMENT *existingElement = ELEMENT_FROM_KEY(existingEntry);
1021+
HASHHDR *hctl = hashp->hctl;
1022+
uint32 newhashvalue;
1023+
Size keysize;
1024+
uint32 bucket;
1025+
long segment_num;
1026+
long segment_ndx;
1027+
HASHSEGMENT segp;
1028+
HASHBUCKET currBucket;
1029+
HASHBUCKET *prevBucketPtr;
1030+
HASHBUCKET *oldPrevPtr;
1031+
HashCompareFunc match;
1032+
1033+
#if HASH_STATISTICS
1034+
hash_accesses++;
1035+
hctl->accesses++;
1036+
#endif
1037+
1038+
/* disallow updates if frozen */
1039+
if (hashp->frozen)
1040+
elog(ERROR, "cannot update in frozen hashtable \"%s\"",
1041+
hashp->tabname);
1042+
1043+
/*
1044+
* Lookup the existing element using its saved hash value. We need to
1045+
* do this to be able to unlink it from its hash chain, but as a side
1046+
* benefit we can verify the validity of the passed existingEntry pointer.
1047+
*/
1048+
bucket = calc_bucket(hctl, existingElement->hashvalue);
1049+
1050+
segment_num = bucket >> hashp->sshift;
1051+
segment_ndx = MOD(bucket, hashp->ssize);
1052+
1053+
segp = hashp->dir[segment_num];
1054+
1055+
if (segp == NULL)
1056+
hash_corrupted(hashp);
1057+
1058+
prevBucketPtr = &segp[segment_ndx];
1059+
currBucket = *prevBucketPtr;
1060+
1061+
while (currBucket != NULL)
1062+
{
1063+
if (currBucket == existingElement)
1064+
break;
1065+
prevBucketPtr = &(currBucket->link);
1066+
currBucket = *prevBucketPtr;
1067+
}
1068+
1069+
if (currBucket == NULL)
1070+
elog(ERROR, "hash_update_hash_key argument is not in hashtable \"%s\"",
1071+
hashp->tabname);
1072+
1073+
oldPrevPtr = prevBucketPtr;
1074+
1075+
/*
1076+
* Now perform the equivalent of a HASH_ENTER operation to locate the
1077+
* hash chain we want to put the entry into.
1078+
*/
1079+
newhashvalue = hashp->hash(newKeyPtr, hashp->keysize);
1080+
1081+
bucket = calc_bucket(hctl, newhashvalue);
1082+
1083+
segment_num = bucket >> hashp->sshift;
1084+
segment_ndx = MOD(bucket, hashp->ssize);
1085+
1086+
segp = hashp->dir[segment_num];
1087+
1088+
if (segp == NULL)
1089+
hash_corrupted(hashp);
1090+
1091+
prevBucketPtr = &segp[segment_ndx];
1092+
currBucket = *prevBucketPtr;
1093+
1094+
/*
1095+
* Follow collision chain looking for matching key
1096+
*/
1097+
match = hashp->match; /* save one fetch in inner loop */
1098+
keysize = hashp->keysize; /* ditto */
1099+
1100+
while (currBucket != NULL)
1101+
{
1102+
if (currBucket->hashvalue == newhashvalue &&
1103+
match(ELEMENTKEY(currBucket), newKeyPtr, keysize) == 0)
1104+
break;
1105+
prevBucketPtr = &(currBucket->link);
1106+
currBucket = *prevBucketPtr;
1107+
#if HASH_STATISTICS
1108+
hash_collisions++;
1109+
hctl->collisions++;
1110+
#endif
1111+
}
1112+
1113+
if (currBucket != NULL)
1114+
return false; /* collision with an existing entry */
1115+
1116+
currBucket = existingElement;
1117+
1118+
/* OK to remove record from old hash bucket's chain. */
1119+
*oldPrevPtr = currBucket->link;
1120+
1121+
/* link into new hashbucket chain */
1122+
*prevBucketPtr = currBucket;
1123+
currBucket->link = NULL;
1124+
1125+
/* copy new key into record */
1126+
currBucket->hashvalue = newhashvalue;
1127+
hashp->keycopy(ELEMENTKEY(currBucket), newKeyPtr, keysize);
1128+
1129+
/* rest of record is untouched */
1130+
1131+
return true;
1132+
}
1133+
9901134
/*
9911135
* create a new entry if possible
9921136
*/

src/include/utils/hsearch.h

+2
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,8 @@ extern uint32 get_hash_value(HTAB *hashp, const void *keyPtr);
128128
extern void *hash_search_with_hash_value(HTAB *hashp, const void *keyPtr,
129129
uint32 hashvalue, HASHACTION action,
130130
bool *foundPtr);
131+
extern bool hash_update_hash_key(HTAB *hashp, void *existingEntry,
132+
const void *newKeyPtr);
131133
extern long hash_get_num_entries(HTAB *hashp);
132134
extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
133135
extern void *hash_seq_search(HASH_SEQ_STATUS *status);

0 commit comments

Comments
 (0)