Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 10b9ca3

Browse files
committed
Split the buffer mapping table into multiple separately lockable
partitions, as per discussion. Passes functionality checks, but I don't have any performance data yet.
1 parent 51ee9fa commit 10b9ca3

File tree

8 files changed

+198
-75
lines changed

8 files changed

+198
-75
lines changed

contrib/pg_buffercache/pg_buffercache_pages.c

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* pg_buffercache_pages.c
44
* display some contents of the buffer cache
55
*
6-
* $PostgreSQL: pgsql/contrib/pg_buffercache/pg_buffercache_pages.c,v 1.7 2006/05/30 22:12:13 tgl Exp $
6+
* $PostgreSQL: pgsql/contrib/pg_buffercache/pg_buffercache_pages.c,v 1.8 2006/07/23 03:07:57 tgl Exp $
77
*-------------------------------------------------------------------------
88
*/
99
#include "postgres.h"
@@ -74,7 +74,7 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
7474

7575
if (SRF_IS_FIRSTCALL())
7676
{
77-
uint32 i;
77+
int i;
7878
volatile BufferDesc *bufHdr;
7979

8080
funcctx = SRF_FIRSTCALL_INIT();
@@ -108,7 +108,6 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
108108
funcctx->max_calls = NBuffers;
109109
funcctx->user_fctx = fctx;
110110

111-
112111
/* Allocate NBuffers worth of BufferCachePagesRec records. */
113112
fctx->record = (BufferCachePagesRec *) palloc(sizeof(BufferCachePagesRec) * NBuffers);
114113

@@ -120,17 +119,21 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
120119
fctx->values[4] = (char *) palloc(3 * sizeof(uint32) + 1);
121120
fctx->values[5] = (char *) palloc(2);
122121

123-
124122
/* Return to original context when allocating transient memory */
125123
MemoryContextSwitchTo(oldcontext);
126124

127-
128125
/*
129-
* Lock Buffer map and scan though all the buffers, saving the
130-
* relevant fields in the fctx->record structure.
126+
* To get a consistent picture of the buffer state, we must lock
127+
* all partitions of the buffer map. Needless to say, this is
128+
* horrible for concurrency...
131129
*/
132-
LWLockAcquire(BufMappingLock, LW_SHARED);
130+
for (i = 0; i < NUM_BUFFER_PARTITIONS; i++)
131+
LWLockAcquire(FirstBufMappingLock + i, LW_SHARED);
133132

133+
/*
134+
* Scan though all the buffers, saving the relevant fields in the
135+
* fctx->record structure.
136+
*/
134137
for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
135138
{
136139
/* Lock each buffer header before inspecting. */
@@ -157,15 +160,15 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
157160
}
158161

159162
/* Release Buffer map. */
160-
LWLockRelease(BufMappingLock);
163+
for (i = 0; i < NUM_BUFFER_PARTITIONS; i++)
164+
LWLockRelease(FirstBufMappingLock + i);
161165
}
162166

163167
funcctx = SRF_PERCALL_SETUP();
164168

165169
/* Get the saved state */
166170
fctx = funcctx->user_fctx;
167171

168-
169172
if (funcctx->call_cntr < funcctx->max_calls)
170173
{
171174
uint32 i = funcctx->call_cntr;

src/backend/storage/buffer/README

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
$PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.10 2006/06/08 14:58:33 tgl Exp $
1+
$PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.11 2006/07/23 03:07:58 tgl Exp $
22

33
Notes about shared buffer access rules
44
--------------------------------------
@@ -114,6 +114,14 @@ operation that needs exclusive lock is reading in a page that was not
114114
in shared buffers already, which will require at least a kernel call
115115
and usually a wait for I/O, so it will be slow anyway.
116116

117+
* As of PG 8.2, the BufMappingLock has been split into NUM_BUFFER_PARTITIONS
118+
separate locks, each guarding a portion of the buffer tag space. This allows
119+
further reduction of contention in the normal code paths. The partition
120+
that a particular buffer tag belongs to is determined from the low-order
121+
bits of the tag's hash value. The rules stated above apply to each partition
122+
independently. If it is necessary to lock more than one partition at a time,
123+
they must be locked in partition-number order to avoid risk of deadlock.
124+
117125
* A separate system-wide LWLock, the BufFreelistLock, provides mutual
118126
exclusion for operations that access the buffer free list or select
119127
buffers for replacement. This is always taken in exclusive mode since

src/backend/storage/buffer/buf_table.c

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,18 @@
44
* routines for mapping BufferTags to buffer indexes.
55
*
66
* Note: the routines in this file do no locking of their own. The caller
7-
* must hold a suitable lock on the BufMappingLock, as specified in the
8-
* comments.
7+
* must hold a suitable lock on the appropriate BufMappingLock, as specified
8+
* in the comments. We can't do the locking inside these functions because
9+
* in most cases the caller needs to adjust the buffer header contents
10+
* before the lock is released (see notes in README).
911
*
1012
*
1113
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
1214
* Portions Copyright (c) 1994, Regents of the University of California
1315
*
1416
*
1517
* IDENTIFICATION
16-
* $PostgreSQL: pgsql/src/backend/storage/buffer/buf_table.c,v 1.46 2006/07/14 16:59:19 tgl Exp $
18+
* $PostgreSQL: pgsql/src/backend/storage/buffer/buf_table.c,v 1.47 2006/07/23 03:07:58 tgl Exp $
1719
*
1820
*-------------------------------------------------------------------------
1921
*/
@@ -58,29 +60,49 @@ InitBufTable(int size)
5860
info.keysize = sizeof(BufferTag);
5961
info.entrysize = sizeof(BufferLookupEnt);
6062
info.hash = tag_hash;
63+
info.num_partitions = NUM_BUFFER_PARTITIONS;
6164

6265
SharedBufHash = ShmemInitHash("Shared Buffer Lookup Table",
6366
size, size,
6467
&info,
65-
HASH_ELEM | HASH_FUNCTION);
68+
HASH_ELEM | HASH_FUNCTION | HASH_PARTITION);
6669

6770
if (!SharedBufHash)
6871
elog(FATAL, "could not initialize shared buffer hash table");
6972
}
7073

74+
/*
75+
* BufTableHashCode
76+
* Compute the hash code associated with a BufferTag
77+
*
78+
* This must be passed to the lookup/insert/delete routines along with the
79+
* tag. We do it like this because the callers need to know the hash code
80+
* in order to determine which buffer partition to lock, and we don't want
81+
* to do the hash computation twice (hash_any is a bit slow).
82+
*/
83+
uint32
84+
BufTableHashCode(BufferTag *tagPtr)
85+
{
86+
return get_hash_value(SharedBufHash, (void *) tagPtr);
87+
}
88+
7189
/*
7290
* BufTableLookup
7391
* Lookup the given BufferTag; return buffer ID, or -1 if not found
7492
*
75-
* Caller must hold at least share lock on BufMappingLock
93+
* Caller must hold at least share lock on BufMappingLock for tag's partition
7694
*/
7795
int
78-
BufTableLookup(BufferTag *tagPtr)
96+
BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
7997
{
8098
BufferLookupEnt *result;
8199

82100
result = (BufferLookupEnt *)
83-
hash_search(SharedBufHash, (void *) tagPtr, HASH_FIND, NULL);
101+
hash_search_with_hash_value(SharedBufHash,
102+
(void *) tagPtr,
103+
hashcode,
104+
HASH_FIND,
105+
NULL);
84106

85107
if (!result)
86108
return -1;
@@ -96,10 +118,10 @@ BufTableLookup(BufferTag *tagPtr)
96118
* Returns -1 on successful insertion. If a conflicting entry exists
97119
* already, returns the buffer ID in that entry.
98120
*
99-
* Caller must hold write lock on BufMappingLock
121+
* Caller must hold exclusive lock on BufMappingLock for tag's partition
100122
*/
101123
int
102-
BufTableInsert(BufferTag *tagPtr, int buf_id)
124+
BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
103125
{
104126
BufferLookupEnt *result;
105127
bool found;
@@ -108,7 +130,11 @@ BufTableInsert(BufferTag *tagPtr, int buf_id)
108130
Assert(tagPtr->blockNum != P_NEW); /* invalid tag */
109131

110132
result = (BufferLookupEnt *)
111-
hash_search(SharedBufHash, (void *) tagPtr, HASH_ENTER, &found);
133+
hash_search_with_hash_value(SharedBufHash,
134+
(void *) tagPtr,
135+
hashcode,
136+
HASH_ENTER,
137+
&found);
112138

113139
if (found) /* found something already in the table */
114140
return result->id;
@@ -122,15 +148,19 @@ BufTableInsert(BufferTag *tagPtr, int buf_id)
122148
* BufTableDelete
123149
* Delete the hashtable entry for given tag (which must exist)
124150
*
125-
* Caller must hold write lock on BufMappingLock
151+
* Caller must hold exclusive lock on BufMappingLock for tag's partition
126152
*/
127153
void
128-
BufTableDelete(BufferTag *tagPtr)
154+
BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
129155
{
130156
BufferLookupEnt *result;
131157

132158
result = (BufferLookupEnt *)
133-
hash_search(SharedBufHash, (void *) tagPtr, HASH_REMOVE, NULL);
159+
hash_search_with_hash_value(SharedBufHash,
160+
(void *) tagPtr,
161+
hashcode,
162+
HASH_REMOVE,
163+
NULL);
134164

135165
if (!result) /* shouldn't happen */
136166
elog(ERROR, "shared buffer hash table corrupted");

0 commit comments

Comments
 (0)