Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit eedb7d1

Browse files
committed
Modify RelationGetBufferForTuple() so that we only do lseek and lock
when we need to move to a new page; as long as we can insert the new tuple on the same page as before, we only need LockBuffer and not the expensive stuff. Also, twiddle bufmgr interfaces to avoid redundant lseeks in RelationGetBufferForTuple and BufferAlloc. Successive inserts now require one lseek per page added, rather than one per tuple with several additional ones at each page boundary as happened before. Lock contention when multiple backends are inserting in same table is also greatly reduced.
1 parent d9f55ed commit eedb7d1

File tree

5 files changed

+157
-102
lines changed

5 files changed

+157
-102
lines changed

src/backend/access/heap/heapam.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.113 2001/03/25 23:23:58 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.114 2001/05/12 19:58:27 tgl Exp $
1212
*
1313
*
1414
* INTERFACE ROUTINES
@@ -487,7 +487,7 @@ heapgettup(Relation relation,
487487
return;
488488
}
489489

490-
*buffer = ReleaseAndReadBuffer(*buffer, relation, page);
490+
*buffer = ReleaseAndReadBuffer(*buffer, relation, page, false);
491491

492492
if (!BufferIsValid(*buffer))
493493
elog(ERROR, "heapgettup: failed ReadBuffer");

src/backend/access/heap/hio.c

Lines changed: 83 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Id: hio.c,v 1.37 2001/03/22 06:16:07 momjian Exp $
11+
* $Id: hio.c,v 1.38 2001/05/12 19:58:27 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -66,7 +66,7 @@ RelationPutHeapTuple(Relation relation,
6666
/*
6767
* RelationGetBufferForTuple
6868
*
69-
* Returns (locked) buffer with free space >= given len.
69+
* Returns exclusive-locked buffer with free space >= given len.
7070
*
7171
* Note that we use LockPage to lock relation for extension. We can
7272
* do this as long as in all other places we use page-level locking
@@ -75,14 +75,14 @@ RelationPutHeapTuple(Relation relation,
7575
*
7676
* ELOG(ERROR) is allowed here, so this routine *must* be called
7777
* before any (unlogged) changes are made in buffer pool.
78-
*
7978
*/
8079
Buffer
8180
RelationGetBufferForTuple(Relation relation, Size len)
8281
{
83-
Buffer buffer;
82+
Buffer buffer = InvalidBuffer;
8483
Page pageHeader;
85-
BlockNumber lastblock;
84+
BlockNumber lastblock,
85+
oldnblocks;
8686

8787
len = MAXALIGN(len); /* be conservative */
8888

@@ -93,59 +93,102 @@ RelationGetBufferForTuple(Relation relation, Size len)
9393
elog(ERROR, "Tuple is too big: size %lu, max size %ld",
9494
(unsigned long) len, MaxTupleSize);
9595

96-
if (!relation->rd_myxactonly)
97-
LockPage(relation, 0, ExclusiveLock);
98-
9996
/*
100-
* XXX This does an lseek - VERY expensive - but at the moment it is
101-
* the only way to accurately determine how many blocks are in a
102-
* relation. A good optimization would be to get this to actually
103-
* work properly.
97+
* First, use relcache's record of table length to guess where the
98+
* last page is, and try to put the tuple there. This cached value
99+
* may be out of date, in which case we'll be inserting into a non-last
100+
* page, but that should be OK. Note that in a newly created relcache
101+
* entry, rd_nblocks may be zero; if so, we'll set it correctly below.
104102
*/
105-
lastblock = RelationGetNumberOfBlocks(relation);
106-
107-
/*
108-
* Get the last existing page --- may need to create the first one if
109-
* this is a virgin relation.
110-
*/
111-
if (lastblock == 0)
112-
{
113-
buffer = ReadBuffer(relation, P_NEW);
114-
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
115-
pageHeader = (Page) BufferGetPage(buffer);
116-
Assert(PageIsNew((PageHeader) pageHeader));
117-
PageInit(pageHeader, BufferGetPageSize(buffer), 0);
118-
}
119-
else
103+
if (relation->rd_nblocks > 0)
120104
{
121-
buffer = ReadBuffer(relation, lastblock - 1);
105+
lastblock = relation->rd_nblocks - 1;
106+
buffer = ReadBuffer(relation, lastblock);
122107
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
123108
pageHeader = (Page) BufferGetPage(buffer);
109+
if (len <= PageGetFreeSpace(pageHeader))
110+
return buffer;
111+
/*
112+
* Doesn't fit, so we'll have to try someplace else.
113+
*/
114+
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
115+
/* buffer release will happen below... */
124116
}
125117

126118
/*
127-
* Is there room on the last existing page?
119+
* Before extending relation, make sure no one else has done
120+
* so more recently than our last rd_nblocks update. (If we
121+
* blindly extend the relation here, then probably most of the
122+
* page the other guy added will end up going to waste.)
123+
*
124+
* We have to use a lock to ensure no one else is extending the
125+
* rel at the same time, else we will both try to initialize the
126+
* same new page.
128127
*/
129-
if (len > PageGetFreeSpace(pageHeader))
128+
if (!relation->rd_myxactonly)
129+
LockPage(relation, 0, ExclusiveLock);
130+
131+
oldnblocks = relation->rd_nblocks;
132+
/*
133+
* XXX This does an lseek - rather expensive - but at the moment it is
134+
* the only way to accurately determine how many blocks are in a
135+
* relation. Is it worth keeping an accurate file length in shared
136+
* memory someplace, rather than relying on the kernel to do it for us?
137+
*/
138+
relation->rd_nblocks = RelationGetNumberOfBlocks(relation);
139+
140+
if (relation->rd_nblocks > oldnblocks)
130141
{
131-
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
132-
buffer = ReleaseAndReadBuffer(buffer, relation, P_NEW);
142+
/*
143+
* Someone else has indeed extended the relation recently.
144+
* Try to fit our tuple into the new last page.
145+
*/
146+
lastblock = relation->rd_nblocks - 1;
147+
buffer = ReleaseAndReadBuffer(buffer, relation, lastblock, false);
133148
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
134149
pageHeader = (Page) BufferGetPage(buffer);
135-
Assert(PageIsNew((PageHeader) pageHeader));
136-
PageInit(pageHeader, BufferGetPageSize(buffer), 0);
137-
138-
if (len > PageGetFreeSpace(pageHeader))
150+
if (len <= PageGetFreeSpace(pageHeader))
139151
{
140-
/* We should not get here given the test at the top */
141-
elog(STOP, "Tuple is too big: size %lu",
142-
(unsigned long) len);
152+
/* OK, we don't need to extend again. */
153+
if (!relation->rd_myxactonly)
154+
UnlockPage(relation, 0, ExclusiveLock);
155+
return buffer;
143156
}
157+
/*
158+
* Doesn't fit, so we'll have to extend the relation (again).
159+
*/
160+
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
161+
/* buffer release will happen below... */
144162
}
145163

164+
/*
165+
* Extend the relation by one page and update rd_nblocks for next time.
166+
*/
167+
lastblock = relation->rd_nblocks;
168+
buffer = ReleaseAndReadBuffer(buffer, relation, lastblock, true);
169+
relation->rd_nblocks = lastblock + 1;
170+
171+
/*
172+
* We need to initialize the empty new page.
173+
*/
174+
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
175+
pageHeader = (Page) BufferGetPage(buffer);
176+
Assert(PageIsNew((PageHeader) pageHeader));
177+
PageInit(pageHeader, BufferGetPageSize(buffer), 0);
178+
179+
/*
180+
* Release the file-extension lock; it's now OK for someone else
181+
* to extend the relation some more.
182+
*/
146183
if (!relation->rd_myxactonly)
147184
UnlockPage(relation, 0, ExclusiveLock);
148185

149-
return (buffer);
186+
if (len > PageGetFreeSpace(pageHeader))
187+
{
188+
/* We should not get here given the test at the top */
189+
elog(STOP, "Tuple is too big: size %lu",
190+
(unsigned long) len);
191+
}
150192

193+
return buffer;
151194
}

0 commit comments

Comments
 (0)