Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit b95ae32

Browse files
committed
Avoid WAL-logging individual tuple insertions during CREATE TABLE AS
(a/k/a SELECT INTO). Instead, flush and fsync the whole relation before committing. We do still need the WAL log when PITR is active, however. Simon Riggs and Tom Lane.
1 parent 1bfdd1a commit b95ae32

File tree

9 files changed

+108
-27
lines changed

9 files changed

+108
-27
lines changed

src/backend/access/heap/heapam.c

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.194 2005/06/08 15:50:21 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.195 2005/06/20 18:37:01 tgl Exp $
1212
*
1313
*
1414
* INTERFACE ROUTINES
@@ -1034,9 +1034,20 @@ heap_get_latest_tid(Relation relation,
10341034
*
10351035
* The new tuple is stamped with current transaction ID and the specified
10361036
* command ID.
1037+
*
1038+
* If use_wal is false, the new tuple is not logged in WAL, even for a
1039+
* non-temp relation. Safe usage of this behavior requires that we arrange
1040+
* that all new tuples go into new pages not containing any tuples from other
1041+
* transactions, that the relation gets fsync'd before commit, and that the
1042+
* transaction emits at least one WAL record to ensure RecordTransactionCommit
1043+
* will decide to WAL-log the commit.
1044+
*
1045+
* use_fsm is passed directly to RelationGetBufferForTuple, which see for
1046+
* more info.
10371047
*/
10381048
Oid
1039-
heap_insert(Relation relation, HeapTuple tup, CommandId cid)
1049+
heap_insert(Relation relation, HeapTuple tup, CommandId cid,
1050+
bool use_wal, bool use_fsm)
10401051
{
10411052
TransactionId xid = GetCurrentTransactionId();
10421053
Buffer buffer;
@@ -1086,7 +1097,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
10861097
heap_tuple_toast_attrs(relation, tup, NULL);
10871098

10881099
/* Find buffer to insert this tuple into */
1089-
buffer = RelationGetBufferForTuple(relation, tup->t_len, InvalidBuffer);
1100+
buffer = RelationGetBufferForTuple(relation, tup->t_len,
1101+
InvalidBuffer, use_fsm);
10901102

10911103
/* NO EREPORT(ERROR) from here till changes are logged */
10921104
START_CRIT_SECTION();
@@ -1096,7 +1108,12 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
10961108
pgstat_count_heap_insert(&relation->pgstat_info);
10971109

10981110
/* XLOG stuff */
1099-
if (!relation->rd_istemp)
1111+
if (relation->rd_istemp)
1112+
{
1113+
/* No XLOG record, but still need to flag that XID exists on disk */
1114+
MyXactMadeTempRelUpdate = true;
1115+
}
1116+
else if (use_wal)
11001117
{
11011118
xl_heap_insert xlrec;
11021119
xl_heap_header xlhdr;
@@ -1151,11 +1168,6 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
11511168
PageSetLSN(page, recptr);
11521169
PageSetTLI(page, ThisTimeLineID);
11531170
}
1154-
else
1155-
{
1156-
/* No XLOG record, but still need to flag that XID exists on disk */
1157-
MyXactMadeTempRelUpdate = true;
1158-
}
11591171

11601172
END_CRIT_SECTION();
11611173

@@ -1183,7 +1195,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
11831195
Oid
11841196
simple_heap_insert(Relation relation, HeapTuple tup)
11851197
{
1186-
return heap_insert(relation, tup, GetCurrentCommandId());
1198+
return heap_insert(relation, tup, GetCurrentCommandId(), true, true);
11871199
}
11881200

11891201
/*
@@ -1743,7 +1755,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
17431755
{
17441756
/* Assume there's no chance to put newtup on same page. */
17451757
newbuf = RelationGetBufferForTuple(relation, newtup->t_len,
1746-
buffer);
1758+
buffer, true);
17471759
}
17481760
else
17491761
{
@@ -1760,7 +1772,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
17601772
*/
17611773
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
17621774
newbuf = RelationGetBufferForTuple(relation, newtup->t_len,
1763-
buffer);
1775+
buffer, true);
17641776
}
17651777
else
17661778
{

src/backend/access/heap/hio.c

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.56 2005/05/07 21:32:23 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.57 2005/06/20 18:37:01 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -79,12 +79,26 @@ RelationPutHeapTuple(Relation relation,
7979
* happen if space is freed in that page after heap_update finds there's not
8080
* enough there). In that case, the page will be pinned and locked only once.
8181
*
82+
* If use_fsm is true (the normal case), we use FSM to help us find free
83+
* space. If use_fsm is false, we always append a new empty page to the
84+
* end of the relation if the tuple won't fit on the current target page.
85+
* This can save some cycles when we know the relation is new and doesn't
86+
* contain useful amounts of free space.
87+
*
88+
* The use_fsm = false case is also useful for non-WAL-logged additions to a
89+
* relation, if the caller holds exclusive lock and is careful to invalidate
90+
* relation->rd_targblock before the first insertion --- that ensures that
91+
* all insertions will occur into newly added pages and not be intermixed
92+
* with tuples from other transactions. That way, a crash can't risk losing
93+
* any committed data of other transactions. (See heap_insert's comments
94+
* for additional constraints needed for safe usage of this behavior.)
95+
*
8296
* ereport(ERROR) is allowed here, so this routine *must* be called
8397
* before any (unlogged) changes are made in buffer pool.
8498
*/
8599
Buffer
86100
RelationGetBufferForTuple(Relation relation, Size len,
87-
Buffer otherBuffer)
101+
Buffer otherBuffer, bool use_fsm)
88102
{
89103
Buffer buffer = InvalidBuffer;
90104
Page pageHeader;
@@ -121,11 +135,14 @@ RelationGetBufferForTuple(Relation relation, Size len,
121135
* on each page that proves not to be suitable.) If the FSM has no
122136
* record of a page with enough free space, we give up and extend the
123137
* relation.
138+
*
139+
* When use_fsm is false, we either put the tuple onto the existing
140+
* target page or extend the relation.
124141
*/
125142

126143
targetBlock = relation->rd_targblock;
127144

128-
if (targetBlock == InvalidBlockNumber)
145+
if (targetBlock == InvalidBlockNumber && use_fsm)
129146
{
130147
/*
131148
* We have no cached target page, so ask the FSM for an initial
@@ -209,6 +226,10 @@ RelationGetBufferForTuple(Relation relation, Size len,
209226
ReleaseBuffer(buffer);
210227
}
211228

229+
/* Without FSM, always fall out of the loop and extend */
230+
if (!use_fsm)
231+
break;
232+
212233
/*
213234
* Update FSM as to condition of this page, and ask for another
214235
* page to try.

src/backend/executor/execMain.c

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,14 @@
2626
*
2727
*
2828
* IDENTIFICATION
29-
* $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.249 2005/05/22 22:30:19 tgl Exp $
29+
* $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.250 2005/06/20 18:37:01 tgl Exp $
3030
*
3131
*-------------------------------------------------------------------------
3232
*/
3333
#include "postgres.h"
3434

3535
#include "access/heapam.h"
36+
#include "access/xlog.h"
3637
#include "catalog/heap.h"
3738
#include "catalog/namespace.h"
3839
#include "commands/tablecmds.h"
@@ -44,6 +45,7 @@
4445
#include "optimizer/clauses.h"
4546
#include "optimizer/var.h"
4647
#include "parser/parsetree.h"
48+
#include "storage/smgr.h"
4749
#include "utils/acl.h"
4850
#include "utils/guc.h"
4951
#include "utils/lsyscache.h"
@@ -784,6 +786,20 @@ InitPlan(QueryDesc *queryDesc, bool explainOnly)
784786
* And open the constructed table for writing.
785787
*/
786788
intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock);
789+
790+
/* use_wal off requires rd_targblock be initially invalid */
791+
Assert(intoRelationDesc->rd_targblock == InvalidBlockNumber);
792+
793+
/*
794+
* We can skip WAL-logging the insertions, unless PITR is in use.
795+
*
796+
* Note that for a non-temp INTO table, this is safe only because
797+
* we know that the catalog changes above will have been WAL-logged,
798+
* and so RecordTransactionCommit will think it needs to WAL-log the
799+
* eventual transaction commit. Else the commit might be lost, even
800+
* though all the data is safely fsync'd ...
801+
*/
802+
estate->es_into_relation_use_wal = XLogArchivingActive();
787803
}
788804

789805
estate->es_into_relation_descriptor = intoRelationDesc;
@@ -979,7 +995,22 @@ ExecEndPlan(PlanState *planstate, EState *estate)
979995
* close the "into" relation if necessary, again keeping lock
980996
*/
981997
if (estate->es_into_relation_descriptor != NULL)
998+
{
999+
/*
1000+
* If we skipped using WAL, and it's not a temp relation,
1001+
* we must force the relation down to disk before it's
1002+
* safe to commit the transaction. This requires forcing
1003+
* out any dirty buffers and then doing a forced fsync.
1004+
*/
1005+
if (!estate->es_into_relation_use_wal &&
1006+
!estate->es_into_relation_descriptor->rd_istemp)
1007+
{
1008+
FlushRelationBuffers(estate->es_into_relation_descriptor);
1009+
smgrimmedsync(estate->es_into_relation_descriptor->rd_smgr);
1010+
}
1011+
9821012
heap_close(estate->es_into_relation_descriptor, NoLock);
1013+
}
9831014

9841015
/*
9851016
* close any relations selected FOR UPDATE/FOR SHARE, again keeping locks
@@ -1307,7 +1338,9 @@ ExecSelect(TupleTableSlot *slot,
13071338

13081339
tuple = ExecCopySlotTuple(slot);
13091340
heap_insert(estate->es_into_relation_descriptor, tuple,
1310-
estate->es_snapshot->curcid);
1341+
estate->es_snapshot->curcid,
1342+
estate->es_into_relation_use_wal,
1343+
false); /* never any point in using FSM */
13111344
/* we know there are no indexes to update */
13121345
heap_freetuple(tuple);
13131346
IncrAppended();
@@ -1386,7 +1419,8 @@ ExecInsert(TupleTableSlot *slot,
13861419
* insert the tuple
13871420
*/
13881421
newId = heap_insert(resultRelationDesc, tuple,
1389-
estate->es_snapshot->curcid);
1422+
estate->es_snapshot->curcid,
1423+
true, true);
13901424

13911425
IncrAppended();
13921426
(estate->es_processed)++;
@@ -2089,6 +2123,7 @@ EvalPlanQualStart(evalPlanQual *epq, EState *estate, evalPlanQual *priorepq)
20892123
epqstate->es_result_relation_info = estate->es_result_relation_info;
20902124
epqstate->es_junkFilter = estate->es_junkFilter;
20912125
epqstate->es_into_relation_descriptor = estate->es_into_relation_descriptor;
2126+
epqstate->es_into_relation_use_wal = estate->es_into_relation_use_wal;
20922127
epqstate->es_param_list_info = estate->es_param_list_info;
20932128
if (estate->es_topPlan->nParamExec > 0)
20942129
epqstate->es_param_exec_vals = (ParamExecData *)

src/backend/executor/execUtils.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/executor/execUtils.c,v 1.123 2005/04/28 21:47:12 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/executor/execUtils.c,v 1.124 2005/06/20 18:37:01 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -186,7 +186,9 @@ CreateExecutorState(void)
186186
estate->es_result_relation_info = NULL;
187187

188188
estate->es_junkFilter = NULL;
189+
189190
estate->es_into_relation_descriptor = NULL;
191+
estate->es_into_relation_use_wal = false;
190192

191193
estate->es_param_list_info = NULL;
192194
estate->es_param_exec_vals = NULL;

src/backend/storage/smgr/md.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.115 2005/05/29 04:23:05 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.116 2005/06/20 18:37:01 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -660,6 +660,9 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
660660

661661
/*
662662
* mdimmedsync() -- Immediately sync a relation to stable storage.
663+
*
664+
* Note that only writes already issued are synced; this routine knows
665+
* nothing of dirty buffers that may exist inside the buffer manager.
663666
*/
664667
bool
665668
mdimmedsync(SMgrRelation reln)

src/backend/storage/smgr/smgr.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
*
1212
*
1313
* IDENTIFICATION
14-
* $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.90 2005/06/17 22:32:46 tgl Exp $
14+
* $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.91 2005/06/20 18:37:01 tgl Exp $
1515
*
1616
*-------------------------------------------------------------------------
1717
*/
@@ -650,7 +650,8 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
650650
/*
651651
* smgrimmedsync() -- Force the specified relation to stable storage.
652652
*
653-
* Synchronously force all of the specified relation down to disk.
653+
* Synchronously force all previous writes to the specified relation
654+
* down to disk.
654655
*
655656
* This is useful for building completely new relations (eg, new
656657
* indexes). Instead of incrementally WAL-logging the index build
@@ -664,6 +665,10 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
664665
*
665666
* The preceding writes should specify isTemp = true to avoid
666667
* duplicative fsyncs.
668+
*
669+
* Note that you need to do FlushRelationBuffers() first if there is
670+
* any possibility that there are dirty buffers for the relation;
671+
* otherwise the sync is not very meaningful.
667672
*/
668673
void
669674
smgrimmedsync(SMgrRelation reln)

src/include/access/heapam.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.101 2005/06/06 17:01:24 tgl Exp $
10+
* $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.102 2005/06/20 18:37:01 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -156,7 +156,8 @@ extern ItemPointer heap_get_latest_tid(Relation relation, Snapshot snapshot,
156156
ItemPointer tid);
157157
extern void setLastTid(const ItemPointer tid);
158158

159-
extern Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid);
159+
extern Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid,
160+
bool use_wal, bool use_fsm);
160161
extern HTSU_Result heap_delete(Relation relation, ItemPointer tid, ItemPointer ctid,
161162
CommandId cid, Snapshot crosscheck, bool wait);
162163
extern HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple tup,

src/include/access/hio.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/access/hio.h,v 1.27 2004/12/31 22:03:21 pgsql Exp $
10+
* $PostgreSQL: pgsql/src/include/access/hio.h,v 1.28 2005/06/20 18:37:01 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -19,6 +19,6 @@
1919
extern void RelationPutHeapTuple(Relation relation, Buffer buffer,
2020
HeapTuple tuple);
2121
extern Buffer RelationGetBufferForTuple(Relation relation, Size len,
22-
Buffer otherBuffer);
22+
Buffer otherBuffer, bool use_fsm);
2323

2424
#endif /* HIO_H */

src/include/nodes/execnodes.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.134 2005/06/15 07:27:44 neilc Exp $
10+
* $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.135 2005/06/20 18:37:02 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -304,7 +304,9 @@ typedef struct EState
304304
ResultRelInfo *es_result_relation_info; /* currently active array
305305
* elt */
306306
JunkFilter *es_junkFilter; /* currently active junk filter */
307+
307308
Relation es_into_relation_descriptor; /* for SELECT INTO */
309+
bool es_into_relation_use_wal;
308310

309311
/* Parameter info: */
310312
ParamListInfo es_param_list_info; /* values of external params */

0 commit comments

Comments
 (0)