Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit d25f519

Browse files
committed
tableam: relation creation, VACUUM FULL/CLUSTER, SET TABLESPACE.
This moves the responsibility for: - creating the storage necessary for a relation, including creating a new relfilenode for a relation with existing storage - non-transactional truncation of a relation - VACUUM FULL / CLUSTER's rewrite of a table below tableam. This is fairly straight forward, with a bit of complexity smattered in to move the computation of xid / multixid horizons below the AM, as they don't make sense for every table AM. Author: Andres Freund Discussion: https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de
1 parent 7e69323 commit d25f519

File tree

13 files changed

+856
-579
lines changed

13 files changed

+856
-579
lines changed

src/backend/access/heap/heapam_handler.c

+451
Large diffs are not rendered by default.

src/backend/bootstrap/bootparse.y

+6-1
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,9 @@ Boot_CreateStmt:
209209

210210
if ($4)
211211
{
212+
TransactionId relfrozenxid;
213+
MultiXactId relminmxid;
214+
212215
if (boot_reldesc)
213216
{
214217
elog(DEBUG4, "create bootstrap: warning, open relation exists, closing first");
@@ -226,7 +229,9 @@ Boot_CreateStmt:
226229
RELPERSISTENCE_PERMANENT,
227230
shared_relation,
228231
mapped_relation,
229-
true);
232+
true,
233+
&relfrozenxid,
234+
&relminmxid);
230235
elog(DEBUG4, "bootstrap relation created");
231236
}
232237
else

src/backend/catalog/heap.c

+49-71
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "access/relation.h"
3636
#include "access/sysattr.h"
3737
#include "access/table.h"
38+
#include "access/tableam.h"
3839
#include "access/transam.h"
3940
#include "access/xact.h"
4041
#include "access/xlog.h"
@@ -98,6 +99,8 @@ static void AddNewRelationTuple(Relation pg_class_desc,
9899
Oid reloftype,
99100
Oid relowner,
100101
char relkind,
102+
TransactionId relfrozenxid,
103+
TransactionId relminmxid,
101104
Datum relacl,
102105
Datum reloptions);
103106
static ObjectAddress AddNewRelationType(const char *typeName,
@@ -300,7 +303,9 @@ heap_create(const char *relname,
300303
char relpersistence,
301304
bool shared_relation,
302305
bool mapped_relation,
303-
bool allow_system_table_mods)
306+
bool allow_system_table_mods,
307+
TransactionId *relfrozenxid,
308+
MultiXactId *relminmxid)
304309
{
305310
bool create_storage;
306311
Relation rel;
@@ -327,6 +332,9 @@ heap_create(const char *relname,
327332
get_namespace_name(relnamespace), relname),
328333
errdetail("System catalog modifications are currently disallowed.")));
329334

335+
*relfrozenxid = InvalidTransactionId;
336+
*relminmxid = InvalidMultiXactId;
337+
330338
/* Handle reltablespace for specific relkinds. */
331339
switch (relkind)
332340
{
@@ -400,13 +408,36 @@ heap_create(const char *relname,
400408
/*
401409
* Have the storage manager create the relation's disk file, if needed.
402410
*
403-
* We only create the main fork here, other forks will be created on
404-
* demand.
411+
* For relations the callback creates both the main and the init fork, for
412+
* indexes only the main fork is created. The other forks will be created
413+
* on demand.
405414
*/
406415
if (create_storage)
407416
{
408417
RelationOpenSmgr(rel);
409-
RelationCreateStorage(rel->rd_node, relpersistence);
418+
419+
switch (rel->rd_rel->relkind)
420+
{
421+
case RELKIND_VIEW:
422+
case RELKIND_COMPOSITE_TYPE:
423+
case RELKIND_FOREIGN_TABLE:
424+
case RELKIND_PARTITIONED_TABLE:
425+
case RELKIND_PARTITIONED_INDEX:
426+
Assert(false);
427+
break;
428+
429+
case RELKIND_INDEX:
430+
case RELKIND_SEQUENCE:
431+
RelationCreateStorage(rel->rd_node, relpersistence);
432+
break;
433+
434+
case RELKIND_RELATION:
435+
case RELKIND_TOASTVALUE:
436+
case RELKIND_MATVIEW:
437+
table_relation_set_new_filenode(rel, relpersistence,
438+
relfrozenxid, relminmxid);
439+
break;
440+
}
410441
}
411442

412443
return rel;
@@ -892,6 +923,8 @@ AddNewRelationTuple(Relation pg_class_desc,
892923
Oid reloftype,
893924
Oid relowner,
894925
char relkind,
926+
TransactionId relfrozenxid,
927+
TransactionId relminmxid,
895928
Datum relacl,
896929
Datum reloptions)
897930
{
@@ -928,40 +961,8 @@ AddNewRelationTuple(Relation pg_class_desc,
928961
break;
929962
}
930963

931-
/* Initialize relfrozenxid and relminmxid */
932-
if (relkind == RELKIND_RELATION ||
933-
relkind == RELKIND_MATVIEW ||
934-
relkind == RELKIND_TOASTVALUE)
935-
{
936-
/*
937-
* Initialize to the minimum XID that could put tuples in the table.
938-
* We know that no xacts older than RecentXmin are still running, so
939-
* that will do.
940-
*/
941-
new_rel_reltup->relfrozenxid = RecentXmin;
942-
943-
/*
944-
* Similarly, initialize the minimum Multixact to the first value that
945-
* could possibly be stored in tuples in the table. Running
946-
* transactions could reuse values from their local cache, so we are
947-
* careful to consider all currently running multis.
948-
*
949-
* XXX this could be refined further, but is it worth the hassle?
950-
*/
951-
new_rel_reltup->relminmxid = GetOldestMultiXactId();
952-
}
953-
else
954-
{
955-
/*
956-
* Other relation types will not contain XIDs, so set relfrozenxid to
957-
* InvalidTransactionId. (Note: a sequence does contain a tuple, but
958-
* we force its xmin to be FrozenTransactionId always; see
959-
* commands/sequence.c.)
960-
*/
961-
new_rel_reltup->relfrozenxid = InvalidTransactionId;
962-
new_rel_reltup->relminmxid = InvalidMultiXactId;
963-
}
964-
964+
new_rel_reltup->relfrozenxid = relfrozenxid;
965+
new_rel_reltup->relminmxid = relminmxid;
965966
new_rel_reltup->relowner = relowner;
966967
new_rel_reltup->reltype = new_type_oid;
967968
new_rel_reltup->reloftype = reloftype;
@@ -1089,6 +1090,8 @@ heap_create_with_catalog(const char *relname,
10891090
Oid new_type_oid;
10901091
ObjectAddress new_type_addr;
10911092
Oid new_array_oid = InvalidOid;
1093+
TransactionId relfrozenxid;
1094+
MultiXactId relminmxid;
10921095

10931096
pg_class_desc = table_open(RelationRelationId, RowExclusiveLock);
10941097

@@ -1220,7 +1223,9 @@ heap_create_with_catalog(const char *relname,
12201223
relpersistence,
12211224
shared_relation,
12221225
mapped_relation,
1223-
allow_system_table_mods);
1226+
allow_system_table_mods,
1227+
&relfrozenxid,
1228+
&relminmxid);
12241229

12251230
Assert(relid == RelationGetRelid(new_rel_desc));
12261231

@@ -1319,6 +1324,8 @@ heap_create_with_catalog(const char *relname,
13191324
reloftypeid,
13201325
ownerid,
13211326
relkind,
1327+
relfrozenxid,
1328+
relminmxid,
13221329
PointerGetDatum(relacl),
13231330
reloptions);
13241331

@@ -1407,14 +1414,6 @@ heap_create_with_catalog(const char *relname,
14071414
if (oncommit != ONCOMMIT_NOOP)
14081415
register_on_commit_action(relid, oncommit);
14091416

1410-
/*
1411-
* Unlogged objects need an init fork, except for partitioned tables which
1412-
* have no storage at all.
1413-
*/
1414-
if (relpersistence == RELPERSISTENCE_UNLOGGED &&
1415-
relkind != RELKIND_PARTITIONED_TABLE)
1416-
heap_create_init_fork(new_rel_desc);
1417-
14181417
/*
14191418
* ok, the relation has been cataloged, so close our relations and return
14201419
* the OID of the newly created relation.
@@ -1425,27 +1424,6 @@ heap_create_with_catalog(const char *relname,
14251424
return relid;
14261425
}
14271426

1428-
/*
1429-
* Set up an init fork for an unlogged table so that it can be correctly
1430-
* reinitialized on restart. An immediate sync is required even if the
1431-
* page has been logged, because the write did not go through
1432-
* shared_buffers and therefore a concurrent checkpoint may have moved
1433-
* the redo pointer past our xlog record. Recovery may as well remove it
1434-
* while replaying, for example, XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE
1435-
* record. Therefore, logging is necessary even if wal_level=minimal.
1436-
*/
1437-
void
1438-
heap_create_init_fork(Relation rel)
1439-
{
1440-
Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
1441-
rel->rd_rel->relkind == RELKIND_MATVIEW ||
1442-
rel->rd_rel->relkind == RELKIND_TOASTVALUE);
1443-
RelationOpenSmgr(rel);
1444-
smgrcreate(rel->rd_smgr, INIT_FORKNUM, false);
1445-
log_smgrcreate(&rel->rd_smgr->smgr_rnode.node, INIT_FORKNUM);
1446-
smgrimmedsync(rel->rd_smgr, INIT_FORKNUM);
1447-
}
1448-
14491427
/*
14501428
* RelationRemoveInheritance
14511429
*
@@ -3168,8 +3146,8 @@ heap_truncate_one_rel(Relation rel)
31683146
if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
31693147
return;
31703148

3171-
/* Truncate the actual file (and discard buffers) */
3172-
RelationTruncate(rel, 0);
3149+
/* Truncate the underlying relation */
3150+
table_relation_nontransactional_truncate(rel);
31733151

31743152
/* If the relation has indexes, truncate the indexes too */
31753153
RelationTruncateIndexes(rel);
@@ -3180,7 +3158,7 @@ heap_truncate_one_rel(Relation rel)
31803158
{
31813159
Relation toastrel = table_open(toastrelid, AccessExclusiveLock);
31823160

3183-
RelationTruncate(toastrel, 0);
3161+
table_relation_nontransactional_truncate(toastrel);
31843162
RelationTruncateIndexes(toastrel);
31853163
/* keep the lock... */
31863164
table_close(toastrel, NoLock);

src/backend/catalog/index.c

+8-3
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,8 @@ index_create(Relation heapRelation,
739739
bool concurrent = (flags & INDEX_CREATE_CONCURRENT) != 0;
740740
bool partitioned = (flags & INDEX_CREATE_PARTITIONED) != 0;
741741
char relkind;
742+
TransactionId relfrozenxid;
743+
MultiXactId relminmxid;
742744

743745
/* constraint flags can only be set when a constraint is requested */
744746
Assert((constr_flags == 0) ||
@@ -899,8 +901,12 @@ index_create(Relation heapRelation,
899901
relpersistence,
900902
shared_relation,
901903
mapped_relation,
902-
allow_system_table_mods);
904+
allow_system_table_mods,
905+
&relfrozenxid,
906+
&relminmxid);
903907

908+
Assert(relfrozenxid == InvalidTransactionId);
909+
Assert(relminmxid == InvalidMultiXactId);
904910
Assert(indexRelationId == RelationGetRelid(indexRelation));
905911

906912
/*
@@ -2850,8 +2856,7 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
28502856
}
28512857

28522858
/* We'll build a new physical relation for the index */
2853-
RelationSetNewRelfilenode(iRel, persistence, InvalidTransactionId,
2854-
InvalidMultiXactId);
2859+
RelationSetNewRelfilenode(iRel, persistence);
28552860

28562861
/* Initialize the index and rebuild */
28572862
/* Note: we do not need to re-establish pkey setting */

src/backend/catalog/storage.c

+88
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
#include "postgres.h"
2121

22+
#include "miscadmin.h"
23+
2224
#include "access/visibilitymap.h"
2325
#include "access/xact.h"
2426
#include "access/xlog.h"
@@ -290,6 +292,92 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
290292
smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks);
291293
}
292294

295+
/*
296+
* Copy a fork's data, block by block.
297+
*/
298+
void
299+
RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
300+
ForkNumber forkNum, char relpersistence)
301+
{
302+
PGAlignedBlock buf;
303+
Page page;
304+
bool use_wal;
305+
bool copying_initfork;
306+
BlockNumber nblocks;
307+
BlockNumber blkno;
308+
309+
page = (Page) buf.data;
310+
311+
/*
312+
* The init fork for an unlogged relation in many respects has to be
313+
* treated the same as normal relation, changes need to be WAL logged and
314+
* it needs to be synced to disk.
315+
*/
316+
copying_initfork = relpersistence == RELPERSISTENCE_UNLOGGED &&
317+
forkNum == INIT_FORKNUM;
318+
319+
/*
320+
* We need to log the copied data in WAL iff WAL archiving/streaming is
321+
* enabled AND it's a permanent relation.
322+
*/
323+
use_wal = XLogIsNeeded() &&
324+
(relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork);
325+
326+
nblocks = smgrnblocks(src, forkNum);
327+
328+
for (blkno = 0; blkno < nblocks; blkno++)
329+
{
330+
/* If we got a cancel signal during the copy of the data, quit */
331+
CHECK_FOR_INTERRUPTS();
332+
333+
smgrread(src, forkNum, blkno, buf.data);
334+
335+
if (!PageIsVerified(page, blkno))
336+
ereport(ERROR,
337+
(errcode(ERRCODE_DATA_CORRUPTED),
338+
errmsg("invalid page in block %u of relation %s",
339+
blkno,
340+
relpathbackend(src->smgr_rnode.node,
341+
src->smgr_rnode.backend,
342+
forkNum))));
343+
344+
/*
345+
* WAL-log the copied page. Unfortunately we don't know what kind of a
346+
* page this is, so we have to log the full page including any unused
347+
* space.
348+
*/
349+
if (use_wal)
350+
log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page, false);
351+
352+
PageSetChecksumInplace(page, blkno);
353+
354+
/*
355+
* Now write the page. We say isTemp = true even if it's not a temp
356+
* rel, because there's no need for smgr to schedule an fsync for this
357+
* write; we'll do it ourselves below.
358+
*/
359+
smgrextend(dst, forkNum, blkno, buf.data, true);
360+
}
361+
362+
/*
363+
* If the rel is WAL-logged, must fsync before commit. We use heap_sync
364+
* to ensure that the toast table gets fsync'd too. (For a temp or
365+
* unlogged rel we don't care since the data will be gone after a crash
366+
* anyway.)
367+
*
368+
* It's obvious that we must do this when not WAL-logging the copy. It's
369+
* less obvious that we have to do it even if we did WAL-log the copied
370+
* pages. The reason is that since we're copying outside shared buffers, a
371+
* CHECKPOINT occurring during the copy has no way to flush the previously
372+
* written data to disk (indeed it won't know the new rel even exists). A
373+
* crash later on would replay WAL from the checkpoint, therefore it
374+
* wouldn't replay our earlier WAL entries. If we do not fsync those pages
375+
* here, they might still not be on disk when the crash occurs.
376+
*/
377+
if (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork)
378+
smgrimmedsync(dst, forkNum);
379+
}
380+
293381
/*
294382
* smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
295383
*

0 commit comments

Comments
 (0)