Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/access/Makefile3
-rw-r--r--src/backend/access/heap/heapam.c41
-rw-r--r--src/backend/access/tablesample/Makefile17
-rw-r--r--src/backend/access/tablesample/bernoulli.c235
-rw-r--r--src/backend/access/tablesample/system.c186
-rw-r--r--src/backend/access/tablesample/tablesample.c368
-rw-r--r--src/backend/catalog/Makefile5
-rw-r--r--src/backend/commands/analyze.c2
-rw-r--r--src/backend/commands/explain.c21
-rw-r--r--src/backend/executor/Makefile2
-rw-r--r--src/backend/executor/execAmi.c8
-rw-r--r--src/backend/executor/execCurrent.c1
-rw-r--r--src/backend/executor/execProcnode.c14
-rw-r--r--src/backend/executor/nodeSamplescan.c256
-rw-r--r--src/backend/nodes/copyfuncs.c60
-rw-r--r--src/backend/nodes/equalfuncs.c37
-rw-r--r--src/backend/nodes/nodeFuncs.c28
-rw-r--r--src/backend/nodes/outfuncs.c48
-rw-r--r--src/backend/nodes/readfuncs.c45
-rw-r--r--src/backend/optimizer/path/allpaths.c49
-rw-r--r--src/backend/optimizer/path/costsize.c67
-rw-r--r--src/backend/optimizer/plan/createplan.c69
-rw-r--r--src/backend/optimizer/plan/planner.c15
-rw-r--r--src/backend/optimizer/plan/setrefs.c11
-rw-r--r--src/backend/optimizer/plan/subselect.c1
-rw-r--r--src/backend/optimizer/util/pathnode.c22
-rw-r--r--src/backend/parser/gram.y36
-rw-r--r--src/backend/parser/parse_clause.c56
-rw-r--r--src/backend/parser/parse_func.c143
-rw-r--r--src/backend/rewrite/rewriteHandler.c3
-rw-r--r--src/backend/utils/adt/ruleutils.c50
-rw-r--r--src/backend/utils/cache/lsyscache.c27
-rw-r--r--src/backend/utils/cache/syscache.c23
-rw-r--r--src/backend/utils/misc/sampling.c33
34 files changed, 1952 insertions, 30 deletions
diff --git a/src/backend/access/Makefile b/src/backend/access/Makefile
index 21721b48f04..bd93a6a8d1e 100644
--- a/src/backend/access/Makefile
+++ b/src/backend/access/Makefile
@@ -8,6 +8,7 @@ subdir = src/backend/access
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
-SUBDIRS = brin common gin gist hash heap index nbtree rmgrdesc spgist transam
+SUBDIRS = brin common gin gist hash heap index nbtree rmgrdesc spgist \
+ tablesample transam
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 1a8d2f2d0b5..f0c2394e600 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -80,8 +80,9 @@ bool synchronize_seqscans = true;
static HeapScanDesc heap_beginscan_internal(Relation relation,
Snapshot snapshot,
int nkeys, ScanKey key,
- bool allow_strat, bool allow_sync,
- bool is_bitmapscan, bool temp_snap);
+ bool allow_strat, bool allow_sync, bool allow_pagemode,
+ bool is_bitmapscan, bool is_samplescan,
+ bool temp_snap);
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
TransactionId xid, CommandId cid, int options);
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
@@ -294,9 +295,10 @@ initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
/*
* Currently, we don't have a stats counter for bitmap heap scans (but the
- * underlying bitmap index scans will be counted).
+ * underlying bitmap index scans will be counted) or sample scans (we only
+ * update stats for tuple fetches there)
*/
- if (!scan->rs_bitmapscan)
+ if (!scan->rs_bitmapscan && !scan->rs_samplescan)
pgstat_count_heap_scan(scan->rs_rd);
}
@@ -315,7 +317,7 @@ heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk, BlockNumber numBlks)
* In page-at-a-time mode it performs additional work, namely determining
* which tuples on the page are visible.
*/
-static void
+void
heapgetpage(HeapScanDesc scan, BlockNumber page)
{
Buffer buffer;
@@ -1310,6 +1312,9 @@ heap_openrv_extended(const RangeVar *relation, LOCKMODE lockmode,
* HeapScanDesc for a bitmap heap scan. Although that scan technology is
* really quite unlike a standard seqscan, there is just enough commonality
* to make it worth using the same data structure.
+ *
+ * heap_beginscan_samplingscan is alternate entry point for setting up a
+ * HeapScanDesc for a TABLESAMPLE scan.
* ----------------
*/
HeapScanDesc
@@ -1317,7 +1322,7 @@ heap_beginscan(Relation relation, Snapshot snapshot,
int nkeys, ScanKey key)
{
return heap_beginscan_internal(relation, snapshot, nkeys, key,
- true, true, false, false);
+ true, true, true, false, false, false);
}
HeapScanDesc
@@ -1327,7 +1332,7 @@ heap_beginscan_catalog(Relation relation, int nkeys, ScanKey key)
Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
return heap_beginscan_internal(relation, snapshot, nkeys, key,
- true, true, false, true);
+ true, true, true, false, false, true);
}
HeapScanDesc
@@ -1336,7 +1341,8 @@ heap_beginscan_strat(Relation relation, Snapshot snapshot,
bool allow_strat, bool allow_sync)
{
return heap_beginscan_internal(relation, snapshot, nkeys, key,
- allow_strat, allow_sync, false, false);
+ allow_strat, allow_sync, true,
+ false, false, false);
}
HeapScanDesc
@@ -1344,14 +1350,24 @@ heap_beginscan_bm(Relation relation, Snapshot snapshot,
int nkeys, ScanKey key)
{
return heap_beginscan_internal(relation, snapshot, nkeys, key,
- false, false, true, false);
+ false, false, true, true, false, false);
+}
+
+HeapScanDesc
+heap_beginscan_sampling(Relation relation, Snapshot snapshot,
+ int nkeys, ScanKey key,
+ bool allow_strat, bool allow_pagemode)
+{
+ return heap_beginscan_internal(relation, snapshot, nkeys, key,
+ allow_strat, false, allow_pagemode,
+ false, true, false);
}
static HeapScanDesc
heap_beginscan_internal(Relation relation, Snapshot snapshot,
int nkeys, ScanKey key,
- bool allow_strat, bool allow_sync,
- bool is_bitmapscan, bool temp_snap)
+ bool allow_strat, bool allow_sync, bool allow_pagemode,
+ bool is_bitmapscan, bool is_samplescan, bool temp_snap)
{
HeapScanDesc scan;
@@ -1373,6 +1389,7 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot,
scan->rs_snapshot = snapshot;
scan->rs_nkeys = nkeys;
scan->rs_bitmapscan = is_bitmapscan;
+ scan->rs_samplescan = is_samplescan;
scan->rs_strategy = NULL; /* set in initscan */
scan->rs_allow_strat = allow_strat;
scan->rs_allow_sync = allow_sync;
@@ -1381,7 +1398,7 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot,
/*
* we can use page-at-a-time mode if it's an MVCC-safe snapshot
*/
- scan->rs_pageatatime = IsMVCCSnapshot(snapshot);
+ scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(snapshot);
/*
* For a seqscan in a serializable transaction, acquire a predicate lock
diff --git a/src/backend/access/tablesample/Makefile b/src/backend/access/tablesample/Makefile
new file mode 100644
index 00000000000..46eeb59f9c4
--- /dev/null
+++ b/src/backend/access/tablesample/Makefile
@@ -0,0 +1,17 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+# Makefile for utils/tablesample
+#
+# IDENTIFICATION
+# src/backend/utils/tablesample/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/access/tablesample
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = tablesample.o system.o bernoulli.o
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/tablesample/bernoulli.c b/src/backend/access/tablesample/bernoulli.c
new file mode 100644
index 00000000000..c91f3f593e5
--- /dev/null
+++ b/src/backend/access/tablesample/bernoulli.c
@@ -0,0 +1,235 @@
+/*-------------------------------------------------------------------------
+ *
+ * bernoulli.c
+ * interface routines for BERNOULLI tablesample method
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/tablesample/bernoulli.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+
+#include "access/tablesample.h"
+#include "access/relscan.h"
+#include "nodes/execnodes.h"
+#include "nodes/relation.h"
+#include "optimizer/clauses.h"
+#include "storage/bufmgr.h"
+#include "utils/sampling.h"
+
+
+/* tsdesc */
+typedef struct
+{
+ uint32 seed; /* random seed */
+ BlockNumber startblock; /* starting block, we use ths for syncscan support */
+ BlockNumber nblocks; /* number of blocks */
+ BlockNumber blockno; /* current block */
+ float4 probability; /* probabilty that tuple will be returned (0.0-1.0) */
+ OffsetNumber lt; /* last tuple returned from current block */
+ SamplerRandomState randstate; /* random generator tsdesc */
+} BernoulliSamplerData;
+
+/*
+ * Initialize the state.
+ */
+Datum
+tsm_bernoulli_init(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ uint32 seed = PG_GETARG_UINT32(1);
+ float4 percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2);
+ HeapScanDesc scan = tsdesc->heapScan;
+ BernoulliSamplerData *sampler;
+
+ if (percent < 0 || percent > 100)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("invalid sample size"),
+ errhint("Sample size must be numeric value between 0 and 100 (inclusive).")));
+
+ sampler = palloc0(sizeof(BernoulliSamplerData));
+
+ /* Remember initial values for reinit */
+ sampler->seed = seed;
+ sampler->startblock = scan->rs_startblock;
+ sampler->nblocks = scan->rs_nblocks;
+ sampler->blockno = InvalidBlockNumber;
+ sampler->probability = percent / 100;
+ sampler->lt = InvalidOffsetNumber;
+ sampler_random_init_state(sampler->seed, sampler->randstate);
+
+ tsdesc->tsmdata = (void *) sampler;
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Get next block number to read or InvalidBlockNumber if we are at the
+ * end of the relation.
+ */
+Datum
+tsm_bernoulli_nextblock(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ BernoulliSamplerData *sampler =
+ (BernoulliSamplerData *) tsdesc->tsmdata;
+
+ /*
+ * Bernoulli sampling scans all blocks on the table and supports
+ * syncscan so loop from startblock to startblock instead of
+ * from 0 to nblocks.
+ */
+ if (sampler->blockno == InvalidBlockNumber)
+ sampler->blockno = sampler->startblock;
+ else
+ {
+ sampler->blockno++;
+
+ if (sampler->blockno >= sampler->nblocks)
+ sampler->blockno = 0;
+
+ if (sampler->blockno == sampler->startblock)
+ PG_RETURN_UINT32(InvalidBlockNumber);
+ }
+
+ PG_RETURN_UINT32(sampler->blockno);
+}
+
+/*
+ * Get next tuple from current block.
+ *
+ * This method implements the main logic in bernoulli sampling.
+ * The algorithm simply generates new random number (in 0.0-1.0 range) and if
+ * it falls within user specified probability (in the same range) return the
+ * tuple offset.
+ *
+ * It is ok here to return tuple offset without knowing if tuple is visible
+ * and not check it via examinetuple. The reason for that is that we do the
+ * coinflip (random number generation) for every tuple in the table. Since all
+ * tuples have same probability of being returned the visible and invisible
+ * tuples will be returned in same ratio as they have in the actual table.
+ * This means that there is no skew towards either visible or invisible tuples
+ * and the number returned visible tuples to from the executor node is the
+ * fraction of visible tuples which was specified in input.
+ *
+ * This is faster than doing the coinflip in the examinetuple because we don't
+ * have to do visibility checks on uninteresting tuples.
+ *
+ * If we reach end of the block return InvalidOffsetNumber which tells
+ * SampleScan to go to next block.
+ */
+Datum
+tsm_bernoulli_nexttuple(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ OffsetNumber maxoffset = PG_GETARG_UINT16(2);
+ BernoulliSamplerData *sampler =
+ (BernoulliSamplerData *) tsdesc->tsmdata;
+ OffsetNumber tupoffset = sampler->lt;
+ float4 probability = sampler->probability;
+
+ if (tupoffset == InvalidOffsetNumber)
+ tupoffset = FirstOffsetNumber;
+ else
+ tupoffset++;
+
+ /*
+ * Loop over tuple offsets until the random generator returns value that
+ * is within the probability of returning the tuple or until we reach
+ * end of the block.
+ *
+ * (This is our implementation of bernoulli trial)
+ */
+ while (sampler_random_fract(sampler->randstate) > probability)
+ {
+ tupoffset++;
+
+ if (tupoffset > maxoffset)
+ break;
+ }
+
+ if (tupoffset > maxoffset)
+ /* Tell SampleScan that we want next block. */
+ tupoffset = InvalidOffsetNumber;
+
+ sampler->lt = tupoffset;
+
+ PG_RETURN_UINT16(tupoffset);
+}
+
+/*
+ * Cleanup method.
+ */
+Datum
+tsm_bernoulli_end(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+
+ pfree(tsdesc->tsmdata);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Reset tsdesc (called by ReScan).
+ */
+Datum
+tsm_bernoulli_reset(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ BernoulliSamplerData *sampler =
+ (BernoulliSamplerData *) tsdesc->tsmdata;
+
+ sampler->blockno = InvalidBlockNumber;
+ sampler->lt = InvalidOffsetNumber;
+ sampler_random_init_state(sampler->seed, sampler->randstate);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Costing function.
+ */
+Datum
+tsm_bernoulli_cost(PG_FUNCTION_ARGS)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Path *path = (Path *) PG_GETARG_POINTER(1);
+ RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
+ List *args = (List *) PG_GETARG_POINTER(3);
+ BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
+ double *tuples = (double *) PG_GETARG_POINTER(5);
+ Node *pctnode;
+ float4 samplesize;
+
+ *pages = baserel->pages;
+
+ pctnode = linitial(args);
+ pctnode = estimate_expression_value(root, pctnode);
+
+ if (IsA(pctnode, RelabelType))
+ pctnode = (Node *) ((RelabelType *) pctnode)->arg;
+
+ if (IsA(pctnode, Const))
+ {
+ samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue);
+ samplesize /= 100.0;
+ }
+ else
+ {
+ /* Default samplesize if the estimation didn't return Const. */
+ samplesize = 0.1f;
+ }
+
+ *tuples = path->rows * samplesize;
+ path->rows = *tuples;
+
+ PG_RETURN_VOID();
+}
diff --git a/src/backend/access/tablesample/system.c b/src/backend/access/tablesample/system.c
new file mode 100644
index 00000000000..1412e511faf
--- /dev/null
+++ b/src/backend/access/tablesample/system.c
@@ -0,0 +1,186 @@
+/*-------------------------------------------------------------------------
+ *
+ * system.c
+ * interface routines for system tablesample method
+ *
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/tablesample/system.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+
+#include "access/tablesample.h"
+#include "access/relscan.h"
+#include "nodes/execnodes.h"
+#include "nodes/relation.h"
+#include "optimizer/clauses.h"
+#include "storage/bufmgr.h"
+#include "utils/sampling.h"
+
+
+/*
+ * State
+ */
+typedef struct
+{
+ BlockSamplerData bs;
+ uint32 seed; /* random seed */
+ BlockNumber nblocks; /* number of block in relation */
+ int samplesize; /* number of blocks to return */
+ OffsetNumber lt; /* last tuple returned from current block */
+} SystemSamplerData;
+
+
+/*
+ * Initializes the state.
+ */
+Datum
+tsm_system_init(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ uint32 seed = PG_GETARG_UINT32(1);
+ float4 percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2);
+ HeapScanDesc scan = tsdesc->heapScan;
+ SystemSamplerData *sampler;
+
+ if (percent < 0 || percent > 100)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("invalid sample size"),
+ errhint("Sample size must be numeric value between 0 and 100 (inclusive).")));
+
+ sampler = palloc0(sizeof(SystemSamplerData));
+
+ /* Remember initial values for reinit */
+ sampler->seed = seed;
+ sampler->nblocks = scan->rs_nblocks;
+ sampler->samplesize = 1 + (int) (sampler->nblocks * (percent / 100.0));
+ sampler->lt = InvalidOffsetNumber;
+
+ BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize,
+ sampler->seed);
+
+ tsdesc->tsmdata = (void *) sampler;
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Get next block number or InvalidBlockNumber when we're done.
+ *
+ * Uses the same logic as ANALYZE for picking the random blocks.
+ */
+Datum
+tsm_system_nextblock(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+ BlockNumber blockno;
+
+ if (!BlockSampler_HasMore(&sampler->bs))
+ PG_RETURN_UINT32(InvalidBlockNumber);
+
+ blockno = BlockSampler_Next(&sampler->bs);
+
+ PG_RETURN_UINT32(blockno);
+}
+
+/*
+ * Get next tuple offset in current block or InvalidOffsetNumber if we are done
+ * with this block.
+ */
+Datum
+tsm_system_nexttuple(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ OffsetNumber maxoffset = PG_GETARG_UINT16(2);
+ SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+ OffsetNumber tupoffset = sampler->lt;
+
+ if (tupoffset == InvalidOffsetNumber)
+ tupoffset = FirstOffsetNumber;
+ else
+ tupoffset++;
+
+ if (tupoffset > maxoffset)
+ tupoffset = InvalidOffsetNumber;
+
+ sampler->lt = tupoffset;
+
+ PG_RETURN_UINT16(tupoffset);
+}
+
+/*
+ * Cleanup method.
+ */
+Datum
+tsm_system_end(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+
+ pfree(tsdesc->tsmdata);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Reset state (called by ReScan).
+ */
+Datum
+tsm_system_reset(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+
+ sampler->lt = InvalidOffsetNumber;
+ BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize,
+ sampler->seed);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Costing function.
+ */
+Datum
+tsm_system_cost(PG_FUNCTION_ARGS)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Path *path = (Path *) PG_GETARG_POINTER(1);
+ RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
+ List *args = (List *) PG_GETARG_POINTER(3);
+ BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
+ double *tuples = (double *) PG_GETARG_POINTER(5);
+ Node *pctnode;
+ float4 samplesize;
+
+ pctnode = linitial(args);
+ pctnode = estimate_expression_value(root, pctnode);
+
+ if (IsA(pctnode, RelabelType))
+ pctnode = (Node *) ((RelabelType *) pctnode)->arg;
+
+ if (IsA(pctnode, Const))
+ {
+ samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue);
+ samplesize /= 100.0;
+ }
+ else
+ {
+ /* Default samplesize if the estimation didn't return Const. */
+ samplesize = 0.1f;
+ }
+
+ *pages = baserel->pages * samplesize;
+ *tuples = path->rows * samplesize;
+ path->rows = *tuples;
+
+ PG_RETURN_VOID();
+}
diff --git a/src/backend/access/tablesample/tablesample.c b/src/backend/access/tablesample/tablesample.c
new file mode 100644
index 00000000000..ef55d062e75
--- /dev/null
+++ b/src/backend/access/tablesample/tablesample.c
@@ -0,0 +1,368 @@
+/*-------------------------------------------------------------------------
+ *
+ * tablesample.c
+ * TABLESAMPLE internal API
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/access/tablesample/tablesample.c
+ *
+ * TABLESAMPLE is the SQL standard clause for sampling the relations.
+ *
+ * The API is interface between the Executor and the TABLESAMPLE Methods.
+ *
+ * TABLESAMPLE Methods are implementations of actual sampling algorithms which
+ * can be used for returning a sample of the source relation.
+ * Methods don't read the table directly but are asked for block number and
+ * tuple offset which they want to examine (or return) and the tablesample
+ * interface implemented here does the reading for them.
+ *
+ * We currently only support sampling of the physical relations, but in the
+ * future we might extend the API to support subqueries as well.
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/tablesample.h"
+
+#include "catalog/pg_tablesample_method.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "storage/bufmgr.h"
+#include "storage/predicate.h"
+#include "utils/rel.h"
+#include "utils/tqual.h"
+
+
+static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan);
+
+
+/*
+ * Initialize the TABLESAMPLE Descriptor and the TABLESAMPLE Method.
+ */
+TableSampleDesc *
+tablesample_init(SampleScanState *scanstate, TableSampleClause *tablesample)
+{
+ FunctionCallInfoData fcinfo;
+ int i;
+ List *args = tablesample->args;
+ ListCell *arg;
+ ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
+ TableSampleDesc *tsdesc = (TableSampleDesc *) palloc0(sizeof(TableSampleDesc));
+
+ /* Load functions */
+ fmgr_info(tablesample->tsminit, &(tsdesc->tsminit));
+ fmgr_info(tablesample->tsmnextblock, &(tsdesc->tsmnextblock));
+ fmgr_info(tablesample->tsmnexttuple, &(tsdesc->tsmnexttuple));
+ if (OidIsValid(tablesample->tsmexaminetuple))
+ fmgr_info(tablesample->tsmexaminetuple, &(tsdesc->tsmexaminetuple));
+ else
+ tsdesc->tsmexaminetuple.fn_oid = InvalidOid;
+ fmgr_info(tablesample->tsmreset, &(tsdesc->tsmreset));
+ fmgr_info(tablesample->tsmend, &(tsdesc->tsmend));
+
+ InitFunctionCallInfoData(fcinfo, &tsdesc->tsminit,
+ list_length(args) + 2,
+ InvalidOid, NULL, NULL);
+
+ tsdesc->tupDesc = scanstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor;
+ tsdesc->heapScan = scanstate->ss.ss_currentScanDesc;
+
+ /* First argument for init function is always TableSampleDesc */
+ fcinfo.arg[0] = PointerGetDatum(tsdesc);
+ fcinfo.argnull[0] = false;
+
+ /*
+ * Second arg for init function is always REPEATABLE
+ * When tablesample->repeatable is NULL then REPEATABLE clause was not
+ * specified.
+ * When specified, the expression cannot evaluate to NULL.
+ */
+ if (tablesample->repeatable)
+ {
+ ExprState *argstate = ExecInitExpr((Expr *) tablesample->repeatable,
+ (PlanState *) scanstate);
+ fcinfo.arg[1] = ExecEvalExpr(argstate, econtext,
+ &fcinfo.argnull[1], NULL);
+ if (fcinfo.argnull[1])
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("REPEATABLE clause must be NOT NULL numeric value")));
+ }
+ else
+ {
+ fcinfo.arg[1] = UInt32GetDatum(random());
+ fcinfo.argnull[1] = false;
+ }
+
+ /* Rest of the arguments come from user. */
+ i = 2;
+ foreach(arg, args)
+ {
+ Expr *argexpr = (Expr *) lfirst(arg);
+ ExprState *argstate = ExecInitExpr(argexpr, (PlanState *) scanstate);
+
+ if (argstate == NULL)
+ {
+ fcinfo.argnull[i] = true;
+ fcinfo.arg[i] = (Datum) 0;;
+ }
+
+ fcinfo.arg[i] = ExecEvalExpr(argstate, econtext,
+ &fcinfo.argnull[i], NULL);
+ i++;
+ }
+ Assert(i == fcinfo.nargs);
+
+ (void) FunctionCallInvoke(&fcinfo);
+
+ return tsdesc;
+}
+
+/*
+ * Get next tuple from TABLESAMPLE Method.
+ */
+HeapTuple
+tablesample_getnext(TableSampleDesc *desc)
+{
+ HeapScanDesc scan = desc->heapScan;
+ HeapTuple tuple = &(scan->rs_ctup);
+ bool pagemode = scan->rs_pageatatime;
+ BlockNumber blockno;
+ Page page;
+ bool page_all_visible;
+ ItemId itemid;
+ OffsetNumber tupoffset,
+ maxoffset;
+
+ if (!scan->rs_inited)
+ {
+ /*
+ * return null immediately if relation is empty
+ */
+ if (scan->rs_nblocks == 0)
+ {
+ Assert(!BufferIsValid(scan->rs_cbuf));
+ tuple->t_data = NULL;
+ return NULL;
+ }
+ blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock,
+ PointerGetDatum(desc)));
+ if (!BlockNumberIsValid(blockno))
+ {
+ tuple->t_data = NULL;
+ return NULL;
+ }
+
+ heapgetpage(scan, blockno);
+ scan->rs_inited = true;
+ }
+ else
+ {
+ /* continue from previously returned page/tuple */
+ blockno = scan->rs_cblock; /* current page */
+ }
+
+ /*
+ * When pagemode is disabled, the scan will do visibility checks for each
+ * tuple it finds so the buffer needs to be locked.
+ */
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+ page = (Page) BufferGetPage(scan->rs_cbuf);
+ page_all_visible = PageIsAllVisible(page);
+ maxoffset = PageGetMaxOffsetNumber(page);
+
+ for (;;)
+ {
+ CHECK_FOR_INTERRUPTS();
+
+ tupoffset = DatumGetUInt16(FunctionCall3(&desc->tsmnexttuple,
+ PointerGetDatum(desc),
+ UInt32GetDatum(blockno),
+ UInt16GetDatum(maxoffset)));
+
+ if (OffsetNumberIsValid(tupoffset))
+ {
+ bool visible;
+ bool found;
+
+ /* Skip invalid tuple pointers. */
+ itemid = PageGetItemId(page, tupoffset);
+ if (!ItemIdIsNormal(itemid))
+ continue;
+
+ tuple->t_data = (HeapTupleHeader) PageGetItem((Page) page, itemid);
+ tuple->t_len = ItemIdGetLength(itemid);
+ ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
+
+ if (page_all_visible)
+ visible = true;
+ else
+ visible = SampleTupleVisible(tuple, tupoffset, scan);
+
+ /*
+ * Let the sampling method examine the actual tuple and decide if we
+ * should return it.
+ *
+ * Note that we let it examine even invisible tuples for
+ * statistical purposes, but not return them since user should
+ * never see invisible tuples.
+ */
+ if (OidIsValid(desc->tsmexaminetuple.fn_oid))
+ {
+ found = DatumGetBool(FunctionCall4(&desc->tsmexaminetuple,
+ PointerGetDatum(desc),
+ UInt32GetDatum(blockno),
+ PointerGetDatum(tuple),
+ BoolGetDatum(visible)));
+ /* Should not happen if sampling method is well written. */
+ if (found && !visible)
+ elog(ERROR, "Sampling method wanted to return invisible tuple");
+ }
+ else
+ found = visible;
+
+ /* Found visible tuple, return it. */
+ if (found)
+ {
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+ break;
+ }
+ else
+ {
+ /* Try next tuple from same page. */
+ continue;
+ }
+ }
+
+
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+ blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock,
+ PointerGetDatum(desc)));
+
+ /*
+ * Report our new scan position for synchronization purposes. We
+ * don't do that when moving backwards, however. That would just
+ * mess up any other forward-moving scanners.
+ *
+ * Note: we do this before checking for end of scan so that the
+ * final state of the position hint is back at the start of the
+ * rel. That's not strictly necessary, but otherwise when you run
+ * the same query multiple times the starting position would shift
+ * a little bit backwards on every invocation, which is confusing.
+ * We don't guarantee any specific ordering in general, though.
+ */
+ if (scan->rs_syncscan)
+ ss_report_location(scan->rs_rd, BlockNumberIsValid(blockno) ?
+ blockno : scan->rs_startblock);
+
+ /*
+ * Reached end of scan.
+ */
+ if (!BlockNumberIsValid(blockno))
+ {
+ if (BufferIsValid(scan->rs_cbuf))
+ ReleaseBuffer(scan->rs_cbuf);
+ scan->rs_cbuf = InvalidBuffer;
+ scan->rs_cblock = InvalidBlockNumber;
+ tuple->t_data = NULL;
+ scan->rs_inited = false;
+ return NULL;
+ }
+
+ heapgetpage(scan, blockno);
+
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+ page = (Page) BufferGetPage(scan->rs_cbuf);
+ page_all_visible = PageIsAllVisible(page);
+ maxoffset = PageGetMaxOffsetNumber(page);
+ }
+
+ pgstat_count_heap_getnext(scan->rs_rd);
+
+ return &(scan->rs_ctup);
+}
+
+/*
+ * Reset the sampling to starting state
+ */
+void
+tablesample_reset(TableSampleDesc *desc)
+{
+ (void) FunctionCall1(&desc->tsmreset, PointerGetDatum(desc));
+}
+
+/*
+ * Signal the sampling method that the scan has finished.
+ */
+void
+tablesample_end(TableSampleDesc *desc)
+{
+ (void) FunctionCall1(&desc->tsmend, PointerGetDatum(desc));
+}
+
+/*
+ * Check visibility of the tuple.
+ */
+static bool
+SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
+{
+ /*
+ * If this scan is reading whole pages at a time, there is already
+ * visibility info present in rs_vistuples so we can just search it
+ * for the tupoffset.
+ */
+ if (scan->rs_pageatatime)
+ {
+ int start = 0,
+ end = scan->rs_ntuples - 1;
+
+ /*
+ * Do the binary search over rs_vistuples, it's already sorted by
+ * OffsetNumber so we don't need to do any sorting ourselves here.
+ *
+ * We could use bsearch() here but it's slower for integers because
+ * of the function call overhead and because it needs boiler plate code
+ * it would not save us anything code-wise anyway.
+ */
+ while (start <= end)
+ {
+ int mid = start + (end - start) / 2;
+ OffsetNumber curoffset = scan->rs_vistuples[mid];
+
+ if (curoffset == tupoffset)
+ return true;
+ else if (curoffset > tupoffset)
+ end = mid - 1;
+ else
+ start = mid + 1;
+ }
+
+ return false;
+ }
+ else
+ {
+ /* No pagemode, we have to check the tuple itself. */
+ Snapshot snapshot = scan->rs_snapshot;
+ Buffer buffer = scan->rs_cbuf;
+
+ bool visible = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
+
+ CheckForSerializableConflictOut(visible, scan->rs_rd, tuple, buffer,
+ snapshot);
+
+ return visible;
+ }
+}
diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile
index 37d05d1acc6..3d1139b5ba0 100644
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -40,9 +40,8 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
pg_ts_parser.h pg_ts_template.h pg_extension.h \
pg_foreign_data_wrapper.h pg_foreign_server.h pg_user_mapping.h \
pg_foreign_table.h pg_policy.h pg_replication_origin.h \
- pg_default_acl.h pg_seclabel.h pg_shseclabel.h pg_collation.h pg_range.h \
- pg_transform.h \
- toasting.h indexing.h \
+ pg_tablesample_method.h pg_default_acl.h pg_seclabel.h pg_shseclabel.h \
+ pg_collation.h pg_range.h pg_transform.h toasting.h indexing.h \
)
# location of Catalog.pm
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 952cf204a0a..65e329eab07 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -1150,7 +1150,7 @@ acquire_sample_rows(Relation onerel, int elevel,
* Found a suitable tuple, so save it, replacing one
* old tuple at random
*/
- int k = (int) (targrows * sampler_random_fract());
+ int k = (int) (targrows * sampler_random_fract(rstate.randstate));
Assert(k >= 0 && k < targrows);
heap_freetuple(rows[k]);
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index eeb8f19017e..478771c6ba1 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -731,6 +731,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
case T_ValuesScan:
case T_CteScan:
case T_WorkTableScan:
+ case T_SampleScan:
*rels_used = bms_add_member(*rels_used,
((Scan *) plan)->scanrelid);
break;
@@ -967,6 +968,21 @@ ExplainNode(PlanState *planstate, List *ancestors,
else
pname = sname;
break;
+ case T_SampleScan:
+ {
+ /*
+ * Fetch the tablesample method name from RTE.
+ *
+ * It would be nice to also show parameters, but since we
+ * support arbitrary expressions as parameter it might get
+ * quite messy.
+ */
+ RangeTblEntry *rte;
+ rte = rt_fetch(((SampleScan *) plan)->scanrelid, es->rtable);
+ custom_name = get_tablesample_method_name(rte->tablesample->tsmid);
+ pname = psprintf("Sample Scan (%s)", custom_name);
+ }
+ break;
case T_Material:
pname = sname = "Materialize";
break;
@@ -1089,6 +1105,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
if (((Scan *) plan)->scanrelid > 0)
ExplainScanTarget((Scan *) plan, es);
break;
+ case T_SampleScan:
+ ExplainScanTarget((Scan *) plan, es);
+ break;
case T_IndexScan:
{
IndexScan *indexscan = (IndexScan *) plan;
@@ -1339,6 +1358,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
case T_CteScan:
case T_WorkTableScan:
case T_SubqueryScan:
+ case T_SampleScan:
show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
if (plan->qual)
show_instrumentation_count("Rows Removed by Filter", 1,
@@ -2238,6 +2258,7 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es)
case T_TidScan:
case T_ForeignScan:
case T_CustomScan:
+ case T_SampleScan:
case T_ModifyTable:
/* Assert it's on a real relation */
Assert(rte->rtekind == RTE_RELATION);
diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile
index bc5d373d68a..08cba6fa2b5 100644
--- a/src/backend/executor/Makefile
+++ b/src/backend/executor/Makefile
@@ -21,7 +21,7 @@ OBJS = execAmi.o execCurrent.o execGrouping.o execIndexing.o execJunk.o \
nodeLimit.o nodeLockRows.o \
nodeMaterial.o nodeMergeAppend.o nodeMergejoin.o nodeModifyTable.o \
nodeNestloop.o nodeFunctionscan.o nodeRecursiveunion.o nodeResult.o \
- nodeSeqscan.o nodeSetOp.o nodeSort.o nodeUnique.o \
+ nodeSamplescan.o nodeSeqscan.o nodeSetOp.o nodeSort.o nodeUnique.o \
nodeValuesscan.o nodeCtescan.o nodeWorktablescan.o \
nodeGroup.o nodeSubplan.o nodeSubqueryscan.o nodeTidscan.o \
nodeForeignscan.o nodeWindowAgg.o tstoreReceiver.o spi.o
diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c
index 6ebad2f03f0..4948a265cb2 100644
--- a/src/backend/executor/execAmi.c
+++ b/src/backend/executor/execAmi.c
@@ -39,6 +39,7 @@
#include "executor/nodeNestloop.h"
#include "executor/nodeRecursiveunion.h"
#include "executor/nodeResult.h"
+#include "executor/nodeSamplescan.h"
#include "executor/nodeSeqscan.h"
#include "executor/nodeSetOp.h"
#include "executor/nodeSort.h"
@@ -155,6 +156,10 @@ ExecReScan(PlanState *node)
ExecReScanSeqScan((SeqScanState *) node);
break;
+ case T_SampleScanState:
+ ExecReScanSampleScan((SampleScanState *) node);
+ break;
+
case T_IndexScanState:
ExecReScanIndexScan((IndexScanState *) node);
break;
@@ -480,6 +485,9 @@ ExecSupportsBackwardScan(Plan *node)
}
return false;
+ case T_SampleScan:
+ return false;
+
case T_Material:
case T_Sort:
/* these don't evaluate tlist */
diff --git a/src/backend/executor/execCurrent.c b/src/backend/executor/execCurrent.c
index d87be963a95..bcd287f8742 100644
--- a/src/backend/executor/execCurrent.c
+++ b/src/backend/executor/execCurrent.c
@@ -261,6 +261,7 @@ search_plan_tree(PlanState *node, Oid table_oid)
* Relation scan nodes can all be treated alike
*/
case T_SeqScanState:
+ case T_SampleScanState:
case T_IndexScanState:
case T_IndexOnlyScanState:
case T_BitmapHeapScanState:
diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c
index 9892499fb7a..03c2febc3e1 100644
--- a/src/backend/executor/execProcnode.c
+++ b/src/backend/executor/execProcnode.c
@@ -102,6 +102,7 @@
#include "executor/nodeNestloop.h"
#include "executor/nodeRecursiveunion.h"
#include "executor/nodeResult.h"
+#include "executor/nodeSamplescan.h"
#include "executor/nodeSeqscan.h"
#include "executor/nodeSetOp.h"
#include "executor/nodeSort.h"
@@ -190,6 +191,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags)
estate, eflags);
break;
+ case T_SampleScan:
+ result = (PlanState *) ExecInitSampleScan((SampleScan *) node,
+ estate, eflags);
+ break;
+
case T_IndexScan:
result = (PlanState *) ExecInitIndexScan((IndexScan *) node,
estate, eflags);
@@ -406,6 +412,10 @@ ExecProcNode(PlanState *node)
result = ExecSeqScan((SeqScanState *) node);
break;
+ case T_SampleScanState:
+ result = ExecSampleScan((SampleScanState *) node);
+ break;
+
case T_IndexScanState:
result = ExecIndexScan((IndexScanState *) node);
break;
@@ -644,6 +654,10 @@ ExecEndNode(PlanState *node)
ExecEndSeqScan((SeqScanState *) node);
break;
+ case T_SampleScanState:
+ ExecEndSampleScan((SampleScanState *) node);
+ break;
+
case T_IndexScanState:
ExecEndIndexScan((IndexScanState *) node);
break;
diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c
new file mode 100644
index 00000000000..fc89d1dca03
--- /dev/null
+++ b/src/backend/executor/nodeSamplescan.c
@@ -0,0 +1,256 @@
+/*-------------------------------------------------------------------------
+ *
+ * nodeSamplescan.c
+ * Support routines for sample scans of relations (table sampling).
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/executor/nodeSamplescan.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/tablesample.h"
+#include "executor/executor.h"
+#include "executor/nodeSamplescan.h"
+#include "miscadmin.h"
+#include "parser/parsetree.h"
+#include "pgstat.h"
+#include "storage/bufmgr.h"
+#include "storage/predicate.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+#include "utils/tqual.h"
+
+static void InitScanRelation(SampleScanState *node, EState *estate,
+ int eflags, TableSampleClause *tablesample);
+static TupleTableSlot *SampleNext(SampleScanState *node);
+
+
+/* ----------------------------------------------------------------
+ * Scan Support
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------------------------------------------------------
+ * SampleNext
+ *
+ * This is a workhorse for ExecSampleScan
+ * ----------------------------------------------------------------
+ */
+static TupleTableSlot *
+SampleNext(SampleScanState *node)
+{
+ TupleTableSlot *slot;
+ TableSampleDesc *tsdesc;
+ HeapTuple tuple;
+
+ /*
+ * get information from the scan state
+ */
+ slot = node->ss.ss_ScanTupleSlot;
+ tsdesc = node->tsdesc;
+
+ tuple = tablesample_getnext(tsdesc);
+
+ if (tuple)
+ ExecStoreTuple(tuple, /* tuple to store */
+ slot, /* slot to store in */
+ tsdesc->heapScan->rs_cbuf, /* buffer associated with this tuple */
+ false); /* don't pfree this pointer */
+ else
+ ExecClearTuple(slot);
+
+ return slot;
+}
+
+/*
+ * SampleRecheck -- access method routine to recheck a tuple in EvalPlanQual
+ */
+static bool
+SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
+{
+ /* No need to recheck for SampleScan */
+ return true;
+}
+
+/* ----------------------------------------------------------------
+ * ExecSampleScan(node)
+ *
+ * Scans the relation using the sampling method and returns
+ * the next qualifying tuple.
+ * We call the ExecScan() routine and pass it the appropriate
+ * access method functions.
+ * ----------------------------------------------------------------
+ */
+TupleTableSlot *
+ExecSampleScan(SampleScanState *node)
+{
+ return ExecScan((ScanState *) node,
+ (ExecScanAccessMtd) SampleNext,
+ (ExecScanRecheckMtd) SampleRecheck);
+}
+
+/* ----------------------------------------------------------------
+ * InitScanRelation
+ *
+ * Set up to access the scan relation.
+ * ----------------------------------------------------------------
+ */
+static void
+InitScanRelation(SampleScanState *node, EState *estate, int eflags,
+ TableSampleClause *tablesample)
+{
+ Relation currentRelation;
+
+ /*
+ * get the relation object id from the relid'th entry in the range table,
+ * open that relation and acquire appropriate lock on it.
+ */
+ currentRelation = ExecOpenScanRelation(estate,
+ ((SampleScan *) node->ss.ps.plan)->scanrelid,
+ eflags);
+
+ node->ss.ss_currentRelation = currentRelation;
+
+ /*
+ * Even though we aren't going to do a conventional seqscan, it is useful
+ * to create a HeapScanDesc --- many of the fields in it are usable.
+ */
+ node->ss.ss_currentScanDesc =
+ heap_beginscan_sampling(currentRelation, estate->es_snapshot, 0, NULL,
+ tablesample->tsmseqscan,
+ tablesample->tsmpagemode);
+
+ /* and report the scan tuple slot's rowtype */
+ ExecAssignScanType(&node->ss, RelationGetDescr(currentRelation));
+}
+
+
+/* ----------------------------------------------------------------
+ * ExecInitSampleScan
+ * ----------------------------------------------------------------
+ */
+SampleScanState *
+ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
+{
+ SampleScanState *scanstate;
+ RangeTblEntry *rte = rt_fetch(node->scanrelid,
+ estate->es_range_table);
+
+ Assert(outerPlan(node) == NULL);
+ Assert(innerPlan(node) == NULL);
+ Assert(rte->tablesample != NULL);
+
+ /*
+ * create state structure
+ */
+ scanstate = makeNode(SampleScanState);
+ scanstate->ss.ps.plan = (Plan *) node;
+ scanstate->ss.ps.state = estate;
+
+ /*
+ * Miscellaneous initialization
+ *
+ * create expression context for node
+ */
+ ExecAssignExprContext(estate, &scanstate->ss.ps);
+
+ /*
+ * initialize child expressions
+ */
+ scanstate->ss.ps.targetlist = (List *)
+ ExecInitExpr((Expr *) node->plan.targetlist,
+ (PlanState *) scanstate);
+ scanstate->ss.ps.qual = (List *)
+ ExecInitExpr((Expr *) node->plan.qual,
+ (PlanState *) scanstate);
+
+ /*
+ * tuple table initialization
+ */
+ ExecInitResultTupleSlot(estate, &scanstate->ss.ps);
+ ExecInitScanTupleSlot(estate, &scanstate->ss);
+
+ /*
+ * initialize scan relation
+ */
+ InitScanRelation(scanstate, estate, eflags, rte->tablesample);
+
+ scanstate->ss.ps.ps_TupFromTlist = false;
+
+ /*
+ * Initialize result tuple type and projection info.
+ */
+ ExecAssignResultTypeFromTL(&scanstate->ss.ps);
+ ExecAssignScanProjectionInfo(&scanstate->ss);
+
+ scanstate->tsdesc = tablesample_init(scanstate, rte->tablesample);
+
+ return scanstate;
+}
+
+/* ----------------------------------------------------------------
+ * ExecEndSampleScan
+ *
+ * frees any storage allocated through C routines.
+ * ----------------------------------------------------------------
+ */
+void
+ExecEndSampleScan(SampleScanState *node)
+{
+ /*
+ * Tell sampling function that we finished the scan.
+ */
+ tablesample_end(node->tsdesc);
+
+ /*
+ * Free the exprcontext
+ */
+ ExecFreeExprContext(&node->ss.ps);
+
+ /*
+ * clean out the tuple table
+ */
+ ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
+ ExecClearTuple(node->ss.ss_ScanTupleSlot);
+
+ /*
+ * close heap scan
+ */
+ heap_endscan(node->ss.ss_currentScanDesc);
+
+ /*
+ * close the heap relation.
+ */
+ ExecCloseScanRelation(node->ss.ss_currentRelation);
+}
+
+/* ----------------------------------------------------------------
+ * Join Support
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------------------------------------------------------
+ * ExecReScanSampleScan
+ *
+ * Rescans the relation.
+ *
+ * ----------------------------------------------------------------
+ */
+void
+ExecReScanSampleScan(SampleScanState *node)
+{
+ heap_rescan(node->ss.ss_currentScanDesc, NULL);
+
+ /*
+ * Tell sampling function to reset its state for rescan.
+ */
+ tablesample_reset(node->tsdesc);
+
+ ExecScanReScan(&node->ss);
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 25839eed945..bdc7e61935c 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -642,6 +642,22 @@ _copyCustomScan(const CustomScan *from)
}
/*
+ * _copySampleScan
+ */
+static SampleScan *
+_copySampleScan(const SampleScan *from)
+{
+ SampleScan *newnode = makeNode(SampleScan);
+
+ /*
+ * copy node superclass fields
+ */
+ CopyScanFields((const Scan *) from, (Scan *) newnode);
+
+ return newnode;
+}
+
+/*
* CopyJoinFields
*
* This function copies the fields of the Join node. It is used by
@@ -2063,6 +2079,7 @@ _copyRangeTblEntry(const RangeTblEntry *from)
COPY_SCALAR_FIELD(rtekind);
COPY_SCALAR_FIELD(relid);
COPY_SCALAR_FIELD(relkind);
+ COPY_NODE_FIELD(tablesample);
COPY_NODE_FIELD(subquery);
COPY_SCALAR_FIELD(security_barrier);
COPY_SCALAR_FIELD(jointype);
@@ -2224,6 +2241,40 @@ _copyCommonTableExpr(const CommonTableExpr *from)
return newnode;
}
+static RangeTableSample *
+_copyRangeTableSample(const RangeTableSample *from)
+{
+ RangeTableSample *newnode = makeNode(RangeTableSample);
+
+ COPY_NODE_FIELD(relation);
+ COPY_STRING_FIELD(method);
+ COPY_NODE_FIELD(repeatable);
+ COPY_NODE_FIELD(args);
+
+ return newnode;
+}
+
+static TableSampleClause *
+_copyTableSampleClause(const TableSampleClause *from)
+{
+ TableSampleClause *newnode = makeNode(TableSampleClause);
+
+ COPY_SCALAR_FIELD(tsmid);
+ COPY_SCALAR_FIELD(tsmseqscan);
+ COPY_SCALAR_FIELD(tsmpagemode);
+ COPY_SCALAR_FIELD(tsminit);
+ COPY_SCALAR_FIELD(tsmnextblock);
+ COPY_SCALAR_FIELD(tsmnexttuple);
+ COPY_SCALAR_FIELD(tsmexaminetuple);
+ COPY_SCALAR_FIELD(tsmend);
+ COPY_SCALAR_FIELD(tsmreset);
+ COPY_SCALAR_FIELD(tsmcost);
+ COPY_NODE_FIELD(repeatable);
+ COPY_NODE_FIELD(args);
+
+ return newnode;
+}
+
static A_Expr *
_copyAExpr(const A_Expr *from)
{
@@ -4179,6 +4230,9 @@ copyObject(const void *from)
case T_CustomScan:
retval = _copyCustomScan(from);
break;
+ case T_SampleScan:
+ retval = _copySampleScan(from);
+ break;
case T_Join:
retval = _copyJoin(from);
break;
@@ -4842,6 +4896,12 @@ copyObject(const void *from)
case T_CommonTableExpr:
retval = _copyCommonTableExpr(from);
break;
+ case T_RangeTableSample:
+ retval = _copyRangeTableSample(from);
+ break;
+ case T_TableSampleClause:
+ retval = _copyTableSampleClause(from);
+ break;
case T_FuncWithArgs:
retval = _copyFuncWithArgs(from);
break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index c4b3615caf3..d483221fb7a 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -2359,6 +2359,7 @@ _equalRangeTblEntry(const RangeTblEntry *a, const RangeTblEntry *b)
COMPARE_SCALAR_FIELD(rtekind);
COMPARE_SCALAR_FIELD(relid);
COMPARE_SCALAR_FIELD(relkind);
+ COMPARE_NODE_FIELD(tablesample);
COMPARE_NODE_FIELD(subquery);
COMPARE_SCALAR_FIELD(security_barrier);
COMPARE_SCALAR_FIELD(jointype);
@@ -2503,6 +2504,36 @@ _equalCommonTableExpr(const CommonTableExpr *a, const CommonTableExpr *b)
}
static bool
+_equalRangeTableSample(const RangeTableSample *a, const RangeTableSample *b)
+{
+ COMPARE_NODE_FIELD(relation);
+ COMPARE_STRING_FIELD(method);
+ COMPARE_NODE_FIELD(repeatable);
+ COMPARE_NODE_FIELD(args);
+
+ return true;
+}
+
+static bool
+_equalTableSampleClause(const TableSampleClause *a, const TableSampleClause *b)
+{
+ COMPARE_SCALAR_FIELD(tsmid);
+ COMPARE_SCALAR_FIELD(tsmseqscan);
+ COMPARE_SCALAR_FIELD(tsmpagemode);
+ COMPARE_SCALAR_FIELD(tsminit);
+ COMPARE_SCALAR_FIELD(tsmnextblock);
+ COMPARE_SCALAR_FIELD(tsmnexttuple);
+ COMPARE_SCALAR_FIELD(tsmexaminetuple);
+ COMPARE_SCALAR_FIELD(tsmend);
+ COMPARE_SCALAR_FIELD(tsmreset);
+ COMPARE_SCALAR_FIELD(tsmcost);
+ COMPARE_NODE_FIELD(repeatable);
+ COMPARE_NODE_FIELD(args);
+
+ return true;
+}
+
+static bool
_equalXmlSerialize(const XmlSerialize *a, const XmlSerialize *b)
{
COMPARE_SCALAR_FIELD(xmloption);
@@ -3236,6 +3267,12 @@ equal(const void *a, const void *b)
case T_CommonTableExpr:
retval = _equalCommonTableExpr(a, b);
break;
+ case T_RangeTableSample:
+ retval = _equalRangeTableSample(a, b);
+ break;
+ case T_TableSampleClause:
+ retval = _equalTableSampleClause(a, b);
+ break;
case T_FuncWithArgs:
retval = _equalFuncWithArgs(a, b);
break;
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index eac02159236..42d62d32d93 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -2058,6 +2058,14 @@ range_table_walker(List *rtable,
switch (rte->rtekind)
{
case RTE_RELATION:
+ if (rte->tablesample)
+ {
+ if (walker(rte->tablesample->args, context))
+ return true;
+ if (walker(rte->tablesample->repeatable, context))
+ return true;
+ }
+ break;
case RTE_CTE:
/* nothing to do */
break;
@@ -2813,6 +2821,14 @@ range_table_mutator(List *rtable,
switch (rte->rtekind)
{
case RTE_RELATION:
+ if (rte->tablesample)
+ {
+ MUTATE(rte->tablesample->args, rte->tablesample->args,
+ List *);
+ MUTATE(rte->tablesample->repeatable,
+ rte->tablesample->repeatable, Node *);
+ }
+ break;
case RTE_CTE:
/* we don't bother to copy eref, aliases, etc; OK? */
break;
@@ -3309,6 +3325,18 @@ raw_expression_tree_walker(Node *node,
break;
case T_CommonTableExpr:
return walker(((CommonTableExpr *) node)->ctequery, context);
+ case T_RangeTableSample:
+ {
+ RangeTableSample *rts = (RangeTableSample *) node;
+
+ if (walker(rts->relation, context))
+ return true;
+ if (walker(rts->repeatable, context))
+ return true;
+ if (walker(rts->args, context))
+ return true;
+ }
+ break;
default:
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(node));
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index fe868b889d9..7918553da0a 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -592,6 +592,14 @@ _outCustomScan(StringInfo str, const CustomScan *node)
}
static void
+_outSampleScan(StringInfo str, const SampleScan *node)
+{
+ WRITE_NODE_TYPE("SAMPLESCAN");
+
+ _outScanInfo(str, (const Scan *) node);
+}
+
+static void
_outJoin(StringInfo str, const Join *node)
{
WRITE_NODE_TYPE("JOIN");
@@ -2445,6 +2453,36 @@ _outCommonTableExpr(StringInfo str, const CommonTableExpr *node)
}
static void
+_outRangeTableSample(StringInfo str, const RangeTableSample *node)
+{
+ WRITE_NODE_TYPE("RANGETABLESAMPLE");
+
+ WRITE_NODE_FIELD(relation);
+ WRITE_STRING_FIELD(method);
+ WRITE_NODE_FIELD(repeatable);
+ WRITE_NODE_FIELD(args);
+}
+
+static void
+_outTableSampleClause(StringInfo str, const TableSampleClause *node)
+{
+ WRITE_NODE_TYPE("TABLESAMPLECLAUSE");
+
+ WRITE_OID_FIELD(tsmid);
+ WRITE_BOOL_FIELD(tsmseqscan);
+ WRITE_BOOL_FIELD(tsmpagemode);
+ WRITE_OID_FIELD(tsminit);
+ WRITE_OID_FIELD(tsmnextblock);
+ WRITE_OID_FIELD(tsmnexttuple);
+ WRITE_OID_FIELD(tsmexaminetuple);
+ WRITE_OID_FIELD(tsmend);
+ WRITE_OID_FIELD(tsmreset);
+ WRITE_OID_FIELD(tsmcost);
+ WRITE_NODE_FIELD(repeatable);
+ WRITE_NODE_FIELD(args);
+}
+
+static void
_outSetOperationStmt(StringInfo str, const SetOperationStmt *node)
{
WRITE_NODE_TYPE("SETOPERATIONSTMT");
@@ -2474,6 +2512,7 @@ _outRangeTblEntry(StringInfo str, const RangeTblEntry *node)
case RTE_RELATION:
WRITE_OID_FIELD(relid);
WRITE_CHAR_FIELD(relkind);
+ WRITE_NODE_FIELD(tablesample);
break;
case RTE_SUBQUERY:
WRITE_NODE_FIELD(subquery);
@@ -2973,6 +3012,9 @@ _outNode(StringInfo str, const void *obj)
case T_CustomScan:
_outCustomScan(str, obj);
break;
+ case T_SampleScan:
+ _outSampleScan(str, obj);
+ break;
case T_Join:
_outJoin(str, obj);
break;
@@ -3319,6 +3361,12 @@ _outNode(StringInfo str, const void *obj)
case T_CommonTableExpr:
_outCommonTableExpr(str, obj);
break;
+ case T_RangeTableSample:
+ _outRangeTableSample(str, obj);
+ break;
+ case T_TableSampleClause:
+ _outTableSampleClause(str, obj);
+ break;
case T_SetOperationStmt:
_outSetOperationStmt(str, obj);
break;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 8136306e1e5..c8fb894a75a 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -352,6 +352,46 @@ _readCommonTableExpr(void)
}
/*
+ * _readRangeTableSample
+ */
+static RangeTableSample *
+_readRangeTableSample(void)
+{
+ READ_LOCALS(RangeTableSample);
+
+ READ_NODE_FIELD(relation);
+ READ_STRING_FIELD(method);
+ READ_NODE_FIELD(repeatable);
+ READ_NODE_FIELD(args);
+
+ READ_DONE();
+}
+
+/*
+ * _readTableSampleClause
+ */
+static TableSampleClause *
+_readTableSampleClause(void)
+{
+ READ_LOCALS(TableSampleClause);
+
+ READ_OID_FIELD(tsmid);
+ READ_BOOL_FIELD(tsmseqscan);
+ READ_BOOL_FIELD(tsmpagemode);
+ READ_OID_FIELD(tsminit);
+ READ_OID_FIELD(tsmnextblock);
+ READ_OID_FIELD(tsmnexttuple);
+ READ_OID_FIELD(tsmexaminetuple);
+ READ_OID_FIELD(tsmend);
+ READ_OID_FIELD(tsmreset);
+ READ_OID_FIELD(tsmcost);
+ READ_NODE_FIELD(repeatable);
+ READ_NODE_FIELD(args);
+
+ READ_DONE();
+}
+
+/*
* _readSetOperationStmt
*/
static SetOperationStmt *
@@ -1255,6 +1295,7 @@ _readRangeTblEntry(void)
case RTE_RELATION:
READ_OID_FIELD(relid);
READ_CHAR_FIELD(relkind);
+ READ_NODE_FIELD(tablesample);
break;
case RTE_SUBQUERY:
READ_NODE_FIELD(subquery);
@@ -1351,6 +1392,10 @@ parseNodeString(void)
return_value = _readRowMarkClause();
else if (MATCH("COMMONTABLEEXPR", 15))
return_value = _readCommonTableExpr();
+ else if (MATCH("RANGETABLESAMPLE", 16))
+ return_value = _readRangeTableSample();
+ else if (MATCH("TABLESAMPLECLAUSE", 17))
+ return_value = _readTableSampleClause();
else if (MATCH("SETOPERATIONSTMT", 16))
return_value = _readSetOperationStmt();
else if (MATCH("ALIAS", 5))
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 9caca94f64b..4cd1bf65e74 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -71,6 +71,10 @@ static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
+static void set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
+static void set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
@@ -265,6 +269,11 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel,
/* Foreign table */
set_foreign_size(root, rel, rte);
}
+ else if (rte->tablesample != NULL)
+ {
+ /* Sampled relation */
+ set_tablesample_rel_size(root, rel, rte);
+ }
else
{
/* Plain relation */
@@ -332,6 +341,11 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
/* Foreign table */
set_foreign_pathlist(root, rel, rte);
}
+ else if (rte->tablesample != NULL)
+ {
+ /* Build sample scan on relation */
+ set_tablesample_rel_pathlist(root, rel, rte);
+ }
else
{
/* Plain relation */
@@ -418,6 +432,41 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
}
/*
+ * set_tablesample_rel_size
+ * Set size estimates for a sampled relation.
+ */
+static void
+set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ /* Mark rel with estimated output rows, width, etc */
+ set_baserel_size_estimates(root, rel);
+}
+
+/*
+ * set_tablesample_rel_pathlist
+ * Build access paths for a sampled relation
+ *
+ * There is only one possible path - sampling scan
+ */
+static void
+set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ Relids required_outer;
+ Path *path;
+
+ /*
+ * We don't support pushing join clauses into the quals of a seqscan, but
+ * it could still have required parameterization due to LATERAL refs in
+ * its tlist.
+ */
+ required_outer = rel->lateral_relids;
+
+ /* We only do sample scan if it was requested */
+ path = create_samplescan_path(root, rel, required_outer);
+ rel->pathlist = list_make1(path);
+}
+
+/*
* set_foreign_size
* Set size estimates for a foreign table RTE
*/
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 1a0d358c5f7..c2b2b7622a6 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -220,6 +220,73 @@ cost_seqscan(Path *path, PlannerInfo *root,
}
/*
+ * cost_samplescan
+ * Determines and returns the cost of scanning a relation using sampling.
+ *
+ * From planner/optimizer perspective, we don't care all that much about cost
+ * itself since there is always only one scan path to consider when sampling
+ * scan is present, but number of rows estimation is still important.
+ *
+ * 'baserel' is the relation to be scanned
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ */
+void
+cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ double spc_seq_page_cost,
+ spc_random_page_cost,
+ spc_page_cost;
+ QualCost qpqual_cost;
+ Cost cpu_per_tuple;
+ BlockNumber pages;
+ double tuples;
+ RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
+ TableSampleClause *tablesample = rte->tablesample;
+
+ /* Should only be applied to base relations */
+ Assert(baserel->relid > 0);
+ Assert(baserel->rtekind == RTE_RELATION);
+
+ /* Mark the path with the correct row estimate */
+ if (path->param_info)
+ path->rows = path->param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
+
+ /* Call the sampling method's costing function. */
+ OidFunctionCall6(tablesample->tsmcost, PointerGetDatum(root),
+ PointerGetDatum(path), PointerGetDatum(baserel),
+ PointerGetDatum(tablesample->args),
+ PointerGetDatum(&pages), PointerGetDatum(&tuples));
+
+ /* fetch estimated page cost for tablespace containing table */
+ get_tablespace_page_costs(baserel->reltablespace,
+ &spc_random_page_cost,
+ &spc_seq_page_cost);
+
+
+ spc_page_cost = tablesample->tsmseqscan ? spc_seq_page_cost :
+ spc_random_page_cost;
+
+ /*
+ * disk costs
+ */
+ run_cost += spc_page_cost * pages;
+
+ /* CPU costs */
+ get_restriction_qual_cost(root, baserel, path->param_info, &qpqual_cost);
+
+ startup_cost += qpqual_cost.startup;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
+ run_cost += cpu_per_tuple * tuples;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
* cost_index
* Determines and returns the cost of scanning a relation using an index.
*
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 783e34b4fbc..c6095167e80 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -59,6 +59,8 @@ static Material *create_material_plan(PlannerInfo *root, MaterialPath *best_path
static Plan *create_unique_plan(PlannerInfo *root, UniquePath *best_path);
static SeqScan *create_seqscan_plan(PlannerInfo *root, Path *best_path,
List *tlist, List *scan_clauses);
+static SampleScan *create_samplescan_plan(PlannerInfo *root, Path *best_path,
+ List *tlist, List *scan_clauses);
static Scan *create_indexscan_plan(PlannerInfo *root, IndexPath *best_path,
List *tlist, List *scan_clauses, bool indexonly);
static BitmapHeapScan *create_bitmap_scan_plan(PlannerInfo *root,
@@ -101,6 +103,7 @@ static List *order_qual_clauses(PlannerInfo *root, List *clauses);
static void copy_path_costsize(Plan *dest, Path *src);
static void copy_plan_costsize(Plan *dest, Plan *src);
static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid);
+static SampleScan *make_samplescan(List *qptlist, List *qpqual, Index scanrelid);
static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid,
Oid indexid, List *indexqual, List *indexqualorig,
List *indexorderby, List *indexorderbyorig, Oid *indexorderbyops,
@@ -229,6 +232,7 @@ create_plan_recurse(PlannerInfo *root, Path *best_path)
switch (best_path->pathtype)
{
case T_SeqScan:
+ case T_SampleScan:
case T_IndexScan:
case T_IndexOnlyScan:
case T_BitmapHeapScan:
@@ -344,6 +348,13 @@ create_scan_plan(PlannerInfo *root, Path *best_path)
scan_clauses);
break;
+ case T_SampleScan:
+ plan = (Plan *) create_samplescan_plan(root,
+ best_path,
+ tlist,
+ scan_clauses);
+ break;
+
case T_IndexScan:
plan = (Plan *) create_indexscan_plan(root,
(IndexPath *) best_path,
@@ -547,6 +558,7 @@ disuse_physical_tlist(PlannerInfo *root, Plan *plan, Path *path)
switch (path->pathtype)
{
case T_SeqScan:
+ case T_SampleScan:
case T_IndexScan:
case T_IndexOnlyScan:
case T_BitmapHeapScan:
@@ -1134,6 +1146,45 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path,
}
/*
+ * create_samplescan_plan
+ * Returns a samplecan plan for the base relation scanned by 'best_path'
+ * with restriction clauses 'scan_clauses' and targetlist 'tlist'.
+ */
+static SampleScan *
+create_samplescan_plan(PlannerInfo *root, Path *best_path,
+ List *tlist, List *scan_clauses)
+{
+ SampleScan *scan_plan;
+ Index scan_relid = best_path->parent->relid;
+
+ /* it should be a base rel with tablesample clause... */
+ Assert(scan_relid > 0);
+ Assert(best_path->parent->rtekind == RTE_RELATION);
+ Assert(best_path->pathtype == T_SampleScan);
+
+ /* Sort clauses into best execution order */
+ scan_clauses = order_qual_clauses(root, scan_clauses);
+
+ /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */
+ scan_clauses = extract_actual_clauses(scan_clauses, false);
+
+ /* Replace any outer-relation variables with nestloop params */
+ if (best_path->param_info)
+ {
+ scan_clauses = (List *)
+ replace_nestloop_params(root, (Node *) scan_clauses);
+ }
+
+ scan_plan = make_samplescan(tlist,
+ scan_clauses,
+ scan_relid);
+
+ copy_path_costsize(&scan_plan->plan, best_path);
+
+ return scan_plan;
+}
+
+/*
* create_indexscan_plan
* Returns an indexscan plan for the base relation scanned by 'best_path'
* with restriction clauses 'scan_clauses' and targetlist 'tlist'.
@@ -3378,6 +3429,24 @@ make_seqscan(List *qptlist,
return node;
}
+static SampleScan *
+make_samplescan(List *qptlist,
+ List *qpqual,
+ Index scanrelid)
+{
+ SampleScan *node = makeNode(SampleScan);
+ Plan *plan = &node->plan;
+
+ /* cost should be inserted by caller */
+ plan->targetlist = qptlist;
+ plan->qual = qpqual;
+ plan->lefttree = NULL;
+ plan->righttree = NULL;
+ node->scanrelid = scanrelid;
+
+ return node;
+}
+
static IndexScan *
make_indexscan(List *qptlist,
List *qpqual,
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 8de57c8e6bb..9ba10516bb2 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -60,6 +60,7 @@ planner_hook_type planner_hook = NULL;
#define EXPRKIND_LIMIT 6
#define EXPRKIND_APPINFO 7
#define EXPRKIND_PHV 8
+#define EXPRKIND_TABLESAMPLE 9
/* Passthrough data for standard_qp_callback */
typedef struct
@@ -486,7 +487,19 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
int kind;
- if (rte->rtekind == RTE_SUBQUERY)
+ if (rte->rtekind == RTE_RELATION)
+ {
+ if (rte->tablesample)
+ {
+ rte->tablesample->args = (List *)
+ preprocess_expression(root, (Node *) rte->tablesample->args,
+ EXPRKIND_TABLESAMPLE);
+ rte->tablesample->repeatable = (Node *)
+ preprocess_expression(root, rte->tablesample->repeatable,
+ EXPRKIND_TABLESAMPLE);
+ }
+ }
+ else if (rte->rtekind == RTE_SUBQUERY)
{
/*
* We don't want to do all preprocessing yet on the subquery's
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 517409d28a0..73b19888369 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -451,6 +451,17 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
fix_scan_list(root, splan->plan.qual, rtoffset);
}
break;
+ case T_SampleScan:
+ {
+ SampleScan *splan = (SampleScan *) plan;
+
+ splan->scanrelid += rtoffset;
+ splan->plan.targetlist =
+ fix_scan_list(root, splan->plan.targetlist, rtoffset);
+ splan->plan.qual =
+ fix_scan_list(root, splan->plan.qual, rtoffset);
+ }
+ break;
case T_IndexScan:
{
IndexScan *splan = (IndexScan *) plan;
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index afccee53acf..2f7f5c0df0e 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -2167,6 +2167,7 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
break;
case T_SeqScan:
+ case T_SampleScan:
context.paramids = bms_add_members(context.paramids, scan_params);
break;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index faca30b322d..ea7a47bdf45 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -706,6 +706,26 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
}
/*
+ * create_samplescan_path
+ * Like seqscan but uses sampling function while scanning.
+ */
+Path *
+create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
+{
+ Path *pathnode = makeNode(Path);
+
+ pathnode->pathtype = T_SampleScan;
+ pathnode->parent = rel;
+ pathnode->param_info = get_baserel_parampathinfo(root, rel,
+ required_outer);
+ pathnode->pathkeys = NIL; /* samplescan has unordered result */
+
+ cost_samplescan(pathnode, root, rel);
+
+ return pathnode;
+}
+
+/*
* create_index_path
* Creates a path node for an index scan.
*
@@ -1778,6 +1798,8 @@ reparameterize_path(PlannerInfo *root, Path *path,
case T_SubqueryScan:
return create_subqueryscan_path(root, rel, path->pathkeys,
required_outer);
+ case T_SampleScan:
+ return (Path *) create_samplescan_path(root, rel, required_outer);
default:
break;
}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 2dce87879e7..14397830686 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -454,6 +454,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%type <range> relation_expr
%type <range> relation_expr_opt_alias
%type <target> target_el single_set_clause set_target insert_column_item
+%type <node> relation_expr_tablesample tablesample_clause opt_repeatable_clause
%type <str> generic_option_name
%type <node> generic_option_arg
@@ -625,8 +626,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
STATEMENT STATISTICS STDIN STDOUT STORAGE STRICT_P STRIP_P SUBSTRING
SYMMETRIC SYSID SYSTEM_P
- TABLE TABLES TABLESPACE TEMP TEMPLATE TEMPORARY TEXT_P THEN TIME TIMESTAMP
- TO TRAILING TRANSACTION TRANSFORM TREAT TRIGGER TRIM TRUE_P
+ TABLE TABLES TABLESAMPLE TABLESPACE TEMP TEMPLATE TEMPORARY TEXT_P THEN
+ TIME TIMESTAMP TO TRAILING TRANSACTION TRANSFORM TREAT TRIGGER TRIM TRUE_P
TRUNCATE TRUSTED TYPE_P TYPES_P
UNBOUNDED UNCOMMITTED UNENCRYPTED UNION UNIQUE UNKNOWN UNLISTEN UNLOGGED
@@ -10386,6 +10387,10 @@ table_ref: relation_expr opt_alias_clause
$1->alias = $2;
$$ = (Node *) $1;
}
+ | relation_expr_tablesample
+ {
+ $$ = (Node *) $1;
+ }
| func_table func_alias_clause
{
RangeFunction *n = (RangeFunction *) $1;
@@ -10711,6 +10716,32 @@ relation_expr_opt_alias: relation_expr %prec UMINUS
}
;
+
+relation_expr_tablesample: relation_expr opt_alias_clause tablesample_clause
+ {
+ RangeTableSample *n = (RangeTableSample *) $3;
+ n->relation = $1;
+ n->relation->alias = $2;
+ $$ = (Node *) n;
+ }
+ ;
+
+tablesample_clause:
+ TABLESAMPLE ColId '(' expr_list ')' opt_repeatable_clause
+ {
+ RangeTableSample *n = makeNode(RangeTableSample);
+ n->method = $2;
+ n->args = $4;
+ n->repeatable = $6;
+ $$ = (Node *) n;
+ }
+ ;
+
+opt_repeatable_clause:
+ REPEATABLE '(' a_expr ')' { $$ = (Node *) $3; }
+ | /*EMPTY*/ { $$ = NULL; }
+ ;
+
/*
* func_table represents a function invocation in a FROM list. It can be
* a plain function call, like "foo(...)", or a ROWS FROM expression with
@@ -13804,6 +13835,7 @@ type_func_name_keyword:
| OVERLAPS
| RIGHT
| SIMILAR
+ | TABLESAMPLE
| VERBOSE
;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 73c505ed85b..6b1bbe57d0e 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -17,6 +17,7 @@
#include "access/heapam.h"
#include "catalog/catalog.h"
+#include "access/htup_details.h"
#include "catalog/heap.h"
#include "catalog/pg_constraint.h"
#include "catalog/pg_type.h"
@@ -31,6 +32,7 @@
#include "parser/parse_coerce.h"
#include "parser/parse_collate.h"
#include "parser/parse_expr.h"
+#include "parser/parse_func.h"
#include "parser/parse_oper.h"
#include "parser/parse_relation.h"
#include "parser/parse_target.h"
@@ -39,6 +41,7 @@
#include "utils/guc.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
+#include "utils/syscache.h"
/* Convenience macro for the most common makeNamespaceItem() case */
@@ -419,6 +422,39 @@ transformJoinOnClause(ParseState *pstate, JoinExpr *j, List *namespace)
return result;
}
+static RangeTblEntry *
+transformTableSampleEntry(ParseState *pstate, RangeTableSample *rv)
+{
+ RangeTblEntry *rte = NULL;
+ CommonTableExpr *cte = NULL;
+ TableSampleClause *tablesample = NULL;
+
+ /* if relation has an unqualified name, it might be a CTE reference */
+ if (!rv->relation->schemaname)
+ {
+ Index levelsup;
+ cte = scanNameSpaceForCTE(pstate, rv->relation->relname, &levelsup);
+ }
+
+ /* We first need to build a range table entry */
+ if (!cte)
+ rte = transformTableEntry(pstate, rv->relation);
+
+ if (!rte ||
+ (rte->relkind != RELKIND_RELATION &&
+ rte->relkind != RELKIND_MATVIEW))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("TABLESAMPLE clause can only be used on tables and materialized views"),
+ parser_errposition(pstate, rv->relation->location)));
+
+ tablesample = ParseTableSample(pstate, rv->method, rv->repeatable,
+ rv->args, rv->relation->location);
+ rte->tablesample = tablesample;
+
+ return rte;
+}
+
/*
* transformTableEntry --- transform a RangeVar (simple relation reference)
*/
@@ -1127,6 +1163,26 @@ transformFromClauseItem(ParseState *pstate, Node *n,
return (Node *) j;
}
+ else if (IsA(n, RangeTableSample))
+ {
+ /* Tablesample reference */
+ RangeTableSample *rv = (RangeTableSample *) n;
+ RangeTblRef *rtr;
+ RangeTblEntry *rte = NULL;
+ int rtindex;
+
+ rte = transformTableSampleEntry(pstate, rv);
+
+ /* assume new rte is at end */
+ rtindex = list_length(pstate->p_rtable);
+ Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
+ *top_rte = rte;
+ *top_rti = rtindex;
+ *namespace = list_make1(makeDefaultNSItem(rte));
+ rtr = makeNode(RangeTblRef);
+ rtr->rtindex = rtindex;
+ return (Node *) rtr;
+ }
else
elog(ERROR, "unrecognized node type: %d", (int) nodeTag(n));
return NULL; /* can't get here, keep compiler quiet */
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index f7affebf84e..fa50f92d8dd 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -18,6 +18,7 @@
#include "catalog/pg_aggregate.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_type.h"
+#include "catalog/pg_tablesample_method.h"
#include "funcapi.h"
#include "lib/stringinfo.h"
#include "nodes/makefuncs.h"
@@ -26,6 +27,7 @@
#include "parser/parse_clause.h"
#include "parser/parse_coerce.h"
#include "parser/parse_func.h"
+#include "parser/parse_expr.h"
#include "parser/parse_relation.h"
#include "parser/parse_target.h"
#include "parser/parse_type.h"
@@ -767,6 +769,147 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
}
+/*
+ * ParseTableSample
+ *
+ * Parse TABLESAMPLE clause and process the arguments
+ */
+TableSampleClause *
+ParseTableSample(ParseState *pstate, char *samplemethod, Node *repeatable,
+ List *sampleargs, int location)
+{
+ HeapTuple tuple;
+ Form_pg_tablesample_method tsm;
+ Form_pg_proc procform;
+ TableSampleClause *tablesample;
+ List *fargs;
+ ListCell *larg;
+ int nargs, initnargs;
+ Oid init_arg_types[FUNC_MAX_ARGS];
+
+ /* Load the tablesample method */
+ tuple = SearchSysCache1(TABLESAMPLEMETHODNAME, PointerGetDatum(samplemethod));
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("tablesample method \"%s\" does not exist",
+ samplemethod),
+ parser_errposition(pstate, location)));
+
+ tablesample = makeNode(TableSampleClause);
+ tablesample->tsmid = HeapTupleGetOid(tuple);
+
+ tsm = (Form_pg_tablesample_method) GETSTRUCT(tuple);
+
+ tablesample->tsmseqscan = tsm->tsmseqscan;
+ tablesample->tsmpagemode = tsm->tsmpagemode;
+ tablesample->tsminit = tsm->tsminit;
+ tablesample->tsmnextblock = tsm->tsmnextblock;
+ tablesample->tsmnexttuple = tsm->tsmnexttuple;
+ tablesample->tsmexaminetuple = tsm->tsmexaminetuple;
+ tablesample->tsmend = tsm->tsmend;
+ tablesample->tsmreset = tsm->tsmreset;
+ tablesample->tsmcost = tsm->tsmcost;
+
+ ReleaseSysCache(tuple);
+
+ /* Validate the parameters against init function definition. */
+ tuple = SearchSysCache1(PROCOID,
+ ObjectIdGetDatum(tablesample->tsminit));
+
+ if (!HeapTupleIsValid(tuple)) /* should not happen */
+ elog(ERROR, "cache lookup failed for function %u",
+ tablesample->tsminit);
+
+ procform = (Form_pg_proc) GETSTRUCT(tuple);
+ initnargs = procform->pronargs;
+ Assert(initnargs >= 3);
+
+ /*
+ * First parameter is used to pass the SampleScanState, second is
+ * seed (REPEATABLE), skip the processing for them here, just assert
+ * that the types are correct.
+ */
+ Assert(procform->proargtypes.values[0] == INTERNALOID);
+ Assert(procform->proargtypes.values[1] == INT4OID);
+ initnargs -= 2;
+ memcpy(init_arg_types, procform->proargtypes.values + 2,
+ initnargs * sizeof(Oid));
+
+ /* Now we are done with the catalog */
+ ReleaseSysCache(tuple);
+
+ /* Process repeatable (seed) */
+ if (repeatable != NULL)
+ {
+ Node *arg = repeatable;
+
+ if (arg && IsA(arg, A_Const))
+ {
+ A_Const *con = (A_Const *) arg;
+
+ if (con->val.type == T_Null)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("REPEATABLE clause must be NOT NULL numeric value"),
+ parser_errposition(pstate, con->location)));
+
+ }
+
+ arg = transformExpr(pstate, arg, EXPR_KIND_FROM_FUNCTION);
+ arg = coerce_to_specific_type(pstate, arg, INT4OID, "REPEATABLE");
+ tablesample->repeatable = arg;
+ }
+ else
+ tablesample->repeatable = NULL;
+
+ /* Check user provided expected number of arguments. */
+ if (list_length(sampleargs) != initnargs)
+ ereport(ERROR,
+ (errcode(ERRCODE_TOO_MANY_ARGUMENTS),
+ errmsg_plural("tablesample method \"%s\" expects %d argument got %d",
+ "tablesample method \"%s\" expects %d arguments got %d",
+ initnargs,
+ samplemethod,
+ initnargs, list_length(sampleargs)),
+ parser_errposition(pstate, location)));
+
+ /* Transform the arguments, typecasting them as needed. */
+ fargs = NIL;
+ nargs = 0;
+ foreach(larg, sampleargs)
+ {
+ Node *inarg = (Node *) lfirst(larg);
+ Node *arg = transformExpr(pstate, inarg, EXPR_KIND_FROM_FUNCTION);
+ Oid argtype = exprType(arg);
+
+ if (argtype != init_arg_types[nargs])
+ {
+ if (!can_coerce_type(1, &argtype, &init_arg_types[nargs],
+ COERCION_IMPLICIT))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("wrong parameter %d for tablesample method \"%s\"",
+ nargs + 1, samplemethod),
+ errdetail("Expected type %s got %s.",
+ format_type_be(init_arg_types[nargs]),
+ format_type_be(argtype)),
+ parser_errposition(pstate, exprLocation(inarg))));
+
+ arg = coerce_type(pstate, arg, argtype, init_arg_types[nargs], -1,
+ COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, -1);
+ }
+
+ fargs = lappend(fargs, arg);
+ nargs++;
+ }
+
+ /* Pass the arguments down */
+ tablesample->args = fargs;
+
+ return tablesample;
+}
+
/* func_match_argtypes()
*
* Given a list of candidate functions (having the right name and number
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index 39302a410b8..e27afd1a3e0 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -2209,6 +2209,9 @@ view_query_is_auto_updatable(Query *viewquery, bool check_cols)
base_rte->relkind != RELKIND_VIEW))
return gettext_noop("Views that do not select from a single table or view are not automatically updatable.");
+ if (base_rte->tablesample)
+ return gettext_noop("Views containing TABLESAMPLE are not automatically updatable.");
+
/*
* Check that the view has at least one updatable column. This is required
* for INSERT/UPDATE but not for DELETE.
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 903e80aea35..298eebf5e67 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -32,6 +32,7 @@
#include "catalog/pg_opclass.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_proc.h"
+#include "catalog/pg_tablesample_method.h"
#include "catalog/pg_trigger.h"
#include "catalog/pg_type.h"
#include "commands/defrem.h"
@@ -345,6 +346,8 @@ static void make_ruledef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
int prettyFlags);
static void make_viewdef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
int prettyFlags, int wrapColumn);
+static void get_tablesample_def(TableSampleClause *tablesample,
+ deparse_context *context);
static void get_query_def(Query *query, StringInfo buf, List *parentnamespace,
TupleDesc resultDesc,
int prettyFlags, int wrapColumn, int startIndent);
@@ -4220,6 +4223,50 @@ make_viewdef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
heap_close(ev_relation, AccessShareLock);
}
+/* ----------
+ * get_tablesample_def - Convert TableSampleClause back to SQL
+ * ----------
+ */
+static void
+get_tablesample_def(TableSampleClause *tablesample, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ HeapTuple tuple;
+ Form_pg_tablesample_method tsm;
+ char *tsmname;
+ int nargs;
+ ListCell *l;
+
+ /* Load the tablesample method */
+ tuple = SearchSysCache1(TABLESAMPLEMETHODOID, ObjectIdGetDatum(tablesample->tsmid));
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("cache lookup failed for tablesample method %u",
+ tablesample->tsmid)));
+
+ tsm = (Form_pg_tablesample_method) GETSTRUCT(tuple);
+ tsmname = NameStr(tsm->tsmname);
+ appendStringInfo(buf, " TABLESAMPLE %s (", quote_identifier(tsmname));
+
+ ReleaseSysCache(tuple);
+
+ nargs = 0;
+ foreach(l, tablesample->args)
+ {
+ if (nargs++ > 0)
+ appendStringInfoString(buf, ", ");
+ get_rule_expr((Node *) lfirst(l), context, true);
+ }
+ appendStringInfoChar(buf, ')');
+
+ if (tablesample->repeatable != NULL)
+ {
+ appendStringInfoString(buf, " REPEATABLE (");
+ get_rule_expr(tablesample->repeatable, context, true);
+ appendStringInfoChar(buf, ')');
+ }
+}
/* ----------
* get_query_def - Parse back one query parsetree
@@ -8529,6 +8576,9 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
only_marker(rte),
generate_relation_name(rte->relid,
context->namespaces));
+
+ if (rte->tablesample)
+ get_tablesample_def(rte->tablesample, context);
break;
case RTE_SUBQUERY:
/* Subquery RTE */
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index 1dc293297d9..f259751e157 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -32,6 +32,7 @@
#include "catalog/pg_range.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_transform.h"
+#include "catalog/pg_tablesample_method.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
@@ -2996,3 +2997,29 @@ get_range_subtype(Oid rangeOid)
else
return InvalidOid;
}
+
+/* ---------- PG_TABLESAMPLE_METHOD CACHE ---------- */
+
+/*
+ * get_tablesample_method_name - given a tablesample method OID,
+ * look up the name or NULL if not found
+ */
+char *
+get_tablesample_method_name(Oid tsmid)
+{
+ HeapTuple tuple;
+
+ tuple = SearchSysCache1(TABLESAMPLEMETHODOID, ObjectIdGetDatum(tsmid));
+ if (HeapTupleIsValid(tuple))
+ {
+ Form_pg_tablesample_method tup =
+ (Form_pg_tablesample_method) GETSTRUCT(tuple);
+ char *result;
+
+ result = pstrdup(NameStr(tup->tsmname));
+ ReleaseSysCache(tuple);
+ return result;
+ }
+ else
+ return NULL;
+}
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index f58e1cebf2a..7def1be32ae 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -56,6 +56,7 @@
#include "catalog/pg_shseclabel.h"
#include "catalog/pg_replication_origin.h"
#include "catalog/pg_statistic.h"
+#include "catalog/pg_tablesample_method.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_transform.h"
#include "catalog/pg_ts_config.h"
@@ -666,6 +667,28 @@ static const struct cachedesc cacheinfo[] = {
},
128
},
+ {TableSampleMethodRelationId, /* TABLESAMPLEMETHODNAME */
+ TableSampleMethodNameIndexId,
+ 1,
+ {
+ Anum_pg_tablesample_method_tsmname,
+ 0,
+ 0,
+ 0,
+ },
+ 2
+ },
+ {TableSampleMethodRelationId, /* TABLESAMPLEMETHODOID */
+ TableSampleMethodOidIndexId,
+ 1,
+ {
+ ObjectIdAttributeNumber,
+ 0,
+ 0,
+ 0,
+ },
+ 2
+ },
{TableSpaceRelationId, /* TABLESPACEOID */
TablespaceOidIndexId,
1,
diff --git a/src/backend/utils/misc/sampling.c b/src/backend/utils/misc/sampling.c
index 1eeabaf1588..9becc63bf82 100644
--- a/src/backend/utils/misc/sampling.c
+++ b/src/backend/utils/misc/sampling.c
@@ -46,6 +46,8 @@ BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize,
bs->n = samplesize;
bs->t = 0; /* blocks scanned so far */
bs->m = 0; /* blocks selected so far */
+
+ sampler_random_init_state(randseed, bs->randstate);
}
bool
@@ -92,7 +94,7 @@ BlockSampler_Next(BlockSampler bs)
* less than k, which means that we cannot fail to select enough blocks.
*----------
*/
- V = sampler_random_fract();
+ V = sampler_random_fract(bs->randstate);
p = 1.0 - (double) k / (double) K;
while (V < p)
{
@@ -126,8 +128,14 @@ BlockSampler_Next(BlockSampler bs)
void
reservoir_init_selection_state(ReservoirState rs, int n)
{
+ /*
+ * Reservoir sampling is not used anywhere where it would need to return
+ * repeatable results so we can initialize it randomly.
+ */
+ sampler_random_init_state(random(), rs->randstate);
+
/* Initial value of W (for use when Algorithm Z is first applied) */
- *rs = exp(-log(sampler_random_fract()) / n);
+ rs->W = exp(-log(sampler_random_fract(rs->randstate)) / n);
}
double
@@ -142,7 +150,7 @@ reservoir_get_next_S(ReservoirState rs, double t, int n)
double V,
quot;
- V = sampler_random_fract(); /* Generate V */
+ V = sampler_random_fract(rs->randstate); /* Generate V */
S = 0;
t += 1;
/* Note: "num" in Vitter's code is always equal to t - n */
@@ -158,7 +166,7 @@ reservoir_get_next_S(ReservoirState rs, double t, int n)
else
{
/* Now apply Algorithm Z */
- double W = *rs;
+ double W = rs->W;
double term = t - (double) n + 1;
for (;;)
@@ -174,7 +182,7 @@ reservoir_get_next_S(ReservoirState rs, double t, int n)
tmp;
/* Generate U and X */
- U = sampler_random_fract();
+ U = sampler_random_fract(rs->randstate);
X = t * (W - 1.0);
S = floor(X); /* S is tentatively set to floor(X) */
/* Test if U <= h(S)/cg(X) in the manner of (6.3) */
@@ -203,11 +211,11 @@ reservoir_get_next_S(ReservoirState rs, double t, int n)
y *= numer / denom;
denom -= 1;
}
- W = exp(-log(sampler_random_fract()) / n); /* Generate W in advance */
+ W = exp(-log(sampler_random_fract(rs->randstate)) / n); /* Generate W in advance */
if (exp(log(y) / n) <= (t + X) / t)
break;
}
- *rs = W;
+ rs->W = W;
}
return S;
}
@@ -217,10 +225,17 @@ reservoir_get_next_S(ReservoirState rs, double t, int n)
* Random number generator used by sampling
*----------
*/
+void
+sampler_random_init_state(long seed, SamplerRandomState randstate)
+{
+ randstate[0] = RAND48_SEED_0;
+ randstate[1] = (unsigned short) seed;
+ randstate[2] = (unsigned short) (seed >> 16);
+}
/* Select a random value R uniformly distributed in (0 - 1) */
double
-sampler_random_fract()
+sampler_random_fract(SamplerRandomState randstate)
{
- return ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
+ return pg_erand48(randstate);
}