Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 6530df6

Browse files
committed
Redesign the caching done by get_cached_rowtype().
Previously, get_cached_rowtype() cached a pointer to a reference-counted tuple descriptor from the typcache, relying on the ExprContextCallback mechanism to release the tupdesc refcount when the expression tree using the tupdesc was destroyed. This worked fine when it was designed, but the introduction of within-DO-block COMMITs broke it. The refcount is logged in a transaction-lifespan resource owner, but plpgsql won't destroy simple expressions made within the DO block (before its first commit) until the DO block is exited. That results in a warning about a leaked tupdesc refcount when the COMMIT destroys the original resource owner, and then an error about the active resource owner not holding a matching refcount when the expression is destroyed. To fix, get rid of the need to have a shutdown callback at all, by instead caching a pointer to the relevant typcache entry. Those survive for the life of the backend, so we needn't worry about the pointer becoming stale. (For registered RECORD types, we can still cache a pointer to the tupdesc, knowing that it won't change for the life of the backend.) This mechanism has been in use in plpgsql and expandedrecord.c since commit 4b93f57, and seems to work well. This change requires modifying the ExprEvalStep structs used by the relevant expression step types, which is slightly worrisome for back-patching. However, there seems no good reason for extensions to be familiar with the details of these particular sub-structs. Per report from Rohit Bhogate. Back-patch to v11 where within-DO-block COMMITs became a thing. Discussion: https://postgr.es/m/CAAV6ZkQRCVBh8qAY+SZiHnz+U+FqAGBBDaDTjF2yiKa2nJSLKg@mail.gmail.com
1 parent 5f12bc9 commit 6530df6

File tree

5 files changed

+177
-91
lines changed

5 files changed

+177
-91
lines changed

src/backend/executor/execExpr.c

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,7 +1130,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
11301130
scratch.opcode = EEOP_FIELDSELECT;
11311131
scratch.d.fieldselect.fieldnum = fselect->fieldnum;
11321132
scratch.d.fieldselect.resulttype = fselect->resulttype;
1133-
scratch.d.fieldselect.argdesc = NULL;
1133+
scratch.d.fieldselect.rowcache.cacheptr = NULL;
11341134

11351135
ExprEvalPushStep(state, &scratch);
11361136
break;
@@ -1140,7 +1140,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
11401140
{
11411141
FieldStore *fstore = (FieldStore *) node;
11421142
TupleDesc tupDesc;
1143-
TupleDesc *descp;
1143+
ExprEvalRowtypeCache *rowcachep;
11441144
Datum *values;
11451145
bool *nulls;
11461146
int ncolumns;
@@ -1156,17 +1156,17 @@ ExecInitExprRec(Expr *node, ExprState *state,
11561156
values = (Datum *) palloc(sizeof(Datum) * ncolumns);
11571157
nulls = (bool *) palloc(sizeof(bool) * ncolumns);
11581158

1159-
/* create workspace for runtime tupdesc cache */
1160-
descp = (TupleDesc *) palloc(sizeof(TupleDesc));
1161-
*descp = NULL;
1159+
/* create shared composite-type-lookup cache struct */
1160+
rowcachep = palloc(sizeof(ExprEvalRowtypeCache));
1161+
rowcachep->cacheptr = NULL;
11621162

11631163
/* emit code to evaluate the composite input value */
11641164
ExecInitExprRec(fstore->arg, state, resv, resnull);
11651165

11661166
/* next, deform the input tuple into our workspace */
11671167
scratch.opcode = EEOP_FIELDSTORE_DEFORM;
11681168
scratch.d.fieldstore.fstore = fstore;
1169-
scratch.d.fieldstore.argdesc = descp;
1169+
scratch.d.fieldstore.rowcache = rowcachep;
11701170
scratch.d.fieldstore.values = values;
11711171
scratch.d.fieldstore.nulls = nulls;
11721172
scratch.d.fieldstore.ncolumns = ncolumns;
@@ -1224,7 +1224,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
12241224
/* finally, form result tuple */
12251225
scratch.opcode = EEOP_FIELDSTORE_FORM;
12261226
scratch.d.fieldstore.fstore = fstore;
1227-
scratch.d.fieldstore.argdesc = descp;
1227+
scratch.d.fieldstore.rowcache = rowcachep;
12281228
scratch.d.fieldstore.values = values;
12291229
scratch.d.fieldstore.nulls = nulls;
12301230
scratch.d.fieldstore.ncolumns = ncolumns;
@@ -1370,17 +1370,24 @@ ExecInitExprRec(Expr *node, ExprState *state,
13701370
case T_ConvertRowtypeExpr:
13711371
{
13721372
ConvertRowtypeExpr *convert = (ConvertRowtypeExpr *) node;
1373+
ExprEvalRowtypeCache *rowcachep;
1374+
1375+
/* cache structs must be out-of-line for space reasons */
1376+
rowcachep = palloc(2 * sizeof(ExprEvalRowtypeCache));
1377+
rowcachep[0].cacheptr = NULL;
1378+
rowcachep[1].cacheptr = NULL;
13731379

13741380
/* evaluate argument into step's result area */
13751381
ExecInitExprRec(convert->arg, state, resv, resnull);
13761382

13771383
/* and push conversion step */
13781384
scratch.opcode = EEOP_CONVERT_ROWTYPE;
1379-
scratch.d.convert_rowtype.convert = convert;
1380-
scratch.d.convert_rowtype.indesc = NULL;
1381-
scratch.d.convert_rowtype.outdesc = NULL;
1385+
scratch.d.convert_rowtype.inputtype =
1386+
exprType((Node *) convert->arg);
1387+
scratch.d.convert_rowtype.outputtype = convert->resulttype;
1388+
scratch.d.convert_rowtype.incache = &rowcachep[0];
1389+
scratch.d.convert_rowtype.outcache = &rowcachep[1];
13821390
scratch.d.convert_rowtype.map = NULL;
1383-
scratch.d.convert_rowtype.initialized = false;
13841391

13851392
ExprEvalPushStep(state, &scratch);
13861393
break;
@@ -2005,7 +2012,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
20052012
(int) ntest->nulltesttype);
20062013
}
20072014
/* initialize cache in case it's a row test */
2008-
scratch.d.nulltest_row.argdesc = NULL;
2015+
scratch.d.nulltest_row.rowcache.cacheptr = NULL;
20092016

20102017
/* first evaluate argument into result variable */
20112018
ExecInitExprRec(ntest->arg, state,

src/backend/executor/execExprInterp.c

Lines changed: 89 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ static void ExecInitInterpreter(void);
145145
static void CheckVarSlotCompatibility(TupleTableSlot *slot, int attnum, Oid vartype);
146146
static void CheckOpSlotCompatibility(ExprEvalStep *op, TupleTableSlot *slot);
147147
static TupleDesc get_cached_rowtype(Oid type_id, int32 typmod,
148-
TupleDesc *cache_field, ExprContext *econtext);
149-
static void ShutdownTupleDescRef(Datum arg);
148+
ExprEvalRowtypeCache *rowcache,
149+
bool *changed);
150150
static void ExecEvalRowNullInt(ExprState *state, ExprEvalStep *op,
151151
ExprContext *econtext, bool checkisnull);
152152

@@ -1919,56 +1919,78 @@ CheckOpSlotCompatibility(ExprEvalStep *op, TupleTableSlot *slot)
19191919
* get_cached_rowtype: utility function to lookup a rowtype tupdesc
19201920
*
19211921
* type_id, typmod: identity of the rowtype
1922-
* cache_field: where to cache the TupleDesc pointer in expression state node
1923-
* (field must be initialized to NULL)
1924-
* econtext: expression context we are executing in
1922+
* rowcache: space for caching identity info
1923+
* (rowcache->cacheptr must be initialized to NULL)
1924+
* changed: if not NULL, *changed is set to true on any update
19251925
*
1926-
* NOTE: because the shutdown callback will be called during plan rescan,
1927-
* must be prepared to re-do this during any node execution; cannot call
1928-
* just once during expression initialization.
1926+
* The returned TupleDesc is not guaranteed pinned; caller must pin it
1927+
* to use it across any operation that might incur cache invalidation.
1928+
* (The TupleDesc is always refcounted, so just use IncrTupleDescRefCount.)
1929+
*
1930+
* NOTE: because composite types can change contents, we must be prepared
1931+
* to re-do this during any node execution; cannot call just once during
1932+
* expression initialization.
19291933
*/
19301934
static TupleDesc
19311935
get_cached_rowtype(Oid type_id, int32 typmod,
1932-
TupleDesc *cache_field, ExprContext *econtext)
1936+
ExprEvalRowtypeCache *rowcache,
1937+
bool *changed)
19331938
{
1934-
TupleDesc tupDesc = *cache_field;
1935-
1936-
/* Do lookup if no cached value or if requested type changed */
1937-
if (tupDesc == NULL ||
1938-
type_id != tupDesc->tdtypeid ||
1939-
typmod != tupDesc->tdtypmod)
1939+
if (type_id != RECORDOID)
19401940
{
1941-
tupDesc = lookup_rowtype_tupdesc(type_id, typmod);
1941+
/*
1942+
* It's a named composite type, so use the regular typcache. Do a
1943+
* lookup first time through, or if the composite type changed. Note:
1944+
* "tupdesc_id == 0" may look redundant, but it protects against the
1945+
* admittedly-theoretical possibility that type_id was RECORDOID the
1946+
* last time through, so that the cacheptr isn't TypeCacheEntry *.
1947+
*/
1948+
TypeCacheEntry *typentry = (TypeCacheEntry *) rowcache->cacheptr;
19421949

1943-
if (*cache_field)
1950+
if (unlikely(typentry == NULL ||
1951+
rowcache->tupdesc_id == 0 ||
1952+
typentry->tupDesc_identifier != rowcache->tupdesc_id))
19441953
{
1945-
/* Release old tupdesc; but callback is already registered */
1946-
ReleaseTupleDesc(*cache_field);
1954+
typentry = lookup_type_cache(type_id, TYPECACHE_TUPDESC);
1955+
if (typentry->tupDesc == NULL)
1956+
ereport(ERROR,
1957+
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1958+
errmsg("type %s is not composite",
1959+
format_type_be(type_id))));
1960+
rowcache->cacheptr = (void *) typentry;
1961+
rowcache->tupdesc_id = typentry->tupDesc_identifier;
1962+
if (changed)
1963+
*changed = true;
19471964
}
1948-
else
1965+
return typentry->tupDesc;
1966+
}
1967+
else
1968+
{
1969+
/*
1970+
* A RECORD type, once registered, doesn't change for the life of the
1971+
* backend. So we don't need a typcache entry as such, which is good
1972+
* because there isn't one. It's possible that the caller is asking
1973+
* about a different type than before, though.
1974+
*/
1975+
TupleDesc tupDesc = (TupleDesc) rowcache->cacheptr;
1976+
1977+
if (unlikely(tupDesc == NULL ||
1978+
rowcache->tupdesc_id != 0 ||
1979+
type_id != tupDesc->tdtypeid ||
1980+
typmod != tupDesc->tdtypmod))
19491981
{
1950-
/* Need to register shutdown callback to release tupdesc */
1951-
RegisterExprContextCallback(econtext,
1952-
ShutdownTupleDescRef,
1953-
PointerGetDatum(cache_field));
1982+
tupDesc = lookup_rowtype_tupdesc(type_id, typmod);
1983+
/* Drop pin acquired by lookup_rowtype_tupdesc */
1984+
ReleaseTupleDesc(tupDesc);
1985+
rowcache->cacheptr = (void *) tupDesc;
1986+
rowcache->tupdesc_id = 0; /* not a valid value for non-RECORD */
1987+
if (changed)
1988+
*changed = true;
19541989
}
1955-
*cache_field = tupDesc;
1990+
return tupDesc;
19561991
}
1957-
return tupDesc;
19581992
}
19591993

1960-
/*
1961-
* Callback function to release a tupdesc refcount at econtext shutdown
1962-
*/
1963-
static void
1964-
ShutdownTupleDescRef(Datum arg)
1965-
{
1966-
TupleDesc *cache_field = (TupleDesc *) DatumGetPointer(arg);
1967-
1968-
if (*cache_field)
1969-
ReleaseTupleDesc(*cache_field);
1970-
*cache_field = NULL;
1971-
}
19721994

19731995
/*
19741996
* Fast-path functions, for very simple expressions
@@ -2501,8 +2523,7 @@ ExecEvalRowNullInt(ExprState *state, ExprEvalStep *op,
25012523

25022524
/* Lookup tupdesc if first time through or if type changes */
25032525
tupDesc = get_cached_rowtype(tupType, tupTypmod,
2504-
&op->d.nulltest_row.argdesc,
2505-
econtext);
2526+
&op->d.nulltest_row.rowcache, NULL);
25062527

25072528
/*
25082529
* heap_attisnull needs a HeapTuple not a bare HeapTupleHeader.
@@ -2938,8 +2959,7 @@ ExecEvalFieldSelect(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
29382959

29392960
/* Lookup tupdesc if first time through or if type changes */
29402961
tupDesc = get_cached_rowtype(tupType, tupTypmod,
2941-
&op->d.fieldselect.argdesc,
2942-
econtext);
2962+
&op->d.fieldselect.rowcache, NULL);
29432963

29442964
/*
29452965
* Find field's attr record. Note we don't support system columns
@@ -2997,9 +3017,9 @@ ExecEvalFieldStoreDeForm(ExprState *state, ExprEvalStep *op, ExprContext *econte
29973017
{
29983018
TupleDesc tupDesc;
29993019

3000-
/* Lookup tupdesc if first time through or after rescan */
3020+
/* Lookup tupdesc if first time through or if type changes */
30013021
tupDesc = get_cached_rowtype(op->d.fieldstore.fstore->resulttype, -1,
3002-
op->d.fieldstore.argdesc, econtext);
3022+
op->d.fieldstore.rowcache, NULL);
30033023

30043024
/* Check that current tupdesc doesn't have more fields than we allocated */
30053025
if (unlikely(tupDesc->natts > op->d.fieldstore.ncolumns))
@@ -3041,10 +3061,14 @@ ExecEvalFieldStoreDeForm(ExprState *state, ExprEvalStep *op, ExprContext *econte
30413061
void
30423062
ExecEvalFieldStoreForm(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
30433063
{
3064+
TupleDesc tupDesc;
30443065
HeapTuple tuple;
30453066

3046-
/* argdesc should already be valid from the DeForm step */
3047-
tuple = heap_form_tuple(*op->d.fieldstore.argdesc,
3067+
/* Lookup tupdesc (should be valid already) */
3068+
tupDesc = get_cached_rowtype(op->d.fieldstore.fstore->resulttype, -1,
3069+
op->d.fieldstore.rowcache, NULL);
3070+
3071+
tuple = heap_form_tuple(tupDesc,
30483072
op->d.fieldstore.values,
30493073
op->d.fieldstore.nulls);
30503074

@@ -3255,13 +3279,13 @@ ExecEvalSubscriptingRefAssign(ExprState *state, ExprEvalStep *op)
32553279
void
32563280
ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
32573281
{
3258-
ConvertRowtypeExpr *convert = op->d.convert_rowtype.convert;
32593282
HeapTuple result;
32603283
Datum tupDatum;
32613284
HeapTupleHeader tuple;
32623285
HeapTupleData tmptup;
32633286
TupleDesc indesc,
32643287
outdesc;
3288+
bool changed = false;
32653289

32663290
/* NULL in -> NULL out */
32673291
if (*op->resnull)
@@ -3270,24 +3294,19 @@ ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext
32703294
tupDatum = *op->resvalue;
32713295
tuple = DatumGetHeapTupleHeader(tupDatum);
32723296

3273-
/* Lookup tupdescs if first time through or after rescan */
3274-
if (op->d.convert_rowtype.indesc == NULL)
3275-
{
3276-
get_cached_rowtype(exprType((Node *) convert->arg), -1,
3277-
&op->d.convert_rowtype.indesc,
3278-
econtext);
3279-
op->d.convert_rowtype.initialized = false;
3280-
}
3281-
if (op->d.convert_rowtype.outdesc == NULL)
3282-
{
3283-
get_cached_rowtype(convert->resulttype, -1,
3284-
&op->d.convert_rowtype.outdesc,
3285-
econtext);
3286-
op->d.convert_rowtype.initialized = false;
3287-
}
3288-
3289-
indesc = op->d.convert_rowtype.indesc;
3290-
outdesc = op->d.convert_rowtype.outdesc;
3297+
/*
3298+
* Lookup tupdescs if first time through or if type changes. We'd better
3299+
* pin them since type conversion functions could do catalog lookups and
3300+
* hence cause cache invalidation.
3301+
*/
3302+
indesc = get_cached_rowtype(op->d.convert_rowtype.inputtype, -1,
3303+
op->d.convert_rowtype.incache,
3304+
&changed);
3305+
IncrTupleDescRefCount(indesc);
3306+
outdesc = get_cached_rowtype(op->d.convert_rowtype.outputtype, -1,
3307+
op->d.convert_rowtype.outcache,
3308+
&changed);
3309+
IncrTupleDescRefCount(outdesc);
32913310

32923311
/*
32933312
* We used to be able to assert that incoming tuples are marked with
@@ -3298,8 +3317,8 @@ ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext
32983317
Assert(HeapTupleHeaderGetTypeId(tuple) == indesc->tdtypeid ||
32993318
HeapTupleHeaderGetTypeId(tuple) == RECORDOID);
33003319

3301-
/* if first time through, initialize conversion map */
3302-
if (!op->d.convert_rowtype.initialized)
3320+
/* if first time through, or after change, initialize conversion map */
3321+
if (changed)
33033322
{
33043323
MemoryContext old_cxt;
33053324

@@ -3310,7 +3329,6 @@ ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext
33103329
op->d.convert_rowtype.map =
33113330
convert_tuples_by_name(indesc, outdesc,
33123331
gettext_noop("could not convert row type"));
3313-
op->d.convert_rowtype.initialized = true;
33143332

33153333
MemoryContextSwitchTo(old_cxt);
33163334
}
@@ -3340,6 +3358,9 @@ ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext
33403358
*/
33413359
*op->resvalue = heap_copy_tuple_as_datum(&tmptup, outdesc);
33423360
}
3361+
3362+
DecrTupleDescRefCount(indesc);
3363+
DecrTupleDescRefCount(outdesc);
33433364
}
33443365

33453366
/*

0 commit comments

Comments
 (0)