Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 089e4d4

Browse files
committed
Prevent memory leaks associated with relcache rd_partcheck structures.
The original coding of generate_partition_qual() just copied the list of predicate expressions into the global CacheMemoryContext, making it effectively impossible to clean up when the owning relcache entry is destroyed --- the relevant code in RelationDestroyRelation() only managed to free the topmost List header :-(. This resulted in a session-lifespan memory leak whenever a table partition's relcache entry is rebuilt. Fortunately, that's not normally a large data structure, and rebuilds shouldn't occur all that often in production situations; but this is still a bug worth fixing back to v10 where the code was introduced. To fix, put the cached expression tree into its own small memory context, as we do with other complicated substructures of relcache entries. Also, deal more honestly with the case that a partition has an empty partcheck list; while that probably isn't a case that's very interesting for production use, it's legal. In passing, clarify comments about how partitioning-related relcache data structures are managed, and add some Asserts that we're not leaking old copies when we overwrite these data fields. Amit Langote and Tom Lane Discussion: https://postgr.es/m/7961.1552498252@sss.pgh.pa.us
1 parent 7ef2b31 commit 089e4d4

File tree

3 files changed

+80
-30
lines changed

3 files changed

+80
-30
lines changed

src/backend/utils/cache/partcache.c

Lines changed: 61 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,11 @@ static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
4747

4848
/*
4949
* RelationBuildPartitionKey
50-
* Build and attach to relcache partition key data of relation
50+
* Build partition key data of relation, and attach to relcache
5151
*
5252
* Partitioning key data is a complex structure; to avoid complicated logic to
5353
* free individual elements whenever the relcache entry is flushed, we give it
54-
* its own memory context, child of CacheMemoryContext, which can easily be
54+
* its own memory context, a child of CacheMemoryContext, which can easily be
5555
* deleted on its own. To avoid leaking memory in that context in case of an
5656
* error partway through this function, the context is initially created as a
5757
* child of CurTransactionContext and only re-parented to CacheMemoryContext
@@ -150,15 +150,14 @@ RelationBuildPartitionKey(Relation relation)
150150
MemoryContextSwitchTo(oldcxt);
151151
}
152152

153+
/* Allocate assorted arrays in the partkeycxt, which we'll fill below */
153154
oldcxt = MemoryContextSwitchTo(partkeycxt);
154155
key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber));
155156
key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid));
156157
key->partopcintype = (Oid *) palloc0(key->partnatts * sizeof(Oid));
157158
key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo));
158159

159160
key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid));
160-
161-
/* Gather type and collation info as well */
162161
key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid));
163162
key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32));
164163
key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16));
@@ -241,6 +240,10 @@ RelationBuildPartitionKey(Relation relation)
241240

242241
ReleaseSysCache(tuple);
243242

243+
/* Assert that we're not leaking any old data during assignments below */
244+
Assert(relation->rd_partkeycxt == NULL);
245+
Assert(relation->rd_partkey == NULL);
246+
244247
/*
245248
* Success --- reparent our context and make the relcache point to the
246249
* newly constructed key
@@ -252,10 +255,13 @@ RelationBuildPartitionKey(Relation relation)
252255

253256
/*
254257
* RelationBuildPartitionDesc
255-
* Form rel's partition descriptor
258+
* Form rel's partition descriptor, and store in relcache entry
256259
*
257-
* Not flushed from the cache by RelationClearRelation() unless changed because
258-
* of addition or removal of partition.
260+
* Note: the descriptor won't be flushed from the cache by
261+
* RelationClearRelation() unless it's changed because of
262+
* addition or removal of a partition. Hence, code holding a lock
263+
* that's sufficient to prevent that can assume that rd_partdesc
264+
* won't change underneath it.
259265
*/
260266
void
261267
RelationBuildPartitionDesc(Relation rel)
@@ -565,11 +571,24 @@ RelationBuildPartitionDesc(Relation rel)
565571
(int) key->strategy);
566572
}
567573

568-
/* Now build the actual relcache partition descriptor */
574+
/* Assert we aren't about to leak any old data structure */
575+
Assert(rel->rd_pdcxt == NULL);
576+
Assert(rel->rd_partdesc == NULL);
577+
578+
/*
579+
* Now build the actual relcache partition descriptor. Note that the
580+
* order of operations here is fairly critical. If we fail partway
581+
* through this code, we won't have leaked memory because the rd_pdcxt is
582+
* attached to the relcache entry immediately, so it'll be freed whenever
583+
* the entry is rebuilt or destroyed. However, we don't assign to
584+
* rd_partdesc until the cached data structure is fully complete and
585+
* valid, so that no other code might try to use it.
586+
*/
569587
rel->rd_pdcxt = AllocSetContextCreate(CacheMemoryContext,
570588
"partition descriptor",
571-
ALLOCSET_DEFAULT_SIZES);
572-
MemoryContextCopyAndSetIdentifier(rel->rd_pdcxt, RelationGetRelationName(rel));
589+
ALLOCSET_SMALL_SIZES);
590+
MemoryContextCopyAndSetIdentifier(rel->rd_pdcxt,
591+
RelationGetRelationName(rel));
573592

574593
oldcxt = MemoryContextSwitchTo(rel->rd_pdcxt);
575594

@@ -839,11 +858,9 @@ get_partition_qual_relid(Oid relid)
839858
* Generate partition predicate from rel's partition bound expression. The
840859
* function returns a NIL list if there is no predicate.
841860
*
842-
* Result expression tree is stored CacheMemoryContext to ensure it survives
843-
* as long as the relcache entry. But we should be running in a less long-lived
844-
* working context. To avoid leaking cache memory if this routine fails partway
845-
* through, we build in working memory and then copy the completed structure
846-
* into cache memory.
861+
* We cache a copy of the result in the relcache entry, after constructing
862+
* it using the caller's context. This approach avoids leaking any data
863+
* into long-lived cache contexts, especially if we fail partway through.
847864
*/
848865
static List *
849866
generate_partition_qual(Relation rel)
@@ -860,8 +877,8 @@ generate_partition_qual(Relation rel)
860877
/* Guard against stack overflow due to overly deep partition tree */
861878
check_stack_depth();
862879

863-
/* Quick copy */
864-
if (rel->rd_partcheck != NIL)
880+
/* If we already cached the result, just return a copy */
881+
if (rel->rd_partcheckvalid)
865882
return copyObject(rel->rd_partcheck);
866883

867884
/* Grab at least an AccessShareLock on the parent table */
@@ -907,14 +924,37 @@ generate_partition_qual(Relation rel)
907924
if (found_whole_row)
908925
elog(ERROR, "unexpected whole-row reference found in partition key");
909926

910-
/* Save a copy in the relcache */
911-
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
912-
rel->rd_partcheck = copyObject(result);
913-
MemoryContextSwitchTo(oldcxt);
927+
/* Assert that we're not leaking any old data during assignments below */
928+
Assert(rel->rd_partcheckcxt == NULL);
929+
Assert(rel->rd_partcheck == NIL);
930+
931+
/*
932+
* Save a copy in the relcache. The order of these operations is fairly
933+
* critical to avoid memory leaks and ensure that we don't leave a corrupt
934+
* relcache entry if we fail partway through copyObject.
935+
*
936+
* If, as is definitely possible, the partcheck list is NIL, then we do
937+
* not need to make a context to hold it.
938+
*/
939+
if (result != NIL)
940+
{
941+
rel->rd_partcheckcxt = AllocSetContextCreate(CacheMemoryContext,
942+
"partition constraint",
943+
ALLOCSET_SMALL_SIZES);
944+
MemoryContextCopyAndSetIdentifier(rel->rd_partcheckcxt,
945+
RelationGetRelationName(rel));
946+
oldcxt = MemoryContextSwitchTo(rel->rd_partcheckcxt);
947+
rel->rd_partcheck = copyObject(result);
948+
MemoryContextSwitchTo(oldcxt);
949+
}
950+
else
951+
rel->rd_partcheck = NIL;
952+
rel->rd_partcheckvalid = true;
914953

915954
/* Keep the parent locked until commit */
916955
relation_close(parent, NoLock);
917956

957+
/* Return the working copy to the caller */
918958
return result;
919959
}
920960

src/backend/utils/cache/relcache.c

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,11 +1191,15 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
11911191
}
11921192
else
11931193
{
1194-
relation->rd_partkeycxt = NULL;
11951194
relation->rd_partkey = NULL;
1195+
relation->rd_partkeycxt = NULL;
11961196
relation->rd_partdesc = NULL;
11971197
relation->rd_pdcxt = NULL;
11981198
}
1199+
/* ... but partcheck is not loaded till asked for */
1200+
relation->rd_partcheck = NIL;
1201+
relation->rd_partcheckvalid = false;
1202+
relation->rd_partcheckcxt = NULL;
11991203

12001204
/*
12011205
* if it's an index, initialize index-related information
@@ -2285,8 +2289,8 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc)
22852289
MemoryContextDelete(relation->rd_partkeycxt);
22862290
if (relation->rd_pdcxt)
22872291
MemoryContextDelete(relation->rd_pdcxt);
2288-
if (relation->rd_partcheck)
2289-
pfree(relation->rd_partcheck);
2292+
if (relation->rd_partcheckcxt)
2293+
MemoryContextDelete(relation->rd_partcheckcxt);
22902294
if (relation->rd_fdwroutine)
22912295
pfree(relation->rd_fdwroutine);
22922296
pfree(relation);
@@ -5617,18 +5621,20 @@ load_relcache_init_file(bool shared)
56175621
* format is complex and subject to change). They must be rebuilt if
56185622
* needed by RelationCacheInitializePhase3. This is not expected to
56195623
* be a big performance hit since few system catalogs have such. Ditto
5620-
* for RLS policy data, index expressions, predicates, exclusion info,
5621-
* and FDW info.
5624+
* for RLS policy data, partition info, index expressions, predicates,
5625+
* exclusion info, and FDW info.
56225626
*/
56235627
rel->rd_rules = NULL;
56245628
rel->rd_rulescxt = NULL;
56255629
rel->trigdesc = NULL;
56265630
rel->rd_rsdesc = NULL;
5627-
rel->rd_partkeycxt = NULL;
56285631
rel->rd_partkey = NULL;
5629-
rel->rd_pdcxt = NULL;
5632+
rel->rd_partkeycxt = NULL;
56305633
rel->rd_partdesc = NULL;
5634+
rel->rd_pdcxt = NULL;
56315635
rel->rd_partcheck = NIL;
5636+
rel->rd_partcheckvalid = false;
5637+
rel->rd_partcheckcxt = NULL;
56325638
rel->rd_indexprs = NIL;
56335639
rel->rd_indpred = NIL;
56345640
rel->rd_exclops = NULL;

src/include/utils/rel.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,9 @@ typedef struct RelationData
9595
List *rd_fkeylist; /* list of ForeignKeyCacheInfo (see below) */
9696
bool rd_fkeyvalid; /* true if list has been computed */
9797

98-
MemoryContext rd_partkeycxt; /* private memory cxt for the below */
98+
MemoryContext rd_partkeycxt; /* private context for rd_partkey, if any */
9999
struct PartitionKeyData *rd_partkey; /* partition key, or NULL */
100-
MemoryContext rd_pdcxt; /* private context for partdesc */
100+
MemoryContext rd_pdcxt; /* private context for rd_partdesc, if any */
101101
struct PartitionDescData *rd_partdesc; /* partitions, or NULL */
102102
List *rd_partcheck; /* partition CHECK quals */
103103

@@ -188,6 +188,10 @@ typedef struct RelationData
188188

189189
/* use "struct" here to avoid needing to include pgstat.h: */
190190
struct PgStat_TableStatus *pgstat_info; /* statistics collection area */
191+
192+
/* placed here to avoid ABI break before v12: */
193+
bool rd_partcheckvalid; /* true if list has been computed */
194+
MemoryContext rd_partcheckcxt; /* private cxt for rd_partcheck, if any */
191195
} RelationData;
192196

193197

0 commit comments

Comments
 (0)