Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit a2e923a

Browse files
committed
Fix dynahash.c to suppress hash bucket splits while a hash_seq_search() scan
is in progress on the same hashtable. This seems the least invasive way to fix the recently-recognized problem that a split could cause the scan to visit entries twice or (with much lower probability) miss them entirely. The only field-reported problem caused by this is the "failed to re-find shared lock object" PANIC in COMMIT PREPARED reported by Michel Dorochevsky, which was caused by multiply visited entries. However, it seems certain that mdsync() is vulnerable to missing required fsync's due to missed entries, and I am fearful that RelationCacheInitializePhase2() might be at risk as well. Because of that and the generalized hazard presented by this bug, back-patch all the supported branches. Along the way, fix pg_prepared_statement() and pg_cursor() to not assume that the hashtables they are examining will stay static between calls. This is risky regardless of the newly noted dynahash problem, because hash_seq_search() has never promised to cope with deletion of table entries other than the just-returned one. There may be no bug here because the only supported way to call these functions is via ExecMakeTableFunctionResult() which will cycle them to completion before doing anything very interesting, but it seems best to get rid of the assumption. This affects 8.2 and HEAD only, since those functions weren't there earlier.
1 parent 8e90c54 commit a2e923a

File tree

8 files changed

+392
-156
lines changed

8 files changed

+392
-156
lines changed

src/backend/access/transam/xact.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
*
1111
*
1212
* IDENTIFICATION
13-
* $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.239 2007/04/03 16:34:35 tgl Exp $
13+
* $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.240 2007/04/26 23:24:44 tgl Exp $
1414
*
1515
*-------------------------------------------------------------------------
1616
*/
@@ -1631,6 +1631,7 @@ CommitTransaction(void)
16311631
/* smgrcommit already done */
16321632
AtEOXact_Files();
16331633
AtEOXact_ComboCid();
1634+
AtEOXact_HashTables(true);
16341635
pgstat_clear_snapshot();
16351636
pgstat_count_xact_commit();
16361637
pgstat_report_txn_timestamp(0);
@@ -1849,6 +1850,7 @@ PrepareTransaction(void)
18491850
/* smgrcommit already done */
18501851
AtEOXact_Files();
18511852
AtEOXact_ComboCid();
1853+
AtEOXact_HashTables(true);
18521854
pgstat_clear_snapshot();
18531855

18541856
CurrentResourceOwner = NULL;
@@ -2003,6 +2005,7 @@ AbortTransaction(void)
20032005
smgrabort();
20042006
AtEOXact_Files();
20052007
AtEOXact_ComboCid();
2008+
AtEOXact_HashTables(false);
20062009
pgstat_clear_snapshot();
20072010
pgstat_count_xact_rollback();
20082011
pgstat_report_txn_timestamp(0);
@@ -3716,6 +3719,7 @@ CommitSubTransaction(void)
37163719
s->parent->subTransactionId);
37173720
AtEOSubXact_Files(true, s->subTransactionId,
37183721
s->parent->subTransactionId);
3722+
AtEOSubXact_HashTables(true, s->nestingLevel);
37193723

37203724
/*
37213725
* We need to restore the upper transaction's read-only state, in case the
@@ -3827,6 +3831,7 @@ AbortSubTransaction(void)
38273831
s->parent->subTransactionId);
38283832
AtEOSubXact_Files(false, s->subTransactionId,
38293833
s->parent->subTransactionId);
3834+
AtEOSubXact_HashTables(false, s->nestingLevel);
38303835
}
38313836

38323837
/*

src/backend/commands/prepare.c

Lines changed: 80 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* Copyright (c) 2002-2007, PostgreSQL Global Development Group
1111
*
1212
* IDENTIFICATION
13-
* $PostgreSQL: pgsql/src/backend/commands/prepare.c,v 1.73 2007/04/16 18:21:07 tgl Exp $
13+
* $PostgreSQL: pgsql/src/backend/commands/prepare.c,v 1.74 2007/04/26 23:24:44 tgl Exp $
1414
*
1515
*-------------------------------------------------------------------------
1616
*/
@@ -21,7 +21,7 @@
2121
#include "catalog/pg_type.h"
2222
#include "commands/explain.h"
2323
#include "commands/prepare.h"
24-
#include "funcapi.h"
24+
#include "miscadmin.h"
2525
#include "parser/analyze.h"
2626
#include "parser/parse_coerce.h"
2727
#include "parser/parse_expr.h"
@@ -743,92 +743,99 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, ExplainStmt *stmt,
743743
Datum
744744
pg_prepared_statement(PG_FUNCTION_ARGS)
745745
{
746-
FuncCallContext *funcctx;
747-
HASH_SEQ_STATUS *hash_seq;
748-
PreparedStatement *prep_stmt;
749-
750-
/* stuff done only on the first call of the function */
751-
if (SRF_IS_FIRSTCALL())
752-
{
753-
TupleDesc tupdesc;
754-
MemoryContext oldcontext;
755-
756-
/* create a function context for cross-call persistence */
757-
funcctx = SRF_FIRSTCALL_INIT();
746+
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
747+
TupleDesc tupdesc;
748+
Tuplestorestate *tupstore;
749+
MemoryContext per_query_ctx;
750+
MemoryContext oldcontext;
751+
752+
/* check to see if caller supports us returning a tuplestore */
753+
if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
754+
ereport(ERROR,
755+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
756+
errmsg("set-valued function called in context that cannot accept a set")));
757+
if (!(rsinfo->allowedModes & SFRM_Materialize))
758+
ereport(ERROR,
759+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
760+
errmsg("materialize mode required, but it is not " \
761+
"allowed in this context")));
758762

759-
/*
760-
* switch to memory context appropriate for multiple function calls
761-
*/
762-
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
763+
/* need to build tuplestore in query context */
764+
per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
765+
oldcontext = MemoryContextSwitchTo(per_query_ctx);
763766

764-
/* allocate memory for user context */
765-
if (prepared_queries)
766-
{
767-
hash_seq = (HASH_SEQ_STATUS *) palloc(sizeof(HASH_SEQ_STATUS));
768-
hash_seq_init(hash_seq, prepared_queries);
769-
funcctx->user_fctx = (void *) hash_seq;
770-
}
771-
else
772-
funcctx->user_fctx = NULL;
767+
/*
768+
* build tupdesc for result tuples. This must match the definition of
769+
* the pg_prepared_statements view in system_views.sql
770+
*/
771+
tupdesc = CreateTemplateTupleDesc(5, false);
772+
TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name",
773+
TEXTOID, -1, 0);
774+
TupleDescInitEntry(tupdesc, (AttrNumber) 2, "statement",
775+
TEXTOID, -1, 0);
776+
TupleDescInitEntry(tupdesc, (AttrNumber) 3, "prepare_time",
777+
TIMESTAMPTZOID, -1, 0);
778+
TupleDescInitEntry(tupdesc, (AttrNumber) 4, "parameter_types",
779+
REGTYPEARRAYOID, -1, 0);
780+
TupleDescInitEntry(tupdesc, (AttrNumber) 5, "from_sql",
781+
BOOLOID, -1, 0);
773782

774-
/*
775-
* build tupdesc for result tuples. This must match the definition of
776-
* the pg_prepared_statements view in system_views.sql
777-
*/
778-
tupdesc = CreateTemplateTupleDesc(5, false);
779-
TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name",
780-
TEXTOID, -1, 0);
781-
TupleDescInitEntry(tupdesc, (AttrNumber) 2, "statement",
782-
TEXTOID, -1, 0);
783-
TupleDescInitEntry(tupdesc, (AttrNumber) 3, "prepare_time",
784-
TIMESTAMPTZOID, -1, 0);
785-
TupleDescInitEntry(tupdesc, (AttrNumber) 4, "parameter_types",
786-
REGTYPEARRAYOID, -1, 0);
787-
TupleDescInitEntry(tupdesc, (AttrNumber) 5, "from_sql",
788-
BOOLOID, -1, 0);
789-
790-
funcctx->tuple_desc = BlessTupleDesc(tupdesc);
791-
MemoryContextSwitchTo(oldcontext);
792-
}
783+
/*
784+
* We put all the tuples into a tuplestore in one scan of the hashtable.
785+
* This avoids any issue of the hashtable possibly changing between calls.
786+
*/
787+
tupstore = tuplestore_begin_heap(true, false, work_mem);
793788

794-
/* stuff done on every call of the function */
795-
funcctx = SRF_PERCALL_SETUP();
796-
hash_seq = (HASH_SEQ_STATUS *) funcctx->user_fctx;
789+
/* hash table might be uninitialized */
790+
if (prepared_queries)
791+
{
792+
HASH_SEQ_STATUS hash_seq;
793+
PreparedStatement *prep_stmt;
797794

798-
/* if the hash table is uninitialized, we're done */
799-
if (hash_seq == NULL)
800-
SRF_RETURN_DONE(funcctx);
795+
hash_seq_init(&hash_seq, prepared_queries);
796+
while ((prep_stmt = hash_seq_search(&hash_seq)) != NULL)
797+
{
798+
HeapTuple tuple;
799+
Datum values[5];
800+
bool nulls[5];
801801

802-
prep_stmt = hash_seq_search(hash_seq);
803-
if (prep_stmt)
804-
{
805-
Datum result;
806-
HeapTuple tuple;
807-
Datum values[5];
808-
bool nulls[5];
802+
/* generate junk in short-term context */
803+
MemoryContextSwitchTo(oldcontext);
809804

810-
MemSet(nulls, 0, sizeof(nulls));
805+
MemSet(nulls, 0, sizeof(nulls));
811806

812-
values[0] = DirectFunctionCall1(textin,
807+
values[0] = DirectFunctionCall1(textin,
813808
CStringGetDatum(prep_stmt->stmt_name));
814809

815-
if (prep_stmt->plansource->query_string == NULL)
816-
nulls[1] = true;
817-
else
818-
values[1] = DirectFunctionCall1(textin,
810+
if (prep_stmt->plansource->query_string == NULL)
811+
nulls[1] = true;
812+
else
813+
values[1] = DirectFunctionCall1(textin,
819814
CStringGetDatum(prep_stmt->plansource->query_string));
820815

821-
values[2] = TimestampTzGetDatum(prep_stmt->prepare_time);
822-
values[3] = build_regtype_array(prep_stmt->plansource->param_types,
823-
prep_stmt->plansource->num_params);
824-
values[4] = BoolGetDatum(prep_stmt->from_sql);
816+
values[2] = TimestampTzGetDatum(prep_stmt->prepare_time);
817+
values[3] = build_regtype_array(prep_stmt->plansource->param_types,
818+
prep_stmt->plansource->num_params);
819+
values[4] = BoolGetDatum(prep_stmt->from_sql);
820+
821+
tuple = heap_form_tuple(tupdesc, values, nulls);
825822

826-
tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
827-
result = HeapTupleGetDatum(tuple);
828-
SRF_RETURN_NEXT(funcctx, result);
823+
/* switch to appropriate context while storing the tuple */
824+
MemoryContextSwitchTo(per_query_ctx);
825+
tuplestore_puttuple(tupstore, tuple);
826+
}
829827
}
830828

831-
SRF_RETURN_DONE(funcctx);
829+
/* clean up and return the tuplestore */
830+
tuplestore_donestoring(tupstore);
831+
832+
MemoryContextSwitchTo(oldcontext);
833+
834+
rsinfo->returnMode = SFRM_Materialize;
835+
rsinfo->setResult = tupstore;
836+
rsinfo->setDesc = tupdesc;
837+
838+
return (Datum) 0;
832839
}
833840

834841
/*

src/backend/executor/nodeSubplan.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/executor/nodeSubplan.c,v 1.87 2007/02/27 01:11:25 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/executor/nodeSubplan.c,v 1.88 2007/04/26 23:24:44 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -569,16 +569,20 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot)
569569
TupleHashIterator hashiter;
570570
TupleHashEntry entry;
571571

572-
ResetTupleHashIterator(hashtable, &hashiter);
572+
InitTupleHashIterator(hashtable, &hashiter);
573573
while ((entry = ScanTupleHashTable(&hashiter)) != NULL)
574574
{
575575
ExecStoreMinimalTuple(entry->firstTuple, hashtable->tableslot, false);
576576
if (!execTuplesUnequal(slot, hashtable->tableslot,
577577
numCols, keyColIdx,
578578
hashtable->cur_eq_funcs,
579579
hashtable->tempcxt))
580+
{
581+
TermTupleHashIterator(&hashiter);
580582
return true;
583+
}
581584
}
585+
/* No TermTupleHashIterator call needed here */
582586
return false;
583587
}
584588

src/backend/nodes/tidbitmap.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
* Copyright (c) 2003-2007, PostgreSQL Global Development Group
2424
*
2525
* IDENTIFICATION
26-
* $PostgreSQL: pgsql/src/backend/nodes/tidbitmap.c,v 1.11 2007/01/05 22:19:30 momjian Exp $
26+
* $PostgreSQL: pgsql/src/backend/nodes/tidbitmap.c,v 1.12 2007/04/26 23:24:44 tgl Exp $
2727
*
2828
*-------------------------------------------------------------------------
2929
*/
@@ -907,7 +907,11 @@ tbm_lossify(TIDBitmap *tbm)
907907
tbm_mark_page_lossy(tbm, page->blockno);
908908

909909
if (tbm->nentries <= tbm->maxentries)
910-
return; /* we have done enough */
910+
{
911+
/* we have done enough */
912+
hash_seq_term(&status);
913+
break;
914+
}
911915

912916
/*
913917
* Note: tbm_mark_page_lossy may have inserted a lossy chunk into the

0 commit comments

Comments
 (0)