Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 7fc1a81

Browse files
committed
postgres_fdw: Restructure connection retry logic.
Commit 32a9c0b introduced connection retry logic into postgres_fdw. Previously it used goto statement for retry. This commit gets rid of that goto from the logic to make the code simpler and easier-to-read. When getting out of PG_CATCH() for the retry, the error state should be cleaned up and the memory context should be reset. But commit 32a9c0b forgot to do that. This commit also fixes this bug. Previously only PQstatus()==CONNECTION_BAD was verified to detect connection failure. But this could cause false detection in the case where any error other than connection failure (e.g., out-of-memory) was thrown after a broken connection was detected in libpq and CONNECTION_BAD is set. To fix this issue, this commit changes the logic so that it also checks the error's sqlstate is ERRCODE_CONNECTION_FAILURE. Author: Fujii Masao Reviewed-by: Tom Lane Discussion: https://postgr.es/m/2943611.1602375376@sss.pgh.pa.us
1 parent fe2a16d commit 7fc1a81

File tree

1 file changed

+90
-46
lines changed

1 file changed

+90
-46
lines changed

contrib/postgres_fdw/connection.c

+90-46
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ static unsigned int prep_stmt_number = 0;
7474
static bool xact_got_connection = false;
7575

7676
/* prototypes of private functions */
77+
static void make_new_connection(ConnCacheEntry *entry, UserMapping *user);
7778
static PGconn *connect_pg_server(ForeignServer *server, UserMapping *user);
7879
static void disconnect_pg_server(ConnCacheEntry *entry);
7980
static void check_conn_params(const char **keywords, const char **values, UserMapping *user);
@@ -108,9 +109,10 @@ PGconn *
108109
GetConnection(UserMapping *user, bool will_prep_stmt)
109110
{
110111
bool found;
111-
volatile bool retry_conn = false;
112+
bool retry = false;
112113
ConnCacheEntry *entry;
113114
ConnCacheKey key;
115+
MemoryContext ccxt = CurrentMemoryContext;
114116

115117
/* First time through, initialize connection cache hashtable */
116118
if (ConnectionHash == NULL)
@@ -160,23 +162,14 @@ GetConnection(UserMapping *user, bool will_prep_stmt)
160162
/* Reject further use of connections which failed abort cleanup. */
161163
pgfdw_reject_incomplete_xact_state_change(entry);
162164

163-
retry:
164-
165165
/*
166166
* If the connection needs to be remade due to invalidation, disconnect as
167-
* soon as we're out of all transactions. Also, if previous attempt to
168-
* start new remote transaction failed on the cached connection,
169-
* disconnect it to retry a new connection.
167+
* soon as we're out of all transactions.
170168
*/
171-
if ((entry->conn != NULL && entry->invalidated &&
172-
entry->xact_depth == 0) || retry_conn)
169+
if (entry->conn != NULL && entry->invalidated && entry->xact_depth == 0)
173170
{
174-
if (retry_conn)
175-
elog(DEBUG3, "closing connection %p to reestablish a new one",
176-
entry->conn);
177-
else
178-
elog(DEBUG3, "closing connection %p for option changes to take effect",
179-
entry->conn);
171+
elog(DEBUG3, "closing connection %p for option changes to take effect",
172+
entry->conn);
180173
disconnect_pg_server(entry);
181174
}
182175

@@ -186,58 +179,78 @@ GetConnection(UserMapping *user, bool will_prep_stmt)
186179
* will remain in a valid empty state, ie conn == NULL.)
187180
*/
188181
if (entry->conn == NULL)
189-
{
190-
ForeignServer *server = GetForeignServer(user->serverid);
191-
192-
/* Reset all transient state fields, to be sure all are clean */
193-
entry->xact_depth = 0;
194-
entry->have_prep_stmt = false;
195-
entry->have_error = false;
196-
entry->changing_xact_state = false;
197-
entry->invalidated = false;
198-
entry->server_hashvalue =
199-
GetSysCacheHashValue1(FOREIGNSERVEROID,
200-
ObjectIdGetDatum(server->serverid));
201-
entry->mapping_hashvalue =
202-
GetSysCacheHashValue1(USERMAPPINGOID,
203-
ObjectIdGetDatum(user->umid));
204-
205-
/* Now try to make the connection */
206-
entry->conn = connect_pg_server(server, user);
207-
208-
elog(DEBUG3, "new postgres_fdw connection %p for server \"%s\" (user mapping oid %u, userid %u)",
209-
entry->conn, server->servername, user->umid, user->userid);
210-
}
182+
make_new_connection(entry, user);
211183

212184
/*
213185
* We check the health of the cached connection here when starting a new
214-
* remote transaction. If a broken connection is detected in the first
215-
* attempt, we try to reestablish a new connection. If broken connection
216-
* is detected again here, we give up getting a connection.
186+
* remote transaction. If a broken connection is detected, we try to
187+
* reestablish a new connection later.
217188
*/
218189
PG_TRY();
219190
{
220191
/* Start a new transaction or subtransaction if needed. */
221192
begin_remote_xact(entry);
222-
retry_conn = false;
223193
}
224194
PG_CATCH();
225195
{
226-
if (PQstatus(entry->conn) != CONNECTION_BAD ||
227-
entry->xact_depth > 0 ||
228-
retry_conn)
196+
MemoryContext ecxt = MemoryContextSwitchTo(ccxt);
197+
ErrorData *errdata = CopyErrorData();
198+
199+
/*
200+
* If connection failure is reported when starting a new remote
201+
* transaction (not subtransaction), new connection will be
202+
* reestablished later.
203+
*
204+
* After a broken connection is detected in libpq, any error other
205+
* than connection failure (e.g., out-of-memory) can be thrown
206+
* somewhere between return from libpq and the expected ereport() call
207+
* in pgfdw_report_error(). In this case, since PQstatus() indicates
208+
* CONNECTION_BAD, checking only PQstatus() causes the false detection
209+
* of connection failure. To avoid this, we also verify that the
210+
* error's sqlstate is ERRCODE_CONNECTION_FAILURE. Note that also
211+
* checking only the sqlstate can cause another false detection
212+
* because pgfdw_report_error() may report ERRCODE_CONNECTION_FAILURE
213+
* for any libpq-originated error condition.
214+
*/
215+
if (errdata->sqlerrcode != ERRCODE_CONNECTION_FAILURE ||
216+
PQstatus(entry->conn) != CONNECTION_BAD ||
217+
entry->xact_depth > 0)
218+
{
219+
MemoryContextSwitchTo(ecxt);
229220
PG_RE_THROW();
230-
retry_conn = true;
221+
}
222+
223+
/* Clean up the error state */
224+
FlushErrorState();
225+
FreeErrorData(errdata);
226+
errdata = NULL;
227+
228+
retry = true;
231229
}
232230
PG_END_TRY();
233231

234-
if (retry_conn)
232+
/*
233+
* If a broken connection is detected, disconnect it, reestablish a new
234+
* connection and retry a new remote transaction. If connection failure is
235+
* reported again, we give up getting a connection.
236+
*/
237+
if (retry)
235238
{
239+
Assert(entry->xact_depth == 0);
240+
236241
ereport(DEBUG3,
237242
(errmsg_internal("could not start remote transaction on connection %p",
238243
entry->conn)),
239244
errdetail_internal("%s", pchomp(PQerrorMessage(entry->conn))));
240-
goto retry;
245+
246+
elog(DEBUG3, "closing connection %p to reestablish a new one",
247+
entry->conn);
248+
disconnect_pg_server(entry);
249+
250+
if (entry->conn == NULL)
251+
make_new_connection(entry, user);
252+
253+
begin_remote_xact(entry);
241254
}
242255

243256
/* Remember if caller will prepare statements */
@@ -246,6 +259,37 @@ GetConnection(UserMapping *user, bool will_prep_stmt)
246259
return entry->conn;
247260
}
248261

262+
/*
263+
* Reset all transient state fields in the cached connection entry and
264+
* establish new connection to the remote server.
265+
*/
266+
static void
267+
make_new_connection(ConnCacheEntry *entry, UserMapping *user)
268+
{
269+
ForeignServer *server = GetForeignServer(user->serverid);
270+
271+
Assert(entry->conn == NULL);
272+
273+
/* Reset all transient state fields, to be sure all are clean */
274+
entry->xact_depth = 0;
275+
entry->have_prep_stmt = false;
276+
entry->have_error = false;
277+
entry->changing_xact_state = false;
278+
entry->invalidated = false;
279+
entry->server_hashvalue =
280+
GetSysCacheHashValue1(FOREIGNSERVEROID,
281+
ObjectIdGetDatum(server->serverid));
282+
entry->mapping_hashvalue =
283+
GetSysCacheHashValue1(USERMAPPINGOID,
284+
ObjectIdGetDatum(user->umid));
285+
286+
/* Now try to make the connection */
287+
entry->conn = connect_pg_server(server, user);
288+
289+
elog(DEBUG3, "new postgres_fdw connection %p for server \"%s\" (user mapping oid %u, userid %u)",
290+
entry->conn, server->servername, user->umid, user->userid);
291+
}
292+
249293
/*
250294
* Connect to remote server using specified server and user mapping properties.
251295
*/

0 commit comments

Comments
 (0)