Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit c8dfc89

Browse files
committed
Make isolationtester more robust on locked commands
Noah Misch diagnosed the buildfarm problems in the isolation tests partly as failure to differentiate backends properly; the old code was using backend IDs, which is not good enough because a new backend might use an already used ID. Use PIDs instead. Also, the code was purposely careless about other concurrent activity, because it isn't expected; and in fact, it doesn't affect the vast majority of the time. However, it can be observed that autovacuum can block tables for long enough to cause sporadic failures. The new code accounts for that by ignoring locks held by processes not explicitly declared in our spec file. Author: Noah Misch
1 parent d6db0e4 commit c8dfc89

File tree

1 file changed

+93
-14
lines changed

1 file changed

+93
-14
lines changed

src/test/isolation/isolationtester.c

Lines changed: 93 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#endif
2222

2323
#include "libpq-fe.h"
24+
#include "pqexpbuffer.h"
2425

2526
#include "isolationtester.h"
2627

@@ -31,7 +32,7 @@
3132
* connections represent spec-defined sessions.
3233
*/
3334
static PGconn **conns = NULL;
34-
static const char **backend_ids = NULL;
35+
static const char **backend_pids = NULL;
3536
static int nconns = 0;
3637

3738
static void run_all_permutations(TestSpec * testspec);
@@ -67,6 +68,7 @@ main(int argc, char **argv)
6768
TestSpec *testspec;
6869
int i;
6970
PGresult *res;
71+
PQExpBufferData wait_query;
7072

7173
/*
7274
* If the user supplies a parameter on the command line, use it as the
@@ -89,7 +91,7 @@ main(int argc, char **argv)
8991
*/
9092
nconns = 1 + testspec->nsessions;
9193
conns = calloc(nconns, sizeof(PGconn *));
92-
backend_ids = calloc(nconns, sizeof(*backend_ids));
94+
backend_pids = calloc(nconns, sizeof(*backend_pids));
9395
for (i = 0; i < nconns; i++)
9496
{
9597
conns[i] = PQconnectdb(conninfo);
@@ -112,23 +114,22 @@ main(int argc, char **argv)
112114
}
113115
PQclear(res);
114116

115-
/* Get the backend ID for lock wait checking. */
116-
res = PQexec(conns[i], "SELECT i FROM pg_stat_get_backend_idset() t(i) "
117-
"WHERE pg_stat_get_backend_pid(i) = pg_backend_pid()");
117+
/* Get the backend pid for lock wait checking. */
118+
res = PQexec(conns[i], "SELECT pg_backend_pid()");
118119
if (PQresultStatus(res) == PGRES_TUPLES_OK)
119120
{
120121
if (PQntuples(res) == 1 && PQnfields(res) == 1)
121-
backend_ids[i] = strdup(PQgetvalue(res, 0, 0));
122+
backend_pids[i] = strdup(PQgetvalue(res, 0, 0));
122123
else
123124
{
124-
fprintf(stderr, "backend id query returned %d rows and %d columns, expected 1 row and 1 column",
125+
fprintf(stderr, "backend pid query returned %d rows and %d columns, expected 1 row and 1 column",
125126
PQntuples(res), PQnfields(res));
126127
exit_nicely();
127128
}
128129
}
129130
else
130131
{
131-
fprintf(stderr, "backend id query failed: %s",
132+
fprintf(stderr, "backend pid query failed: %s",
132133
PQerrorMessage(conns[i]));
133134
exit_nicely();
134135
}
@@ -145,15 +146,95 @@ main(int argc, char **argv)
145146
session->steps[stepindex]->session = i;
146147
}
147148

148-
res = PQprepare(conns[0], PREP_WAITING,
149-
"SELECT 1 WHERE pg_stat_get_backend_waiting($1)", 0, NULL);
149+
/*
150+
* Build the query we'll use to detect lock contention among sessions in
151+
* the test specification. Most of the time, we could get away with
152+
* simply checking whether a session is waiting for *any* lock: we don't
153+
* exactly expect concurrent use of test tables. However, autovacuum will
154+
* occasionally take AccessExclusiveLock to truncate a table, and we must
155+
* ignore that transient wait.
156+
*/
157+
initPQExpBuffer(&wait_query);
158+
appendPQExpBufferStr(&wait_query,
159+
"SELECT 1 FROM pg_locks holder, pg_locks waiter "
160+
"WHERE NOT waiter.granted AND waiter.pid = $1 "
161+
"AND holder.granted "
162+
"AND holder.pid <> $1 AND holder.pid IN (");
163+
/* The spec syntax requires at least one session; assume that here. */
164+
appendPQExpBuffer(&wait_query, "%s", backend_pids[1]);
165+
for (i = 2; i < nconns; i++)
166+
appendPQExpBuffer(&wait_query, ", %s", backend_pids[i]);
167+
appendPQExpBufferStr(&wait_query,
168+
") "
169+
170+
"AND holder.mode = ANY (CASE waiter.mode "
171+
"WHEN 'AccessShareLock' THEN ARRAY["
172+
"'AccessExclusiveLock'] "
173+
"WHEN 'RowShareLock' THEN ARRAY["
174+
"'ExclusiveLock',"
175+
"'AccessExclusiveLock'] "
176+
"WHEN 'RowExclusiveLock' THEN ARRAY["
177+
"'ShareLock',"
178+
"'ShareRowExclusiveLock',"
179+
"'ExclusiveLock',"
180+
"'AccessExclusiveLock'] "
181+
"WHEN 'ShareUpdateExclusiveLock' THEN ARRAY["
182+
"'ShareUpdateExclusiveLock',"
183+
"'ShareLock',"
184+
"'ShareRowExclusiveLock',"
185+
"'ExclusiveLock',"
186+
"'AccessExclusiveLock'] "
187+
"WHEN 'ShareLock' THEN ARRAY["
188+
"'RowExclusiveLock',"
189+
"'ShareUpdateExclusiveLock',"
190+
"'ShareRowExclusiveLock',"
191+
"'ExclusiveLock',"
192+
"'AccessExclusiveLock'] "
193+
"WHEN 'ShareRowExclusiveLock' THEN ARRAY["
194+
"'RowExclusiveLock',"
195+
"'ShareUpdateExclusiveLock',"
196+
"'ShareLock',"
197+
"'ShareRowExclusiveLock',"
198+
"'ExclusiveLock',"
199+
"'AccessExclusiveLock'] "
200+
"WHEN 'ExclusiveLock' THEN ARRAY["
201+
"'RowShareLock',"
202+
"'RowExclusiveLock',"
203+
"'ShareUpdateExclusiveLock',"
204+
"'ShareLock',"
205+
"'ShareRowExclusiveLock',"
206+
"'ExclusiveLock',"
207+
"'AccessExclusiveLock'] "
208+
"WHEN 'AccessExclusiveLock' THEN ARRAY["
209+
"'AccessShareLock',"
210+
"'RowShareLock',"
211+
"'RowExclusiveLock',"
212+
"'ShareUpdateExclusiveLock',"
213+
"'ShareLock',"
214+
"'ShareRowExclusiveLock',"
215+
"'ExclusiveLock',"
216+
"'AccessExclusiveLock'] END) "
217+
218+
"AND holder.locktype IS NOT DISTINCT FROM waiter.locktype "
219+
"AND holder.database IS NOT DISTINCT FROM waiter.database "
220+
"AND holder.relation IS NOT DISTINCT FROM waiter.relation "
221+
"AND holder.page IS NOT DISTINCT FROM waiter.page "
222+
"AND holder.tuple IS NOT DISTINCT FROM waiter.tuple "
223+
"AND holder.virtualxid IS NOT DISTINCT FROM waiter.virtualxid "
224+
"AND holder.transactionid IS NOT DISTINCT FROM waiter.transactionid "
225+
"AND holder.classid IS NOT DISTINCT FROM waiter.classid "
226+
"AND holder.objid IS NOT DISTINCT FROM waiter.objid "
227+
"AND holder.objsubid IS NOT DISTINCT FROM waiter.objsubid ");
228+
229+
res = PQprepare(conns[0], PREP_WAITING, wait_query.data, 0, NULL);
150230
if (PQresultStatus(res) != PGRES_COMMAND_OK)
151231
{
152232
fprintf(stderr, "prepare of lock wait query failed: %s",
153233
PQerrorMessage(conns[0]));
154234
exit_nicely();
155235
}
156236
PQclear(res);
237+
termPQExpBuffer(&wait_query);
157238

158239
/*
159240
* Run the permutations specified in the spec, or all if none were
@@ -411,9 +492,7 @@ run_permutation(TestSpec * testspec, int nsteps, Step ** steps)
411492
* Our caller already sent the query associated with this step. Wait for it
412493
* to either complete or (if given the STEP_NONBLOCK flag) to block while
413494
* waiting for a lock. We assume that any lock wait will persist until we
414-
* have executed additional steps in the permutation. This is not fully
415-
* robust -- a concurrent autovacuum could briefly take a lock with which we
416-
* conflict. The risk may be low enough to discount.
495+
* have executed additional steps in the permutation.
417496
*
418497
* When calling this function on behalf of a given step for a second or later
419498
* time, pass the STEP_RETRY flag. This only affects the messages printed.
@@ -450,7 +529,7 @@ try_complete_step(Step *step, int flags)
450529
int ntuples;
451530

452531
res = PQexecPrepared(conns[0], PREP_WAITING, 1,
453-
&backend_ids[step->session + 1],
532+
&backend_pids[step->session + 1],
454533
NULL, NULL, 0);
455534
if (PQresultStatus(res) != PGRES_TUPLES_OK)
456535
{

0 commit comments

Comments
 (0)