@@ -212,8 +212,7 @@ wait_for_relation_state_change(Oid relid, char expected_state)
212
212
*
213
213
* Used when transitioning from SYNCWAIT state to CATCHUP.
214
214
*
215
- * Returns false if the apply worker has disappeared or the table state has been
216
- * reset.
215
+ * Returns false if the apply worker has disappeared.
217
216
*/
218
217
static bool
219
218
wait_for_worker_state_change (char expected_state )
@@ -226,17 +225,30 @@ wait_for_worker_state_change(char expected_state)
226
225
227
226
CHECK_FOR_INTERRUPTS ();
228
227
229
- /* Bail if the apply has died. */
228
+ /*
229
+ * Done if already in correct state. (We assume this fetch is atomic
230
+ * enough to not give a misleading answer if we do it with no lock.)
231
+ */
232
+ if (MyLogicalRepWorker -> relstate == expected_state )
233
+ return true;
234
+
235
+ /*
236
+ * Bail out if the apply worker has died, else signal it we're
237
+ * waiting.
238
+ */
230
239
LWLockAcquire (LogicalRepWorkerLock , LW_SHARED );
231
240
worker = logicalrep_worker_find (MyLogicalRepWorker -> subid ,
232
241
InvalidOid , false);
242
+ if (worker && worker -> proc )
243
+ logicalrep_worker_wakeup_ptr (worker );
233
244
LWLockRelease (LogicalRepWorkerLock );
234
245
if (!worker )
235
- return false;
236
-
237
- if (MyLogicalRepWorker -> relstate == expected_state )
238
- return true;
246
+ break ;
239
247
248
+ /*
249
+ * Wait. We expect to get a latch signal back from the apply worker,
250
+ * but use a timeout in case it dies without sending one.
251
+ */
240
252
rc = WaitLatch (MyLatch ,
241
253
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH ,
242
254
1000L , WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE );
@@ -245,7 +257,8 @@ wait_for_worker_state_change(char expected_state)
245
257
if (rc & WL_POSTMASTER_DEATH )
246
258
proc_exit (1 );
247
259
248
- ResetLatch (MyLatch );
260
+ if (rc & WL_LATCH_SET )
261
+ ResetLatch (MyLatch );
249
262
}
250
263
251
264
return false;
@@ -422,83 +435,96 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
422
435
else
423
436
{
424
437
LogicalRepWorker * syncworker ;
425
- int nsyncworkers = 0 ;
426
438
439
+ /*
440
+ * Look for a sync worker for this relation.
441
+ */
427
442
LWLockAcquire (LogicalRepWorkerLock , LW_SHARED );
443
+
428
444
syncworker = logicalrep_worker_find (MyLogicalRepWorker -> subid ,
429
445
rstate -> relid , false);
446
+
430
447
if (syncworker )
431
448
{
449
+ /* Found one, update our copy of its state */
432
450
SpinLockAcquire (& syncworker -> relmutex );
433
451
rstate -> state = syncworker -> relstate ;
434
452
rstate -> lsn = syncworker -> relstate_lsn ;
453
+ if (rstate -> state == SUBREL_STATE_SYNCWAIT )
454
+ {
455
+ /*
456
+ * Sync worker is waiting for apply. Tell sync worker it
457
+ * can catchup now.
458
+ */
459
+ syncworker -> relstate = SUBREL_STATE_CATCHUP ;
460
+ syncworker -> relstate_lsn =
461
+ Max (syncworker -> relstate_lsn , current_lsn );
462
+ }
435
463
SpinLockRelease (& syncworker -> relmutex );
464
+
465
+ /* If we told worker to catch up, wait for it. */
466
+ if (rstate -> state == SUBREL_STATE_SYNCWAIT )
467
+ {
468
+ /* Signal the sync worker, as it may be waiting for us. */
469
+ if (syncworker -> proc )
470
+ logicalrep_worker_wakeup_ptr (syncworker );
471
+
472
+ /* Now safe to release the LWLock */
473
+ LWLockRelease (LogicalRepWorkerLock );
474
+
475
+ /*
476
+ * Enter busy loop and wait for synchronization worker to
477
+ * reach expected state (or die trying).
478
+ */
479
+ if (!started_tx )
480
+ {
481
+ StartTransactionCommand ();
482
+ started_tx = true;
483
+ }
484
+
485
+ wait_for_relation_state_change (rstate -> relid ,
486
+ SUBREL_STATE_SYNCDONE );
487
+ }
488
+ else
489
+ LWLockRelease (LogicalRepWorkerLock );
436
490
}
437
491
else
438
-
492
+ {
439
493
/*
440
494
* If there is no sync worker for this table yet, count
441
495
* running sync workers for this subscription, while we have
442
- * the lock, for later .
496
+ * the lock.
443
497
*/
444
- nsyncworkers = logicalrep_sync_worker_count (MyLogicalRepWorker -> subid );
445
- LWLockRelease (LogicalRepWorkerLock );
446
-
447
- /*
448
- * There is a worker synchronizing the relation and waiting for
449
- * apply to do something.
450
- */
451
- if (syncworker && rstate -> state == SUBREL_STATE_SYNCWAIT )
452
- {
453
- /*
454
- * Tell sync worker it can catchup now. We'll wait for it so
455
- * it does not get lost.
456
- */
457
- SpinLockAcquire (& syncworker -> relmutex );
458
- syncworker -> relstate = SUBREL_STATE_CATCHUP ;
459
- syncworker -> relstate_lsn =
460
- Max (syncworker -> relstate_lsn , current_lsn );
461
- SpinLockRelease (& syncworker -> relmutex );
498
+ int nsyncworkers =
499
+ logicalrep_sync_worker_count (MyLogicalRepWorker -> subid );
462
500
463
- /* Signal the sync worker, as it may be waiting for us. */
464
- logicalrep_worker_wakeup_ptr ( syncworker );
501
+ /* Now safe to release the LWLock */
502
+ LWLockRelease ( LogicalRepWorkerLock );
465
503
466
504
/*
467
- * Enter busy loop and wait for synchronization worker to
468
- * reach expected state (or die trying) .
505
+ * If there are free sync worker slot(s), start a new sync
506
+ * worker for the table .
469
507
*/
470
- if (!started_tx )
471
- {
472
- StartTransactionCommand ();
473
- started_tx = true;
474
- }
475
- wait_for_relation_state_change (rstate -> relid ,
476
- SUBREL_STATE_SYNCDONE );
477
- }
478
-
479
- /*
480
- * If there is no sync worker registered for the table and there
481
- * is some free sync worker slot, start a new sync worker for the
482
- * table.
483
- */
484
- else if (!syncworker && nsyncworkers < max_sync_workers_per_subscription )
485
- {
486
- TimestampTz now = GetCurrentTimestamp ();
487
- struct tablesync_start_time_mapping * hentry ;
488
- bool found ;
489
-
490
- hentry = hash_search (last_start_times , & rstate -> relid , HASH_ENTER , & found );
491
-
492
- if (!found ||
493
- TimestampDifferenceExceeds (hentry -> last_start_time , now ,
494
- wal_retrieve_retry_interval ))
508
+ if (nsyncworkers < max_sync_workers_per_subscription )
495
509
{
496
- logicalrep_worker_launch (MyLogicalRepWorker -> dbid ,
497
- MySubscription -> oid ,
498
- MySubscription -> name ,
499
- MyLogicalRepWorker -> userid ,
500
- rstate -> relid );
501
- hentry -> last_start_time = now ;
510
+ TimestampTz now = GetCurrentTimestamp ();
511
+ struct tablesync_start_time_mapping * hentry ;
512
+ bool found ;
513
+
514
+ hentry = hash_search (last_start_times , & rstate -> relid ,
515
+ HASH_ENTER , & found );
516
+
517
+ if (!found ||
518
+ TimestampDifferenceExceeds (hentry -> last_start_time , now ,
519
+ wal_retrieve_retry_interval ))
520
+ {
521
+ logicalrep_worker_launch (MyLogicalRepWorker -> dbid ,
522
+ MySubscription -> oid ,
523
+ MySubscription -> name ,
524
+ MyLogicalRepWorker -> userid ,
525
+ rstate -> relid );
526
+ hentry -> last_start_time = now ;
527
+ }
502
528
}
503
529
}
504
530
}
@@ -512,7 +538,7 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
512
538
}
513
539
514
540
/*
515
- * Process state possible change(s) of tables that are being synchronized.
541
+ * Process possible state change(s) of tables that are being synchronized.
516
542
*/
517
543
void
518
544
process_syncing_tables (XLogRecPtr current_lsn )
0 commit comments