Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit b58c250

Browse files
committed
Fix leakage of cost_limit when multiple autovacuum workers are active.
When using default autovacuum_vac_cost_limit, autovac_balance_cost relied on VacuumCostLimit to contain the correct global value ... but after the first time through in a particular worker process, it didn't, because we'd trashed it in previous iterations. Depending on the state of other autovac workers, this could result in a steady reduction of the effective cost_limit setting as a particular worker processed more and more tables, causing it to go slower and slower. Spotted by Simon Poole (bug #5759). Fix by saving and restoring the GUC variables in the loop in do_autovacuum. In passing, improve a few comments. Back-patch to 8.3 ... the cost rebalancing code has been buggy since it was put in.
1 parent 4fc115b commit b58c250

File tree

1 file changed

+45
-18
lines changed

1 file changed

+45
-18
lines changed

src/backend/postmaster/autovacuum.c

Lines changed: 45 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ typedef struct autovac_table
190190
*
191191
* wi_links entry into free list or running list
192192
* wi_dboid OID of the database this worker is supposed to work on
193-
* wi_tableoid OID of the table currently being vacuumed
193+
* wi_tableoid OID of the table currently being vacuumed, if any
194194
* wi_proc pointer to PGPROC of the running worker, NULL if not started
195195
* wi_launchtime Time at which this worker was launched
196196
* wi_cost_* Vacuum cost-based delay parameters current in this worker
@@ -1629,7 +1629,7 @@ FreeWorkerInfo(int code, Datum arg)
16291629
* limit setting of the remaining workers.
16301630
*
16311631
* We somewhat ignore the risk that the launcher changes its PID
1632-
* between we reading it and the actual kill; we expect ProcKill to be
1632+
* between us reading it and the actual kill; we expect ProcKill to be
16331633
* called shortly after us, and we assume that PIDs are not reused too
16341634
* quickly after a process exits.
16351635
*/
@@ -1673,16 +1673,18 @@ AutoVacuumUpdateDelay(void)
16731673

16741674
/*
16751675
* autovac_balance_cost
1676-
* Recalculate the cost limit setting for each active workers.
1676+
* Recalculate the cost limit setting for each active worker.
16771677
*
16781678
* Caller must hold the AutovacuumLock in exclusive mode.
16791679
*/
16801680
static void
16811681
autovac_balance_cost(void)
16821682
{
1683-
WorkerInfo worker;
1684-
16851683
/*
1684+
* The idea here is that we ration out I/O equally. The amount of I/O
1685+
* that a worker can consume is determined by cost_limit/cost_delay, so
1686+
* we try to equalize those ratios rather than the raw limit settings.
1687+
*
16861688
* note: in cost_limit, zero also means use value from elsewhere, because
16871689
* zero is not a valid value.
16881690
*/
@@ -1692,6 +1694,7 @@ autovac_balance_cost(void)
16921694
autovacuum_vac_cost_delay : VacuumCostDelay);
16931695
double cost_total;
16941696
double cost_avail;
1697+
WorkerInfo worker;
16951698

16961699
/* not set? nothing to do */
16971700
if (vac_cost_limit <= 0 || vac_cost_delay <= 0)
@@ -1718,7 +1721,7 @@ autovac_balance_cost(void)
17181721
return;
17191722

17201723
/*
1721-
* Adjust each cost limit of active workers to balance the total of cost
1724+
* Adjust cost limit of each active worker to balance the total of cost
17221725
* limit to autovacuum_vacuum_cost_limit.
17231726
*/
17241727
cost_avail = (double) vac_cost_limit / vac_cost_delay;
@@ -1734,14 +1737,19 @@ autovac_balance_cost(void)
17341737
(cost_avail * worker->wi_cost_limit_base / cost_total);
17351738

17361739
/*
1737-
* We put a lower bound of 1 to the cost_limit, to avoid division-
1738-
* by-zero in the vacuum code.
1740+
* We put a lower bound of 1 on the cost_limit, to avoid division-
1741+
* by-zero in the vacuum code. Also, in case of roundoff trouble
1742+
* in these calculations, let's be sure we don't ever set
1743+
* cost_limit to more than the base value.
17391744
*/
1740-
worker->wi_cost_limit = Max(Min(limit, worker->wi_cost_limit_base), 1);
1741-
1742-
elog(DEBUG2, "autovac_balance_cost(pid=%u db=%u, rel=%u, cost_limit=%d, cost_delay=%d)",
1743-
worker->wi_proc->pid, worker->wi_dboid,
1744-
worker->wi_tableoid, worker->wi_cost_limit, worker->wi_cost_delay);
1745+
worker->wi_cost_limit = Max(Min(limit,
1746+
worker->wi_cost_limit_base),
1747+
1);
1748+
1749+
elog(DEBUG2, "autovac_balance_cost(pid=%u db=%u, rel=%u, cost_limit=%d, cost_limit_base=%d, cost_delay=%d)",
1750+
worker->wi_proc->pid, worker->wi_dboid, worker->wi_tableoid,
1751+
worker->wi_cost_limit, worker->wi_cost_limit_base,
1752+
worker->wi_cost_delay);
17451753
}
17461754

17471755
worker = (WorkerInfo) SHMQueueNext(&AutoVacuumShmem->av_runningWorkers,
@@ -2125,6 +2133,8 @@ do_autovacuum(void)
21252133
autovac_table *tab;
21262134
WorkerInfo worker;
21272135
bool skipit;
2136+
int stdVacuumCostDelay;
2137+
int stdVacuumCostLimit;
21282138

21292139
CHECK_FOR_INTERRUPTS();
21302140

@@ -2198,11 +2208,15 @@ do_autovacuum(void)
21982208
MyWorkerInfo->wi_tableoid = relid;
21992209
LWLockRelease(AutovacuumScheduleLock);
22002210

2201-
/* Set the initial vacuum cost parameters for this table */
2202-
VacuumCostDelay = tab->at_vacuum_cost_delay;
2203-
VacuumCostLimit = tab->at_vacuum_cost_limit;
2211+
/*
2212+
* Remember the prevailing values of the vacuum cost GUCs. We have
2213+
* to restore these at the bottom of the loop, else we'll compute
2214+
* wrong values in the next iteration of autovac_balance_cost().
2215+
*/
2216+
stdVacuumCostDelay = VacuumCostDelay;
2217+
stdVacuumCostLimit = VacuumCostLimit;
22042218

2205-
/* Last fixups before actually starting to work */
2219+
/* Must hold AutovacuumLock while mucking with cost balance info */
22062220
LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
22072221

22082222
/* advertise my cost delay parameters for the balancing algorithm */
@@ -2213,6 +2227,9 @@ do_autovacuum(void)
22132227
/* do a balance */
22142228
autovac_balance_cost();
22152229

2230+
/* set the active cost parameters from the result of that */
2231+
AutoVacuumUpdateDelay();
2232+
22162233
/* done */
22172234
LWLockRelease(AutovacuumLock);
22182235

@@ -2290,10 +2307,20 @@ do_autovacuum(void)
22902307
pfree(tab->at_relname);
22912308
pfree(tab);
22922309

2293-
/* remove my info from shared memory */
2310+
/*
2311+
* Remove my info from shared memory. We could, but intentionally
2312+
* don't, clear wi_cost_limit and friends --- this is on the
2313+
* assumption that we probably have more to do with similar cost
2314+
* settings, so we don't want to give up our share of I/O for a very
2315+
* short interval and thereby thrash the global balance.
2316+
*/
22942317
LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
22952318
MyWorkerInfo->wi_tableoid = InvalidOid;
22962319
LWLockRelease(AutovacuumLock);
2320+
2321+
/* restore vacuum cost GUCs for the next iteration */
2322+
VacuumCostDelay = stdVacuumCostDelay;
2323+
VacuumCostLimit = stdVacuumCostLimit;
22972324
}
22982325

22992326
/*

0 commit comments

Comments
 (0)