Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 8aaa04b

Browse files
committed
Track shared buffer hits in pg_stat_io
Among other things, this should make it easier to calculate a useful cache hit ratio by excluding buffer reads via buffer access strategies. As buffer access strategies reuse buffers (and thus evict the prior buffer contents), it is normal to see reads on repeated scans of the same data. Author: Melanie Plageman <melanieplageman@gmail.com> Reviewed-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com> Reviewed-by: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/CAAKRu_beMa9Hzih40%3DXPYqhDVz6tsgUGTrhZXRo%3Dunp%2Bszb%3DUA%40mail.gmail.com
1 parent 6c3b697 commit 8aaa04b

File tree

12 files changed

+109
-47
lines changed

12 files changed

+109
-47
lines changed

doc/src/sgml/monitoring.sgml

+11
Original file line numberDiff line numberDiff line change
@@ -3855,6 +3855,17 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
38553855
</entry>
38563856
</row>
38573857

3858+
<row>
3859+
<entry role="catalog_table_entry">
3860+
<para role="column_definition">
3861+
<structfield>hits</structfield> <type>bigint</type>
3862+
</para>
3863+
<para>
3864+
The number of times a desired block was found in a shared buffer.
3865+
</para>
3866+
</entry>
3867+
</row>
3868+
38583869
<row>
38593870
<entry role="catalog_table_entry">
38603871
<para role="column_definition">

src/backend/catalog/system_views.sql

+1
Original file line numberDiff line numberDiff line change
@@ -1128,6 +1128,7 @@ SELECT
11281128
b.writes,
11291129
b.extends,
11301130
b.op_bytes,
1131+
b.hits,
11311132
b.evictions,
11321133
b.reuses,
11331134
b.fsyncs,

src/backend/storage/buffer/bufmgr.c

+14-24
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr,
472472
ForkNumber forkNum,
473473
BlockNumber blockNum,
474474
BufferAccessStrategy strategy,
475-
bool *foundPtr, IOContext *io_context);
475+
bool *foundPtr, IOContext io_context);
476476
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln,
477477
IOObject io_object, IOContext io_context);
478478
static void FindAndDropRelationBuffers(RelFileLocator rlocator,
@@ -850,13 +850,14 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
850850
if (isLocalBuf)
851851
{
852852
/*
853-
* LocalBufferAlloc() will set the io_context to IOCONTEXT_NORMAL. We
854-
* do not use a BufferAccessStrategy for I/O of temporary tables.
853+
* We do not use a BufferAccessStrategy for I/O of temporary tables.
855854
* However, in some cases, the "strategy" may not be NULL, so we can't
856855
* rely on IOContextForStrategy() to set the right IOContext for us.
857856
* This may happen in cases like CREATE TEMPORARY TABLE AS...
858857
*/
859-
bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found, &io_context);
858+
io_context = IOCONTEXT_NORMAL;
859+
io_object = IOOBJECT_TEMP_RELATION;
860+
bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found);
860861
if (found)
861862
pgBufferUsage.local_blks_hit++;
862863
else if (isExtend)
@@ -871,8 +872,10 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
871872
* lookup the buffer. IO_IN_PROGRESS is set if the requested block is
872873
* not currently in memory.
873874
*/
875+
io_context = IOContextForStrategy(strategy);
876+
io_object = IOOBJECT_RELATION;
874877
bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum,
875-
strategy, &found, &io_context);
878+
strategy, &found, io_context);
876879
if (found)
877880
pgBufferUsage.shared_blks_hit++;
878881
else if (isExtend)
@@ -892,6 +895,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
892895
/* Just need to update stats before we exit */
893896
*hit = true;
894897
VacuumPageHit++;
898+
pgstat_count_io_op(io_object, io_context, IOOP_HIT);
895899

896900
if (VacuumCostActive)
897901
VacuumCostBalance += VacuumCostPageHit;
@@ -987,16 +991,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
987991
*/
988992
Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID)); /* spinlock not needed */
989993

990-
if (isLocalBuf)
991-
{
992-
bufBlock = LocalBufHdrGetBlock(bufHdr);
993-
io_object = IOOBJECT_TEMP_RELATION;
994-
}
995-
else
996-
{
997-
bufBlock = BufHdrGetBlock(bufHdr);
998-
io_object = IOOBJECT_RELATION;
999-
}
994+
bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
1000995

1001996
if (isExtend)
1002997
{
@@ -1139,7 +1134,7 @@ static BufferDesc *
11391134
BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
11401135
BlockNumber blockNum,
11411136
BufferAccessStrategy strategy,
1142-
bool *foundPtr, IOContext *io_context)
1137+
bool *foundPtr, IOContext io_context)
11431138
{
11441139
bool from_ring;
11451140
BufferTag newTag; /* identity of requested block */
@@ -1193,11 +1188,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
11931188
{
11941189
/*
11951190
* If we get here, previous attempts to read the buffer must
1196-
* have failed ... but we shall bravely try again. Set
1197-
* io_context since we will in fact need to count an IO
1198-
* Operation.
1191+
* have failed ... but we shall bravely try again.
11991192
*/
1200-
*io_context = IOContextForStrategy(strategy);
12011193
*foundPtr = false;
12021194
}
12031195
}
@@ -1211,8 +1203,6 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
12111203
*/
12121204
LWLockRelease(newPartitionLock);
12131205

1214-
*io_context = IOContextForStrategy(strategy);
1215-
12161206
/* Loop here in case we have to try another victim buffer */
12171207
for (;;)
12181208
{
@@ -1295,7 +1285,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
12951285
smgr->smgr_rlocator.locator.dbOid,
12961286
smgr->smgr_rlocator.locator.relNumber);
12971287

1298-
FlushBuffer(buf, NULL, IOOBJECT_RELATION, *io_context);
1288+
FlushBuffer(buf, NULL, IOOBJECT_RELATION, io_context);
12991289
LWLockRelease(BufferDescriptorGetContentLock(buf));
13001290

13011291
ScheduleBufferTagForWriteback(&BackendWritebackContext,
@@ -1494,7 +1484,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
14941484
* we may have been forced to release the buffer due to concurrent
14951485
* pinners or erroring out.
14961486
*/
1497-
pgstat_count_io_op(IOOBJECT_RELATION, *io_context,
1487+
pgstat_count_io_op(IOOBJECT_RELATION, io_context,
14981488
from_ring ? IOOP_REUSE : IOOP_EVICT);
14991489
}
15001490

src/backend/storage/buffer/localbuf.c

+2-9
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum,
108108
*/
109109
BufferDesc *
110110
LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
111-
bool *foundPtr, IOContext *io_context)
111+
bool *foundPtr)
112112
{
113113
BufferTag newTag; /* identity of requested block */
114114
LocalBufferLookupEnt *hresult;
@@ -128,14 +128,6 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
128128
hresult = (LocalBufferLookupEnt *)
129129
hash_search(LocalBufHash, &newTag, HASH_FIND, NULL);
130130

131-
/*
132-
* IO Operations on local buffers are only done in IOCONTEXT_NORMAL. Set
133-
* io_context here (instead of after a buffer hit would have returned) for
134-
* convenience since we don't have to worry about the overhead of calling
135-
* IOContextForStrategy().
136-
*/
137-
*io_context = IOCONTEXT_NORMAL;
138-
139131
if (hresult)
140132
{
141133
b = hresult->id;
@@ -239,6 +231,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
239231
buf_state &= ~BM_DIRTY;
240232
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
241233

234+
/* Temporary table I/O does not use Buffer Access Strategies */
242235
pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE);
243236
pgBufferUsage.local_blks_written++;
244237
}

src/backend/utils/activity/pgstat_io.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ pgstat_tracks_io_op(BackendType bktype, IOObject io_object,
344344
* Some BackendTypes will not do certain IOOps.
345345
*/
346346
if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) &&
347-
(io_op == IOOP_READ || io_op == IOOP_EVICT))
347+
(io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT))
348348
return false;
349349

350350
if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||

src/backend/utils/adt/pgstatfuncs.c

+7-4
Original file line numberDiff line numberDiff line change
@@ -1259,6 +1259,7 @@ typedef enum io_stat_col
12591259
IO_COL_WRITES,
12601260
IO_COL_EXTENDS,
12611261
IO_COL_CONVERSION,
1262+
IO_COL_HITS,
12621263
IO_COL_EVICTIONS,
12631264
IO_COL_REUSES,
12641265
IO_COL_FSYNCS,
@@ -1277,16 +1278,18 @@ pgstat_get_io_op_index(IOOp io_op)
12771278
{
12781279
case IOOP_EVICT:
12791280
return IO_COL_EVICTIONS;
1281+
case IOOP_EXTEND:
1282+
return IO_COL_EXTENDS;
1283+
case IOOP_FSYNC:
1284+
return IO_COL_FSYNCS;
1285+
case IOOP_HIT:
1286+
return IO_COL_HITS;
12801287
case IOOP_READ:
12811288
return IO_COL_READS;
12821289
case IOOP_REUSE:
12831290
return IO_COL_REUSES;
12841291
case IOOP_WRITE:
12851292
return IO_COL_WRITES;
1286-
case IOOP_EXTEND:
1287-
return IO_COL_EXTENDS;
1288-
case IOOP_FSYNC:
1289-
return IO_COL_FSYNCS;
12901293
}
12911294

12921295
elog(ERROR, "unrecognized IOOp value: %d", io_op);

src/include/catalog/pg_proc.dat

+3-3
Original file line numberDiff line numberDiff line change
@@ -5749,9 +5749,9 @@
57495749
proname => 'pg_stat_get_io', provolatile => 'v',
57505750
prorows => '30', proretset => 't',
57515751
proparallel => 'r', prorettype => 'record', proargtypes => '',
5752-
proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,timestamptz}',
5753-
proargmodes => '{o,o,o,o,o,o,o,o,o,o,o}',
5754-
proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,evictions,reuses,fsyncs,stats_reset}',
5752+
proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,int8,timestamptz}',
5753+
proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o}',
5754+
proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,hits,evictions,reuses,fsyncs,stats_reset}',
57555755
prosrc => 'pg_stat_get_io' },
57565756

57575757
{ oid => '1136', descr => 'statistics: information about WAL activity',

src/include/pgstat.h

+1
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,7 @@ typedef enum IOOp
296296
IOOP_EVICT,
297297
IOOP_EXTEND,
298298
IOOP_FSYNC,
299+
IOOP_HIT,
299300
IOOP_READ,
300301
IOOP_REUSE,
301302
IOOP_WRITE,

src/include/storage/buf_internals.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ extern PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr,
419419
ForkNumber forkNum,
420420
BlockNumber blockNum);
421421
extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
422-
BlockNumber blockNum, bool *foundPtr, IOContext *io_context);
422+
BlockNumber blockNum, bool *foundPtr);
423423
extern void MarkLocalBufferDirty(Buffer buffer);
424424
extern void DropRelationLocalBuffers(RelFileLocator rlocator,
425425
ForkNumber forkNum,

src/test/regress/expected/rules.out

+2-1
Original file line numberDiff line numberDiff line change
@@ -1884,11 +1884,12 @@ pg_stat_io| SELECT backend_type,
18841884
writes,
18851885
extends,
18861886
op_bytes,
1887+
hits,
18871888
evictions,
18881889
reuses,
18891890
fsyncs,
18901891
stats_reset
1891-
FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, evictions, reuses, fsyncs, stats_reset);
1892+
FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, hits, evictions, reuses, fsyncs, stats_reset);
18921893
pg_stat_progress_analyze| SELECT s.pid,
18931894
s.datid,
18941895
d.datname,

src/test/regress/expected/stats.out

+44-2
Original file line numberDiff line numberDiff line change
@@ -1131,6 +1131,7 @@ SELECT pg_stat_get_subscription_stats(NULL);
11311131
-- - writes of shared buffers to permanent storage
11321132
-- - extends of relations using shared buffers
11331133
-- - fsyncs done to ensure the durability of data dirtying shared buffers
1134+
-- - shared buffer hits
11341135
-- There is no test for blocks evicted from shared buffers, because we cannot
11351136
-- be sure of the state of shared buffers at the point the test is run.
11361137
-- Create a regular table and insert some data to generate IOCONTEXT_NORMAL
@@ -1208,6 +1209,47 @@ SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads;
12081209
t
12091210
(1 row)
12101211

1212+
SELECT sum(hits) AS io_sum_shared_before_hits
1213+
FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
1214+
-- Select from the table again to count hits.
1215+
-- Ensure we generate hits by forcing a nested loop self-join with no
1216+
-- materialize node. The outer side's buffer will stay pinned, preventing its
1217+
-- eviction, while we loop through the inner side and generate hits.
1218+
BEGIN;
1219+
SET LOCAL enable_nestloop TO on; SET LOCAL enable_mergejoin TO off;
1220+
SET LOCAL enable_hashjoin TO off; SET LOCAL enable_material TO off;
1221+
-- ensure plan stays as we expect it to
1222+
EXPLAIN (COSTS OFF) SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a);
1223+
QUERY PLAN
1224+
-------------------------------------------
1225+
Aggregate
1226+
-> Nested Loop
1227+
Join Filter: (t1.a = t2.a)
1228+
-> Seq Scan on test_io_shared t1
1229+
-> Seq Scan on test_io_shared t2
1230+
(5 rows)
1231+
1232+
SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a);
1233+
count
1234+
-------
1235+
100
1236+
(1 row)
1237+
1238+
COMMIT;
1239+
SELECT pg_stat_force_next_flush();
1240+
pg_stat_force_next_flush
1241+
--------------------------
1242+
1243+
(1 row)
1244+
1245+
SELECT sum(hits) AS io_sum_shared_after_hits
1246+
FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
1247+
SELECT :io_sum_shared_after_hits > :io_sum_shared_before_hits;
1248+
?column?
1249+
----------
1250+
t
1251+
(1 row)
1252+
12111253
DROP TABLE test_io_shared;
12121254
-- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io:
12131255
-- - eviction of local buffers in order to reuse them
@@ -1342,15 +1384,15 @@ SELECT pg_stat_have_stats('io', 0, 0);
13421384
t
13431385
(1 row)
13441386

1345-
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_pre_reset
1387+
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset
13461388
FROM pg_stat_io \gset
13471389
SELECT pg_stat_reset_shared('io');
13481390
pg_stat_reset_shared
13491391
----------------------
13501392

13511393
(1 row)
13521394

1353-
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_post_reset
1395+
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset
13541396
FROM pg_stat_io \gset
13551397
SELECT :io_stats_post_reset < :io_stats_pre_reset;
13561398
?column?

src/test/regress/sql/stats.sql

+22-2
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,7 @@ SELECT pg_stat_get_subscription_stats(NULL);
541541
-- - writes of shared buffers to permanent storage
542542
-- - extends of relations using shared buffers
543543
-- - fsyncs done to ensure the durability of data dirtying shared buffers
544+
-- - shared buffer hits
544545

545546
-- There is no test for blocks evicted from shared buffers, because we cannot
546547
-- be sure of the state of shared buffers at the point the test is run.
@@ -587,6 +588,25 @@ SELECT pg_stat_force_next_flush();
587588
SELECT sum(reads) AS io_sum_shared_after_reads
588589
FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
589590
SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads;
591+
592+
SELECT sum(hits) AS io_sum_shared_before_hits
593+
FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
594+
-- Select from the table again to count hits.
595+
-- Ensure we generate hits by forcing a nested loop self-join with no
596+
-- materialize node. The outer side's buffer will stay pinned, preventing its
597+
-- eviction, while we loop through the inner side and generate hits.
598+
BEGIN;
599+
SET LOCAL enable_nestloop TO on; SET LOCAL enable_mergejoin TO off;
600+
SET LOCAL enable_hashjoin TO off; SET LOCAL enable_material TO off;
601+
-- ensure plan stays as we expect it to
602+
EXPLAIN (COSTS OFF) SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a);
603+
SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a);
604+
COMMIT;
605+
SELECT pg_stat_force_next_flush();
606+
SELECT sum(hits) AS io_sum_shared_after_hits
607+
FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
608+
SELECT :io_sum_shared_after_hits > :io_sum_shared_before_hits;
609+
590610
DROP TABLE test_io_shared;
591611

592612
-- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io:
@@ -674,10 +694,10 @@ SELECT :io_sum_bulkwrite_strategy_extends_after > :io_sum_bulkwrite_strategy_ext
674694

675695
-- Test IO stats reset
676696
SELECT pg_stat_have_stats('io', 0, 0);
677-
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_pre_reset
697+
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset
678698
FROM pg_stat_io \gset
679699
SELECT pg_stat_reset_shared('io');
680-
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_post_reset
700+
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset
681701
FROM pg_stat_io \gset
682702
SELECT :io_stats_post_reset < :io_stats_pre_reset;
683703

0 commit comments

Comments
 (0)