Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 3febabb

Browse files
bdrouvotAWSCommitfest Bot
authored and
Commitfest Bot
committed
Add os_page_num to pg_buffercache
ba2a3c2 added a way to check if a buffer is spread across multiple pages. Adding the same information in pg_buffercache so that one does not need NUMA support enabled to get this information.
1 parent e050af2 commit 3febabb

File tree

5 files changed

+135
-28
lines changed

5 files changed

+135
-28
lines changed

contrib/pg_buffercache/expected/pg_buffercache.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
CREATE EXTENSION pg_buffercache;
2-
select count(*) = (select setting::bigint
2+
select count(*) >= (select setting::bigint
33
from pg_settings
44
where name = 'shared_buffers')
55
from pg_buffercache;

contrib/pg_buffercache/pg_buffercache--1.5--1.6.sql

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,10 @@ CREATE FUNCTION pg_buffercache_evict_all(
4444
OUT buffers_skipped int4)
4545
AS 'MODULE_PATHNAME', 'pg_buffercache_evict_all'
4646
LANGUAGE C PARALLEL SAFE VOLATILE;
47+
48+
-- Upgrade view to 1.6. format
49+
CREATE OR REPLACE VIEW pg_buffercache AS
50+
SELECT P.* FROM pg_buffercache_pages() AS P
51+
(bufferid integer, relfilenode oid, reltablespace oid, reldatabase oid,
52+
relforknumber int2, relblocknumber int8, isdirty bool, usagecount int2,
53+
pinning_backends int4, os_page_num bigint);

contrib/pg_buffercache/pg_buffercache_pages.c

Lines changed: 92 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020

2121
#define NUM_BUFFERCACHE_PAGES_MIN_ELEM 8
22-
#define NUM_BUFFERCACHE_PAGES_ELEM 9
22+
#define NUM_BUFFERCACHE_PAGES_ELEM 10
2323
#define NUM_BUFFERCACHE_SUMMARY_ELEM 5
2424
#define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM 4
2525
#define NUM_BUFFERCACHE_EVICT_ELEM 2
@@ -54,6 +54,7 @@ typedef struct
5454
* because of bufmgr.c's PrivateRefCount infrastructure.
5555
*/
5656
int32 pinning_backends;
57+
int64 page_num;
5758
} BufferCachePagesRec;
5859

5960

@@ -63,6 +64,9 @@ typedef struct
6364
typedef struct
6465
{
6566
TupleDesc tupdesc;
67+
int buffers_per_page;
68+
int pages_per_buffer;
69+
int os_page_size;
6670
BufferCachePagesRec *record;
6771
} BufferCachePagesContext;
6872

@@ -119,8 +123,25 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
119123

120124
if (SRF_IS_FIRSTCALL())
121125
{
122-
int i;
126+
int i,
127+
idx;
128+
Size os_page_size;
129+
char *startptr;
130+
int pages_per_buffer;
131+
int max_entries;
123132

133+
/*
134+
* Different database block sizes (4kB, 8kB, ..., 32kB) can be used, while
135+
* the OS may have different memory page sizes.
136+
*
137+
* To correctly map between them, we need to: 1. Determine the OS memory
138+
* page size 2. Calculate how many OS pages are used by all buffer blocks
139+
* 3. Calculate how many OS pages are contained within each database
140+
* block.
141+
*/
142+
os_page_size = pg_get_shmem_pagesize();
143+
144+
/* Initialize the multi-call context, load entries about buffers */
124145
funcctx = SRF_FIRSTCALL_INIT();
125146

126147
/* Switch context when allocating stuff to be used in later calls */
@@ -163,24 +184,36 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
163184
TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count",
164185
INT2OID, -1, 0);
165186

166-
if (expected_tupledesc->natts == NUM_BUFFERCACHE_PAGES_ELEM)
187+
if (expected_tupledesc->natts >= (NUM_BUFFERCACHE_PAGES_ELEM - 1))
167188
TupleDescInitEntry(tupledesc, (AttrNumber) 9, "pinning_backends",
168189
INT4OID, -1, 0);
169190

191+
if (expected_tupledesc->natts == NUM_BUFFERCACHE_PAGES_ELEM)
192+
TupleDescInitEntry(tupledesc, (AttrNumber) 10, "os_page_num",
193+
INT8OID, -1, 0);
194+
170195
fctx->tupdesc = BlessTupleDesc(tupledesc);
171196

172-
/* Allocate NBuffers worth of BufferCachePagesRec records. */
197+
/*
198+
* Each buffer needs at least one entry, but it might be offset in
199+
* some way, and use one extra entry. So we allocate space for the
200+
* maximum number of entries we might need, and then count the exact
201+
* number as we're walking buffers. That way we can do it in one pass,
202+
* without reallocating memory.
203+
*/
204+
pages_per_buffer = Max(1, BLCKSZ / os_page_size) + 1;
205+
max_entries = NBuffers * pages_per_buffer;
206+
207+
/* Allocate entries for BufferCachePagesRec records. */
173208
fctx->record = (BufferCachePagesRec *)
174209
MemoryContextAllocHuge(CurrentMemoryContext,
175-
sizeof(BufferCachePagesRec) * NBuffers);
176-
177-
/* Set max calls and remember the user function context. */
178-
funcctx->max_calls = NBuffers;
179-
funcctx->user_fctx = fctx;
210+
sizeof(BufferCachePagesRec) * max_entries);
180211

181212
/* Return to original context when allocating transient memory */
182213
MemoryContextSwitchTo(oldcontext);
183214

215+
startptr = (char *) TYPEALIGN_DOWN(os_page_size, (char *) BufferGetBlock(1));
216+
idx = 0;
184217
/*
185218
* Scan through all the buffers, saving the relevant fields in the
186219
* fctx->record structure.
@@ -191,35 +224,65 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
191224
*/
192225
for (i = 0; i < NBuffers; i++)
193226
{
227+
char *buffptr = (char *) BufferGetBlock(i + 1);
194228
BufferDesc *bufHdr;
195229
uint32 buf_state;
230+
int32 page_num;
231+
char *startptr_buff,
232+
*endptr_buff;
196233

197234
bufHdr = GetBufferDescriptor(i);
198235
/* Lock each buffer header before inspecting. */
199236
buf_state = LockBufHdr(bufHdr);
200237

201-
fctx->record[i].bufferid = BufferDescriptorGetBuffer(bufHdr);
202-
fctx->record[i].relfilenumber = BufTagGetRelNumber(&bufHdr->tag);
203-
fctx->record[i].reltablespace = bufHdr->tag.spcOid;
204-
fctx->record[i].reldatabase = bufHdr->tag.dbOid;
205-
fctx->record[i].forknum = BufTagGetForkNum(&bufHdr->tag);
206-
fctx->record[i].blocknum = bufHdr->tag.blockNum;
207-
fctx->record[i].usagecount = BUF_STATE_GET_USAGECOUNT(buf_state);
208-
fctx->record[i].pinning_backends = BUF_STATE_GET_REFCOUNT(buf_state);
238+
/* start of the first page of this buffer */
239+
startptr_buff = (char *) TYPEALIGN_DOWN(os_page_size, buffptr);
209240

210-
if (buf_state & BM_DIRTY)
211-
fctx->record[i].isdirty = true;
212-
else
213-
fctx->record[i].isdirty = false;
241+
/* end of the buffer (no need to align to memory page) */
242+
endptr_buff = buffptr + BLCKSZ;
214243

215-
/* Note if the buffer is valid, and has storage created */
216-
if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID))
217-
fctx->record[i].isvalid = true;
218-
else
219-
fctx->record[i].isvalid = false;
244+
Assert(startptr_buff < endptr_buff);
245+
246+
/* calculate ID of the first page for this buffer */
247+
page_num = (startptr_buff - startptr) / os_page_size;
248+
249+
/* Add an entry for each OS page overlapping with this buffer. */
250+
for (char *ptr = startptr_buff; ptr < endptr_buff; ptr += os_page_size)
251+
{
252+
fctx->record[idx].bufferid = BufferDescriptorGetBuffer(bufHdr);
253+
fctx->record[idx].relfilenumber = BufTagGetRelNumber(&bufHdr->tag);
254+
fctx->record[idx].reltablespace = bufHdr->tag.spcOid;
255+
fctx->record[idx].reldatabase = bufHdr->tag.dbOid;
256+
fctx->record[idx].forknum = BufTagGetForkNum(&bufHdr->tag);
257+
fctx->record[idx].blocknum = bufHdr->tag.blockNum;
258+
fctx->record[idx].usagecount = BUF_STATE_GET_USAGECOUNT(buf_state);
259+
fctx->record[idx].pinning_backends = BUF_STATE_GET_REFCOUNT(buf_state);
260+
261+
if (buf_state & BM_DIRTY)
262+
fctx->record[idx].isdirty = true;
263+
else
264+
fctx->record[idx].isdirty = false;
265+
266+
/* Note if the buffer is valid, and has storage created */
267+
if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID))
268+
fctx->record[idx].isvalid = true;
269+
else
270+
fctx->record[idx].isvalid = false;
271+
272+
fctx->record[idx].page_num = page_num;
273+
/* advance to the next entry/page */
274+
++idx;
275+
++page_num;
276+
}
220277

221278
UnlockBufHdr(bufHdr, buf_state);
222279
}
280+
281+
Assert(idx <= max_entries);
282+
283+
/* Set max calls and remember the user function context. */
284+
funcctx->max_calls = idx;
285+
funcctx->user_fctx = fctx;
223286
}
224287

225288
funcctx = SRF_PERCALL_SETUP();
@@ -252,6 +315,7 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
252315
nulls[7] = true;
253316
/* unused for v1.0 callers, but the array is always long enough */
254317
nulls[8] = true;
318+
nulls[9] = true;
255319
}
256320
else
257321
{
@@ -272,6 +336,8 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
272336
/* unused for v1.0 callers, but the array is always long enough */
273337
values[8] = Int32GetDatum(fctx->record[i].pinning_backends);
274338
nulls[8] = false;
339+
values[9] = Int64GetDatum(fctx->record[i].page_num);
340+
nulls[9] = false;
275341
}
276342

277343
/* Build and return the tuple. */

contrib/pg_buffercache/sql/pg_buffercache.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
CREATE EXTENSION pg_buffercache;
22

3-
select count(*) = (select setting::bigint
3+
select count(*) >= (select setting::bigint
44
from pg_settings
55
where name = 'shared_buffers')
66
from pg_buffercache;

doc/src/sgml/pgbuffercache.sgml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,15 @@
205205
Number of backends pinning this buffer
206206
</para></entry>
207207
</row>
208+
209+
<row>
210+
<entry role="catalog_table_entry"><para role="column_definition">
211+
<structfield>os_page_num</structfield> <type>bigint</type>
212+
</para>
213+
<para>
214+
number of OS memory page for this buffer
215+
</para></entry>
216+
</row>
208217
</tbody>
209218
</tgroup>
210219
</table>
@@ -550,6 +559,31 @@ regression=# SELECT n.nspname, c.relname, count(*) AS buffers
550559
public | spgist_text_tbl | 182
551560
(10 rows)
552561

562+
regression=# SELECT n.nspname, c.relname, count(*) AS buffers_on_multiple_pages
563+
FROM pg_buffercache b JOIN pg_class c
564+
ON b.relfilenode = pg_relation_filenode(c.oid) AND
565+
b.reldatabase IN (0, (SELECT oid FROM pg_database
566+
WHERE datname = current_database()))
567+
JOIN pg_namespace n ON n.oid = c.relnamespace
568+
JOIN (SELECT bufferid FROM pg_buffercache
569+
GROUP BY bufferid HAVING count(*) > 1) m on m.bufferid = b.bufferid
570+
GROUP BY n.nspname, c.relname
571+
ORDER BY 3 DESC
572+
LIMIT 10;
573+
574+
nspname | relname | buffers_on_multiple_pages
575+
------------+---------------------------------+---------------------------
576+
public | gin_test_tbl | 4
577+
public | delete_test_table | 4
578+
public | tenk1 | 4
579+
pg_catalog | pg_attribute_relid_attnum_index | 4
580+
pg_catalog | pg_class | 2
581+
pg_catalog | pg_depend_depender_index | 2
582+
pg_catalog | pg_attribute | 2
583+
pg_catalog | pg_opfamily | 2
584+
pg_catalog | pg_opclass_oid_index | 2
585+
pg_catalog | pg_description | 2
586+
(10 rows)
553587

554588
regression=# SELECT * FROM pg_buffercache_summary();
555589
buffers_used | buffers_unused | buffers_dirty | buffers_pinned | usagecount_avg

0 commit comments

Comments
 (0)