@@ -590,13 +590,11 @@ pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
590
590
max_nodes ;
591
591
Size * nodes ;
592
592
593
- InitMaterializedSRF (fcinfo , 0 );
594
-
595
593
if (pg_numa_init () == -1 )
596
- {
597
594
elog (ERROR , "libnuma initialization failed or NUMA is not supported on this platform" );
598
- return (Datum ) 0 ;
599
- }
595
+
596
+ InitMaterializedSRF (fcinfo , 0 );
597
+
600
598
max_nodes = pg_numa_get_max_node ();
601
599
nodes = palloc (sizeof (Size ) * (max_nodes + 1 ));
602
600
@@ -619,6 +617,9 @@ pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
619
617
* memory size. This simplified approach allocates enough space for all
620
618
* pages in shared memory rather than calculating the exact requirements
621
619
* for each segment.
620
+ *
621
+ * XXX Isn't this wasteful? But there probably is one large segment of
622
+ * shared memory, much larger than the rest anyway.
622
623
*/
623
624
shm_total_page_count = ShmemSegHdr -> totalsize / os_page_size ;
624
625
page_ptrs = palloc0 (sizeof (void * ) * shm_total_page_count );
@@ -637,25 +638,33 @@ pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
637
638
{
638
639
int i ;
639
640
640
- /* Get number of OS aliged pages */
641
- shm_ent_page_count = TYPEALIGN (os_page_size , ent -> allocated_size ) / os_page_size ;
641
+ /* XXX I assume we use TYPEALIGN as a way to round to whole pages.
642
+ * It's a bit misleading to call that "aligned", no? */
643
+
644
+ /* Get number of OS aligned pages */
645
+ shm_ent_page_count
646
+ = TYPEALIGN (os_page_size , ent -> allocated_size ) / os_page_size ;
642
647
643
648
/*
644
649
* If we get ever 0xff back from kernel inquiry, then we probably have
645
650
* bug in our buffers to OS page mapping code here.
646
651
*/
647
652
memset (pages_status , 0xff , sizeof (int ) * shm_ent_page_count );
648
653
654
+ /*
655
+ * Setup page_ptrs[] with pointers to all OS pages for this segment,
656
+ * and get the NUMA status using pg_numa_query_pages.
657
+ *
658
+ * In order to get reliable results we also need to touch memory
659
+ * pages, so that inquiry about NUMA memory node doesn't return -2
660
+ * (ENOENT, which indicates unmapped/unallocated pages).
661
+ */
649
662
for (i = 0 ; i < shm_ent_page_count ; i ++ )
650
663
{
651
- /*
652
- * In order to get reliable results we also need to touch memory
653
- * pages, so that inquiry about NUMA memory node doesn't return -2
654
- * (which indicates unmapped/unallocated pages).
655
- */
656
664
volatile uint64 touch pg_attribute_unused ();
657
665
658
666
page_ptrs [i ] = (char * ) ent -> location + (i * os_page_size );
667
+
659
668
if (firstNumaTouch )
660
669
pg_numa_touch_mem_if_required (touch , page_ptrs [i ]);
661
670
@@ -665,19 +674,27 @@ pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
665
674
if (pg_numa_query_pages (0 , shm_ent_page_count , page_ptrs , pages_status ) == -1 )
666
675
elog (ERROR , "failed NUMA pages inquiry status: %m" );
667
676
668
- memset (nodes , 0 , sizeof (Size ) * (max_nodes + 1 ));
669
677
/* Count number of NUMA nodes used for this shared memory entry */
678
+ memset (nodes , 0 , sizeof (Size ) * (max_nodes + 1 ));
679
+
670
680
for (i = 0 ; i < shm_ent_page_count ; i ++ )
671
681
{
672
682
int s = pages_status [i ];
673
683
674
684
/* Ensure we are adding only valid index to the array */
675
- if (s >= 0 && s <= max_nodes )
676
- nodes [s ]++ ;
677
- else
678
- elog (ERROR , "invalid NUMA node id outside of allowed range [0, " UINT64_FORMAT "]: %d" , max_nodes , s );
685
+ if (s < 0 || s > max_nodes )
686
+ {
687
+ elog (ERROR , "invalid NUMA node id outside of allowed range "
688
+ "[0, " UINT64_FORMAT "]: %d" , max_nodes , s );
689
+ }
690
+
691
+ nodes [s ]++ ;
679
692
}
680
693
694
+ /*
695
+ * Add one entry for each NUMA node, including those without allocated
696
+ * memory for this segment.
697
+ */
681
698
for (i = 0 ; i <= max_nodes ; i ++ )
682
699
{
683
700
values [0 ] = CStringGetTextDatum (ent -> key );
@@ -693,6 +710,9 @@ pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
693
710
* We are ignoring the following memory regions (as compared to
694
711
* pg_get_shmem_allocations()): 1. output shared memory allocated but not
695
712
* counted via the shmem index 2. output as-of-yet unused shared memory.
713
+ *
714
+ * XXX Not quite sure why this is at the end, and what "output memory"
715
+ * refers to.
696
716
*/
697
717
698
718
LWLockRelease (ShmemIndexLock );
0 commit comments