From 4beac2c351d0cccb56e287dc18a61c4ed7e58d4e Mon Sep 17 00:00:00 2001
From: Commitfest Bot <cfbot@cputube.org>
Date: Thu, 22 May 2025 12:44:24 +0000
Subject: [PATCH] [PATCH]: ./v1-discount-metapage-in-genericcostestimate.patch

---
 contrib/bloom/blcost.c                |  3 +++
 src/backend/utils/adt/selfuncs.c      | 32 +++++++++++++++++++++++----
 src/include/utils/selfuncs.h          |  7 ++++++
 src/test/regress/expected/join.out    |  8 ++++---
 src/test/regress/expected/memoize.out |  2 ++
 src/test/regress/expected/select.out  |  2 +-
 src/test/regress/sql/join.sql         | 10 +++++----
 src/test/regress/sql/memoize.sql      |  2 ++
 src/test/regress/sql/select.sql       |  2 +-
 9 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/contrib/bloom/blcost.c b/contrib/bloom/blcost.c
index a38fcf3c5791..4359b81d1961 100644
--- a/contrib/bloom/blcost.c
+++ b/contrib/bloom/blcost.c
@@ -30,6 +30,9 @@ blcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 	/* We have to visit all index tuples anyway */
 	costs.numIndexTuples = index->tuples;
 
+	/* As in btcostestimate, count only the metapage as non-leaf */
+	costs.numNonLeafPages = 1;
+
 	/* Use generic estimate */
 	genericcostestimate(root, path, loop_count, &costs);
 
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index a96b1b9c0bc6..3449f82c71bd 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -6931,6 +6931,11 @@ index_other_operands_eval_cost(PlannerInfo *root, List *indexquals)
 	return qual_arg_cost;
 }
 
+/*
+ * Compute generic index access cost estimates.
+ *
+ * See struct GenericCosts in selfuncs.h for more info.
+ */
 void
 genericcostestimate(PlannerInfo *root,
 					IndexPath *path,
@@ -7026,16 +7031,18 @@ genericcostestimate(PlannerInfo *root,
 	 * Estimate the number of index pages that will be retrieved.
 	 *
 	 * We use the simplistic method of taking a pro-rata fraction of the total
-	 * number of index pages.  In effect, this counts only leaf pages and not
-	 * any overhead such as index metapage or upper tree levels.
+	 * number of index leaf pages.  We disregard any overhead such as index
+	 * metapages or upper tree levels.
 	 *
 	 * In practice access to upper index levels is often nearly free because
 	 * those tend to stay in cache under load; moreover, the cost involved is
 	 * highly dependent on index type.  We therefore ignore such costs here
 	 * and leave it to the caller to add a suitable charge if needed.
 	 */
-	if (index->pages > 1 && index->tuples > 1)
-		numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
+	if (index->pages > costs->numNonLeafPages && index->tuples > 1)
+		numIndexPages =
+			ceil(numIndexTuples * (index->pages - costs->numNonLeafPages)
+				 / index->tuples);
 	else
 		numIndexPages = 1.0;
 
@@ -7626,9 +7633,18 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 
 	/*
 	 * Now do generic index cost estimation.
+	 *
+	 * While we expended effort to make realistic estimates of numIndexTuples
+	 * and num_sa_scans, we are content to count only the btree metapage as
+	 * non-leaf.  btree fanout is typically high enough that upper pages are
+	 * few relative to leaf pages, so accounting for them would move the
+	 * estimates at most a percent or two.  Given the uncertainty in just how
+	 * many upper pages exist in a particular index, we'll skip trying to
+	 * handle that.
 	 */
 	costs.numIndexTuples = numIndexTuples;
 	costs.num_sa_scans = num_sa_scans;
+	costs.numNonLeafPages = 1;
 
 	genericcostestimate(root, path, loop_count, &costs);
 
@@ -7693,6 +7709,9 @@ hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 {
 	GenericCosts costs = {0};
 
+	/* As in btcostestimate, count only the metapage as non-leaf */
+	costs.numNonLeafPages = 1;
+
 	genericcostestimate(root, path, loop_count, &costs);
 
 	/*
@@ -7737,6 +7756,8 @@ gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 	GenericCosts costs = {0};
 	Cost		descentCost;
 
+	/* GiST has no metapage, so we treat all pages as leaf pages */
+
 	genericcostestimate(root, path, loop_count, &costs);
 
 	/*
@@ -7792,6 +7813,9 @@ spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 	GenericCosts costs = {0};
 	Cost		descentCost;
 
+	/* As in btcostestimate, count only the metapage as non-leaf */
+	costs.numNonLeafPages = 1;
+
 	genericcostestimate(root, path, loop_count, &costs);
 
 	/*
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 013049b3098a..a34a737edf83 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -121,6 +121,12 @@ typedef struct VariableStatData
  * Similarly, they can set num_sa_scans to some value >= 1 for an index AM
  * that doesn't necessarily perform exactly one primitive index scan per
  * distinct combination of ScalarArrayOp array elements.
+ * Similarly, they can set numNonLeafPages to some value >= 1 if they know
+ * how many index pages are not leaf pages.  (It's always good to count
+ * totally non-data-bearing pages such as metapages here, since accounting
+ * for the metapage can move cost estimates for a small index significantly.
+ * But upper pages in large indexes may be few enough relative to leaf pages
+ * that it's not worth trying to count them.)
  */
 typedef struct
 {
@@ -135,6 +141,7 @@ typedef struct
 	double		numIndexTuples; /* number of leaf tuples visited */
 	double		spc_random_page_cost;	/* relevant random_page_cost value */
 	double		num_sa_scans;	/* # indexscans from ScalarArrayOpExprs */
+	BlockNumber numNonLeafPages;	/* # of index pages that are not leafs */
 } GenericCosts;
 
 /* Hooks for plugins to get control when we ask for stats */
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index f35a0b18c37c..aa793aafd0f3 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -9136,12 +9136,14 @@ drop index j1_id2_idx;
 set enable_nestloop to 0;
 set enable_hashjoin to 0;
 set enable_sort to 0;
+-- we need additional data to get the partial indexes to be preferred
+insert into j1 select 2, i from generate_series(1, 100) i;
+insert into j2 select 1, i from generate_series(2, 100) i;
+analyze j1;
+analyze j2;
 -- create indexes that will be preferred over the PKs to perform the join
 create index j1_id1_idx on j1 (id1) where id1 % 1000 = 1;
 create index j2_id1_idx on j2 (id1) where id1 % 1000 = 1;
--- need an additional row in j2, if we want j2_id1_idx to be preferred
-insert into j2 values(1,2);
-analyze j2;
 explain (costs off) select * from j1
 inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2
 where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1;
diff --git a/src/test/regress/expected/memoize.out b/src/test/regress/expected/memoize.out
index 38dfaf021c91..a8deabc9b846 100644
--- a/src/test/regress/expected/memoize.out
+++ b/src/test/regress/expected/memoize.out
@@ -261,6 +261,7 @@ CREATE INDEX flt_f_idx ON flt (f);
 INSERT INTO flt VALUES('-0.0'::float),('+0.0'::float);
 ANALYZE flt;
 SET enable_seqscan TO off;
+SET enable_material TO off;
 -- Ensure memoize operates in logical mode
 SELECT explain_memoize('
 SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f = f2.f;', false);
@@ -454,6 +455,7 @@ WHERE unique1 < 3
 (1 row)
 
 RESET enable_seqscan;
+RESET enable_material;
 RESET enable_mergejoin;
 RESET work_mem;
 RESET hash_mem_multiplier;
diff --git a/src/test/regress/expected/select.out b/src/test/regress/expected/select.out
index bab0cc93ff5e..698d08ddd72e 100644
--- a/src/test/regress/expected/select.out
+++ b/src/test/regress/expected/select.out
@@ -861,7 +861,6 @@ select unique2 from onek2 where unique2 = 11 and stringu1 < 'B';
       11
 (1 row)
 
-RESET enable_indexscan;
 -- check multi-index cases too
 explain (costs off)
 select unique1, unique2 from onek2
@@ -908,6 +907,7 @@ select unique1, unique2 from onek2
        0 |     998
 (2 rows)
 
+RESET enable_indexscan;
 --
 -- Test some corner cases that have been known to confuse the planner
 --
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index cc5128add4df..a5d762cb2f4c 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -3450,14 +3450,16 @@ set enable_nestloop to 0;
 set enable_hashjoin to 0;
 set enable_sort to 0;
 
+-- we need additional data to get the partial indexes to be preferred
+insert into j1 select 2, i from generate_series(1, 100) i;
+insert into j2 select 1, i from generate_series(2, 100) i;
+analyze j1;
+analyze j2;
+
 -- create indexes that will be preferred over the PKs to perform the join
 create index j1_id1_idx on j1 (id1) where id1 % 1000 = 1;
 create index j2_id1_idx on j2 (id1) where id1 % 1000 = 1;
 
--- need an additional row in j2, if we want j2_id1_idx to be preferred
-insert into j2 values(1,2);
-analyze j2;
-
 explain (costs off) select * from j1
 inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2
 where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1;
diff --git a/src/test/regress/sql/memoize.sql b/src/test/regress/sql/memoize.sql
index c0d47fa875ad..179a9107b493 100644
--- a/src/test/regress/sql/memoize.sql
+++ b/src/test/regress/sql/memoize.sql
@@ -138,6 +138,7 @@ INSERT INTO flt VALUES('-0.0'::float),('+0.0'::float);
 ANALYZE flt;
 
 SET enable_seqscan TO off;
+SET enable_material TO off;
 
 -- Ensure memoize operates in logical mode
 SELECT explain_memoize('
@@ -217,6 +218,7 @@ WHERE unique1 < 3
 	WHERE t0.ten = t1.twenty AND t0.two <> t2.four OFFSET 0);
 
 RESET enable_seqscan;
+RESET enable_material;
 RESET enable_mergejoin;
 RESET work_mem;
 RESET hash_mem_multiplier;
diff --git a/src/test/regress/sql/select.sql b/src/test/regress/sql/select.sql
index 1d1bf2b9310a..771b9869a202 100644
--- a/src/test/regress/sql/select.sql
+++ b/src/test/regress/sql/select.sql
@@ -221,7 +221,6 @@ SET enable_indexscan TO off;
 explain (costs off)
 select unique2 from onek2 where unique2 = 11 and stringu1 < 'B';
 select unique2 from onek2 where unique2 = 11 and stringu1 < 'B';
-RESET enable_indexscan;
 -- check multi-index cases too
 explain (costs off)
 select unique1, unique2 from onek2
@@ -233,6 +232,7 @@ select unique1, unique2 from onek2
   where (unique2 = 11 and stringu1 < 'B') or unique1 = 0;
 select unique1, unique2 from onek2
   where (unique2 = 11 and stringu1 < 'B') or unique1 = 0;
+RESET enable_indexscan;
 
 --
 -- Test some corner cases that have been known to confuse the planner