Ensure we allocate NAMEDATALEN bytes for names in Index Only Scans

author David Rowley <drowley@postgresql.org>

Wed, 1 May 2024 01:21:21 +0000 (13:21 +1200)

committer David Rowley <drowley@postgresql.org>

Wed, 1 May 2024 01:21:21 +0000 (13:21 +1200)
author David Rowley <drowley@postgresql.org>
Wed, 1 May 2024 01:21:21 +0000 (13:21 +1200)
committer David Rowley <drowley@postgresql.org>
Wed, 1 May 2024 01:21:21 +0000 (13:21 +1200)
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c

index fcf6d1d932184a36661590a4961600f5aba084e1..b49194c0167afd2901bae933f6d163a7adc33bd5 100644 (file)
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -35,18 +35,20 @@
  #include "access/tableam.h"
  #include "access/tupdesc.h"
  #include "access/visibilitymap.h"
+#include "catalog/pg_type.h"
  #include "executor/executor.h"
  #include "executor/nodeIndexonlyscan.h"
  #include "executor/nodeIndexscan.h"
  #include "miscadmin.h"
  #include "storage/bufmgr.h"
  #include "storage/predicate.h"
+#include "utils/builtins.h"
  #include "utils/rel.h"
  
  
  static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node);
-static void StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup,
-                           TupleDesc itupdesc);
+static void StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot,
+                           IndexTuple itup, TupleDesc itupdesc);
  
  
  /* ----------------------------------------------------------------
@@ -205,7 +207,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
             ExecForceStoreHeapTuple(scandesc->xs_hitup, slot, false);
         }
         else if (scandesc->xs_itup)
-           StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc);
+           StoreIndexTuple(node, slot, scandesc->xs_itup, scandesc->xs_itupdesc);
         else
             elog(ERROR, "no data returned for index-only scan");
  
@@ -263,7 +265,8 @@ IndexOnlyNext(IndexOnlyScanState *node)
   * right now we don't need it elsewhere.
   */
  static void
-StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc)
+StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot,
+               IndexTuple itup, TupleDesc itupdesc)
  {
     /*
      * Note: we must use the tupdesc supplied by the AM in index_deform_tuple,
@@ -276,6 +279,37 @@ StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc)
  
     ExecClearTuple(slot);
     index_deform_tuple(itup, itupdesc, slot->tts_values, slot->tts_isnull);
+
+   /*
+    * Copy all name columns stored as cstrings back into a NAMEDATALEN byte
+    * sized allocation.  We mark this branch as unlikely as generally "name"
+    * is used only for the system catalogs and this would have to be a user
+    * query running on those or some other user table with an index on a name
+    * column.
+    */
+   if (unlikely(node->ioss_NameCStringAttNums != NULL))
+   {
+       int         attcount = node->ioss_NameCStringCount;
+
+       for (int idx = 0; idx < attcount; idx++)
+       {
+           int         attnum = node->ioss_NameCStringAttNums[idx];
+           Name        name;
+
+           /* skip null Datums */
+           if (slot->tts_isnull[attnum])
+               continue;
+
+           /* allocate the NAMEDATALEN and copy the datum into that memory */
+           name = (Name) MemoryContextAlloc(node->ss.ps.ps_ExprContext->ecxt_per_tuple_memory,
+                                            NAMEDATALEN);
+
+           /* use namestrcpy to zero-pad all trailing bytes */
+           namestrcpy(name, DatumGetCString(slot->tts_values[attnum]));
+           slot->tts_values[attnum] = NameGetDatum(name);
+       }
+   }
+
     ExecStoreVirtualTuple(slot);
  }
  
@@ -473,8 +507,11 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
  {
     IndexOnlyScanState *indexstate;
     Relation    currentRelation;
+   Relation    indexRelation;
     LOCKMODE    lockmode;
     TupleDesc   tupDesc;
+   int         indnkeyatts;
+   int         namecount;
  
     /*
      * create state structure
@@ -547,7 +584,8 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
  
     /* Open the index relation. */
     lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode;
-   indexstate->ioss_RelationDesc = index_open(node->indexid, lockmode);
+   indexRelation = index_open(node->indexid, lockmode);
+   indexstate->ioss_RelationDesc = indexRelation;
  
     /*
      * Initialize index-specific scan state
@@ -560,7 +598,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
      * build the index scan keys from the index qualification
      */
     ExecIndexBuildScanKeys((PlanState *) indexstate,
-                          indexstate->ioss_RelationDesc,
+                          indexRelation,
                            node->indexqual,
                            false,
                            &indexstate->ioss_ScanKeys,
@@ -574,7 +612,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
      * any ORDER BY exprs have to be turned into scankeys in the same way
      */
     ExecIndexBuildScanKeys((PlanState *) indexstate,
-                          indexstate->ioss_RelationDesc,
+                          indexRelation,
                            node->indexorderby,
                            true,
                            &indexstate->ioss_OrderByKeys,
@@ -603,6 +641,49 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
         indexstate->ioss_RuntimeContext = NULL;
     }
  
+   indexstate->ioss_NameCStringAttNums = NULL;
+   indnkeyatts = indexRelation->rd_index->indnkeyatts;
+   namecount = 0;
+
+   /*
+    * The "name" type for btree uses text_ops which results in storing
+    * cstrings in the indexed keys rather than names.  Here we detect that in
+    * a generic way in case other index AMs want to do the same optimization.
+    * Check for opclasses with an opcintype of NAMEOID and an index tuple
+    * descriptor with CSTRINGOID.  If any of these are found, create an array
+    * marking the index attribute number of each of them.  StoreIndexTuple()
+    * handles copying the name Datums into a NAMEDATALEN-byte allocation.
+    */
+
+   /* First, count the number of such index keys */
+   for (int attnum = 0; attnum < indnkeyatts; attnum++)
+   {
+       if (indexRelation->rd_att->attrs[attnum].atttypid == CSTRINGOID &&
+           indexRelation->rd_opcintype[attnum] == NAMEOID)
+           namecount++;
+   }
+
+   if (namecount > 0)
+   {
+       int         idx = 0;
+
+       /*
+        * Now create an array to mark the attribute numbers of the keys that
+        * need to be converted from cstring to name.
+        */
+       indexstate->ioss_NameCStringAttNums = (AttrNumber *)
+           palloc(sizeof(AttrNumber) * namecount);
+
+       for (int attnum = 0; attnum < indnkeyatts; attnum++)
+       {
+           if (indexRelation->rd_att->attrs[attnum].atttypid == CSTRINGOID &&
+               indexRelation->rd_opcintype[attnum] == NAMEOID)
+               indexstate->ioss_NameCStringAttNums[idx++] = (AttrNumber) attnum;
+       }
+   }
+
+   indexstate->ioss_NameCStringCount = namecount;
+
     /*
      * all done.
      */
diff --git a/src/include/catalog/pg_opclass.dat b/src/include/catalog/pg_opclass.dat

index 6c30770fe7c8651f1595c97f448f3603c5eeff1f..f503c652ebca64a4db641bf58b991ff873d5aee1 100644 (file)
--- a/src/include/catalog/pg_opclass.dat
+++ b/src/include/catalog/pg_opclass.dat
@@ -91,8 +91,11 @@
  # Here's an ugly little hack to save space in the system catalog indexes.
  # btree doesn't ordinarily allow a storage type different from input type;
  # but cstring and name are the same thing except for trailing padding,
-# and we can safely omit that within an index entry.  So we declare the
-# btree opclass for name as using cstring storage type.
+# so we choose to omit that within an index entry.  Here we declare the
+# btree opclass for name as using cstring storage type.  This does require
+# that we pad the cstring out with the full NAMEDATALEN bytes when performing
+# index-only scans.  See corresponding hacks in ExecInitIndexOnlyScan() and
+# StoreIndexTuple().
  { opcmethod => 'btree', opcname => 'name_ops', opcfamily => 'btree/text_ops',
    opcintype => 'name', opckeytype => 'cstring' },
  
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index d927ac44a82bca1d46bb8877c8c422ce71d28d9b..8bc421e7c0508b0374ae04ea6e2004408e2dd727 100644 (file)
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1690,6 +1690,8 @@ typedef struct IndexScanState
   *     TableSlot          slot for holding tuples fetched from the table
   *     VMBuffer           buffer in use for visibility map testing, if any
   *     PscanLen           size of parallel index-only scan descriptor
+ *     NameCStringAttNums attnums of name typed columns to pad to NAMEDATALEN
+ *     NameCStringCount   number of elements in the NameCStringAttNums array
   * ----------------
   */
  typedef struct IndexOnlyScanState
@@ -1709,6 +1711,8 @@ typedef struct IndexOnlyScanState
     TupleTableSlot *ioss_TableSlot;
     Buffer      ioss_VMBuffer;
     Size        ioss_PscanLen;
+   AttrNumber *ioss_NameCStringAttNums;
+   int         ioss_NameCStringCount;
  } IndexOnlyScanState;
  
  /* ----------------
diff --git a/src/test/regress/expected/index_including.out b/src/test/regress/expected/index_including.out

index 86510687c743f54fd49f508d09176f10a8c52cd3..ea8b2454bf8cd45951d53a05bfef3612099e398d 100644 (file)
--- a/src/test/regress/expected/index_including.out
+++ b/src/test/regress/expected/index_including.out
@@ -398,3 +398,28 @@ Indexes:
      "tbl_c1_c2_c3_c4_key" UNIQUE CONSTRAINT, btree (c1, c2) INCLUDE (c3, c4)
  
  DROP TABLE tbl;
+/*
+ * 10. Test coverage for names stored as cstrings in indexes
+ */
+CREATE TABLE nametbl (c1 int, c2 name, c3 float);
+CREATE INDEX nametbl_c1_c2_idx ON nametbl (c2, c1) INCLUDE (c3);
+INSERT INTO nametbl VALUES(1, 'two', 3.0);
+VACUUM nametbl;
+SET enable_seqscan = 0;
+-- Ensure we get an index only scan plan
+EXPLAIN (COSTS OFF) SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1;
+                     QUERY PLAN                     
+----------------------------------------------------
+ Index Only Scan using nametbl_c1_c2_idx on nametbl
+   Index Cond: ((c2 = 'two'::name) AND (c1 = 1))
+(2 rows)
+
+-- Validate the results look sane
+SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1;
+ c2  | c1 | c3 
+-----+----+----
+ two |  1 |  3
+(1 row)
+
+RESET enable_seqscan;
+DROP TABLE nametbl;
diff --git a/src/test/regress/sql/index_including.sql b/src/test/regress/sql/index_including.sql

index 44b340053b75b2b2bdc500bd0a9472d368d1ab7b..ad9cbdd02807bc580f9b6584eda6d674f8fc93ef 100644 (file)
--- a/src/test/regress/sql/index_including.sql
+++ b/src/test/regress/sql/index_including.sql
@@ -217,3 +217,22 @@ ALTER TABLE tbl ALTER c1 TYPE bigint;
  ALTER TABLE tbl ALTER c3 TYPE bigint;
  \d tbl
  DROP TABLE tbl;
+
+/*
+ * 10. Test coverage for names stored as cstrings in indexes
+ */
+CREATE TABLE nametbl (c1 int, c2 name, c3 float);
+CREATE INDEX nametbl_c1_c2_idx ON nametbl (c2, c1) INCLUDE (c3);
+INSERT INTO nametbl VALUES(1, 'two', 3.0);
+VACUUM nametbl;
+SET enable_seqscan = 0;
+
+-- Ensure we get an index only scan plan
+EXPLAIN (COSTS OFF) SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1;
+
+-- Validate the results look sane
+SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1;
+
+RESET enable_seqscan;
+
+DROP TABLE nametbl;
+\ No newline at end of file
author	David Rowley <drowley@postgresql.org>
	Wed, 1 May 2024 01:21:21 +0000 (13:21 +1200)
committer	David Rowley <drowley@postgresql.org>
	Wed, 1 May 2024 01:21:21 +0000 (13:21 +1200)
src/backend/executor/nodeIndexonlyscan.c		patch \| blob \| blame \| history
src/include/catalog/pg_opclass.dat		patch \| blob \| blame \| history
src/include/nodes/execnodes.h		patch \| blob \| blame \| history
src/test/regress/expected/index_including.out		patch \| blob \| blame \| history
src/test/regress/sql/index_including.sql		patch \| blob \| blame \| history