Support subscripting of arbitrary types, not only arrays.

This patch generalizes the subscripting infrastructure so that any data type can be subscripted, if it provides a handler function to define what that means. Traditional variable-length (varlena) arrays all use array_subscript_handler(), while the existing fixed-length types that support subscripting use raw_array_subscript_handler(). It's expected that other types that want to use subscripting notation will define their own handlers. (This patch provides no such new features, though; it only lays the foundation for them.) To do this, move the parser's semantic processing of subscripts (including coercion to whatever data type is required) into a method callback supplied by the handler. On the execution side, replace the ExecEvalSubscriptingRef* layer of functions with direct calls to callback-supplied execution routines. (Thus, essentially no new run-time overhead should be caused by this patch. Indeed, there is room to remove some overhead by supplying specialized execution routines. This patch does a little bit in that line, but more could be done.) Additional work is required here and there to remove formerly hard-wired assumptions about the result type, collation, etc of a SubscriptingRef expression node; and to remove assumptions that the subscript values must be integers. One useful side-effect of this is that we now have a less squishy mechanism for identifying whether a data type is a "true" array: instead of wiring in weird rules about typlen, we can look to see if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this to be bulletproof, we have to forbid user-defined types from using that handler directly; but there seems no good reason for them to do so. This patch also removes assumptions that the number of subscripts is limited to MAXDIM (6), or indeed has any hard-wired limit. That limit still applies to types handled by array_subscript_handler or raw_array_subscript_handler, but to discourage other dependencies on this constant, I've moved it from c.h to utils/array.h. Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov, Peter Eisentraut, Pavel Stehule Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
author: Tom Lane 2020-12-09 17:40:37 +0000
committer: Tom Lane 2020-12-09 17:40:37 +0000
commit: c7aba7c14efdbd9fc1bb44b4cb83bedee0c6a6fc (patch)
tree: d6980ca2951d353475957a56b58866cd4fafcdd3 /src/include
parent: 8b069ef5dca97cd737a5fd64c420df3cd61ec1c9 (diff)
12 files changed, 291 insertions, 79 deletions
diff --git a/src/include/c.h b/src/include/c.h
index b21e4074dd6..7bc4b8a001b 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -591,14 +591,6 @@ typedef uint32 CommandId;
 #define FirstCommandId	((CommandId) 0)
 #define InvalidCommandId	(~(CommandId)0)
 
-/*
- * Array indexing support
- */
-#define MAXDIM 6
-typedef struct
-{
-	int			indx[MAXDIM];
-}			IntArray;
 
 /* ----------------
  *		Variable-length datatypes all share the 'struct varlena' header.
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 54661ce753c..7ca030d4600 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	202012081
+#define CATALOG_VERSION_NO	202012091
 
 #endif
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index fc2202b8436..e6c7b070f64 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -10936,6 +10936,14 @@
   proargnames => '{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,float8_pass_by_value,data_page_checksum_version}',
   prosrc => 'pg_control_init' },
 
+# subscripting support for built-in types
+{ oid => '9255', descr => 'standard array subscripting support',
+  proname => 'array_subscript_handler', prorettype => 'internal',
+  proargtypes => 'internal', prosrc => 'array_subscript_handler' },
+{ oid => '9256', descr => 'raw array subscripting support',
+  proname => 'raw_array_subscript_handler', prorettype => 'internal',
+  proargtypes => 'internal', prosrc => 'raw_array_subscript_handler' },
+
 # collation management functions
 { oid => '3445', descr => 'import collations from operating system',
   proname => 'pg_import_system_collations', procost => '100',
diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat
index 21a467a7a7a..28240bdce39 100644
--- a/src/include/catalog/pg_type.dat
+++ b/src/include/catalog/pg_type.dat
@@ -48,9 +48,10 @@
 { oid => '19', array_type_oid => '1003',
   descr => '63-byte type for storing system identifiers',
   typname => 'name', typlen => 'NAMEDATALEN', typbyval => 'f',
-  typcategory => 'S', typelem => 'char', typinput => 'namein',
-  typoutput => 'nameout', typreceive => 'namerecv', typsend => 'namesend',
-  typalign => 'c', typcollation => 'C' },
+  typcategory => 'S', typsubscript => 'raw_array_subscript_handler',
+  typelem => 'char', typinput => 'namein', typoutput => 'nameout',
+  typreceive => 'namerecv', typsend => 'namesend', typalign => 'c',
+  typcollation => 'C' },
 { oid => '20', array_type_oid => '1016',
   descr => '~18 digit integer, 8-byte storage',
   typname => 'int8', typlen => '8', typbyval => 'FLOAT8PASSBYVAL',
@@ -64,7 +65,8 @@
 { oid => '22', array_type_oid => '1006',
   descr => 'array of int2, used in system tables',
   typname => 'int2vector', typlen => '-1', typbyval => 'f', typcategory => 'A',
-  typelem => 'int2', typinput => 'int2vectorin', typoutput => 'int2vectorout',
+  typsubscript => 'array_subscript_handler', typelem => 'int2',
+  typinput => 'int2vectorin', typoutput => 'int2vectorout',
   typreceive => 'int2vectorrecv', typsend => 'int2vectorsend',
   typalign => 'i' },
 { oid => '23', array_type_oid => '1007',
@@ -104,7 +106,8 @@
 { oid => '30', array_type_oid => '1013',
   descr => 'array of oids, used in system tables',
   typname => 'oidvector', typlen => '-1', typbyval => 'f', typcategory => 'A',
-  typelem => 'oid', typinput => 'oidvectorin', typoutput => 'oidvectorout',
+  typsubscript => 'array_subscript_handler', typelem => 'oid',
+  typinput => 'oidvectorin', typoutput => 'oidvectorout',
   typreceive => 'oidvectorrecv', typsend => 'oidvectorsend', typalign => 'i' },
 
 # hand-built rowtype entries for bootstrapped catalogs
@@ -178,13 +181,15 @@
 { oid => '600', array_type_oid => '1017',
   descr => 'geometric point \'(x, y)\'',
   typname => 'point', typlen => '16', typbyval => 'f', typcategory => 'G',
-  typelem => 'float8', typinput => 'point_in', typoutput => 'point_out',
-  typreceive => 'point_recv', typsend => 'point_send', typalign => 'd' },
+  typsubscript => 'raw_array_subscript_handler', typelem => 'float8',
+  typinput => 'point_in', typoutput => 'point_out', typreceive => 'point_recv',
+  typsend => 'point_send', typalign => 'd' },
 { oid => '601', array_type_oid => '1018',
   descr => 'geometric line segment \'(pt1,pt2)\'',
   typname => 'lseg', typlen => '32', typbyval => 'f', typcategory => 'G',
-  typelem => 'point', typinput => 'lseg_in', typoutput => 'lseg_out',
-  typreceive => 'lseg_recv', typsend => 'lseg_send', typalign => 'd' },
+  typsubscript => 'raw_array_subscript_handler', typelem => 'point',
+  typinput => 'lseg_in', typoutput => 'lseg_out', typreceive => 'lseg_recv',
+  typsend => 'lseg_send', typalign => 'd' },
 { oid => '602', array_type_oid => '1019',
   descr => 'geometric path \'(pt1,...)\'',
   typname => 'path', typlen => '-1', typbyval => 'f', typcategory => 'G',
@@ -193,9 +198,9 @@
 { oid => '603', array_type_oid => '1020',
   descr => 'geometric box \'(lower left,upper right)\'',
   typname => 'box', typlen => '32', typbyval => 'f', typcategory => 'G',
-  typdelim => ';', typelem => 'point', typinput => 'box_in',
-  typoutput => 'box_out', typreceive => 'box_recv', typsend => 'box_send',
-  typalign => 'd' },
+  typdelim => ';', typsubscript => 'raw_array_subscript_handler',
+  typelem => 'point', typinput => 'box_in', typoutput => 'box_out',
+  typreceive => 'box_recv', typsend => 'box_send', typalign => 'd' },
 { oid => '604', array_type_oid => '1027',
   descr => 'geometric polygon \'(pt1,...)\'',
   typname => 'polygon', typlen => '-1', typbyval => 'f', typcategory => 'G',
@@ -203,8 +208,9 @@
   typsend => 'poly_send', typalign => 'd', typstorage => 'x' },
 { oid => '628', array_type_oid => '629', descr => 'geometric line',
   typname => 'line', typlen => '24', typbyval => 'f', typcategory => 'G',
-  typelem => 'float8', typinput => 'line_in', typoutput => 'line_out',
-  typreceive => 'line_recv', typsend => 'line_send', typalign => 'd' },
+  typsubscript => 'raw_array_subscript_handler', typelem => 'float8',
+  typinput => 'line_in', typoutput => 'line_out', typreceive => 'line_recv',
+  typsend => 'line_send', typalign => 'd' },
 
 # OIDS 700 - 799
 
@@ -507,8 +513,9 @@
 # Arrays of records have typcategory P, so they can't be autogenerated.
 { oid => '2287',
   typname => '_record', typlen => '-1', typbyval => 'f', typtype => 'p',
-  typcategory => 'P', typelem => 'record', typinput => 'array_in',
-  typoutput => 'array_out', typreceive => 'array_recv', typsend => 'array_send',
+  typcategory => 'P', typsubscript => 'array_subscript_handler',
+  typelem => 'record', typinput => 'array_in', typoutput => 'array_out',
+  typreceive => 'array_recv', typsend => 'array_send',
   typanalyze => 'array_typanalyze', typalign => 'd', typstorage => 'x' },
 { oid => '2275', array_type_oid => '1263', descr => 'C-style string',
   typname => 'cstring', typlen => '-2', typbyval => 'f', typtype => 'p',
diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h
index 6099e5f57ca..70563a6408b 100644
--- a/src/include/catalog/pg_type.h
+++ b/src/include/catalog/pg_type.h
@@ -101,15 +101,21 @@ CATALOG(pg_type,1247,TypeRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71,TypeRelati
 	Oid			typrelid BKI_DEFAULT(0) BKI_ARRAY_DEFAULT(0) BKI_LOOKUP(pg_class);
 
 	/*
-	 * If typelem is not 0 then it identifies another row in pg_type. The
-	 * current type can then be subscripted like an array yielding values of
-	 * type typelem. A non-zero typelem does not guarantee this type to be a
-	 * "real" array type; some ordinary fixed-length types can also be
-	 * subscripted (e.g., name, point). Variable-length types can *not* be
-	 * turned into pseudo-arrays like that. Hence, the way to determine
-	 * whether a type is a "true" array type is if:
-	 *
-	 * typelem != 0 and typlen == -1.
+	 * Type-specific subscripting handler.  If typsubscript is 0, it means
+	 * that this type doesn't support subscripting.  Note that various parts
+	 * of the system deem types to be "true" array types only if their
+	 * typsubscript is array_subscript_handler.
+	 */
+	regproc		typsubscript BKI_DEFAULT(-) BKI_ARRAY_DEFAULT(array_subscript_handler) BKI_LOOKUP(pg_proc);
+
+	/*
+	 * If typelem is not 0 then it identifies another row in pg_type, defining
+	 * the type yielded by subscripting.  This should be 0 if typsubscript is
+	 * 0.  However, it can be 0 when typsubscript isn't 0, if the handler
+	 * doesn't need typelem to determine the subscripting result type.  Note
+	 * that a typelem dependency is considered to imply physical containment
+	 * of the element type in this type; so DDL changes on the element type
+	 * might be restricted by the presence of this type.
 	 */
 	Oid			typelem BKI_DEFAULT(0) BKI_LOOKUP(pg_type);
 
@@ -319,6 +325,11 @@ DECLARE_UNIQUE_INDEX(pg_type_typname_nsp_index, 2704, on pg_type using btree(typ
 	 (typid) == ANYCOMPATIBLENONARRAYOID || \
 	 (typid) == ANYCOMPATIBLERANGEOID)
 
+/* Is this a "true" array type?  (Requires fmgroids.h) */
+#define IsTrueArrayType(typeForm)  \
+	(OidIsValid((typeForm)->typelem) && \
+	 (typeForm)->typsubscript == F_ARRAY_SUBSCRIPT_HANDLER)
+
 /*
  * Backwards compatibility for ancient random spellings of pg_type OID macros.
  * Don't use these names in new code.
@@ -351,6 +362,7 @@ extern ObjectAddress TypeCreate(Oid newTypeOid,
 								Oid typmodinProcedure,
 								Oid typmodoutProcedure,
 								Oid analyzeProcedure,
+								Oid subscriptProcedure,
 								Oid elementType,
 								bool isImplicitArray,
 								Oid arrayType,
diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h
index abb489e2062..b4e0a9b7d3d 100644
--- a/src/include/executor/execExpr.h
+++ b/src/include/executor/execExpr.h
@@ -32,6 +32,11 @@ typedef void (*ExecEvalSubroutine) (ExprState *state,
 									struct ExprEvalStep *op,
 									ExprContext *econtext);
 
+/* API for out-of-line evaluation subroutines returning bool */
+typedef bool (*ExecEvalBoolSubroutine) (ExprState *state,
+										struct ExprEvalStep *op,
+										ExprContext *econtext);
+
 /*
  * Discriminator for ExprEvalSteps.
  *
@@ -185,8 +190,8 @@ typedef enum ExprEvalOp
 	 */
 	EEOP_FIELDSTORE_FORM,
 
-	/* Process a container subscript; short-circuit expression to NULL if NULL */
-	EEOP_SBSREF_SUBSCRIPT,
+	/* Process container subscripts; possibly short-circuit result to NULL */
+	EEOP_SBSREF_SUBSCRIPTS,
 
 	/*
 	 * Compute old container element/slice when a SubscriptingRef assignment
@@ -494,19 +499,19 @@ typedef struct ExprEvalStep
 			int			ncolumns;
 		}			fieldstore;
 
-		/* for EEOP_SBSREF_SUBSCRIPT */
+		/* for EEOP_SBSREF_SUBSCRIPTS */
 		struct
 		{
+			ExecEvalBoolSubroutine subscriptfunc;	/* evaluation subroutine */
 			/* too big to have inline */
 			struct SubscriptingRefState *state;
-			int			off;	/* 0-based index of this subscript */
-			bool		isupper;	/* is it upper or lower subscript? */
 			int			jumpdone;	/* jump here on null */
 		}			sbsref_subscript;
 
 		/* for EEOP_SBSREF_OLD / ASSIGN / FETCH */
 		struct
 		{
+			ExecEvalSubroutine subscriptfunc;	/* evaluation subroutine */
 			/* too big to have inline */
 			struct SubscriptingRefState *state;
 		}			sbsref;
@@ -640,36 +645,41 @@ typedef struct SubscriptingRefState
 {
 	bool		isassignment;	/* is it assignment, or just fetch? */
 
-	Oid			refelemtype;	/* OID of the container element type */
-	int16		refattrlength;	/* typlen of container type */
-	int16		refelemlength;	/* typlen of the container element type */
-	bool		refelembyval;	/* is the element type pass-by-value? */
-	char		refelemalign;	/* typalign of the element type */
+	/* workspace for type-specific subscripting code */
+	void	   *workspace;
 
-	/* numupper and upperprovided[] are filled at compile time */
-	/* at runtime, extracted subscript datums get stored in upperindex[] */
+	/* numupper and upperprovided[] are filled at expression compile time */
+	/* at runtime, subscripts are computed in upperindex[]/upperindexnull[] */
 	int			numupper;
-	bool		upperprovided[MAXDIM];
-	int			upperindex[MAXDIM];
+	bool	   *upperprovided;	/* indicates if this position is supplied */
+	Datum	   *upperindex;
+	bool	   *upperindexnull;
 
 	/* similarly for lower indexes, if any */
 	int			numlower;
-	bool		lowerprovided[MAXDIM];
-	int			lowerindex[MAXDIM];
-
-	/* subscript expressions get evaluated into here */
-	Datum		subscriptvalue;
-	bool		subscriptnull;
+	bool	   *lowerprovided;
+	Datum	   *lowerindex;
+	bool	   *lowerindexnull;
 
 	/* for assignment, new value to assign is evaluated into here */
 	Datum		replacevalue;
 	bool		replacenull;
 
-	/* if we have a nested assignment, SBSREF_OLD puts old value here */
+	/* if we have a nested assignment, sbs_fetch_old puts old value here */
 	Datum		prevvalue;
 	bool		prevnull;
 } SubscriptingRefState;
 
+/* Execution step methods used for SubscriptingRef */
+typedef struct SubscriptExecSteps
+{
+	/* See nodes/subscripting.h for more detail about these */
+	ExecEvalBoolSubroutine sbs_check_subscripts;	/* process subscripts */
+	ExecEvalSubroutine sbs_fetch;	/* fetch an element */
+	ExecEvalSubroutine sbs_assign;	/* assign to an element */
+	ExecEvalSubroutine sbs_fetch_old;	/* fetch old value for assignment */
+} SubscriptExecSteps;
+
 
 /* functions in execExpr.c */
 extern void ExprEvalPushStep(ExprState *es, const ExprEvalStep *s);
@@ -712,10 +722,6 @@ extern void ExecEvalFieldStoreDeForm(ExprState *state, ExprEvalStep *op,
 									 ExprContext *econtext);
 extern void ExecEvalFieldStoreForm(ExprState *state, ExprEvalStep *op,
 								   ExprContext *econtext);
-extern bool ExecEvalSubscriptingRef(ExprState *state, ExprEvalStep *op);
-extern void ExecEvalSubscriptingRefFetch(ExprState *state, ExprEvalStep *op);
-extern void ExecEvalSubscriptingRefOld(ExprState *state, ExprEvalStep *op);
-extern void ExecEvalSubscriptingRefAssign(ExprState *state, ExprEvalStep *op);
 extern void ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op,
 								   ExprContext *econtext);
 extern void ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op);
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index cdbe781c734..dd85908fe2f 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -390,14 +390,14 @@ typedef struct WindowFunc
 	int			location;		/* token location, or -1 if unknown */
 } WindowFunc;
 
-/* ----------------
- *	SubscriptingRef: describes a subscripting operation over a container
- *			(array, etc).
+/*
+ * SubscriptingRef: describes a subscripting operation over a container
+ * (array, etc).
  *
  * A SubscriptingRef can describe fetching a single element from a container,
- * fetching a part of container (e.g. array slice), storing a single element into
- * a container, or storing a slice.  The "store" cases work with an
- * initial container value and a source value that is inserted into the
+ * fetching a part of a container (e.g. an array slice), storing a single
+ * element into a container, or storing a slice.  The "store" cases work with
+ * an initial container value and a source value that is inserted into the
  * appropriate part of the container; the result of the operation is an
  * entire new modified container value.
  *
@@ -410,23 +410,32 @@ typedef struct WindowFunc
  *
  * In the slice case, individual expressions in the subscript lists can be
  * NULL, meaning "substitute the array's current lower or upper bound".
- *
- * Note: the result datatype is the element type when fetching a single
- * element; but it is the array type when doing subarray fetch or either
- * type of store.
+ * (Non-array containers may or may not support this.)
+ *
+ * refcontainertype is the actual container type that determines the
+ * subscripting semantics.  (This will generally be either the exposed type of
+ * refexpr, or the base type if that is a domain.)  refelemtype is the type of
+ * the container's elements; this is saved for the use of the subscripting
+ * functions, but is not used by the core code.  refrestype, reftypmod, and
+ * refcollid describe the type of the SubscriptingRef's result.  In a store
+ * expression, refrestype will always match refcontainertype; in a fetch,
+ * it could be refelemtype for an element fetch, or refcontainertype for a
+ * slice fetch, or possibly something else as determined by type-specific
+ * subscripting logic.  Likewise, reftypmod and refcollid will match the
+ * container's properties in a store, but could be different in a fetch.
  *
  * Note: for the cases where a container is returned, if refexpr yields a R/W
- * expanded container, then the implementation is allowed to modify that object
- * in-place and return the same object.)
- * ----------------
+ * expanded container, then the implementation is allowed to modify that
+ * object in-place and return the same object.
  */
 typedef struct SubscriptingRef
 {
 	Expr		xpr;
 	Oid			refcontainertype;	/* type of the container proper */
-	Oid			refelemtype;	/* type of the container elements */
-	int32		reftypmod;		/* typmod of the container (and elements too) */
-	Oid			refcollid;		/* OID of collation, or InvalidOid if none */
+	Oid			refelemtype;	/* the container type's pg_type.typelem */
+	Oid			refrestype;		/* type of the SubscriptingRef's result */
+	int32		reftypmod;		/* typmod of the result */
+	Oid			refcollid;		/* collation of result, or InvalidOid if none */
 	List	   *refupperindexpr;	/* expressions that evaluate to upper
 									 * container indexes */
 	List	   *reflowerindexpr;	/* expressions that evaluate to lower
@@ -434,7 +443,6 @@ typedef struct SubscriptingRef
 									 * container element */
 	Expr	   *refexpr;		/* the expression that evaluates to a
 								 * container value */
-
 	Expr	   *refassgnexpr;	/* expression for the source value, or NULL if
 								 * fetch */
 } SubscriptingRef;
diff --git a/src/include/nodes/subscripting.h b/src/include/nodes/subscripting.h
new file mode 100644
index 00000000000..3b0a60773de
--- /dev/null
+++ b/src/include/nodes/subscripting.h
@@ -0,0 +1,167 @@
+/*-------------------------------------------------------------------------
+ *
+ * subscripting.h
+ *		API for generic type subscripting
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/nodes/subscripting.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SUBSCRIPTING_H
+#define SUBSCRIPTING_H
+
+#include "nodes/primnodes.h"
+
+/* Forward declarations, to avoid including other headers */
+struct ParseState;
+struct SubscriptingRefState;
+struct SubscriptExecSteps;
+
+/*
+ * The SQL-visible function that defines a subscripting method is declared
+ *		subscripting_function(internal) returns internal
+ * but it actually is not passed any parameter.  It must return a pointer
+ * to a "struct SubscriptRoutines" that provides pointers to the individual
+ * subscript parsing and execution methods.  Typically the pointer will point
+ * to a "static const" variable, but at need it can point to palloc'd space.
+ * The type (after domain-flattening) of the head variable or expression
+ * of a subscripting construct determines which subscripting function is
+ * called for that construct.
+ *
+ * In addition to the method pointers, struct SubscriptRoutines includes
+ * several bool flags that specify properties of the subscripting actions
+ * this data type can perform:
+ *
+ * fetch_strict indicates that a fetch SubscriptRef is strict, i.e., returns
+ * NULL if any input (either the container or any subscript) is NULL.
+ *
+ * fetch_leakproof indicates that a fetch SubscriptRef is leakproof, i.e.,
+ * will not throw any data-value-dependent errors.  Typically this requires
+ * silently returning NULL for invalid subscripts.
+ *
+ * store_leakproof similarly indicates whether an assignment SubscriptRef is
+ * leakproof.  (It is common to prefer throwing errors for invalid subscripts
+ * in assignments; that's fine, but it makes the operation not leakproof.
+ * In current usage there is no advantage in making assignments leakproof.)
+ *
+ * There is no store_strict flag.  Such behavior would generally be
+ * undesirable, since for example a null subscript in an assignment would
+ * cause the entire container to become NULL.
+ *
+ * Regardless of these flags, all SubscriptRefs are expected to be immutable,
+ * that is they must always give the same results for the same inputs.
+ * They are expected to always be parallel-safe, as well.
+ */
+
+/*
+ * The transform method is called during parse analysis of a subscripting
+ * construct.  The SubscriptingRef node has been constructed, but some of
+ * its fields still need to be filled in, and the subscript expression(s)
+ * are still in raw form.  The transform method is responsible for doing
+ * parse analysis of each subscript expression (using transformExpr),
+ * coercing the subscripts to whatever type it needs, and building the
+ * refupperindexpr and reflowerindexpr lists from those results.  The
+ * reflowerindexpr list must be empty for an element operation, or the
+ * same length as refupperindexpr for a slice operation.  Insert NULLs
+ * (that is, an empty parse tree, not a null Const node) for any omitted
+ * subscripts in a slice operation.  (Of course, if the transform method
+ * does not care to support slicing, it can just throw an error if isSlice.)
+ * See array_subscript_transform() for sample code.
+ *
+ * The transform method is also responsible for identifying the result type
+ * of the subscripting operation.  At call, refcontainertype and reftypmod
+ * describe the container type (this will be a base type not a domain), and
+ * refelemtype is set to the container type's pg_type.typelem value.  The
+ * transform method must set refrestype and reftypmod to describe the result
+ * of subscripting.  For arrays, refrestype is set to refelemtype for an
+ * element operation or refcontainertype for a slice, while reftypmod stays
+ * the same in either case; but other types might use other rules.  The
+ * transform method should ignore refcollid, as that's determined later on
+ * during parsing.
+ *
+ * At call, refassgnexpr has not been filled in, so the SubscriptingRef node
+ * always looks like a fetch; refrestype should be set as though for a
+ * fetch, too.  (The isAssignment parameter is typically only useful if the
+ * transform method wishes to throw an error for not supporting assignment.)
+ * To complete processing of an assignment, the core parser will coerce the
+ * element/slice source expression to the returned refrestype and reftypmod
+ * before putting it into refassgnexpr.  It will then set refrestype and
+ * reftypmod to again describe the container type, since that's what an
+ * assignment must return.
+ */
+typedef void (*SubscriptTransform) (SubscriptingRef *sbsref,
+									List *indirection,
+									struct ParseState *pstate,
+									bool isSlice,
+									bool isAssignment);
+
+/*
+ * The exec_setup method is called during executor-startup compilation of a
+ * SubscriptingRef node in an expression.  It must fill *methods with pointers
+ * to functions that can be called for execution of the node.  Optionally,
+ * exec_setup can initialize sbsrefstate->workspace to point to some palloc'd
+ * workspace for execution.  (Typically, such workspace is used to hold
+ * looked-up catalog data and/or provide space for the check_subscripts step
+ * to pass data forward to the other step functions.)  See executor/execExpr.h
+ * for the definitions of these structs and other ones used in expression
+ * execution.
+ *
+ * The methods to be provided are:
+ *
+ * sbs_check_subscripts: examine the just-computed subscript values available
+ * in sbsrefstate's arrays, and possibly convert them into another form
+ * (stored in sbsrefstate->workspace).  Return TRUE to continue with
+ * evaluation of the subscripting construct, or FALSE to skip it and return an
+ * overall NULL result.  If this is a fetch and the data type's fetch_strict
+ * flag is true, then sbs_check_subscripts must return FALSE if there are any
+ * NULL subscripts.  Otherwise it can choose to throw an error, or return
+ * FALSE, or let sbs_fetch or sbs_assign deal with the null subscripts.
+ *
+ * sbs_fetch: perform a subscripting fetch, using the container value in
+ * *op->resvalue and the subscripts from sbs_check_subscripts.  If
+ * fetch_strict is true then all these inputs can be assumed non-NULL,
+ * otherwise sbs_fetch must check for null inputs.  Place the result in
+ * *op->resvalue / *op->resnull.
+ *
+ * sbs_assign: perform a subscripting assignment, using the original
+ * container value in *op->resvalue / *op->resnull, the subscripts from
+ * sbs_check_subscripts, and the new element/slice value in
+ * sbsrefstate->replacevalue/replacenull.  Any of these inputs might be NULL
+ * (unless sbs_check_subscripts rejected null subscripts).  Place the result
+ * (an entire new container value) in *op->resvalue / *op->resnull.
+ *
+ * sbs_fetch_old: this is only used in cases where an element or slice
+ * assignment involves an assignment to a sub-field or sub-element
+ * (i.e., nested containers are involved).  It must fetch the existing
+ * value of the target element or slice.  This is exactly the same as
+ * sbs_fetch except that (a) it must cope with a NULL container, and
+ * with NULL subscripts if sbs_check_subscripts allows them (typically,
+ * returning NULL is good enough); and (b) the result must be placed in
+ * sbsrefstate->prevvalue/prevnull, without overwriting *op->resvalue.
+ *
+ * Subscripting implementations that do not support assignment need not
+ * provide sbs_assign or sbs_fetch_old methods.  It might be reasonable
+ * to also omit sbs_check_subscripts, in which case the sbs_fetch method must
+ * combine the functionality of sbs_check_subscripts and sbs_fetch.  (The
+ * main reason to have a separate sbs_check_subscripts method is so that
+ * sbs_fetch_old and sbs_assign need not duplicate subscript processing.)
+ * Set the relevant pointers to NULL for any omitted methods.
+ */
+typedef void (*SubscriptExecSetup) (const SubscriptingRef *sbsref,
+									struct SubscriptingRefState *sbsrefstate,
+									struct SubscriptExecSteps *methods);
+
+/* Struct returned by the SQL-visible subscript handler function */
+typedef struct SubscriptRoutines
+{
+	SubscriptTransform transform;	/* parse analysis function */
+	SubscriptExecSetup exec_setup;	/* expression compilation function */
+	bool		fetch_strict;	/* is fetch SubscriptRef strict? */
+	bool		fetch_leakproof;	/* is fetch SubscriptRef leakproof? */
+	bool		store_leakproof;	/* is assignment SubscriptRef leakproof? */
+} SubscriptRoutines;
+
+#endif							/* SUBSCRIPTING_H */
diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h
index d25819aa28b..beb56fec87a 100644
--- a/src/include/parser/parse_node.h
+++ b/src/include/parser/parse_node.h
@@ -313,15 +313,15 @@ extern void setup_parser_errposition_callback(ParseCallbackState *pcbstate,
 											  ParseState *pstate, int location);
 extern void cancel_parser_errposition_callback(ParseCallbackState *pcbstate);
 
-extern Oid	transformContainerType(Oid *containerType, int32 *containerTypmod);
+extern void transformContainerType(Oid *containerType, int32 *containerTypmod);
 
 extern SubscriptingRef *transformContainerSubscripts(ParseState *pstate,
 													 Node *containerBase,
 													 Oid containerType,
-													 Oid elementType,
 													 int32 containerTypMod,
 													 List *indirection,
-													 Node *assignFrom);
+													 bool isAssignment);
+
 extern Const *make_const(ParseState *pstate, Value *value, int location);
 
 #endif							/* PARSE_NODE_H */
diff --git a/src/include/utils/array.h b/src/include/utils/array.h
index 2809dfee939..16925880a1e 100644
--- a/src/include/utils/array.h
+++ b/src/include/utils/array.h
@@ -70,6 +70,11 @@ struct ExprContext;
 
 
 /*
+ * Maximum number of array subscripts (arbitrary limit)
+ */
+#define MAXDIM 6
+
+/*
  * Arrays are varlena objects, so must meet the varlena convention that
  * the first int32 of the object contains the total object size in bytes.
  * Be sure to use VARSIZE() and SET_VARSIZE() to access it, though!
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index c97e12dde8a..a990d11ea86 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -17,6 +17,9 @@
 #include "access/htup.h"
 #include "nodes/pg_list.h"
 
+/* avoid including subscripting.h here */
+struct SubscriptRoutines;
+
 /* Result list element for get_op_btree_interpretation */
 typedef struct OpBtreeInterpretation
 {
@@ -173,6 +176,9 @@ extern void getTypeBinaryOutputInfo(Oid type, Oid *typSend, bool *typIsVarlena);
 extern Oid	get_typmodin(Oid typid);
 extern Oid	get_typcollation(Oid typid);
 extern bool type_is_collatable(Oid typid);
+extern RegProcedure get_typsubscript(Oid typid, Oid *typelemp);
+extern const struct SubscriptRoutines *getSubscriptingRoutines(Oid typid,
+															   Oid *typelemp);
 extern Oid	getBaseType(Oid typid);
 extern Oid	getBaseTypeAndTypmod(Oid typid, int32 *typmod);
 extern int32 get_typavgwidth(Oid typid, int32 typmod);
diff --git a/src/include/utils/typcache.h b/src/include/utils/typcache.h
index cdd20e56d70..38c8fe01929 100644
--- a/src/include/utils/typcache.h
+++ b/src/include/utils/typcache.h
@@ -42,6 +42,7 @@ typedef struct TypeCacheEntry
 	char		typstorage;
 	char		typtype;
 	Oid			typrelid;
+	Oid			typsubscript;
 	Oid			typelem;
 	Oid			typcollation;
author	Tom Lane	2020-12-09 17:40:37 +0000
committer	Tom Lane	2020-12-09 17:40:37 +0000
commit	c7aba7c14efdbd9fc1bb44b4cb83bedee0c6a6fc (patch)
tree	d6980ca2951d353475957a56b58866cd4fafcdd3 /src/include
parent	8b069ef5dca97cd737a5fd64c420df3cd61ec1c9 (diff)