Implement the width_bucket() function, per SQL2003. This commit only adds

Neil Conway · Neil Conway · commit 0079547bcbed · 2004-05-14T21:42:30.000Z
a variant of the function for the 'numeric' datatype; it would be possible
to add additional variants for other datatypes, but I haven't done so yet.

This commit includes regression tests and minimal documentation; if we
want developers to actually use this function in applications, we'll
probably need to document what it does more fully.
diff --git a/doc/src/sgml/errcodes.sgml b/doc/src/sgml/errcodes.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/errcodes.sgml,v 1.4 2004/05/14 18:04:02 neilc Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/errcodes.sgml,v 1.5 2004/05/14 21:42:27 neilc Exp $ -->
 
 <appendix id="errcodes-appendix">
  <title><productname>PostgreSQL</productname> Error Codes</title>
@@ -310,6 +310,11 @@
 <entry>INTERVAL FIELD OVERFLOW</entry>
 </row>
 
+<row>
+<entry><literal>2201G</literal></entry>
+<entry>INVALID ARGUMENT FOR WIDTH BUCKET FUNCTION</entry>
+</row>
+
 <row>
 <entry><literal>22018</literal></entry>
 <entry>INVALID CHARACTER VALUE FOR CAST</entry>
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.201 2004/05/10 22:44:42 tgl Exp $
+$PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.202 2004/05/14 21:42:27 neilc Exp $
 PostgreSQL documentation
 -->
 
@@ -751,6 +751,16 @@ PostgreSQL documentation
        <entry><literal>42.43</literal></entry>
       </row>
 
+      <row>
+       <entry><literal><function>width_bucket</function>(<parameter>op</parameter> <type>numeric</type>, <parameter>b1</parameter> <type>numeric</type>, <parameter>b2</parameter> <type>numeric</type>, <parameter>count</parameter> <type>integer</type>)</literal></entry>
+       <entry><type>integer</type></entry>
+       <entry>return the bucket to which <parameter>operand</> would
+       be assigned in an equidepth histogram with <parameter>count</>
+       buckets, an upper bound of <parameter>b1</>, and a lower bound
+       of <parameter>b2</></entry>
+       <entry><literal>width_bucket(5.35, 0.024, 10.06, 5)</literal></entry>
+       <entry><literal>3</literal></entry>
+      </row>
      </tbody>
     </tgroup>
    </table>
diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/xfunc.sgml,v 1.82 2004/05/10 22:44:43 tgl Exp $
+$PostgreSQL: pgsql/doc/src/sgml/xfunc.sgml,v 1.83 2004/05/14 21:42:27 neilc Exp $
 -->
 
  <sect1 id="xfunc">
@@ -56,20 +56,20 @@ $PostgreSQL: pgsql/doc/src/sgml/xfunc.sgml,v 1.82 2004/05/10 22:44:43 tgl Exp $
   </para>
 
   <para>
-   It's easiest to define <acronym>SQL</acronym> 
+   It's easiest to define <acronym>SQL</acronym>
    functions, so we'll start by discussing those.
    Most of the concepts presented for <acronym>SQL</acronym> functions
    will carry over to the other types of functions.
   </para>
 
   <para>
    Throughout this chapter, it can be useful to look at the reference
-   page of the <command>CREATE FUNCTION</command> command to
-   understand the examples better.
-   Some examples from this chapter
-   can be found in <filename>funcs.sql</filename> 
-   and <filename>funcs.c</filename> in the <filename>src/tutorial</>
-   directory in the <productname>PostgreSQL</productname> source distribution.
+   page of the <xref linkend="sql-createfunction"> command to
+   understand the examples better.  Some examples from this chapter
+   can be found in <filename>funcs.sql</filename> and
+   <filename>funcs.c</filename> in the <filename>src/tutorial</>
+   directory in the <productname>PostgreSQL</productname> source
+   distribution.
   </para>
   </sect1>
 
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
@@ -14,7 +14,7 @@
  * Copyright (c) 1998-2003, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/numeric.c,v 1.73 2004/05/07 00:24:58 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/numeric.c,v 1.74 2004/05/14 21:42:28 neilc Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -252,6 +252,7 @@ static Numeric make_result(NumericVar *var);
 
 static void apply_typmod(NumericVar *var, int32 typmod);
 
+static int32 numericvar_to_int4(NumericVar *var);
 static bool numericvar_to_int8(NumericVar *var, int64 *result);
 static void int8_to_numericvar(int64 val, NumericVar *var);
 static double numeric_to_double_no_overflow(Numeric num);
@@ -285,6 +286,8 @@ static void sub_abs(NumericVar *var1, NumericVar *var2, NumericVar *result);
 static void round_var(NumericVar *var, int rscale);
 static void trunc_var(NumericVar *var, int rscale);
 static void strip_var(NumericVar *var);
+static void compute_bucket(Numeric operand, Numeric bound1, Numeric bound2,
+						   NumericVar *count_var, NumericVar *result_var);
 
 
 /* ----------------------------------------------------------------------
@@ -803,6 +806,125 @@ numeric_floor(PG_FUNCTION_ARGS)
 	PG_RETURN_NUMERIC(res);
 }
 
+/*
+ * width_bucket_numeric() -
+ *
+ * 'bound1' and 'bound2' are the lower and upper bounds of the
+ * histogram's range, respectively. 'count' is the number of buckets
+ * in the histogram. width_bucket() returns an integer indicating the
+ * bucket number that 'operand' belongs in for an equiwidth histogram
+ * with the specified characteristics. An operand smaller than the
+ * lower bound is assigned to bucket 0. An operand greater than the
+ * upper bound is assigned to an additional bucket (with number
+ * count+1).
+ */
+Datum
+width_bucket_numeric(PG_FUNCTION_ARGS)
+{
+	Numeric		operand = PG_GETARG_NUMERIC(0);
+	Numeric		bound1 = PG_GETARG_NUMERIC(1);
+	Numeric		bound2 = PG_GETARG_NUMERIC(2);
+	int32		count = PG_GETARG_INT32(3);
+	NumericVar	count_var;
+	NumericVar	result_var;
+	int32		result;
+
+	if (count <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
+				 errmsg("count must be greater than zero")));
+
+	init_var(&result_var);
+	init_var(&count_var);
+
+	/* Convert 'count' to a numeric, for ease of use later */
+	int8_to_numericvar((int64) count, &count_var);
+
+	switch (cmp_numerics(bound1, bound2))
+	{
+		case 0:
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
+					 errmsg("lower bound cannot equal upper bound")));
+
+		/* bound1 < bound2 */
+		case -1:
+			if (cmp_numerics(operand, bound1) < 0)
+				set_var_from_var(&const_zero, &result_var);
+			else if (cmp_numerics(operand, bound2) >= 0)
+				add_var(&count_var, &const_one, &result_var);
+			else
+				compute_bucket(operand, bound1, bound2,
+							   &count_var, &result_var);
+			break;
+
+		/* bound1 > bound2 */
+		case 1:
+			if (cmp_numerics(operand, bound1) > 0)
+				set_var_from_var(&const_zero, &result_var);
+			else if (cmp_numerics(operand, bound2) <= 0)
+				add_var(&count_var, &const_one, &result_var);
+			else
+				compute_bucket(operand, bound1, bound2,
+							   &count_var, &result_var);
+			break;
+	}
+
+	result = numericvar_to_int4(&result_var);
+
+	free_var(&count_var);
+	free_var(&result_var);
+
+	PG_RETURN_INT32(result);
+}
+
+/*
+ * compute_bucket() -
+ *
+ * If 'operand' is not outside the bucket range, determine the correct
+ * bucket for it to go. The calculations performed by this function
+ * are derived directly from the SQL2003 spec.
+ */
+static void
+compute_bucket(Numeric operand, Numeric bound1, Numeric bound2,
+			   NumericVar *count_var, NumericVar *result_var)
+{
+	NumericVar bound1_var;
+	NumericVar bound2_var;
+	NumericVar operand_var;
+
+	init_var(&bound1_var);
+	init_var(&bound2_var);
+	init_var(&operand_var);
+
+	set_var_from_num(bound1, &bound1_var);
+	set_var_from_num(bound2, &bound2_var);
+	set_var_from_num(operand, &operand_var);
+
+	if (cmp_var(&bound1_var, &bound2_var) < 0)
+	{
+		sub_var(&operand_var, &bound1_var, &operand_var);
+		sub_var(&bound2_var, &bound1_var, &bound2_var);
+		div_var(&operand_var, &bound2_var, result_var,
+				select_div_scale(&operand_var, &bound2_var));
+	}
+	else
+	{
+		sub_var(&bound1_var, &operand_var, &operand_var);
+		sub_var(&bound1_var, &bound2_var, &bound1_var);
+		div_var(&operand_var, &bound1_var, result_var,
+				select_div_scale(&operand_var, &bound1_var));
+	}
+
+	mul_var(result_var, count_var, result_var,
+			result_var->dscale + count_var->dscale);
+	add_var(result_var, &const_one, result_var);
+	floor_var(result_var, result_var);
+
+	free_var(&bound1_var);
+	free_var(&bound2_var);
+	free_var(&operand_var);
+}	
 
 /* ----------------------------------------------------------------------
  *
@@ -1612,7 +1734,6 @@ numeric_int4(PG_FUNCTION_ARGS)
 {
 	Numeric		num = PG_GETARG_NUMERIC(0);
 	NumericVar	x;
-	int64		val;
 	int32		result;
 
 	/* XXX would it be better to return NULL? */
@@ -1621,17 +1742,30 @@ numeric_int4(PG_FUNCTION_ARGS)
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("cannot convert NaN to integer")));
 
-	/* Convert to variable format and thence to int8 */
+	/* Convert to variable format, then convert to int4 */
 	init_var(&x);
 	set_var_from_num(num, &x);
+	result = numericvar_to_int4(&x);
+	free_var(&x);
+	PG_RETURN_INT32(result);
+}
 
-	if (!numericvar_to_int8(&x, &val))
+/*
+ * Given a NumericVar, convert it to an int32. If the NumericVar
+ * exceeds the range of an int32, raise the appropriate error via
+ * ereport(). The input NumericVar is *not* free'd.
+ */
+static int32
+numericvar_to_int4(NumericVar *var)
+{
+	int32 result;
+	int64 val;
+
+	if (!numericvar_to_int8(var, &val))
 		ereport(ERROR,
 				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
 				 errmsg("integer out of range")));
 
-	free_var(&x);
-
 	/* Down-convert to int4 */
 	result = (int32) val;
 
@@ -1641,10 +1775,9 @@ numeric_int4(PG_FUNCTION_ARGS)
 				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
 				 errmsg("integer out of range")));
 
-	PG_RETURN_INT32(result);
+	return result;
 }
 
-
 Datum
 int8_numeric(PG_FUNCTION_ARGS)
 {
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.229 2004/05/10 22:44:49 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.230 2004/05/14 21:42:28 neilc Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	200405101
+#define CATALOG_VERSION_NO	200405141
 
 #endif
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.328 2004/05/07 16:57:16 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.329 2004/05/14 21:42:28 neilc Exp $
  *
  * NOTES
  *	  The script catalog/genbki.sh reads this file and generates .bki
@@ -2508,6 +2508,8 @@ DATA(insert OID = 1745 ( float4					PGNSP PGUID 12 f f t f i 1 700 "1700" _null_
 DESCR("(internal)");
 DATA(insert OID = 1746 ( float8					PGNSP PGUID 12 f f t f i 1 701 "1700" _null_  numeric_float8 - _null_ ));
 DESCR("(internal)");
+DATA(insert OID = 2170 ( width_bucket			PGNSP PGUID 12 f f t f i 4 23 "1700 1700 1700 23" _null_  width_bucket_numeric - _null_ ));
+DESCR("bucket number of operand in equidepth histogram");
 
 DATA(insert OID = 1747 ( time_pl_interval		PGNSP PGUID 12 f f t f i 2 1083 "1083 1186" _null_  time_pl_interval - _null_ ));
 DESCR("plus");
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.237 2004/05/05 04:48:47 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.238 2004/05/14 21:42:30 neilc Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -758,6 +758,7 @@ extern Datum int8_sum(PG_FUNCTION_ARGS);
 extern Datum int2_avg_accum(PG_FUNCTION_ARGS);
 extern Datum int4_avg_accum(PG_FUNCTION_ARGS);
 extern Datum int8_avg(PG_FUNCTION_ARGS);
+extern Datum width_bucket_numeric(PG_FUNCTION_ARGS);
 
 /* ri_triggers.c */
 extern Datum RI_FKey_check_ins(PG_FUNCTION_ARGS);
diff --git a/src/include/utils/errcodes.h b/src/include/utils/errcodes.h
@@ -11,7 +11,7 @@
  *
  * Copyright (c) 2003, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/include/utils/errcodes.h,v 1.9 2004/05/14 18:04:02 neilc Exp $
+ * $PostgreSQL: pgsql/src/include/utils/errcodes.h,v 1.10 2004/05/14 21:42:30 neilc Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -116,6 +116,7 @@
 #define ERRCODE_ESCAPE_CHARACTER_CONFLICT	MAKE_SQLSTATE('2','2', '0','0','B')
 #define ERRCODE_INDICATOR_OVERFLOW			MAKE_SQLSTATE('2','2', '0','2','2')
 #define ERRCODE_INTERVAL_FIELD_OVERFLOW		MAKE_SQLSTATE('2','2', '0','1','5')
+#define ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION		MAKE_SQLSTATE('2','2', '0', '1', 'G')
 #define ERRCODE_INVALID_CHARACTER_VALUE_FOR_CAST		MAKE_SQLSTATE('2','2', '0','1','8')
 #define ERRCODE_INVALID_DATETIME_FORMAT		MAKE_SQLSTATE('2','2', '0','0','7')
 #define ERRCODE_INVALID_ESCAPE_CHARACTER	MAKE_SQLSTATE('2','2', '0','1','9')
diff --git a/src/test/regress/expected/numeric.out b/src/test/regress/expected/numeric.out
diff --git a/src/test/regress/sql/numeric.sql b/src/test/regress/sql/numeric.sql

Original file line number	Diff line number	Diff line change
`@@ -37,7 +37,7 @@`
`37`	`37`	`* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group`
`38`	`38`	`* Portions Copyright (c) 1994, Regents of the University of California`
`39`	`39`	`*`
`40`		`- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.229 2004/05/10 22:44:49 tgl Exp $`
	`40`	`+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.230 2004/05/14 21:42:28 neilc Exp $`
`41`	`41`	`*`
`42`	`42`	`*-------------------------------------------------------------------------`
`43`	`43`	`*/`
`@@ -53,6 +53,6 @@`
`53`	`53`	`*/`
`54`	`54`
`55`	`55`	`/* yyyymmddN */`
`56`		`-#define CATALOG_VERSION_NO 200405101`
	`56`	`+#define CATALOG_VERSION_NO 200405141`
`57`	`57`
`58`	`58`	`#endif`
Original file line number	Diff line number	Diff line change
`@@ -11,7 +11,7 @@`
`11`	`11`	`*`
`12`	`12`	`* Copyright (c) 2003, PostgreSQL Global Development Group`
`13`	`13`	`*`
`14`		`- * $PostgreSQL: pgsql/src/include/utils/errcodes.h,v 1.9 2004/05/14 18:04:02 neilc Exp $`
	`14`	`+ * $PostgreSQL: pgsql/src/include/utils/errcodes.h,v 1.10 2004/05/14 21:42:30 neilc Exp $`
`15`	`15`	`*`
`16`	`16`	`*-------------------------------------------------------------------------`
`17`	`17`	`*/`
`@@ -116,6 +116,7 @@`
`116`	`116`	`#define ERRCODE_ESCAPE_CHARACTER_CONFLICT MAKE_SQLSTATE('2','2', '0','0','B')`
`117`	`117`	`#define ERRCODE_INDICATOR_OVERFLOW MAKE_SQLSTATE('2','2', '0','2','2')`
`118`	`118`	`#define ERRCODE_INTERVAL_FIELD_OVERFLOW MAKE_SQLSTATE('2','2', '0','1','5')`
	`119`	`+#define ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION MAKE_SQLSTATE('2','2', '0', '1', 'G')`
`119`	`120`	`#define ERRCODE_INVALID_CHARACTER_VALUE_FOR_CAST MAKE_SQLSTATE('2','2', '0','1','8')`
`120`	`121`	`#define ERRCODE_INVALID_DATETIME_FORMAT MAKE_SQLSTATE('2','2', '0','0','7')`
`121`	`122`	`#define ERRCODE_INVALID_ESCAPE_CHARACTER MAKE_SQLSTATE('2','2', '0','1','9')`