From 1b35729765cfe08c959e34bbd63de1d164a61e8a Mon Sep 17 00:00:00 2001 From: Dean Rasheed Date: Sat, 1 Mar 2025 10:49:24 +0000 Subject: [PATCH] Allow casting between bytea and integer types. This allows smallint, integer, and bigint values to be cast to and from bytea. The bytea value is the two's complement representation of the integer, with the most significant byte first. For example: 1234::bytea -> \x000004d2 (-1234)::bytea -> \xfffffb2e Author: Aleksander Alekseev Reviewed-by: Joel Jacobson Reviewed-by: Yugo Nagata Reviewed-by: Peter Eisentraut Reviewed-by: Michael Paquier Reviewed-by: Dean Rasheed Discussion: https://postgr.es/m/CAJ7c6TPtOp6%2BkFX5QX3fH1SVr7v65uHr-7yEJ%3DGMGQi5uhGtcA%40mail.gmail.com --- doc/src/sgml/func.sgml | 17 ++++ src/backend/utils/adt/varlena.c | 90 ++++++++++++++++++++ src/include/catalog/pg_cast.dat | 14 ++++ src/include/catalog/pg_proc.dat | 19 +++++ src/test/regress/expected/opr_sanity.out | 3 + src/test/regress/expected/strings.out | 102 +++++++++++++++++++++++ src/test/regress/sql/strings.sql | 29 +++++++ 7 files changed, 274 insertions(+) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 53565075ca72..1847669c2be1 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -5035,6 +5035,23 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); + + In addition, it is possible to cast integral values to and from type + bytea. Casting an integer to bytea produces + 2, 4, or 8 bytes, depending on the width of the integer type. The result + is the two's complement representation of the integer, with the most + significant byte first. Some examples: + +1234::smallint::bytea \x04d2 +cast(1234 as bytea) \x000004d2 +cast(-1234 as bytea) \xfffffb2e +'\x8000'::bytea::smallint -32768 +'\x8000'::bytea::integer 32768 + + Casting a bytea to an integer will raise an error if the + length of the bytea exceeds the width of the integer type. + + See also the aggregate function string_agg in and the large object functions diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index e45565717030..d22648a7e48a 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -4057,6 +4057,96 @@ bytea_sortsupport(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } +/* Cast bytea -> int2 */ +Datum +bytea_int2(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int len = VARSIZE_ANY_EXHDR(v); + uint16 result; + + if (len > sizeof(result)) + ereport(ERROR, + errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("smallint out of range")); + + result = 0; + for (int i = 0; i < len; i++) + { + result <<= BITS_PER_BYTE; + result |= ((unsigned char *) VARDATA_ANY(v))[i]; + } + + PG_RETURN_INT16(result); +} + +/* Cast bytea -> int4 */ +Datum +bytea_int4(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int len = VARSIZE_ANY_EXHDR(v); + uint32 result; + + if (len > sizeof(result)) + ereport(ERROR, + errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range")); + + result = 0; + for (int i = 0; i < len; i++) + { + result <<= BITS_PER_BYTE; + result |= ((unsigned char *) VARDATA_ANY(v))[i]; + } + + PG_RETURN_INT32(result); +} + +/* Cast bytea -> int8 */ +Datum +bytea_int8(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int len = VARSIZE_ANY_EXHDR(v); + uint64 result; + + if (len > sizeof(result)) + ereport(ERROR, + errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range")); + + result = 0; + for (int i = 0; i < len; i++) + { + result <<= BITS_PER_BYTE; + result |= ((unsigned char *) VARDATA_ANY(v))[i]; + } + + PG_RETURN_INT64(result); +} + +/* Cast int2 -> bytea; can just use int2send() */ +Datum +int2_bytea(PG_FUNCTION_ARGS) +{ + return int2send(fcinfo); +} + +/* Cast int4 -> bytea; can just use int4send() */ +Datum +int4_bytea(PG_FUNCTION_ARGS) +{ + return int4send(fcinfo); +} + +/* Cast int8 -> bytea; can just use int8send() */ +Datum +int8_bytea(PG_FUNCTION_ARGS) +{ + return int8send(fcinfo); +} + /* * appendStringInfoText * diff --git a/src/include/catalog/pg_cast.dat b/src/include/catalog/pg_cast.dat index a26ba34e869b..ab46be606f03 100644 --- a/src/include/catalog/pg_cast.dat +++ b/src/include/catalog/pg_cast.dat @@ -320,6 +320,20 @@ { castsource => 'varchar', casttarget => 'name', castfunc => 'name(varchar)', castcontext => 'i', castmethod => 'f' }, +# Allow explicit coercions between bytea and integer types +{ castsource => 'int2', casttarget => 'bytea', castfunc => 'bytea(int2)', + castcontext => 'e', castmethod => 'f' }, +{ castsource => 'int4', casttarget => 'bytea', castfunc => 'bytea(int4)', + castcontext => 'e', castmethod => 'f' }, +{ castsource => 'int8', casttarget => 'bytea', castfunc => 'bytea(int8)', + castcontext => 'e', castmethod => 'f' }, +{ castsource => 'bytea', casttarget => 'int2', castfunc => 'int2(bytea)', + castcontext => 'e', castmethod => 'f' }, +{ castsource => 'bytea', casttarget => 'int4', castfunc => 'int4(bytea)', + castcontext => 'e', castmethod => 'f' }, +{ castsource => 'bytea', casttarget => 'int8', castfunc => 'int8(bytea)', + castcontext => 'e', castmethod => 'f' }, + # Allow explicit coercions between int4 and "char" { castsource => 'char', casttarget => 'int4', castfunc => 'int4(char)', castcontext => 'e', castmethod => 'f' }, diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 134b3dd86899..cede992b6e22 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -1165,6 +1165,25 @@ proname => 'name', proleakproof => 't', prorettype => 'name', proargtypes => 'bpchar', prosrc => 'bpchar_name' }, +{ oid => '8577', descr => 'convert int2 to bytea', + proname => 'bytea', proleakproof => 't', prorettype => 'bytea', + proargtypes => 'int2', prosrc => 'int2_bytea' }, +{ oid => '8578', descr => 'convert int4 to bytea', + proname => 'bytea', proleakproof => 't', prorettype => 'bytea', + proargtypes => 'int4', prosrc => 'int4_bytea' }, +{ oid => '8579', descr => 'convert int8 to bytea', + proname => 'bytea', proleakproof => 't', prorettype => 'bytea', + proargtypes => 'int8', prosrc => 'int8_bytea' }, +{ oid => '8580', descr => 'convert bytea to int2', + proname => 'int2', prorettype => 'int2', + proargtypes => 'bytea', prosrc => 'bytea_int2' }, +{ oid => '8581', descr => 'convert bytea to int4', + proname => 'int4', prorettype => 'int4', + proargtypes => 'bytea', prosrc => 'bytea_int4' }, +{ oid => '8582', descr => 'convert bytea to int8', + proname => 'int8', prorettype => 'int8', + proargtypes => 'bytea', prosrc => 'bytea_int8' }, + { oid => '449', descr => 'hash', proname => 'hashint2', prorettype => 'int4', proargtypes => 'int2', prosrc => 'hashint2' }, diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index b673642ad1d7..20bf9ea9cdf7 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -875,6 +875,9 @@ uuid_extract_timestamp(uuid) uuid_extract_version(uuid) crc32(bytea) crc32c(bytea) +bytea(smallint) +bytea(integer) +bytea(bigint) bytea_larger(bytea,bytea) bytea_smaller(bytea,bytea) -- Check that functions without argument are not marked as leakproof. diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index b65bb2d5368b..f8cba9f5b24e 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -2358,6 +2358,108 @@ SELECT set_byte('\x1234567890abcdef00'::bytea, 7, 11); SELECT set_byte('\x1234567890abcdef00'::bytea, 99, 11); -- error ERROR: index 99 out of valid range, 0..8 +-- +-- conversions between bytea and integer types +-- +SELECT 0x1234::int2::bytea AS "\x1234", (-0x1234)::int2::bytea AS "\xedcc"; + \x1234 | \xedcc +--------+-------- + \x1234 | \xedcc +(1 row) + +SELECT 0x12345678::int4::bytea AS "\x12345678", (-0x12345678)::int4::bytea AS "\xedcba988"; + \x12345678 | \xedcba988 +------------+------------ + \x12345678 | \xedcba988 +(1 row) + +SELECT 0x1122334455667788::int8::bytea AS "\x1122334455667788", + (-0x1122334455667788)::int8::bytea AS "\xeeddccbbaa998878"; + \x1122334455667788 | \xeeddccbbaa998878 +--------------------+-------------------- + \x1122334455667788 | \xeeddccbbaa998878 +(1 row) + +SELECT ''::bytea::int2 AS "0"; + 0 +--- + 0 +(1 row) + +SELECT '\x12'::bytea::int2 AS "18"; + 18 +---- + 18 +(1 row) + +SELECT '\x1234'::bytea::int2 AS "4460"; + 4460 +------ + 4660 +(1 row) + +SELECT '\x123456'::bytea::int2; -- error +ERROR: smallint out of range +SELECT ''::bytea::int4 AS "0"; + 0 +--- + 0 +(1 row) + +SELECT '\x12'::bytea::int4 AS "18"; + 18 +---- + 18 +(1 row) + +SELECT '\x12345678'::bytea::int4 AS "305419896"; + 305419896 +----------- + 305419896 +(1 row) + +SELECT '\x123456789A'::bytea::int4; -- error +ERROR: integer out of range +SELECT ''::bytea::int8 AS "0"; + 0 +--- + 0 +(1 row) + +SELECT '\x12'::bytea::int8 AS "18"; + 18 +---- + 18 +(1 row) + +SELECT '\x1122334455667788'::bytea::int8 AS "1234605616436508552"; + 1234605616436508552 +--------------------- + 1234605616436508552 +(1 row) + +SELECT '\x112233445566778899'::bytea::int8; -- error +ERROR: bigint out of range +-- min/max integer values +SELECT '\x8000'::bytea::int2 AS "-32768", '\x7FFF'::bytea::int2 AS "32767"; + -32768 | 32767 +--------+------- + -32768 | 32767 +(1 row) + +SELECT '\x80000000'::bytea::int4 AS "-2147483648", '\x7FFFFFFF'::bytea::int4 AS "2147483647"; + -2147483648 | 2147483647 +-------------+------------ + -2147483648 | 2147483647 +(1 row) + +SELECT '\x8000000000000000'::bytea::int8 AS "-9223372036854775808", + '\x7FFFFFFFFFFFFFFF'::bytea::int8 AS "9223372036854775807"; + -9223372036854775808 | 9223372036854775807 +----------------------+--------------------- + -9223372036854775808 | 9223372036854775807 +(1 row) + -- -- test behavior of escape_string_warning and standard_conforming_strings options -- diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 8e0f3a0e75f8..4deb0683d571 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -751,6 +751,35 @@ SELECT get_byte('\x1234567890abcdef00'::bytea, 99); -- error SELECT set_byte('\x1234567890abcdef00'::bytea, 7, 11); SELECT set_byte('\x1234567890abcdef00'::bytea, 99, 11); -- error +-- +-- conversions between bytea and integer types +-- +SELECT 0x1234::int2::bytea AS "\x1234", (-0x1234)::int2::bytea AS "\xedcc"; +SELECT 0x12345678::int4::bytea AS "\x12345678", (-0x12345678)::int4::bytea AS "\xedcba988"; +SELECT 0x1122334455667788::int8::bytea AS "\x1122334455667788", + (-0x1122334455667788)::int8::bytea AS "\xeeddccbbaa998878"; + +SELECT ''::bytea::int2 AS "0"; +SELECT '\x12'::bytea::int2 AS "18"; +SELECT '\x1234'::bytea::int2 AS "4460"; +SELECT '\x123456'::bytea::int2; -- error + +SELECT ''::bytea::int4 AS "0"; +SELECT '\x12'::bytea::int4 AS "18"; +SELECT '\x12345678'::bytea::int4 AS "305419896"; +SELECT '\x123456789A'::bytea::int4; -- error + +SELECT ''::bytea::int8 AS "0"; +SELECT '\x12'::bytea::int8 AS "18"; +SELECT '\x1122334455667788'::bytea::int8 AS "1234605616436508552"; +SELECT '\x112233445566778899'::bytea::int8; -- error + +-- min/max integer values +SELECT '\x8000'::bytea::int2 AS "-32768", '\x7FFF'::bytea::int2 AS "32767"; +SELECT '\x80000000'::bytea::int4 AS "-2147483648", '\x7FFFFFFF'::bytea::int4 AS "2147483647"; +SELECT '\x8000000000000000'::bytea::int8 AS "-9223372036854775808", + '\x7FFFFFFFFFFFFFFF'::bytea::int8 AS "9223372036854775807"; + -- -- test behavior of escape_string_warning and standard_conforming_strings options --