#include "mb/pg_wchar.h"
+/*
+ * In today's multibyte encodings other than UTF8, this two-byte sequence
+ * ensures pg_encoding_mblen() == 2 && pg_encoding_verifymbstr() == 0.
+ *
+ * For historical reasons, several verifychar implementations opt to reject
+ * this pair specifically. Byte pair range constraints, in encoding
+ * originator documentation, always excluded this pair. No core conversion
+ * could translate it. However, longstanding verifychar implementations
+ * accepted any non-NUL byte. big5_to_euc_tw and big5_to_mic even translate
+ * pairs not valid per encoding originator documentation. To avoid tightening
+ * core or non-core conversions in a security patch, we sought this one pair.
+ *
+ * PQescapeString() historically used spaces for BYTE1; many other values
+ * could suffice for BYTE1.
+ */
+#define NONUTF8_INVALID_BYTE0 (0x8d)
+#define NONUTF8_INVALID_BYTE1 (' ')
+
+
/*
* Operations on multi-byte encodings are driven by a table of helper
* functions.
if (len < l)
return -1;
+ if (l == 2 &&
+ s[0] == NONUTF8_INVALID_BYTE0 &&
+ s[1] == NONUTF8_INVALID_BYTE1)
+ return -1;
+
while (--l > 0)
{
if (*++s == '\0')
if (len < l)
return -1;
+ if (l == 2 &&
+ s[0] == NONUTF8_INVALID_BYTE0 &&
+ s[1] == NONUTF8_INVALID_BYTE1)
+ return -1;
+
while (--l > 0)
{
if (*++s == '\0')
if (len < l)
return -1;
+ if (l == 2 &&
+ s[0] == NONUTF8_INVALID_BYTE0 &&
+ s[1] == NONUTF8_INVALID_BYTE1)
+ return -1;
+
while (--l > 0)
{
if (*++s == '\0')
}
+/*
+ * Fills the provided buffer with two bytes such that:
+ * pg_encoding_mblen(dst) == 2 && pg_encoding_verifymbstr(dst) == 0
+ */
+void
+pg_encoding_set_invalid(int encoding, char *dst)
+{
+ Assert(pg_encoding_max_length(encoding) > 1);
+
+ dst[0] = (encoding == PG_UTF8 ? 0xc0 : NONUTF8_INVALID_BYTE0);
+ dst[1] = NONUTF8_INVALID_BYTE1;
+}
+
/*
*-------------------------------------------------------------------
* encoding info table
{
Assert(PG_VALID_ENCODING(encoding));
- return pg_wchar_table[encoding].maxmblen;
+ /*
+ * Check for the encoding despite the assert, due to some mingw versions
+ * otherwise issuing bogus warnings.
+ */
+ return PG_VALID_ENCODING(encoding) ?
+ pg_wchar_table[encoding].maxmblen :
+ pg_wchar_table[PG_SQL_ASCII].maxmblen;
}
#endif
} pg_enc2name;
-extern const pg_enc2name pg_enc2name_tbl[];
+extern PGDLLIMPORT const pg_enc2name pg_enc2name_tbl[];
/*
* Encoding names for gettext
* (in addition to the ones just above). The constant tables declared
* earlier in this file are also available from libpgcommon.
*/
+extern void pg_encoding_set_invalid(int encoding, char *dst);
extern int pg_encoding_mblen(int encoding, const char *mbstr);
extern int pg_encoding_mblen_bounded(int encoding, const char *mbstr);
extern int pg_encoding_dsplen(int encoding, const char *mbstr);
--
-- create user defined conversion
--
+SELECT FROM test_enc_setup();
+--
+(1 row)
+
CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE;
SET SESSION AUTHORIZATION regress_conversion_user;
CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;
AS '@libdir@/regress@DLSUFFIX@'
LANGUAGE C;
+CREATE FUNCTION test_enc_setup() RETURNS void
+ AS '@libdir@/regress@DLSUFFIX@', 'test_enc_setup'
+ LANGUAGE C STRICT;
+
-- Tests creating a FDW handler
CREATE FUNCTION test_fdw_handler()
RETURNS fdw_handler
RETURNS bool
AS '@libdir@/regress@DLSUFFIX@'
LANGUAGE C;
+CREATE FUNCTION test_enc_setup() RETURNS void
+ AS '@libdir@/regress@DLSUFFIX@', 'test_enc_setup'
+ LANGUAGE C STRICT;
-- Tests creating a FDW handler
CREATE FUNCTION test_fdw_handler()
RETURNS fdw_handler
#include "commands/trigger.h"
#include "executor/executor.h"
#include "executor/spi.h"
+#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "nodes/supportnodes.h"
#include "optimizer/optimizer.h"
{
PG_RETURN_NULL();
}
+
+/* one-time tests for encoding infrastructure */
+PG_FUNCTION_INFO_V1(test_enc_setup);
+Datum
+test_enc_setup(PG_FUNCTION_ARGS)
+{
+ /* Test pg_encoding_set_invalid() */
+ for (int i = 0; i < _PG_LAST_ENCODING_; i++)
+ {
+ char buf[2],
+ bigbuf[16];
+ int len,
+ mblen,
+ valid;
+
+ if (pg_encoding_max_length(i) == 1)
+ continue;
+ pg_encoding_set_invalid(i, buf);
+ len = strnlen(buf, 2);
+ if (len != 2)
+ elog(WARNING,
+ "official invalid string for encoding \"%s\" has length %d",
+ pg_enc2name_tbl[i].name, len);
+ mblen = pg_encoding_mblen(i, buf);
+ if (mblen != 2)
+ elog(WARNING,
+ "official invalid string for encoding \"%s\" has mblen %d",
+ pg_enc2name_tbl[i].name, mblen);
+ valid = pg_encoding_verifymbstr(i, buf, len);
+ if (valid != 0)
+ elog(WARNING,
+ "official invalid string for encoding \"%s\" has valid prefix of length %d",
+ pg_enc2name_tbl[i].name, valid);
+ valid = pg_encoding_verifymbstr(i, buf, 1);
+ if (valid != 0)
+ elog(WARNING,
+ "first byte of official invalid string for encoding \"%s\" has valid prefix of length %d",
+ pg_enc2name_tbl[i].name, valid);
+ memset(bigbuf, ' ', sizeof(bigbuf));
+ bigbuf[0] = buf[0];
+ bigbuf[1] = buf[1];
+ valid = pg_encoding_verifymbstr(i, bigbuf, sizeof(bigbuf));
+ if (valid != 0)
+ elog(WARNING,
+ "trailing data changed official invalid string for encoding \"%s\" to have valid prefix of length %d",
+ pg_enc2name_tbl[i].name, valid);
+ }
+
+ PG_RETURN_VOID();
+}
--
-- create user defined conversion
--
+
+SELECT FROM test_enc_setup();
+
CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE;
SET SESSION AUTHORIZATION regress_conversion_user;
CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;