</tgroup>
</table>
+ <note>
+ <para>
+ The <type>json</type> functions and operators can impose stricter validity requirements
+ than the type's input functions. In particular, they check much more closely that any use
+ of Unicode surrogate pairs to designate characters outside the Unicode Basic Multilingual
+ Plane is correct.
+ </para>
+ </note>
+
<note>
<para>
The <xref linkend="hstore"> extension has a cast from <type>hstore</type> to
{
char *s;
int len;
+ int hi_surrogate = -1;
if (lex->strval != NULL)
resetStringInfo(lex->strval);
int utf8len;
char *converted;
+ if (ch >= 0xd800 && ch <= 0xdbff)
+ {
+ if (hi_surrogate != -1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type json"),
+ errdetail("high order surrogate must not follow a high order surrogate."),
+ report_json_context(lex)));
+ hi_surrogate = (ch & 0x3ff) << 10;
+ continue;
+ }
+ else if (ch >= 0xdc00 && ch <= 0xdfff)
+ {
+ if (hi_surrogate == -1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type json"),
+ errdetail("low order surrogate must follow a high order surrogate."),
+ report_json_context(lex)));
+ ch = 0x10000 + hi_surrogate + (ch & 0x3ff);
+ hi_surrogate = -1;
+ }
+
+ if (hi_surrogate != -1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type json"),
+ errdetail("low order surrogate must follow a high order surrogate."),
+ report_json_context(lex)));
+
unicode_to_utf8(ch, (unsigned char *) utf8str);
utf8len = pg_utf_mblen((unsigned char *) utf8str);
utf8str[utf8len] = '\0';
}
else if (lex->strval != NULL)
{
+ if (hi_surrogate != -1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type json"),
+ errdetail("low order surrogate must follow a high order surrogate."),
+ report_json_context(lex)));
+
switch (*s)
{
case '"':
}
else if (lex->strval != NULL)
{
+ if (hi_surrogate != -1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type json"),
+ errdetail("low order surrogate must follow a high order surrogate."),
+ report_json_context(lex)));
+
appendStringInfoChar(lex->strval, *s);
}
}
+ if (hi_surrogate != -1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type json"),
+ errdetail("low order surrogate must follow a high order surrogate."),
+ report_json_context(lex)));
+
/* Hooray, we found the end of the string! */
lex->prev_token_terminator = lex->token_terminator;
lex->token_terminator = s + 1;
ERROR: cannot call json_populate_recordset on a nested object
select * from json_populate_recordset(row('def',99,null)::jpop,'[{"c":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]') q;
ERROR: cannot call json_populate_recordset on a nested object
+-- handling of unicode surrogate pairs
+select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct;
+ correct
+----------------------------
+ "\ud83d\ude04\ud83d\udc36"
+(1 row)
+
+select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
+ERROR: invalid input syntax for type json
+DETAIL: high order surrogate must not follow a high order surrogate.
+CONTEXT: JSON data, line 1: { "a":...
+select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
+ERROR: invalid input syntax for type json
+DETAIL: low order surrogate must follow a high order surrogate.
+CONTEXT: JSON data, line 1: { "a":...
+select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
+ERROR: invalid input syntax for type json
+DETAIL: low order surrogate must follow a high order surrogate.
+CONTEXT: JSON data, line 1: { "a":...
+select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
+ERROR: invalid input syntax for type json
+DETAIL: low order surrogate must follow a high order surrogate.
+CONTEXT: JSON data, line 1: { "a":...
select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]') q;
select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]') q;
select * from json_populate_recordset(row('def',99,null)::jpop,'[{"c":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]') q;
+
+-- handling of unicode surrogate pairs
+
+select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct;
+select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
+select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
+select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
+select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate