diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index c67688cbf5f..db42d7856e8 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -14623,6 +14623,56 @@ SELECT xmltext('< foo & bar >'); + + <literal>xmlcanonicalize</literal> + + + xmlcanonicalize + + + +xmlcanonicalize ( doc xml [, keep_comments boolean DEFAULT true] ) xml + + + + + This function transforms a given XML document into its canonical form, + as defined by the W3C Canonical XML 1.1 Specification, which standardizes the document's + structure and syntax to facilitate comparison and validation. + The keep_comments parameter controls whether XML comments from the input document are preserved or discarded. + If omitted, it defaults to true. + + + + Example: + + + 42 + + '::xml); + xmlcanonicalize +----------------------------------------------------------------------------- + 42 +(1 row) + +SELECT + xmlcanonicalize( + ' + + 42 + + '::xml, false); + xmlcanonicalize +----------------------------------------------------------- + 42 +(1 row) +]]> + + + <literal>xmlcomment</literal> diff --git a/src/backend/catalog/system_functions.sql b/src/backend/catalog/system_functions.sql index 566f308e443..15c33335dc7 100644 --- a/src/backend/catalog/system_functions.sql +++ b/src/backend/catalog/system_functions.sql @@ -268,6 +268,12 @@ CREATE OR REPLACE FUNCTION xpath_exists(text, xml) IMMUTABLE PARALLEL SAFE STRICT COST 1 RETURN xpath_exists($1, $2, '{}'::text[]); +CREATE OR REPLACE FUNCTION xmlcanonicalize(xml, boolean DEFAULT true) + RETURNS xml + LANGUAGE internal + IMMUTABLE PARALLEL SAFE STRICT +AS 'xmlcanonicalize'; + CREATE OR REPLACE FUNCTION pg_sleep_for(interval) RETURNS void LANGUAGE sql diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index a4150bff2ea..26f086a3abd 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -58,6 +58,7 @@ #include #include #include +#include /* * We used to check for xmlStructuredErrorContext via a configure test; but @@ -544,6 +545,48 @@ xmltext(PG_FUNCTION_ARGS) #endif /* not USE_LIBXML */ } +/** + * Converts an XML document to its canonical form according to the + * W3C Canonical XML 1.1 specification implemented on xmlC14NDocDumpMemory. + */ +Datum +xmlcanonicalize(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + xmltype *arg = PG_GETARG_XML_P(0); + bool keep_comments = PG_GETARG_BOOL(1); + text *result; + int nbytes; + xmlDocPtr doc; + xmlChar *xmlbuf = NULL; + + doc = xml_parse(arg, XMLOPTION_DOCUMENT, false, + GetDatabaseEncoding(), NULL, NULL, NULL); + + /* + * This dumps the canonicalized XML doc into the xmlChar* buffer. + * mode = 2 means the doc will be canonicalized using the C14N 1.1 standard. + */ + nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, keep_comments, &xmlbuf); + + if(doc) + xmlFreeDoc(doc); + + if(nbytes < 0) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not canonicalize the given XML document"))); + + result = cstring_to_text_with_len((const char *) xmlbuf, nbytes); + + xmlFree(xmlbuf); + + PG_RETURN_XML_P(result); +#else + NO_XML_SUPPORT(); + return 0; +#endif /* not USE_LIBXML */ +} /* * TODO: xmlconcat needs to merge the notations and unparsed entities diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index d3d28a263fa..d6c5b80a8f3 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -9133,6 +9133,9 @@ { oid => '3813', descr => 'generate XML text node', proname => 'xmltext', prorettype => 'xml', proargtypes => 'text', prosrc => 'xmltext' }, +{ oid => '3814', descr => 'generate the canonical form of an XML document', + proname => 'xmlcanonicalize', prorettype => 'xml', proargtypes => 'xml bool', + prosrc => 'xmlcanonicalize' }, { oid => '2923', descr => 'map table contents to XML', proname => 'table_to_xml', procost => '100', provolatile => 's', diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out index 103a22a3b1d..688c0fc3e9d 100644 --- a/src/test/regress/expected/xml.out +++ b/src/test/regress/expected/xml.out @@ -1881,3 +1881,85 @@ SELECT xmltext('x'|| '

73

'::xml || .42 || true || 'j'::char); x<P>73</P>0.42truej (1 row) +-- xmlserialize: canonical +CREATE TABLE xmlcanonicalize_test (doc xml); +INSERT INTO xmlcanonicalize_test VALUES + (' + + + ]> + + + + + + + &val; + + + + + + 1 + + + 321 + + + + + + + + "0" && value<"10" ?"valid":"error"]]> + '); +SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test; + xmlcanonicalize +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + + + 421 321 value>"0" && value<"10" ?"valid":"error"+ + +(1 row) + +SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test; + xmlcanonicalize +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 421 321 value>"0" && value<"10" ?"valid":"error" +(1 row) + +SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test; + ?column? +---------- + t +(1 row) + +SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test; + xmlcanonicalize +----------------- + +(1 row) + +SELECT xmlcanonicalize(NULL, true); + xmlcanonicalize +----------------- + +(1 row) + +\set VERBOSITY terse +SELECT xmlcanonicalize('', true); +ERROR: invalid XML document +SELECT xmlcanonicalize(' ', true); +ERROR: invalid XML document +SELECT xmlcanonicalize('foo', true); +ERROR: invalid XML document +SELECT xmlcanonicalize(''); +ERROR: invalid XML document +SELECT xmlcanonicalize(' '); +ERROR: invalid XML document +SELECT xmlcanonicalize('foo'); +ERROR: invalid XML document +\set VERBOSITY default diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out index 73c411118a3..8bc3ac1c966 100644 --- a/src/test/regress/expected/xml_1.out +++ b/src/test/regress/expected/xml_1.out @@ -1496,3 +1496,83 @@ ERROR: unsupported XML feature LINE 1: SELECT xmltext('x'|| '

73

'::xml || .42 || true || 'j':... ^ DETAIL: This functionality requires the server to be built with libxml support. +-- xmlserialize: canonical +CREATE TABLE xmlcanonicalize_test (doc xml); +INSERT INTO xmlcanonicalize_test VALUES + (' + + + ]> + + + + + + + &val; + + + + + + 1 + + + 321 + + + + + + + + "0" && value<"10" ?"valid":"error"]]> + '); +ERROR: unsupported XML feature +LINE 2: (' + ^ +DETAIL: This functionality requires the server to be built with libxml support. +SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test; + xmlcanonicalize +----------------- +(0 rows) + +SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test; + xmlcanonicalize +----------------- +(0 rows) + +SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test; + ?column? +---------- +(0 rows) + +SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test; + xmlcanonicalize +----------------- +(0 rows) + +SELECT xmlcanonicalize(NULL, true); + xmlcanonicalize +----------------- + +(1 row) + +\set VERBOSITY terse +SELECT xmlcanonicalize('', true); +ERROR: unsupported XML feature at character 24 +SELECT xmlcanonicalize(' ', true); +ERROR: unsupported XML feature at character 24 +SELECT xmlcanonicalize('foo', true); +ERROR: unsupported XML feature at character 24 +SELECT xmlcanonicalize(''); +ERROR: unsupported XML feature at character 24 +SELECT xmlcanonicalize(' '); +ERROR: unsupported XML feature at character 24 +SELECT xmlcanonicalize('foo'); +ERROR: unsupported XML feature at character 24 +\set VERBOSITY default diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out index a85d95358d9..4ce36ff8257 100644 --- a/src/test/regress/expected/xml_2.out +++ b/src/test/regress/expected/xml_2.out @@ -1867,3 +1867,85 @@ SELECT xmltext('x'|| '

73

'::xml || .42 || true || 'j'::char); x<P>73</P>0.42truej (1 row) +-- xmlserialize: canonical +CREATE TABLE xmlcanonicalize_test (doc xml); +INSERT INTO xmlcanonicalize_test VALUES + (' + + + ]> + + + + + + + &val; + + + + + + 1 + + + 321 + + + + + + + + "0" && value<"10" ?"valid":"error"]]> + '); +SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test; + xmlcanonicalize +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + + + 421 321 value>"0" && value<"10" ?"valid":"error"+ + +(1 row) + +SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test; + xmlcanonicalize +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 421 321 value>"0" && value<"10" ?"valid":"error" +(1 row) + +SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test; + ?column? +---------- + t +(1 row) + +SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test; + xmlcanonicalize +----------------- + +(1 row) + +SELECT xmlcanonicalize(NULL, true); + xmlcanonicalize +----------------- + +(1 row) + +\set VERBOSITY terse +SELECT xmlcanonicalize('', true); +ERROR: invalid XML document +SELECT xmlcanonicalize(' ', true); +ERROR: invalid XML document +SELECT xmlcanonicalize('foo', true); +ERROR: invalid XML document +SELECT xmlcanonicalize(''); +ERROR: invalid XML document +SELECT xmlcanonicalize(' '); +ERROR: invalid XML document +SELECT xmlcanonicalize('foo'); +ERROR: invalid XML document +\set VERBOSITY default diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql index 0ea4f508837..4af51a9908f 100644 --- a/src/test/regress/sql/xml.sql +++ b/src/test/regress/sql/xml.sql @@ -679,3 +679,55 @@ SELECT xmltext(' '); SELECT xmltext('foo `$_-+?=*^%!|/\()[]{}'); SELECT xmltext('foo & <"bar">'); SELECT xmltext('x'|| '

73

'::xml || .42 || true || 'j'::char); + +-- xmlserialize: canonical +CREATE TABLE xmlcanonicalize_test (doc xml); +INSERT INTO xmlcanonicalize_test VALUES + (' + + + ]> + + + + + + + &val; + + + + + + 1 + + + 321 + + + + + + + + "0" && value<"10" ?"valid":"error"]]> + '); + +SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test; +SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test; +SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test; +SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test; +SELECT xmlcanonicalize(NULL, true); + +\set VERBOSITY terse +SELECT xmlcanonicalize('', true); +SELECT xmlcanonicalize(' ', true); +SELECT xmlcanonicalize('foo', true); +SELECT xmlcanonicalize(''); +SELECT xmlcanonicalize(' '); +SELECT xmlcanonicalize('foo'); +\set VERBOSITY default \ No newline at end of file