Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit bbcc986

Browse files
jimjonesbrCommitfest Bot
authored and
Commitfest Bot
committed
Add xmlcanonicalize function
This patch adds the xmlcanonicalize function, which transforms an XML document into its canonical form according to the W3C C14N 1.1 specification. xmlcanonicalize(doc xml, keep_comments boolean DEFAULT true) -> xml * doc: The XML document to be canonicalized. * keep_comments: A flag indicating whether to preserve or discard XML comments from the input document. If omitted, it defaults to 'true'. This implementation is based on the xmlC14NDocDumpMemory function from the C14N module of libxml2.
1 parent 961553d commit bbcc986

File tree

8 files changed

+398
-0
lines changed

8 files changed

+398
-0
lines changed

doc/src/sgml/func.sgml

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14623,6 +14623,56 @@ SELECT xmltext('< foo & bar >');
1462314623
</para>
1462414624
</sect3>
1462514625

14626+
<sect3 id="functions-producing-xml-xmlcanonicalize">
14627+
<title><literal>xmlcanonicalize</literal></title>
14628+
14629+
<indexterm>
14630+
<primary>xmlcanonicalize</primary>
14631+
</indexterm>
14632+
14633+
<synopsis>
14634+
<function>xmlcanonicalize</function> ( <parameter>doc</parameter> <type>xml</type> [, <parameter>keep_comments</parameter> <type>boolean</type> DEFAULT <literal>true</literal>] ) <returnvalue>xml</returnvalue>
14635+
14636+
</synopsis>
14637+
14638+
<para>
14639+
This function transforms a given XML document into its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink>,
14640+
as defined by the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>, which standardizes the document's
14641+
structure and syntax to facilitate comparison and validation.
14642+
The <parameter>keep_comments</parameter> parameter controls whether XML comments from the input document are preserved or discarded.
14643+
If omitted, it defaults to <literal>true</literal>.
14644+
</para>
14645+
14646+
<para>
14647+
Example:
14648+
<screen><![CDATA[
14649+
SELECT
14650+
xmlcanonicalize(
14651+
'<foo>
14652+
<!-- a comment -->
14653+
<bar c="3" b="2" a="1">42</bar>
14654+
<empty/>
14655+
</foo>'::xml);
14656+
xmlcanonicalize
14657+
-----------------------------------------------------------------------------
14658+
<foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
14659+
(1 row)
14660+
14661+
SELECT
14662+
xmlcanonicalize(
14663+
'<foo>
14664+
<!-- a comment -->
14665+
<bar c="3" b="2" a="1">42</bar>
14666+
<empty/>
14667+
</foo>'::xml, false);
14668+
xmlcanonicalize
14669+
-----------------------------------------------------------
14670+
<foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
14671+
(1 row)
14672+
]]></screen>
14673+
</para>
14674+
</sect3>
14675+
1462614676
<sect3 id="functions-producing-xml-xmlcomment">
1462714677
<title><literal>xmlcomment</literal></title>
1462814678

src/backend/catalog/system_functions.sql

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,12 @@ CREATE OR REPLACE FUNCTION xpath_exists(text, xml)
268268
IMMUTABLE PARALLEL SAFE STRICT COST 1
269269
RETURN xpath_exists($1, $2, '{}'::text[]);
270270

271+
CREATE OR REPLACE FUNCTION xmlcanonicalize(xml, boolean DEFAULT true)
272+
RETURNS xml
273+
LANGUAGE internal
274+
IMMUTABLE PARALLEL SAFE STRICT
275+
AS 'xmlcanonicalize';
276+
271277
CREATE OR REPLACE FUNCTION pg_sleep_for(interval)
272278
RETURNS void
273279
LANGUAGE sql

src/backend/utils/adt/xml.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
#include <libxml/xmlwriter.h>
5959
#include <libxml/xpath.h>
6060
#include <libxml/xpathInternals.h>
61+
#include <libxml/c14n.h>
6162

6263
/*
6364
* We used to check for xmlStructuredErrorContext via a configure test; but
@@ -544,6 +545,48 @@ xmltext(PG_FUNCTION_ARGS)
544545
#endif /* not USE_LIBXML */
545546
}
546547

548+
/**
549+
* Converts an XML document to its canonical form according to the
550+
* W3C Canonical XML 1.1 specification implemented on xmlC14NDocDumpMemory.
551+
*/
552+
Datum
553+
xmlcanonicalize(PG_FUNCTION_ARGS)
554+
{
555+
#ifdef USE_LIBXML
556+
xmltype *arg = PG_GETARG_XML_P(0);
557+
bool keep_comments = PG_GETARG_BOOL(1);
558+
text *result;
559+
int nbytes;
560+
xmlDocPtr doc;
561+
xmlChar *xmlbuf = NULL;
562+
563+
doc = xml_parse(arg, XMLOPTION_DOCUMENT, false,
564+
GetDatabaseEncoding(), NULL, NULL, NULL);
565+
566+
/*
567+
* This dumps the canonicalized XML doc into the xmlChar* buffer.
568+
* mode = 2 means the doc will be canonicalized using the C14N 1.1 standard.
569+
*/
570+
nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, keep_comments, &xmlbuf);
571+
572+
if(doc)
573+
xmlFreeDoc(doc);
574+
575+
if(nbytes < 0)
576+
ereport(ERROR,
577+
(errcode(ERRCODE_INTERNAL_ERROR),
578+
errmsg("could not canonicalize the given XML document")));
579+
580+
result = cstring_to_text_with_len((const char *) xmlbuf, nbytes);
581+
582+
xmlFree(xmlbuf);
583+
584+
PG_RETURN_XML_P(result);
585+
#else
586+
NO_XML_SUPPORT();
587+
return 0;
588+
#endif /* not USE_LIBXML */
589+
}
547590

548591
/*
549592
* TODO: xmlconcat needs to merge the notations and unparsed entities

src/include/catalog/pg_proc.dat

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9133,6 +9133,9 @@
91339133
{ oid => '3813', descr => 'generate XML text node',
91349134
proname => 'xmltext', prorettype => 'xml', proargtypes => 'text',
91359135
prosrc => 'xmltext' },
9136+
{ oid => '3814', descr => 'generate the canonical form of an XML document',
9137+
proname => 'xmlcanonicalize', prorettype => 'xml', proargtypes => 'xml bool',
9138+
prosrc => 'xmlcanonicalize' },
91369139

91379140
{ oid => '2923', descr => 'map table contents to XML',
91389141
proname => 'table_to_xml', procost => '100', provolatile => 's',

src/test/regress/expected/xml.out

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1881,3 +1881,85 @@ SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j'::char);
18811881
x&lt;P&gt;73&lt;/P&gt;0.42truej
18821882
(1 row)
18831883

1884+
-- xmlserialize: canonical
1885+
CREATE TABLE xmlcanonicalize_test (doc xml);
1886+
INSERT INTO xmlcanonicalize_test VALUES
1887+
('<?xml version="1.0" encoding="ISO-8859-1"?>
1888+
<!DOCTYPE doc SYSTEM "doc.dtd" [
1889+
<!ENTITY val "42">
1890+
<!ATTLIST xyz attr CDATA "default">
1891+
]>
1892+
1893+
<!-- attributes and namespces will be sorted -->
1894+
<foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
1895+
xmlns:b="http://www.ietf.org"
1896+
xmlns:a="http://www.w3.org"
1897+
xmlns="http://example.org">
1898+
1899+
<!-- Normalization of whitespace in start and end tags -->
1900+
<!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
1901+
<bar xmlns="" xmlns:a="http://www.w3.org" >&val;</bar >
1902+
1903+
<!-- empty element will be converted to start-end tag pair -->
1904+
<empty/>
1905+
1906+
<!-- text will be transcoded to UTF-8 -->
1907+
<transcode>&#49;</transcode>
1908+
1909+
<!-- whitespace inside tag will be preserved -->
1910+
<whitespace> 321 </whitespace>
1911+
1912+
<!-- empty namespace will be removed of child tag -->
1913+
<emptyns xmlns="" >
1914+
<emptyns_child xmlns=""></emptyns_child>
1915+
</emptyns>
1916+
1917+
<!-- CDATA section will be replaced by its value -->
1918+
<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
1919+
</foo> <!-- comment outside root element --> ');
1920+
SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
1921+
xmlcanonicalize
1922+
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1923+
<!-- attributes and namespces will be sorted --> +
1924+
<foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
1925+
<!-- comment outside root element -->
1926+
(1 row)
1927+
1928+
SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
1929+
xmlcanonicalize
1930+
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1931+
<foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
1932+
(1 row)
1933+
1934+
SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test;
1935+
?column?
1936+
----------
1937+
t
1938+
(1 row)
1939+
1940+
SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
1941+
xmlcanonicalize
1942+
-----------------
1943+
1944+
(1 row)
1945+
1946+
SELECT xmlcanonicalize(NULL, true);
1947+
xmlcanonicalize
1948+
-----------------
1949+
1950+
(1 row)
1951+
1952+
\set VERBOSITY terse
1953+
SELECT xmlcanonicalize('', true);
1954+
ERROR: invalid XML document
1955+
SELECT xmlcanonicalize(' ', true);
1956+
ERROR: invalid XML document
1957+
SELECT xmlcanonicalize('foo', true);
1958+
ERROR: invalid XML document
1959+
SELECT xmlcanonicalize('');
1960+
ERROR: invalid XML document
1961+
SELECT xmlcanonicalize(' ');
1962+
ERROR: invalid XML document
1963+
SELECT xmlcanonicalize('foo');
1964+
ERROR: invalid XML document
1965+
\set VERBOSITY default

src/test/regress/expected/xml_1.out

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1496,3 +1496,83 @@ ERROR: unsupported XML feature
14961496
LINE 1: SELECT xmltext('x'|| '<P>73</P>'::xml || .42 || true || 'j':...
14971497
^
14981498
DETAIL: This functionality requires the server to be built with libxml support.
1499+
-- xmlserialize: canonical
1500+
CREATE TABLE xmlcanonicalize_test (doc xml);
1501+
INSERT INTO xmlcanonicalize_test VALUES
1502+
('<?xml version="1.0" encoding="ISO-8859-1"?>
1503+
<!DOCTYPE doc SYSTEM "doc.dtd" [
1504+
<!ENTITY val "42">
1505+
<!ATTLIST xyz attr CDATA "default">
1506+
]>
1507+
1508+
<!-- attributes and namespces will be sorted -->
1509+
<foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
1510+
xmlns:b="http://www.ietf.org"
1511+
xmlns:a="http://www.w3.org"
1512+
xmlns="http://example.org">
1513+
1514+
<!-- Normalization of whitespace in start and end tags -->
1515+
<!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
1516+
<bar xmlns="" xmlns:a="http://www.w3.org" >&val;</bar >
1517+
1518+
<!-- empty element will be converted to start-end tag pair -->
1519+
<empty/>
1520+
1521+
<!-- text will be transcoded to UTF-8 -->
1522+
<transcode>&#49;</transcode>
1523+
1524+
<!-- whitespace inside tag will be preserved -->
1525+
<whitespace> 321 </whitespace>
1526+
1527+
<!-- empty namespace will be removed of child tag -->
1528+
<emptyns xmlns="" >
1529+
<emptyns_child xmlns=""></emptyns_child>
1530+
</emptyns>
1531+
1532+
<!-- CDATA section will be replaced by its value -->
1533+
<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
1534+
</foo> <!-- comment outside root element --> ');
1535+
ERROR: unsupported XML feature
1536+
LINE 2: ('<?xml version="1.0" encoding="ISO-8859-1"?>
1537+
^
1538+
DETAIL: This functionality requires the server to be built with libxml support.
1539+
SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test;
1540+
xmlcanonicalize
1541+
-----------------
1542+
(0 rows)
1543+
1544+
SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test;
1545+
xmlcanonicalize
1546+
-----------------
1547+
(0 rows)
1548+
1549+
SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test;
1550+
?column?
1551+
----------
1552+
(0 rows)
1553+
1554+
SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test;
1555+
xmlcanonicalize
1556+
-----------------
1557+
(0 rows)
1558+
1559+
SELECT xmlcanonicalize(NULL, true);
1560+
xmlcanonicalize
1561+
-----------------
1562+
1563+
(1 row)
1564+
1565+
\set VERBOSITY terse
1566+
SELECT xmlcanonicalize('', true);
1567+
ERROR: unsupported XML feature at character 24
1568+
SELECT xmlcanonicalize(' ', true);
1569+
ERROR: unsupported XML feature at character 24
1570+
SELECT xmlcanonicalize('foo', true);
1571+
ERROR: unsupported XML feature at character 24
1572+
SELECT xmlcanonicalize('');
1573+
ERROR: unsupported XML feature at character 24
1574+
SELECT xmlcanonicalize(' ');
1575+
ERROR: unsupported XML feature at character 24
1576+
SELECT xmlcanonicalize('foo');
1577+
ERROR: unsupported XML feature at character 24
1578+
\set VERBOSITY default

0 commit comments

Comments
 (0)