Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 2197d06

Browse files
committed
Add support for parsing of large XML data (>= 10MB)
This commit adds XML_PARSE_HUGE to the libxml2 functions used in core for the parsing of XML objects, raising up the original limit of 10MB supported by libxml2. In most code paths of upstream, XML_MAX_TEXT_LENGTH (10^7) is the historical limit that gets upgraded to XML_MAX_HUGE_LENGTH (10^9) once XML_PARSE_HUGE is given to the parser calls. These are still limited by any palloc() calls for text, up to 1GB. This offers the possibility to handle within the backend XML objects larger than 10MB in general, with also a higher depth limit. This change affects the contrib module xml2, the xml data type and SQL/XML. Author: Dmitry Koval Reviewed-by: Tom Lane, Michael Paquier Discussion: https://postgr.es/m/18274-98d16bc03520665f@postgresql.org
1 parent 65c5864 commit 2197d06

File tree

3 files changed

+33
-13
lines changed

3 files changed

+33
-13
lines changed

contrib/xml2/xpath.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ pgxml_xpath(text *document, xmlChar *xpath, xpath_workspace *workspace)
381381
{
382382
workspace->doctree = xmlReadMemory((char *) VARDATA_ANY(document),
383383
docsize, NULL, NULL,
384-
XML_PARSE_NOENT);
384+
XML_PARSE_HUGE | XML_PARSE_NOENT);
385385
if (workspace->doctree != NULL)
386386
{
387387
workspace->ctxt = xmlXPathNewContext(workspace->doctree);
@@ -626,7 +626,7 @@ xpath_table(PG_FUNCTION_ARGS)
626626
if (xmldoc)
627627
doctree = xmlReadMemory(xmldoc, strlen(xmldoc),
628628
NULL, NULL,
629-
XML_PARSE_NOENT);
629+
XML_PARSE_HUGE | XML_PARSE_NOENT);
630630
else /* treat NULL as not well-formed */
631631
doctree = NULL;
632632

contrib/xml2/xslt_proc.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ xslt_process(PG_FUNCTION_ARGS)
8787
/* Parse document */
8888
doctree = xmlReadMemory((char *) VARDATA_ANY(doct),
8989
VARSIZE_ANY_EXHDR(doct), NULL, NULL,
90-
XML_PARSE_NOENT);
90+
XML_PARSE_HUGE | XML_PARSE_NOENT);
9191

9292
if (doctree == NULL)
9393
xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
@@ -96,7 +96,7 @@ xslt_process(PG_FUNCTION_ARGS)
9696
/* Same for stylesheet */
9797
ssdoc = xmlReadMemory((char *) VARDATA_ANY(ssheet),
9898
VARSIZE_ANY_EXHDR(ssheet), NULL, NULL,
99-
XML_PARSE_NOENT);
99+
XML_PARSE_HUGE | XML_PARSE_NOENT);
100100

101101
if (ssdoc == NULL)
102102
xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,

src/backend/utils/adt/xml.c

+29-9
Original file line numberDiff line numberDiff line change
@@ -1688,8 +1688,8 @@ xml_doctype_in_content(const xmlChar *str)
16881688
* xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
16891689
*
16901690
* If parsed_nodes isn't NULL and the input is not an XML document, the list
1691-
* of parsed nodes from the xmlParseBalancedChunkMemory call will be returned
1692-
* to *parsed_nodes.
1691+
* of parsed nodes from the xmlParseInNodeContext call will be returned to
1692+
* *parsed_nodes.
16931693
*
16941694
* Errors normally result in ereport(ERROR), but if escontext is an
16951695
* ErrorSaveContext, then "safe" errors are reported there instead, and the
@@ -1795,7 +1795,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
17951795
doc = xmlCtxtReadDoc(ctxt, utf8string,
17961796
NULL,
17971797
"UTF-8",
1798-
XML_PARSE_NOENT | XML_PARSE_DTDATTR
1798+
XML_PARSE_NOENT | XML_PARSE_DTDATTR | XML_PARSE_HUGE
17991799
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
18001800
if (doc == NULL || xmlerrcxt->err_occurred)
18011801
{
@@ -1828,10 +1828,30 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
18281828
/* allow empty content */
18291829
if (*(utf8string + count))
18301830
{
1831-
res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1832-
utf8string + count,
1833-
parsed_nodes);
1834-
if (res_code != 0 || xmlerrcxt->err_occurred)
1831+
const char *data;
1832+
xmlNodePtr root;
1833+
xmlNodePtr lst;
1834+
xmlParserErrors xml_error;
1835+
1836+
data = (const char *) (utf8string + count);
1837+
1838+
/*
1839+
* Create a fake root node. The xmlNewDoc() function creates
1840+
* an XML document without any nodes, and this is required for
1841+
* xmlParseInNodeContext() that is able to handle
1842+
* XML_PARSE_HUGE.
1843+
*/
1844+
root = xmlNewNode(NULL, (const xmlChar *) "content-root");
1845+
if (root == NULL || xmlerrcxt->err_occurred)
1846+
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1847+
"could not allocate xml node");
1848+
xmlDocSetRootElement(doc, root);
1849+
1850+
/* Try to parse string with using root node context. */
1851+
xml_error = xmlParseInNodeContext(root, data, strlen(data),
1852+
XML_PARSE_HUGE,
1853+
parsed_nodes ? parsed_nodes : &lst);
1854+
if (xml_error != XML_ERR_OK || xmlerrcxt->err_occurred)
18351855
{
18361856
xml_errsave(escontext, xmlerrcxt,
18371857
ERRCODE_INVALID_XML_CONTENT,
@@ -4344,7 +4364,7 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
43444364
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
43454365
"could not allocate parser context");
43464366
doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
4347-
len - xmldecl_len, NULL, NULL, 0);
4367+
len - xmldecl_len, NULL, NULL, XML_PARSE_HUGE);
43484368
if (doc == NULL || xmlerrcxt->err_occurred)
43494369
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
43504370
"could not parse XML document");
@@ -4675,7 +4695,7 @@ XmlTableSetDocument(TableFuncScanState *state, Datum value)
46754695

46764696
PG_TRY();
46774697
{
4678-
doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
4698+
doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, XML_PARSE_HUGE);
46794699
if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
46804700
xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
46814701
"could not parse XML document");

0 commit comments

Comments
 (0)