Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit bccca78

Browse files
committed
Fix some whitespace issues in XMLSERIALIZE(... INDENT).
We must drop whitespace while parsing the input, else libxml2 will include "blank" nodes that interfere with the desired indentation behavior. The end result is that we didn't indent nodes separated by whitespace. Also, it seems that libxml2 may add a trailing newline when working in DOCUMENT mode. This is semantically insignificant, so strip it. This is in the gray area between being a bug fix and a definition change. However, the INDENT option is still pretty new (since v16), so I think we can get away with changing this in stable branches. Hence, back-patch to v16. Jim Jones Discussion: https://postgr.es/m/872865a8-548b-48e1-bfcd-4e38e672c1e4@uni-muenster.de
1 parent ed055d2 commit bccca78

File tree

5 files changed

+84
-23
lines changed

5 files changed

+84
-23
lines changed

src/backend/utils/adt/xml.c

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -677,8 +677,14 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
677677
}
678678

679679
#ifdef USE_LIBXML
680-
/* Parse the input according to the xmloption */
681-
doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
680+
681+
/*
682+
* Parse the input according to the xmloption.
683+
*
684+
* preserve_whitespace is set to false in case we are indenting, otherwise
685+
* libxml2 will fail to indent elements that have whitespace between them.
686+
*/
687+
doc = xml_parse(data, xmloption_arg, !indent, GetDatabaseEncoding(),
682688
&parsed_xmloptiontype, &content_nodes,
683689
(Node *) &escontext);
684690
if (doc == NULL || escontext.error_occurred)
@@ -802,7 +808,22 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
802808
"could not close xmlSaveCtxtPtr");
803809
}
804810

805-
result = (text *) xmlBuffer_to_xmltype(buf);
811+
/*
812+
* xmlDocContentDumpOutput may add a trailing newline, so remove that.
813+
*/
814+
if (xmloption_arg == XMLOPTION_DOCUMENT)
815+
{
816+
const char *str = (const char *) xmlBufferContent(buf);
817+
int len = xmlBufferLength(buf);
818+
819+
while (len > 0 && (str[len - 1] == '\n' ||
820+
str[len - 1] == '\r'))
821+
len--;
822+
823+
result = cstring_to_text_with_len(str, len);
824+
}
825+
else
826+
result = (text *) xmlBuffer_to_xmltype(buf);
806827
}
807828
PG_CATCH();
808829
{

src/test/regress/expected/xml.out

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -485,8 +485,7 @@ SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text
485485
<bar> +
486486
<val x="y">42</val>+
487487
</bar> +
488-
</foo> +
489-
488+
</foo>
490489
(1 row)
491490

492491
SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text INDENT);
@@ -546,8 +545,7 @@ SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val><val x="y">text node<
546545
<val x="y">42</val> +
547546
<val x="y">text node<val>73</val></val>+
548547
</bar> +
549-
</foo> +
550-
548+
</foo>
551549
(1 row)
552550

553551
SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val><val x="y">text node<val>73</val></val></bar></foo>' AS text INDENT);
@@ -601,8 +599,7 @@ SELECT xmlserialize(DOCUMENT '<?xml version="1.0" encoding="UTF-8"?><foo><bar><v
601599
<bar> +
602600
<val>73</val> +
603601
</bar> +
604-
</foo> +
605-
602+
</foo>
606603
(1 row)
607604

608605
SELECT xmlserialize(CONTENT '<?xml version="1.0" encoding="UTF-8"?><foo><bar><val>73</val></bar></foo>' AS text INDENT);
@@ -620,8 +617,7 @@ SELECT xmlserialize(DOCUMENT '<!DOCTYPE a><a/>' AS text INDENT);
620617
xmlserialize
621618
--------------
622619
<!DOCTYPE a>+
623-
<a/> +
624-
620+
<a/>
625621
(1 row)
626622

627623
SELECT xmlserialize(CONTENT '<!DOCTYPE a><a/>' AS text INDENT);
@@ -638,8 +634,7 @@ SELECT xmlserialize(DOCUMENT '<foo><bar></bar></foo>' AS text INDENT);
638634
--------------
639635
<foo> +
640636
<bar/> +
641-
</foo> +
642-
637+
</foo>
643638
(1 row)
644639

645640
SELECT xmlserialize(CONTENT '<foo><bar></bar></foo>' AS text INDENT);
@@ -663,6 +658,24 @@ SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text
663658
t
664659
(1 row)
665660

661+
-- indent xml strings containing blank nodes
662+
SELECT xmlserialize(DOCUMENT '<foo> <bar></bar> </foo>' AS text INDENT);
663+
xmlserialize
664+
--------------
665+
<foo> +
666+
<bar/> +
667+
</foo>
668+
(1 row)
669+
670+
SELECT xmlserialize(CONTENT 'text node<foo> <bar></bar> </foo>' AS text INDENT);
671+
xmlserialize
672+
--------------
673+
text node +
674+
<foo> +
675+
<bar/> +
676+
</foo>
677+
(1 row)
678+
666679
SELECT xml '<foo>bar</foo>' IS DOCUMENT;
667680
?column?
668681
----------

src/test/regress/expected/xml_1.out

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,17 @@ ERROR: unsupported XML feature
443443
LINE 1: SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val><...
444444
^
445445
DETAIL: This functionality requires the server to be built with libxml support.
446+
-- indent xml strings containing blank nodes
447+
SELECT xmlserialize(DOCUMENT '<foo> <bar></bar> </foo>' AS text INDENT);
448+
ERROR: unsupported XML feature
449+
LINE 1: SELECT xmlserialize(DOCUMENT '<foo> <bar></bar> </foo>'...
450+
^
451+
DETAIL: This functionality requires the server to be built with libxml support.
452+
SELECT xmlserialize(CONTENT 'text node<foo> <bar></bar> </foo>' AS text INDENT);
453+
ERROR: unsupported XML feature
454+
LINE 1: SELECT xmlserialize(CONTENT 'text node<foo> <bar></bar> ...
455+
^
456+
DETAIL: This functionality requires the server to be built with libxml support.
446457
SELECT xml '<foo>bar</foo>' IS DOCUMENT;
447458
ERROR: unsupported XML feature
448459
LINE 1: SELECT xml '<foo>bar</foo>' IS DOCUMENT;

src/test/regress/expected/xml_2.out

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -471,8 +471,7 @@ SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text
471471
<bar> +
472472
<val x="y">42</val>+
473473
</bar> +
474-
</foo> +
475-
474+
</foo>
476475
(1 row)
477476

478477
SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text INDENT);
@@ -532,8 +531,7 @@ SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val><val x="y">text node<
532531
<val x="y">42</val> +
533532
<val x="y">text node<val>73</val></val>+
534533
</bar> +
535-
</foo> +
536-
534+
</foo>
537535
(1 row)
538536

539537
SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val><val x="y">text node<val>73</val></val></bar></foo>' AS text INDENT);
@@ -587,8 +585,7 @@ SELECT xmlserialize(DOCUMENT '<?xml version="1.0" encoding="UTF-8"?><foo><bar><v
587585
<bar> +
588586
<val>73</val> +
589587
</bar> +
590-
</foo> +
591-
588+
</foo>
592589
(1 row)
593590

594591
SELECT xmlserialize(CONTENT '<?xml version="1.0" encoding="UTF-8"?><foo><bar><val>73</val></bar></foo>' AS text INDENT);
@@ -606,8 +603,7 @@ SELECT xmlserialize(DOCUMENT '<!DOCTYPE a><a/>' AS text INDENT);
606603
xmlserialize
607604
--------------
608605
<!DOCTYPE a>+
609-
<a/> +
610-
606+
<a/>
611607
(1 row)
612608

613609
SELECT xmlserialize(CONTENT '<!DOCTYPE a><a/>' AS text INDENT);
@@ -624,8 +620,7 @@ SELECT xmlserialize(DOCUMENT '<foo><bar></bar></foo>' AS text INDENT);
624620
--------------
625621
<foo> +
626622
<bar/> +
627-
</foo> +
628-
623+
</foo>
629624
(1 row)
630625

631626
SELECT xmlserialize(CONTENT '<foo><bar></bar></foo>' AS text INDENT);
@@ -649,6 +644,24 @@ SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text
649644
t
650645
(1 row)
651646

647+
-- indent xml strings containing blank nodes
648+
SELECT xmlserialize(DOCUMENT '<foo> <bar></bar> </foo>' AS text INDENT);
649+
xmlserialize
650+
--------------
651+
<foo> +
652+
<bar/> +
653+
</foo>
654+
(1 row)
655+
656+
SELECT xmlserialize(CONTENT 'text node<foo> <bar></bar> </foo>' AS text INDENT);
657+
xmlserialize
658+
--------------
659+
text node +
660+
<foo> +
661+
<bar/> +
662+
</foo>
663+
(1 row)
664+
652665
SELECT xml '<foo>bar</foo>' IS DOCUMENT;
653666
?column?
654667
----------

src/test/regress/sql/xml.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,9 @@ SELECT xmlserialize(CONTENT '<foo><bar></bar></foo>' AS text INDENT);
168168
-- 'no indent' = not using 'no indent'
169169
SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
170170
SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
171+
-- indent xml strings containing blank nodes
172+
SELECT xmlserialize(DOCUMENT '<foo> <bar></bar> </foo>' AS text INDENT);
173+
SELECT xmlserialize(CONTENT 'text node<foo> <bar></bar> </foo>' AS text INDENT);
171174

172175
SELECT xml '<foo>bar</foo>' IS DOCUMENT;
173176
SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT;

0 commit comments

Comments
 (0)