Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 237859e

Browse files
committed
Fix encoding handling in xml binary input function. If the XML header didn't
specify an encoding explicitly, we used to treat it as being in database encoding when we parsed it, but then perform a UTF-8 -> database encoding conversion on it, which was completely bogus. It's now consistently treated as UTF-8.
1 parent 1608489 commit 237859e

File tree

1 file changed

+21
-14
lines changed
  • src/backend/utils/adt

1 file changed

+21
-14
lines changed

src/backend/utils/adt/xml.c

+21-14
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.93 2009/08/10 05:46:50 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.94 2009/09/04 10:49:29 heikki Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -109,7 +109,7 @@ static int parse_xml_decl(const xmlChar *str, size_t *lenp,
109109
static bool print_xml_decl(StringInfo buf, const xmlChar *version,
110110
pg_enc encoding, int standalone);
111111
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
112-
bool preserve_whitespace, xmlChar *encoding);
112+
bool preserve_whitespace, int encoding);
113113
static text *xml_xmlnodetoxmltype(xmlNodePtr cur);
114114
#endif /* USE_LIBXML */
115115

@@ -183,7 +183,7 @@ xml_in(PG_FUNCTION_ARGS)
183183
* Parse the data to check if it is well-formed XML data. Assume that
184184
* ERROR occurred if parsing failed.
185185
*/
186-
doc = xml_parse(vardata, xmloption, true, NULL);
186+
doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding());
187187
xmlFreeDoc(doc);
188188

189189
PG_RETURN_XML_P(vardata);
@@ -272,7 +272,8 @@ xml_recv(PG_FUNCTION_ARGS)
272272
char *newstr;
273273
int nbytes;
274274
xmlDocPtr doc;
275-
xmlChar *encoding = NULL;
275+
xmlChar *encodingStr = NULL;
276+
int encoding;
276277

277278
/*
278279
* Read the data in raw format. We don't know yet what the encoding is, as
@@ -293,7 +294,15 @@ xml_recv(PG_FUNCTION_ARGS)
293294
str = VARDATA(result);
294295
str[nbytes] = '\0';
295296

296-
parse_xml_decl((xmlChar *) str, NULL, NULL, &encoding, NULL);
297+
parse_xml_decl((xmlChar *) str, NULL, NULL, &encodingStr, NULL);
298+
299+
/*
300+
* If encoding wasn't explicitly specified in the XML header, treat it as
301+
* UTF-8, as that's the default in XML. This is different from xml_in(),
302+
* where the input has to go through the normal client to server encoding
303+
* conversion.
304+
*/
305+
encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
297306

298307
/*
299308
* Parse the data to check if it is well-formed XML data. Assume that
@@ -305,9 +314,7 @@ xml_recv(PG_FUNCTION_ARGS)
305314
/* Now that we know what we're dealing with, convert to server encoding */
306315
newstr = (char *) pg_do_encoding_conversion((unsigned char *) str,
307316
nbytes,
308-
encoding ?
309-
xmlChar_to_encoding(encoding) :
310-
PG_UTF8,
317+
encoding,
311318
GetDatabaseEncoding());
312319

313320
if (newstr != str)
@@ -659,7 +666,8 @@ xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
659666
#ifdef USE_LIBXML
660667
xmlDocPtr doc;
661668

662-
doc = xml_parse(data, xmloption_arg, preserve_whitespace, NULL);
669+
doc = xml_parse(data, xmloption_arg, preserve_whitespace,
670+
GetDatabaseEncoding());
663671
xmlFreeDoc(doc);
664672

665673
return (xmltype *) data;
@@ -799,7 +807,8 @@ xml_is_document(xmltype *arg)
799807
/* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
800808
PG_TRY();
801809
{
802-
doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true, NULL);
810+
doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
811+
GetDatabaseEncoding());
803812
result = true;
804813
}
805814
PG_CATCH();
@@ -1152,7 +1161,7 @@ print_xml_decl(StringInfo buf, const xmlChar *version,
11521161
*/
11531162
static xmlDocPtr
11541163
xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
1155-
xmlChar *encoding)
1164+
int encoding)
11561165
{
11571166
int32 len;
11581167
xmlChar *string;
@@ -1165,9 +1174,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
11651174

11661175
utf8string = pg_do_encoding_conversion(string,
11671176
len,
1168-
encoding ?
1169-
xmlChar_to_encoding(encoding) :
1170-
GetDatabaseEncoding(),
1177+
encoding,
11711178
PG_UTF8);
11721179

11731180
/* Start up libxml and its parser (no-ops if already done) */

0 commit comments

Comments
 (0)