7
7
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
8
8
* Portions Copyright (c) 1994, Regents of the University of California
9
9
*
10
- * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.93 2009/08/10 05:46:50 tgl Exp $
10
+ * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.94 2009/09/04 10:49:29 heikki Exp $
11
11
*
12
12
*-------------------------------------------------------------------------
13
13
*/
@@ -109,7 +109,7 @@ static int parse_xml_decl(const xmlChar *str, size_t *lenp,
109
109
static bool print_xml_decl (StringInfo buf , const xmlChar * version ,
110
110
pg_enc encoding , int standalone );
111
111
static xmlDocPtr xml_parse (text * data , XmlOptionType xmloption_arg ,
112
- bool preserve_whitespace , xmlChar * encoding );
112
+ bool preserve_whitespace , int encoding );
113
113
static text * xml_xmlnodetoxmltype (xmlNodePtr cur );
114
114
#endif /* USE_LIBXML */
115
115
@@ -183,7 +183,7 @@ xml_in(PG_FUNCTION_ARGS)
183
183
* Parse the data to check if it is well-formed XML data. Assume that
184
184
* ERROR occurred if parsing failed.
185
185
*/
186
- doc = xml_parse (vardata , xmloption , true, NULL );
186
+ doc = xml_parse (vardata , xmloption , true, GetDatabaseEncoding () );
187
187
xmlFreeDoc (doc );
188
188
189
189
PG_RETURN_XML_P (vardata );
@@ -272,7 +272,8 @@ xml_recv(PG_FUNCTION_ARGS)
272
272
char * newstr ;
273
273
int nbytes ;
274
274
xmlDocPtr doc ;
275
- xmlChar * encoding = NULL ;
275
+ xmlChar * encodingStr = NULL ;
276
+ int encoding ;
276
277
277
278
/*
278
279
* Read the data in raw format. We don't know yet what the encoding is, as
@@ -293,7 +294,15 @@ xml_recv(PG_FUNCTION_ARGS)
293
294
str = VARDATA (result );
294
295
str [nbytes ] = '\0' ;
295
296
296
- parse_xml_decl ((xmlChar * ) str , NULL , NULL , & encoding , NULL );
297
+ parse_xml_decl ((xmlChar * ) str , NULL , NULL , & encodingStr , NULL );
298
+
299
+ /*
300
+ * If encoding wasn't explicitly specified in the XML header, treat it as
301
+ * UTF-8, as that's the default in XML. This is different from xml_in(),
302
+ * where the input has to go through the normal client to server encoding
303
+ * conversion.
304
+ */
305
+ encoding = encodingStr ? xmlChar_to_encoding (encodingStr ) : PG_UTF8 ;
297
306
298
307
/*
299
308
* Parse the data to check if it is well-formed XML data. Assume that
@@ -305,9 +314,7 @@ xml_recv(PG_FUNCTION_ARGS)
305
314
/* Now that we know what we're dealing with, convert to server encoding */
306
315
newstr = (char * ) pg_do_encoding_conversion ((unsigned char * ) str ,
307
316
nbytes ,
308
- encoding ?
309
- xmlChar_to_encoding (encoding ) :
310
- PG_UTF8 ,
317
+ encoding ,
311
318
GetDatabaseEncoding ());
312
319
313
320
if (newstr != str )
@@ -659,7 +666,8 @@ xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
659
666
#ifdef USE_LIBXML
660
667
xmlDocPtr doc ;
661
668
662
- doc = xml_parse (data , xmloption_arg , preserve_whitespace , NULL );
669
+ doc = xml_parse (data , xmloption_arg , preserve_whitespace ,
670
+ GetDatabaseEncoding ());
663
671
xmlFreeDoc (doc );
664
672
665
673
return (xmltype * ) data ;
@@ -799,7 +807,8 @@ xml_is_document(xmltype *arg)
799
807
/* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
800
808
PG_TRY ();
801
809
{
802
- doc = xml_parse ((text * ) arg , XMLOPTION_DOCUMENT , true, NULL );
810
+ doc = xml_parse ((text * ) arg , XMLOPTION_DOCUMENT , true,
811
+ GetDatabaseEncoding ());
803
812
result = true;
804
813
}
805
814
PG_CATCH ();
@@ -1152,7 +1161,7 @@ print_xml_decl(StringInfo buf, const xmlChar *version,
1152
1161
*/
1153
1162
static xmlDocPtr
1154
1163
xml_parse (text * data , XmlOptionType xmloption_arg , bool preserve_whitespace ,
1155
- xmlChar * encoding )
1164
+ int encoding )
1156
1165
{
1157
1166
int32 len ;
1158
1167
xmlChar * string ;
@@ -1165,9 +1174,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
1165
1174
1166
1175
utf8string = pg_do_encoding_conversion (string ,
1167
1176
len ,
1168
- encoding ?
1169
- xmlChar_to_encoding (encoding ) :
1170
- GetDatabaseEncoding (),
1177
+ encoding ,
1171
1178
PG_UTF8 );
1172
1179
1173
1180
/* Start up libxml and its parser (no-ops if already done) */
0 commit comments