Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 53a11be

Browse files
committed
Allow XML fragment to contain a XML declaration. For that, we need a small
hand-crafted parser for the XML declaration, because libxml doesn't seem to allow this.
1 parent 324297d commit 53a11be

File tree

1 file changed

+127
-7
lines changed
  • src/backend/utils/adt

1 file changed

+127
-7
lines changed

src/backend/utils/adt/xml.c

Lines changed: 127 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.11 2007/01/06 19:18:36 petere Exp $
10+
* $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.12 2007/01/07 00:13:55 petere Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -489,6 +489,122 @@ xml_init(void)
489489
}
490490

491491

492+
/*
493+
* SQL/XML allows storing "XML documents" or "XML content". "XML
494+
* documents" are specified by the XML specification and are parsed
495+
* easily by libxml. "XML content" is specified by SQL/XML as the
496+
* production "XMLDecl? content". But libxml can only parse the
497+
* "content" part, so we have to parse the XML declaration ourselves
498+
* to complete this.
499+
*/
500+
501+
#define CHECK_XML_SPACE(p) if (!xmlIsBlank_ch(*(p))) return XML_ERR_SPACE_REQUIRED
502+
#define SKIP_XML_SPACE(p) while (xmlIsBlank_ch(*(p))) (p)++
503+
504+
static int
505+
parse_xml_decl(const xmlChar *str, size_t *len, xmlChar **encoding, int *standalone)
506+
{
507+
const xmlChar *p;
508+
const xmlChar *save_p;
509+
510+
p = str;
511+
512+
if (xmlStrncmp(p, (xmlChar *)"<?xml", 5) != 0)
513+
goto finished;
514+
515+
p += 5;
516+
517+
/* version */
518+
CHECK_XML_SPACE(p);
519+
SKIP_XML_SPACE(p);
520+
if (xmlStrncmp(p, (xmlChar *)"version", 7) != 0)
521+
return XML_ERR_VERSION_MISSING;
522+
p += 7;
523+
SKIP_XML_SPACE(p);
524+
if (*p != '=')
525+
return XML_ERR_VERSION_MISSING;
526+
p += 1;
527+
SKIP_XML_SPACE(p);
528+
if (xmlStrncmp(p, (xmlChar *)"'1.0'", 5) != 0 && xmlStrncmp(p, (xmlChar *)"\"1.0\"", 5) != 0)
529+
return XML_ERR_VERSION_MISSING;
530+
p += 5;
531+
532+
/* encoding */
533+
save_p = p;
534+
SKIP_XML_SPACE(p);
535+
if (xmlStrncmp(p, (xmlChar *)"encoding", 8) == 0)
536+
{
537+
CHECK_XML_SPACE(save_p);
538+
p += 8;
539+
SKIP_XML_SPACE(p);
540+
if (*p != '=')
541+
return XML_ERR_MISSING_ENCODING;
542+
p += 1;
543+
SKIP_XML_SPACE(p);
544+
545+
if (*p == '\'' || *p == '"')
546+
{
547+
const xmlChar *q;
548+
549+
q = xmlStrchr(p + 1, *p);
550+
if (!q)
551+
return XML_ERR_MISSING_ENCODING;
552+
553+
*encoding = xmlStrndup(p + 1, q - p - 1);
554+
p = q + 1;
555+
}
556+
else
557+
return XML_ERR_MISSING_ENCODING;
558+
}
559+
else
560+
{
561+
p = save_p;
562+
*encoding = NULL;
563+
}
564+
565+
/* standalone */
566+
save_p = p;
567+
SKIP_XML_SPACE(p);
568+
if (xmlStrncmp(p, (xmlChar *)"standalone", 10) == 0)
569+
{
570+
CHECK_XML_SPACE(save_p);
571+
p += 10;
572+
SKIP_XML_SPACE(p);
573+
if (*p != '=')
574+
return XML_ERR_STANDALONE_VALUE;
575+
p += 1;
576+
SKIP_XML_SPACE(p);
577+
if (xmlStrncmp(p, (xmlChar *)"'yes'", 5) == 0 || xmlStrncmp(p, (xmlChar *)"\"yes\"", 5) == 0)
578+
{
579+
*standalone = 1;
580+
p += 5;
581+
}
582+
else if (xmlStrncmp(p, (xmlChar *)"'no'", 4) == 0 || xmlStrncmp(p, (xmlChar *)"\"no\"", 4) == 0)
583+
{
584+
*standalone = 0;
585+
p += 4;
586+
}
587+
else
588+
return XML_ERR_STANDALONE_VALUE;
589+
}
590+
else
591+
{
592+
p = save_p;
593+
*standalone = -1;
594+
}
595+
596+
SKIP_XML_SPACE(p);
597+
if (xmlStrncmp(p, (xmlChar *)"?>", 2) != 0)
598+
return XML_ERR_XMLDECL_NOT_FINISHED;
599+
p += 2;
600+
601+
finished:
602+
if (len)
603+
*len = (p - str);
604+
return XML_ERR_OK;
605+
}
606+
607+
492608
/*
493609
* Convert a C string to XML internal representation
494610
*
@@ -536,19 +652,23 @@ xml_parse(text *data, bool is_document, bool preserve_whitespace)
536652
}
537653
else
538654
{
655+
size_t count;
656+
xmlChar *encoding = NULL;
657+
int standalone = -1;
658+
539659
doc = xmlNewDoc(NULL);
540660

541-
/*
542-
* FIXME: An XMLDecl is supposed to be accepted before the
543-
* content, but libxml doesn't allow this. Parse that
544-
* ourselves?
545-
*/
661+
res_code = parse_xml_decl(string, &count, &encoding, &standalone);
546662

547663
/* TODO resolve: xmlParseBalancedChunkMemory assumes that string is UTF8 encoded! */
548-
res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, string, NULL);
664+
if (res_code == 0)
665+
res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, string + count, NULL);
549666
if (res_code != 0)
550667
xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
551668
"invalid XML content", res_code);
669+
670+
doc->encoding = encoding;
671+
doc->standalone = standalone;
552672
}
553673

554674
/* TODO encoding issues

0 commit comments

Comments
 (0)