|
7 | 7 | * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
8 | 8 | * Portions Copyright (c) 1994, Regents of the University of California
|
9 | 9 | *
|
10 |
| - * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.11 2007/01/06 19:18:36 petere Exp $ |
| 10 | + * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.12 2007/01/07 00:13:55 petere Exp $ |
11 | 11 | *
|
12 | 12 | *-------------------------------------------------------------------------
|
13 | 13 | */
|
@@ -489,6 +489,122 @@ xml_init(void)
|
489 | 489 | }
|
490 | 490 |
|
491 | 491 |
|
| 492 | +/* |
| 493 | + * SQL/XML allows storing "XML documents" or "XML content". "XML |
| 494 | + * documents" are specified by the XML specification and are parsed |
| 495 | + * easily by libxml. "XML content" is specified by SQL/XML as the |
| 496 | + * production "XMLDecl? content". But libxml can only parse the |
| 497 | + * "content" part, so we have to parse the XML declaration ourselves |
| 498 | + * to complete this. |
| 499 | + */ |
| 500 | + |
| 501 | +#define CHECK_XML_SPACE(p) if (!xmlIsBlank_ch(*(p))) return XML_ERR_SPACE_REQUIRED |
| 502 | +#define SKIP_XML_SPACE(p) while (xmlIsBlank_ch(*(p))) (p)++ |
| 503 | + |
| 504 | +static int |
| 505 | +parse_xml_decl(const xmlChar *str, size_t *len, xmlChar **encoding, int *standalone) |
| 506 | +{ |
| 507 | + const xmlChar *p; |
| 508 | + const xmlChar *save_p; |
| 509 | + |
| 510 | + p = str; |
| 511 | + |
| 512 | + if (xmlStrncmp(p, (xmlChar *)"<?xml", 5) != 0) |
| 513 | + goto finished; |
| 514 | + |
| 515 | + p += 5; |
| 516 | + |
| 517 | + /* version */ |
| 518 | + CHECK_XML_SPACE(p); |
| 519 | + SKIP_XML_SPACE(p); |
| 520 | + if (xmlStrncmp(p, (xmlChar *)"version", 7) != 0) |
| 521 | + return XML_ERR_VERSION_MISSING; |
| 522 | + p += 7; |
| 523 | + SKIP_XML_SPACE(p); |
| 524 | + if (*p != '=') |
| 525 | + return XML_ERR_VERSION_MISSING; |
| 526 | + p += 1; |
| 527 | + SKIP_XML_SPACE(p); |
| 528 | + if (xmlStrncmp(p, (xmlChar *)"'1.0'", 5) != 0 && xmlStrncmp(p, (xmlChar *)"\"1.0\"", 5) != 0) |
| 529 | + return XML_ERR_VERSION_MISSING; |
| 530 | + p += 5; |
| 531 | + |
| 532 | + /* encoding */ |
| 533 | + save_p = p; |
| 534 | + SKIP_XML_SPACE(p); |
| 535 | + if (xmlStrncmp(p, (xmlChar *)"encoding", 8) == 0) |
| 536 | + { |
| 537 | + CHECK_XML_SPACE(save_p); |
| 538 | + p += 8; |
| 539 | + SKIP_XML_SPACE(p); |
| 540 | + if (*p != '=') |
| 541 | + return XML_ERR_MISSING_ENCODING; |
| 542 | + p += 1; |
| 543 | + SKIP_XML_SPACE(p); |
| 544 | + |
| 545 | + if (*p == '\'' || *p == '"') |
| 546 | + { |
| 547 | + const xmlChar *q; |
| 548 | + |
| 549 | + q = xmlStrchr(p + 1, *p); |
| 550 | + if (!q) |
| 551 | + return XML_ERR_MISSING_ENCODING; |
| 552 | + |
| 553 | + *encoding = xmlStrndup(p + 1, q - p - 1); |
| 554 | + p = q + 1; |
| 555 | + } |
| 556 | + else |
| 557 | + return XML_ERR_MISSING_ENCODING; |
| 558 | + } |
| 559 | + else |
| 560 | + { |
| 561 | + p = save_p; |
| 562 | + *encoding = NULL; |
| 563 | + } |
| 564 | + |
| 565 | + /* standalone */ |
| 566 | + save_p = p; |
| 567 | + SKIP_XML_SPACE(p); |
| 568 | + if (xmlStrncmp(p, (xmlChar *)"standalone", 10) == 0) |
| 569 | + { |
| 570 | + CHECK_XML_SPACE(save_p); |
| 571 | + p += 10; |
| 572 | + SKIP_XML_SPACE(p); |
| 573 | + if (*p != '=') |
| 574 | + return XML_ERR_STANDALONE_VALUE; |
| 575 | + p += 1; |
| 576 | + SKIP_XML_SPACE(p); |
| 577 | + if (xmlStrncmp(p, (xmlChar *)"'yes'", 5) == 0 || xmlStrncmp(p, (xmlChar *)"\"yes\"", 5) == 0) |
| 578 | + { |
| 579 | + *standalone = 1; |
| 580 | + p += 5; |
| 581 | + } |
| 582 | + else if (xmlStrncmp(p, (xmlChar *)"'no'", 4) == 0 || xmlStrncmp(p, (xmlChar *)"\"no\"", 4) == 0) |
| 583 | + { |
| 584 | + *standalone = 0; |
| 585 | + p += 4; |
| 586 | + } |
| 587 | + else |
| 588 | + return XML_ERR_STANDALONE_VALUE; |
| 589 | + } |
| 590 | + else |
| 591 | + { |
| 592 | + p = save_p; |
| 593 | + *standalone = -1; |
| 594 | + } |
| 595 | + |
| 596 | + SKIP_XML_SPACE(p); |
| 597 | + if (xmlStrncmp(p, (xmlChar *)"?>", 2) != 0) |
| 598 | + return XML_ERR_XMLDECL_NOT_FINISHED; |
| 599 | + p += 2; |
| 600 | + |
| 601 | +finished: |
| 602 | + if (len) |
| 603 | + *len = (p - str); |
| 604 | + return XML_ERR_OK; |
| 605 | +} |
| 606 | + |
| 607 | + |
492 | 608 | /*
|
493 | 609 | * Convert a C string to XML internal representation
|
494 | 610 | *
|
@@ -536,19 +652,23 @@ xml_parse(text *data, bool is_document, bool preserve_whitespace)
|
536 | 652 | }
|
537 | 653 | else
|
538 | 654 | {
|
| 655 | + size_t count; |
| 656 | + xmlChar *encoding = NULL; |
| 657 | + int standalone = -1; |
| 658 | + |
539 | 659 | doc = xmlNewDoc(NULL);
|
540 | 660 |
|
541 |
| - /* |
542 |
| - * FIXME: An XMLDecl is supposed to be accepted before the |
543 |
| - * content, but libxml doesn't allow this. Parse that |
544 |
| - * ourselves? |
545 |
| - */ |
| 661 | + res_code = parse_xml_decl(string, &count, &encoding, &standalone); |
546 | 662 |
|
547 | 663 | /* TODO resolve: xmlParseBalancedChunkMemory assumes that string is UTF8 encoded! */
|
548 |
| - res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, string, NULL); |
| 664 | + if (res_code == 0) |
| 665 | + res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, string + count, NULL); |
549 | 666 | if (res_code != 0)
|
550 | 667 | xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
|
551 | 668 | "invalid XML content", res_code);
|
| 669 | + |
| 670 | + doc->encoding = encoding; |
| 671 | + doc->standalone = standalone; |
552 | 672 | }
|
553 | 673 |
|
554 | 674 | /* TODO encoding issues
|
|
0 commit comments