From 510842ac940cdb534da3ae225776def79e2498bc Mon Sep 17 00:00:00 2001 From: Norman Walsh Date: Fri, 4 Dec 2020 17:12:38 +0000 Subject: [PATCH 1/3] The build process relies on the serialization spec --- .gitignore | 2 + .../xslt-xquery-serialization-31/build.xml | 321 + .../xslt-xquery-serialization-31/src/bibl.xml | 140 + .../src/changes.txt | 177 + .../src/errors.xml | 155 + .../src/ns-xslt-xquery-serialization.xml | 256 + .../schema-for-serialization-parameters.xsd | 589 ++ .../src/schema-test.xml | 133 + .../src/xslt-xquery-serialization-errata.xml | 207 + .../src/xslt-xquery-serialization.xml | 5529 +++++++++++++++++ .../style/ns-xslt-xquery-serialization.xsl | 65 + .../style/serialization-diff.xsl | 36 + 12 files changed, 7610 insertions(+) create mode 100644 specifications/xslt-xquery-serialization-31/build.xml create mode 100644 specifications/xslt-xquery-serialization-31/src/bibl.xml create mode 100644 specifications/xslt-xquery-serialization-31/src/changes.txt create mode 100644 specifications/xslt-xquery-serialization-31/src/errors.xml create mode 100644 specifications/xslt-xquery-serialization-31/src/ns-xslt-xquery-serialization.xml create mode 100644 specifications/xslt-xquery-serialization-31/src/schema-for-serialization-parameters.xsd create mode 100644 specifications/xslt-xquery-serialization-31/src/schema-test.xml create mode 100644 specifications/xslt-xquery-serialization-31/src/xslt-xquery-serialization-errata.xml create mode 100644 specifications/xslt-xquery-serialization-31/src/xslt-xquery-serialization.xml create mode 100644 specifications/xslt-xquery-serialization-31/style/ns-xslt-xquery-serialization.xsl create mode 100644 specifications/xslt-xquery-serialization-31/style/serialization-diff.xsl diff --git a/.gitignore b/.gitignore index 5a9f853..61de649 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,5 @@ /specifications/xpath-functions-40/html/ /specifications/xquery-40/build/ /specifications/xquery-40/html/ +/specifications/xslt-xquery-serialization-31/html/ +/specifications/xslt-xquery-serialization-31/build/ diff --git a/specifications/xslt-xquery-serialization-31/build.xml b/specifications/xslt-xquery-serialization-31/build.xml new file mode 100644 index 0000000..6676cbb --- /dev/null +++ b/specifications/xslt-xquery-serialization-31/build.xml @@ -0,0 +1,321 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/specifications/xslt-xquery-serialization-31/src/bibl.xml b/specifications/xslt-xquery-serialization-31/src/bibl.xml new file mode 100644 index 0000000..6bc51a3 --- /dev/null +++ b/specifications/xslt-xquery-serialization-31/src/bibl.xml @@ -0,0 +1,140 @@ + + +References + +Normative References + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Unicode +Character Encoding Model, +Unicode Consortium. +Unicode Standard Annex #17. + + + +Unicode +Normalization Forms, +Unicode Consortium. +Unicode Standard Annex #15. + + + + + + + + + + + + + + + + + + + + + + + + + +The JSON Data Interchange Format, +ECMA International. + + +IETF. +RFC 7159: The Javascript Object Notation (JSON) Data Interchange Format, +T. Bray, Editor. +Internet Engineering Task Force, March 2014. +Available at: +http://www.rfc-editor.org/rfc/rfc7159.txt + + + + + + + +Informative References + + + +The JSON Data Interchange Format, +ECMA International. + + + + + + + + + + + XSLT 2.0 and XQuery 1.0 Serialization (Second Edition), W3C Recommendation, + Henry Zongaro, Norman Walsh, Joanne Tong, et. al., Editors. + World Wide Web Consortium, 14  December  2010. + This version is http://www.w3.org/TR/2010/REC-xslt-xquery-serialization-20101214/ + + + XSLT and XQuery + Serialization, W3C First Public Working Draft, + Andrew Coleman, C. M. Sperberg-McQueen, et. al., Editors. + World Wide Web Consortium, 24 April 2014. + + + diff --git a/specifications/xslt-xquery-serialization-31/src/changes.txt b/specifications/xslt-xquery-serialization-31/src/changes.txt new file mode 100644 index 0000000..78e77c0 --- /dev/null +++ b/specifications/xslt-xquery-serialization-31/src/changes.txt @@ -0,0 +1,177 @@ +CHANGES.TXT for Serialization 3.0 + +Items below are not yet incorporated into the editor's drafts unless +they are marked with 'DONE'. Once completed items have been entered +into the revision log, they are deleted from this document. + +* Backlog + +Action items +------------ + +** DONE Action A-423-03 + on all editors of x.1 specs to add list of incompatibilities and major + differences between x.0 and x.1 versions. + + Applied in Serialization Internal Working Draft of 23 March 2010 + + +** DONE Action A-439-01 + Henry to write a full proposal for the serialization option file format + and its use from the query prolog. + +** DONE Action A-443-04 + HZongaro will take all the feedback re output declarations and produce + a revised proposal. + + +** DONE Unnumbered action item from July 2010 F2F + Henry to fix problems with the schema and add examples that + parse against the schema. Henry to change 'ser' prefix to 'output' + +** DONE ACTION A-452-01: All editors to review the use of the word "legal" + and to replace "legal" with valid, conformant, well-formed, must, "it + is an error if...", or any other appropriate term. + + +Serialization 3.0 bugs and enhancements +--------------------------------------- + + +** DONE Bugzilla Bug 14751: Copy-paste errors in serialization-parameters schema + http://www.w3.org/Bugs/Public/show_bug.cgi?id=14751 + Proposal: 15 November 2011 + Applied editorially in Serialization Internal Working Draft of + 15 November 2011 + + +** DONE Bugzilla Bug 13688: [Ser30] Incorrect regexp for output:encoding in xslt-xquery-serialization.xsd + http://www.w3.org/Bugs/Public/show_bug.cgi?id=13688 + Accepted at joint XQuery/XSLT WG call of 20 September 2011 + Applied change requested 20 September 2011 + + +** DONE Bugzilla Bug 12852: [SER30] Error in serialization parmeters schema + http://www.w3.org/Bugs/Public/show_bug.cgi?id=12852 + Marked editorial: 27 July 2011 + Applied change requested 27 July 2011 + + +** DONE Bugzilla Bug 6535: Add new serialization option suppress-indentation + http://www.w3.org/Bugs/Public/show_bug.cgi?id=6535 + Applied in internal working draft of 04 January 2010 + + +** TODO Bugzilla bug 9302: How are output declarations processed? + http://www.w3.org/Bugs/Public/show_bug.cgi?id=9302 + Proposal: 24 June 2010 and subsequent discussion + Revised: 14 July 2010 and subsequent discussion + Accepted by XQuery WG at July 2010 F2F, with corrections to schema as described in minutes , plus examples and change "ser" prefix to "output". + Applied changes 20 July 2010 + + Dependencies: Need ratification by XSL WG? + + +** DONE Bugzilla Bug 9433: [SER11] Invalid reference to XSLT 2.1 + http://www.w3.org/Bugs/Public/show_bug.cgi?id=9433 + Need to update cross-document links to XSLT 2.1 + + +** DONE Bugzilla Bug 6808: Whitespacing rules are too restrictive for the + indent parameter + + http://www.w3.org/Bugs/Public/show_bug.cgi?id=6808 + Proposal: 01 February 2010 + Accepted at joint XQuery/XSL call of 02 February 2010 + Applied in internal working draft of 23 March 2010 + Changes requested at XSL WG call of 03 June 2010 + Proposal: 23 June 2010 + Accepted at XSL WG call of 24 June 2010 + Applied in Serialization Internal Last Call Working Draft of 28 June 2010 + Accepted at XQuery WG call of 29 June 2010 + + + +Serialization 1.0 bug fixes to be applied to 3.0 +------------------------------------------------ + +** DONE Bugzilla Bug 10176: What does it mean to output an XML island as XML? + http://www.w3.org/Bugs/Public/show_bug.cgi?id=10176 + Proposal: 27 July 2011 + Revised: 27 July 2011 + Accepted at joint XQuery/XSL call of 20 September 2011, with direction to + avoid using the word "might." + Applied in Serialization Internal Working Draft of 15 November 2011 + +** DONE Bugzilla Bug 11590: Wrong error description for err:SEPM0010: "xhtml" is missing + http://www.w3.org/Bugs/Public/show_bug.cgi?id=11590 + Proposal: 04 January 2011 + Accepted at joint XQuery/XSL call of 24 May 2011 + Applied in Serialization Internal Working Draft of 26 May 2011 + +** DONE Bugzilla Bug 7823: [SER] Description of escaping rules for script and style elements in HTML mode not clear + http://www.w3.org/Bugs/Public/show_bug.cgi?id=7823 + Proposal: 05 April 2010 + Accepted at joint XQuery/XSL call of 06 April 2010 + Accepted at XSL call of 3 June 2010 + Applied in Serialization Internal Last Call Working Draft of 28 June 2010 + + +** DONE Bugzilla Bug 8651: What does it mean to compare without consideration + of case? + + http://www.w3.org/Bugs/Public/show_bug.cgi?id=8651 + Proposal: 05 January 2010 + Accepted at joint XQuery/XSL call of 13 January 2010 + Accepted at XSL call of 3 June 2010 + Applied in Serialization Internal Last Call Working Draft of 28 June 2010 + + +** DONE Bugzilla Bug 8206: + + http://www.w3.org/Bugs/Public/show_bug.cgi?id=8206 + Marked as editorial + Applied in Serialization Internal Last Call Working Draft of 28 June 2010 + + +** DONE Bugzilla Bug 7829: Serialization of minimized attributes. + + http://www.w3.org/Bugs/Public/show_bug.cgi?id=7829 + Proposal: 26 November 2009 + Accepted at joint XQuery/XSL call of 01 December 2009 + Ratified at XSL WG call of 03 December 2009 + Applied in Serialization Internal Last Call Working Draft of 28 June 2010 + + +** DONE Bugzilla Bug 8245: Error for characters that are not permitted in HTML omits some control characters + + http://www.w3.org/Bugs/Public/show_bug.cgi?id=8245 + Proposal: 09 November 2009 + Proposal: 12 November 2009 + Discussed: 12 November 2009 + Proposal: 26 November 2009 + Accepted at joint XQuery/XSL call of 01 December 2009 + Ratified at XSL WG call of 03 December 2009 + Applied in Serialization Internal Last Call Working Draft of 28 June 2010 + + +** DONE Bugzilla Bug 6723: No rule about empty

elements in HTML + serialization + + http://www.w3.org/Bugs/Public/show_bug.cgi?id=6723 + Proposal: 20 August 2009 + Accepted at joint XQuery/XSL call of 25 August 2009 + Ratified at XSL WG call of 12 November 2009 + Applied in Serialization Public Working Draft of 15 December 2009 + + +** DONE Bugzilla Bug 6732: Make clear for which values of version parameter + the serialization recommendation provides normative definitions + + http://www.w3.org/Bugs/Public/show_bug.cgi?id=6732 + Proposal: 24 March 2009 + Accepted at joint XQuery/XSL call of 14 April 2009 + Ratified at XSL WG call of 30 April 2009 + Applied in Serialization Public Working Draft of 15 December 2009 + + diff --git a/specifications/xslt-xquery-serialization-31/src/errors.xml b/specifications/xslt-xquery-serialization-31/src/errors.xml new file mode 100644 index 0000000..aea3918 --- /dev/null +++ b/specifications/xslt-xquery-serialization-31/src/errors.xml @@ -0,0 +1,155 @@ + + +Summary of Error Conditions + + +

This document uses the err prefix which represents the +same namespace URI (http://www.w3.org/2005/xqt-errors) as defined in +. Use of this +namespace prefix binding in this document is not normative.

+ + + +

It is an error if an item in S6 in sequence normalization is an +attribute node or a namespace node.

+ + + +

It is an error if the serializer is unable to satisfy the rules +for either a well-formed XML document entity or a well-formed XML +external general parsed entity, or both, except for content modified +by the character expansion phase of serialization.

+ + +

It is an error to specify the doctype-system parameter, or to +specify the standalone parameter with a value other than +omit, if the instance of the data model contains text +nodes or multiple element nodes as children of the root +node.

+ + +

It is an error if the serialized result would contain an +NCName that contains a character that is not +permitted by the version of Namespaces in XML specified by the +version parameter.

+ + +

It is an error if the serialized result would contain a character +that is not permitted by the version of XML specified by the +version parameter.

+
+ + +

It is an error if an output encoding other than UTF-8 +or UTF-16 is requested and the serializer does not support that +encoding.

+ + + +

It is an error if a character that cannot be represented in the +encoding that the serializer is +using for output appears in a context where character references are +not allowed (for example if the character occurs in the name of an +element).

+ + +

It is an error if the omit-xml-declaration parameter +has the value yes, +true or 1, +and the standalone +attribute has a value other than omit; or the +version parameter has a value other than 1.0 +and the doctype-system parameter is +specified.

+ + +

It is an error if the output method is xml or +xhtml, the value of the undeclare-prefixes +parameter is +one of, +yes, + true or 1, +and the value of the +version parameter is 1.0.

+ + +

It is an error if the value of the normalization-form +parameter specifies a normalization form that is not supported by the +serializer.

+ + +

It is an error if the value of the normalization-form +parameter is fully-normalized and any relevant construct +of the result begins with a combining character.

+ + +

It is an error if the serializer does not support the version of +XML or HTML specified by the version parameter.

+
+ + +

It is an error to use the HTML output method if characters which +are permitted in XML but not in HTML appear in the instance of the +data model.

+ + +

It is an error to use the HTML output method when > +appears within a processing instruction in the data model instance +being serialized.

+ + +

It is an error if a parameter value is invalid for the defined +domain.

+ + +

It is an error if evaluating an expression in order to extract the +setting of a serialization parameter from a data model instance would +yield an error.

+ + +

It is an error if evaluating an expression in order to extract the +setting of the use-character-maps serialization parameter +from a data model instance would yield a sequence of length greater +than one.

+ + +

It is an error if an instance of the data model used to specify the +settings of serialization parameters specifies the value of the same +parameter more than once.

+ + +

It is an error if a numeric value being serialized using the +JSON output method cannot be represented in the JSON grammar +(e.g. +INF, -INF, NaN). +

+ + +

It is an error if a sequence being serialized using the JSON output +method includes items for which no rules are provided in the +appropriate section of the serialization rules.

+ + + +

It is an error if a map being serialized using the JSON output method +has two keys with the same string value, unless the +allow-duplicate-names has the value +yes, + true or 1.

+ + +

It is an error if a sequence being serialized using the JSON output +method is of length greater than one.

+ +
+ + diff --git a/specifications/xslt-xquery-serialization-31/src/ns-xslt-xquery-serialization.xml b/specifications/xslt-xquery-serialization-31/src/ns-xslt-xquery-serialization.xml new file mode 100644 index 0000000..d1a9fa9 --- /dev/null +++ b/specifications/xslt-xquery-serialization-31/src/ns-xslt-xquery-serialization.xml @@ -0,0 +1,256 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +]> + +
+ XSLT and XQuery Serialization 3.1 Namespace Document + + + W3C Recommendation + + &doc.date.day; + &doc.date.month; + &doc.date.year; + + + &doc.publoc; + + + &doc.publoc; + + + + + + + +

+
+ +

+
+ + English + + + + Initial draft. + + +
+ + + + +Introduction + +

+This document describes the namespace +&nsname; defined by the + specification +(&ser.spec.date.month; &ser.spec.date.year; version). +This namespace is typically identified by the namespace prefix output:, +which is not a predefined prefix. +For updated information, please refer to the latest version of the + specification. +

+ +

+This document contains a directory of links to related resources, using RDDL +(as defined in ). +

+ +

+It is GRDDL-enabled (as defined in ), that is to +say that a GRDDL-compliant processor can extract useful RDF +(as defined in ) +representations of the information contained herein. +

+ +
+ + + +XML Schema + +

+The specification +provides a number of serialization parameters. Those parameters are +described by an XML Schema. +

+ +

+The specification +defines a schema for XML Infoset instances with which a user of +a host language may specify serialization parameters for use in +serializing an instance of the XQuery and XPath Data Model. +The schema also provides hooks that allow the inclusion of +implementation-defined serialization parameters and +implementation-defined modifiers to serialization parameters. +

+ + +

+This schema defines the XML syntax of serialization parameters +specified in . +It is located at +http://www.w3.org/&ser.spec.date.year;/&ser.spec.date.MM;/xslt-xquery-serialization/schema-for-serialization-parameters.xsd. +

+
+ +

The following serialization parameters are defined by that schema: +

+ + + + + + + +
+ + + + +Normative References + + + + + +

XSLT and XQuery Serialization 3.1 +(&ser.spec.date.day; &ser.spec.date.month; &ser.spec.date.year; version)

+ +

This document describes the names that are defined in this namespace at the time of publication. +The W3C reserves the right to define additional names in this namespace in the future. + +is the only specification that is permitted to amend this namespace. +It may, however, be augmented by other specifications that define Serialization 3.1 extensions. +

+
+
+ +
+ +
+ + + +Non-Normative References + + + + + +

XQuery 3.1 Requirements +(&xqreq.spec.date.day; &xqreq.spec.date.month; &xqreq.spec.date.year; version)

+
+
+ + + +

XQuery 3.1 +(&xq.spec.date.day; &xq.spec.date.month; &xq.spec.date.year; version)

+
+
+ + + +

Resource Directory Description Language (RDDL) (4 July 2007)

+
+
+ + + + +

Gleaning Resource Descriptions from Dialects of Languages (GRDDL) +(Recommendation of 11 September 2007)

+
+
+ + + +

Resource Description Framework (RDF): +Concepts and Abstract Syntax (Recommendation of 10 February 2004)

+
+
+ + + +

XML Schema Part 1: Structures Second Edition +(Recommendation of 28 October 2004)

+
+
+ +
+ +
+ + + +
diff --git a/specifications/xslt-xquery-serialization-31/src/schema-for-serialization-parameters.xsd b/specifications/xslt-xquery-serialization-31/src/schema-for-serialization-parameters.xsd new file mode 100644 index 0000000..df9c4b5 --- /dev/null +++ b/specifications/xslt-xquery-serialization-31/src/schema-for-serialization-parameters.xsd @@ -0,0 +1,589 @@ + + + + + This is a schema for serialization parameters for XSLT and + XQuery Serialization 3.1. + + This schema is available for use under the conditions of the + W3C Software License published at + http://www.w3.org/Consortium/Legal/copyright-software-19980720 + + It defines a schema for XML Infoset instances with which a + user of a host language MAY specify serialization parameters + for use in serializing an instance of the XQuery and XPath + Data Model. It also provides hooks that allow the inclusion + of implementation- defined serialization parameters and + implementation-defined modifiers to serialization parameters. + + + + + + + + + + + + + + + + + + + + Prefixed-QName matches only QNames with a non-null prefix: + that is, only QNames with a colon. + + + + + + + + + + + Qualified-EQName matches only EQNames with a non-null namespace name. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/specifications/xslt-xquery-serialization-31/src/schema-test.xml b/specifications/xslt-xquery-serialization-31/src/schema-test.xml new file mode 100644 index 0000000..d63f934 --- /dev/null +++ b/specifications/xslt-xquery-serialization-31/src/schema-test.xml @@ -0,0 +1,133 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/specifications/xslt-xquery-serialization-31/src/xslt-xquery-serialization-errata.xml b/specifications/xslt-xquery-serialization-31/src/xslt-xquery-serialization-errata.xml new file mode 100644 index 0000000..7cfc8ca --- /dev/null +++ b/specifications/xslt-xquery-serialization-31/src/xslt-xquery-serialization-errata.xml @@ -0,0 +1,207 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +]> + + +
+ &erratadoc.title; + &erratadoc.w3c-designation; + + + &erratadate.day; + &erratadate.month; + &erratadate.year; + + + + &erratadoc.latestloc; + + + + + + + Scott Boag + IBM + scott_boag@us.ibm.com + + + Michael Kay + Saxonica + http://www.saxonica.com + + + Joanne Tong + IBM + joannet@ca.ibm.com + + + Norman Walsh + Sun Microsystems + Norman.Walsh@Sun.COM + + + + +

This document records all known errors in the + &doc.title; specification; + for updates see the latest version of that document.

+ +

The errata are numbered, classified as + Substantive or Editorial, + and listed in reverse chronological order of their date of publication + in each category. + Each entry contains the following information:

+ + +

The section in which the error exists.

+
+ +

A description of the error.

+
+ +

A correction to the error, if applicable.

+
+
+

Changes to the text of the spec are indicated thus: + new, added text, + modified text, + and deleted text. + Substantive corrections are proposed by the + + + + + XML Query Working Group and the + XSL Working Group + (both of which are part of the XML Activity), + which have + + consensus that they are appropriate; + they are not to be considered normative until approved by a + Call + for Review of Proposed Corrections or a + Call + for Review of an Edited Recommendation.

+ +

Please send comments about this document to + W3C XSLT/XPath/XQuery public comments mailing list, + public-qt-comments@w3.org. + It will be very helpful if you include the string + + [SERerrata] + in the subject line of your comment. + An archive of that mailing list is available at + + http://lists.w3.org/Archives/Public/public-qt-comments/.

+
+ + +

This document records all known errors in the + &doc.title; specification; + that were known at the time of publication of this document.

+
+ + + English + + + +

This is the errata for the Recommendation.

+
+
+ + + + + Substantive Errata + + + +

No substantive errata at present

+
+ + + Editorial Errata + + + +

No editorial errata at present

+
+ +
\ No newline at end of file diff --git a/specifications/xslt-xquery-serialization-31/src/xslt-xquery-serialization.xml b/specifications/xslt-xquery-serialization-31/src/xslt-xquery-serialization.xml new file mode 100644 index 0000000..776af0d --- /dev/null +++ b/specifications/xslt-xquery-serialization-31/src/xslt-xquery-serialization.xml @@ -0,0 +1,5529 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +%status-entities; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +This document will be +considered ready for transition to Proposed Recommendation +at the same time that the XQuery 3.1 specification is ready +for transition to Proposed Recommendation.

'> + + +This &doc.w3c-doctype-full; specifies XSLT and XQuery Serialization + version 3.1, a fully compatible extension of + Serialization version 3.0.

'> + + + +]> + + +
+ &language; + &version; + &doc.w3c-designation; + W3C &doc.w3c-doctype-full; + + &date.day; + &date.month; + &date.year; + + + &doc.publoc; + + + + XML + Change markings relative to previous Working Draft + + + + &doc.latestloc; + + + + https://www.w3.org/TR/2017/PR-xslt-xquery-serialization-31-20170117/ + https://www.w3.org/TR/2016/CR-xslt-xquery-serialization-31-20161213/ + https://www.w3.org/TR/2015/CR-xslt-xquery-serialization-31-20151217/ + + + &doc.latestloc-major; + + + &doc.latestloc-tech; + + + https://www.w3.org/TR/2014/REC-xslt-xquery-serialization-30-20140408/ + + + + + Andrew Coleman + IBM Hursley Laboratories + andrew_coleman@uk.ibm.com + + + C. M. Sperberg-McQueen + Black Mesa Technologies + http://blackmesatech.com/ + + + + + + +&status-section; + + +

This document defines serialization of an instance of the data model as defined in + into a sequence of octets. + Serialization is designed to be a component that can be used by other specifications + such as or .

+
+ + +English + + + +

See the CVS changelog.

+
+
+ + + + +Introduction + +

This document defines serialization of the W3C XQuery +and XPath Data Model +3.1 (XDM), +which is the data model of at least , +, and +, and any other specifications that reference it.

+ +

In this document, examples and material labeled as Note are provided for +explanatory purposes and are not normative.

+ +

Serialization is the process of converting an instance of the + into a sequence of octets. Serialization is +well-defined for most data model instances.

+ +Terminology +

In this specification, +where they appear in upper case, +the words "MUST", "MUST NOT", +"SHOULD", "SHOULD NOT", "MAY", "REQUIRED", and +"RECOMMENDED" are to be interpreted as described in +.

+

As is indicated in +, conformance criteria for serialization +are determined by other specifications that refer to this specification. +A serializer is software that implements some or all of the +requirements of this specification in accordance with such conformance +criteria. A serializer is not REQUIRED to directly provide a +programming interface that permits a user to set serialization parameters +or to provide an input sequence for serialization. +In this document, material labeled +as "Note" and examples are provided for explanatory purposes and are not +normative. + +

+

Certain aspects of serialization are described in this specification +as implementation-defined +or implementation-dependent.

+

+Implementation-defined indicates an +aspect that MAY differ between +serializers, but whose actual +behavior MUST be specified either by another specification that sets +conformance criteria for serialization (see ) +or in documentation that accompanies the +serializer.

+

+Implementation-dependent indicates an +aspect that MAY differ between +serializers, and whose actual +behavior is not REQUIRED to be specified either by another specification +that sets conformance criteria for serialization (see +) or in documentation that accompanies the +serializer.

+

+In some instances, the +sequence that is input to serialization cannot be successfully converted +into a sequence of octets given the set of serialization parameter +() values specified. A +serialization error is said to occur in such an instance. +In some cases, a serializer is +REQUIRED to signal such an error. +What it means to signal a serialization error is determined by the +relevant conformance criteria () to which +the serializer conforms. In other cases, +there is an implementation-defined choice +between signaling a serialization error and performing a recovery action. +Such a recovery action will allow a +serializer to produce a sequence of +octets that might not fully reflect the usual requirements of the +parameter settings that are in effect. +

+ +

+ +Where this specification indicates that two strings are to be +compared without regard to case, the serializer +MUST translate any characters in the range +#x41 (LATIN CAPITAL LETTER A) to #x5A (LATIN CAPITAL LETTER Z), +inclusive, to the corresponding lower-case letters in the range +#x61 (LATIN SMALL LETTER A) to #x7A (LATIN SMALL LETTER Z) only +for the purposes of making the comparison. The comparison +succeeds if the two strings are the same length and the code +point of each character in the first string is equal to the +code point of the character in the corresponding position in +the second string. +

+ +

Many terms used in this document are defined in the XPath specification + or the Data Model specification . Particular +attention is drawn to the following:

+ + + + +

The term atomization is defined +in . It is a process that takes as input a sequence of nodes and atomic values, and +returns a sequence of atomic values, in which the nodes are replaced by their typed values as defined in +.

+
+ + +

The term node +is defined as part of . +There are seven kinds of nodes in the data model: document, element, attribute, text, namespace, processing instruction, and comment.

+
+ + +

The term sequence +is defined in . +A sequence is an ordered collection of zero or more items.

+
+ + +

The term +function is defined in +.

+
+ + +

The term +map item is defined in +.

+
+ + +

The term +array item is defined in +.

+
+ + +

The term +string is defined in +.

+
+ + +

The term +character is defined in +.

+
+ + +

The term +codepoint is defined in +.

+
+ + +

The term string value +is defined in . +Every node has a string value. For example, the string value +of an element is the concatenation of the string values of all its descendant text nodes.

+
+ + +

The term expanded QName +is defined in . +An expanded QName consists of an optional namespace URI and a local name. An expanded QName also retains its original namespace prefix (if any), to facilitate casting the expanded QName into a string.

+
+ + +

+An element or attribute that +is in no namespace, +or an expanded-QName whose +namespace part is an empty sequence, +An +expanded-QName whose +namespace part is an empty sequence, +or an element or attribute whose name expands to such +an expanded-QName, +is referred to as having a null namespace URI.

+
+ + +

An element or attribute that does not have a null namespace URI, is referred to as having a non-null namespace URI.

+
+ + +

+A space character, +TAB character, CR character or NL character is referred to as a +whitespace character.

+
+
+ +

Where this specification indicates that an +XSLT instruction is evaluated, the behavior is as specified by +. Where it indicates that an XQuery expression is +evaluated, the behavior is as specified by .

+
+ + +Namespaces +

This specification refers to several namespaces that affect the process +of serialization. These are:

+ +

the +Output declaration namespace, +https://www.w3.org/2010/xslt-xquery-serialization;

+

the +XML namespace, +https://www.w3.org/XML/1998/namespace;

+

the +XHTML namespace namespace, +https://www.w3.org/1999/xhtml;

+

the +SVG namespace, +https://www.w3.org/2000/svg; and

+

the +MathML namespace namespace, +https://www.w3.org/1998/Math/MathML.

+
+

Wherever an element node or attribute node is said to be in a +particular namespace, it is understood that the namespace URI of the +node is equal to the namespace URI corresponding to that namespace. +Wherever a namespace node is said to be a namespace node for a +particular namespace, it is understood that the +string value of the node is +equal to the namespace URI corresponding to that namespace.

+
+
+ + +Sequence Normalization +

An instance of the data model that is input to the serialization +process is a sequence. +Prior to serializing a sequence using any of +the output methods whose behavior is specified by this document +(), +with the exception of the JSON +and Adaptive +output methods, +the serializer MUST first +compute a normalized sequence for serialization; it +is the normalized sequence that is actually serialized. +The purpose of sequence normalization is +to create a sequence that can be serialized as a +well-formed XML document or external general parsed entity, that +also reflects the content of the input sequence to the extent possible. +The result of the sequence normalization process is a result tree.

+ +

The normalized sequence for serialization is constructed by applying all +of the following rules in order, with the initial sequence being +input to the first step, and the sequence that results from any +step being used as input to the subsequent step. +For any implementation-defined +output method, it is implementation-defined +whether this sequence normalization process takes place. +For the JSON +and Adaptive +output methods, +sequence normalization MUST NOT take place.

+ +For any implementation-defined +output method, it is implementation-defined +whether sequence normalization +process takes place. +

Where the process of converting the input sequence +to a normalized sequence indicates that a value MUST be cast to +xs:string, that operation is +defined in of +. +Where a +step in the sequence normalization process indicates that a node should be +copied, the copy is performed in the same way as an XSLT +xsl:copy-of instruction that has a +validation attribute whose value is +preserve and has a +select attribute whose effective value is the +node, as described in +of , +or equivalently in the same way as an XQuery +content expression as described in Step 1e of + +of , where the construction mode is +preserve. + +The steps in computing the normalized sequence are: +

+ + +

If the sequence that is input to serialization is +empty, create a sequence S1 that consists of a +zero-length string. Otherwise, copy each item in the sequence that is +input to serialization to create the new sequence S1. +Each item in the sequence that is an array is flattened by calling the function +array:flatten() before being copied. +

+

For each item in S1, if the item is atomic, obtain the +lexical representation of the item by casting it to an xs:string +and copy the string representation to the new sequence; otherwise, copy the +item to the new sequence. +The new sequence is S2.

+ +

If the item-separator serialization +parameter is absent, then for each subsequence of adjacent strings in +S2, +copy a single string to the new sequence equal to the values of the +strings in the subsequence concatenated in order, each separated by a +single space. Copy all other items to the new sequence. +Otherwise, copy each item in +S2 to the +new sequence, inserting between each pair of items a string whose +value is equal to the value of the item-separator +parameter. +The new sequence is S3.

+

For each item in S3, if the item is a string, +create a text node in the new sequence whose string value is equal to +the string; otherwise, copy the item to the new sequence. The new +sequence is S4.

+

For each item in S4, if the item is a document node, +copy its children to the new sequence; otherwise, copy the item to the new +sequence. The new sequence is S5.

+

For each subsequence of adjacent text nodes in S5, copy a single text node to the new sequence equal to the values of the text nodes in the subsequence concatenated in order. Any text nodes with values of zero length are dropped. Copy all other items to the new sequence. The new sequence is S6.

+

It is a serialization error if an item in S6 is an +attribute node, +a namespace node +or a +function. +Otherwise, construct a new sequence, +S7, that consists of a single document node and +copy all the items in the sequence, which are all nodes, as children of +that document node.

S7 is the normalized sequence.

+

The result tree rooted at the document node that is +created by the final step of this sequence +normalization process is the +instance of the data model to which the rules of the appropriate +output method are applied. If the sequence +normalization process results +in a serialization error, the +serializer MUST signal the error.

+

If the item-separator +serialization parameter is absent, the sequence normalization process +for a sequence $seq is equivalent +to constructing a document node using the XSLT instruction:

+<xsl:document> + <xsl:copy-of select="$seq" validation="preserve"/> +</xsl:document> +

or the XQuery expression:

+ + + + +declare construction preserve; + +document { $seq } + +

If the item-separator +serialization parameter is present, the sequence normalization process +for a sequence $seq is equivalent to constructing a +document node using the XSLT +instruction:

+ + + + + + + + + + + + + + + + + + +]]> + +

or the XQuery expression:

+ + + + + +declare construction preserve; + +document { + for $item at $pos in $seq + let $node := + if ($item instance of node()) then + $item + else + text { $item } + return + if ($pos eq 1) then + $node + else + ($sep, $node) +} + +

where the value of the sep +variable is a string whose value is equal to the value of the +item-separator serialization parameter.

+

This process results in a serialization error +if + +$seq contains functions, +attribute nodes or namespace +nodes.

+ + +Serialization Parameters +

There are a number of parameters that influence how serialization +is performed. Host languages MAY allow users to specify any or all of these parameters, but +they are not REQUIRED to be able to do so. However, the host language +specification MUST specify how the values of all applicable parameters are to be +determined.

+

It is a serialization error if a parameter value is invalid for the given parameter. It +is the responsibility of the host language to specify how invalid values should be handled at the level of that language.

+

The following serialization parameters are defined:

+ + +++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Serialization parameter namePermitted values for parameter
allow-duplicate-namesOne of the enumerated values +yes or no +yes, no, true, false, 1 or 0. +This parameter indicates +whether a map item serialized as a JSON object using the JSON output method is +allowed to contain duplicate member names. If the value no, false or 0 +is specified, serialization error will be raised.
byte-order-markOne of the enumerated values +yes or no +yes, no, true, false, 1 or 0. + This parameter indicates + whether the serialized sequence of octets is to be preceded by + a Byte Order Mark (See Section 5.1 of + ). The actual octet order used is + implementation-dependent. + If the encoding defines no Byte Order Mark, or if the Byte Order Mark is + prohibited for the specific Unicode encoding or implementation environment, then + this parameter is ignored.
cdata-section-elementsA list of expanded QNames, possibly empty.
doctype-publicA string of +PubidChar characters. +This parameter MAY be absent.
doctype-systemA string of Unicode characters + that does not include both an + apostrophe (#x27) and a quotation mark (#x22) character. + This parameter MAY be absent.
encodingA string of Unicode characters in the range #x21 to #x7E (that is, + printable ASCII characters); the value SHOULD be a charset + registered with the Internet Assigned Numbers Authority + , or begin with the + characters x- or X-.
escape-uri-attributesOne of the enumerated values +yes or no +yes, no, true, false, 1 or 0. +
html-versionA decimal value. This parameter MAY be absent.
include-content-typeOne of the enumerated values +yes or no +yes, no, true, false, 1 or 0. +
indentOne of the enumerated values +yes or no +yes, no, true, false, 1 or 0. +
item-separatorA string of Unicode characters. This +parameter MAY be absent.
json-node-output-methodAn expanded QName +with a +non-null +namespace URI, +or with a null +namespace URI and a local name equal +to one of xml, xhtml, +html or +text. +If the namespace URI is non-null, +the parameter +specifies an implementation-defined +output method.
media-typeA string of Unicode characters specifying the media type (MIME + content type) ; + the charset parameter of + the media type MUST NOT be specified explicitly in the value of + the media-type parameter. + If the destination of the serialized output + is annotated with a media type, this parameter MAY be used to + provide such an annotation. For example, it MAY be used to set + the media type in an HTTP header.
methodAn expanded QName +with a +null +namespace URI, and the local part of +the name equal to one of xml, xhtml, +html, text, +or json +or having a non-null +namespace URI. +a +non-null +namespace URI, +or with a null +namespace URI and a local name equal +to one of xml, xhtml, +html, +text, or +json, or adaptive. +If the namespace URI is non-null, +the parameter +specifies an implementation-defined +output method.
normalization-formOne of the enumerated values NFC, NFD, + NFKC, NFKD, fully-normalized + or none, or an + implementation-defined value + of type + NMTOKEN.
omit-xml-declarationOne of the enumerated values +yes or no +yes, no, true, false, 1 or 0. +
standaloneOne of the enumerated values +yes, no, true, false, 1 or 0 +or omit.
suppress-indentationA list of expanded QNames, possibly empty.
undeclare-prefixesOne of the enumerated values +yes or no +yes, no, true, false, 1 or 0. +
use-character-mapsA list of pairs, possibly empty, with each pair consisting of + a single Unicode character and a string of Unicode characters.
versionA string of Unicode characters.
+The octet order of the serialized result sequence of +octets is implementation-dependent. +

The value of the method parameter is an +expanded QName. +If the value has a null +namespace URI, then the local name identifies +a method specified in this document and MUST +be one of xml, html, +xhtml, text, +or +json , or adaptive; +in this case, the output method specified MUST +be used for serializing. If the namespace URI is non-null, then +it identifies an implementation-defined output +method; the behavior in this case is not specified +by this document.

+If the namespace URI is non-null for the method serialization +parameter, then the parameter specifies an implementation-defined +output method. +

In those cases where they have no important +effect on the content of the serialized result, details of the +output methods defined by this specification are left unspecified +and are regarded as implementation-dependent. +Whether a serializer uses +apostrophes or quotation marks to delimit attribute values in the +XML output method is an example of such a detail.

+In those cases where they have no important +effect on the content of the serialized result, details of the +output methods defined by this specification are left unspecified +and are regarded as implementation-dependent. +

The detailed semantics of each parameter will be described +separately for each output method for which it is applicable. If the +semantics of a parameter are not described for an output method, then +it is not applicable to that output method.

+ +

Implementations MAY define additional serialization +parameters, and MAY +allow users to do so. For this purpose, the name of a serialization +parameter is considered to be a QName; the parameters listed above are +QNames in no namespace, +QNames whose +expanded-QName +has a null namespace URI, +while any additional serialization parameters +that are either +implementation-defined or +defined by the host language +MUST have names that are namespace-qualified. +Any such +additional serialization parameters MUST NOT be in the namespace +https://www.w3.org/2010/xslt-xquery-serialization. +A host language MAY +specify the means by which an implementation can define such an additional +serialization parameter, and implementations MAY provide +mechanisms by which users can define such an additional serialization +parameter. +If the serialization method is one +of the four +five +six +methods xml, html, +xhtml, text, +or json, +or adaptive + then the additional serialization parameters MAY +affect the output of the serializer to the extent (but only to the extent) +that this specification leaves the output implementation-defined or +implementation-dependent. For example, such parameters might control whether +namespace declarations on an element are written before or after the +attributes of the element, or they might define the number of space or tab +characters to be inserted when the indent parameter is set to +yes, true or 1; but they +could not instruct the serializer +to suppress the error that occurs when the +HTML output method encounters +characters +that are not permitted +(see error ).

+The effect of additional serialization parameters on the output of the serializer, +where the name of such a parameter MUST be namespace-qualified, is +implementation-defined +or implementation-dependent. +The extent of this effect on the output MUST NOT +override the provisions of this specification. + + +Setting Serialization Parameters by Means of a Data Model Instance +

A host language MAY provide, by reference to this +section, a mechanism by +which the settings of serialization parameters are supplied in the form of +an instance of the data model as specified in +. The instance of the data model used +to determine the settings of +serialization parameters MUST be processed as if by the +procedure described below.

+

With the exception of the use-character-maps parameter, +the setting of each serialization parameter +defined in this specification +is equal to the result of +evaluating the XQuery expression

+ +(validate lax { document { . } }) + /output:serialization-parameters + /output:*[local-name() eq $param-name]/data(@value) + +document { . } + /output:serialization-parameters + /(validate lax { + output:*[local-name() eq $param-name] + }) + /data(@value) + +

+or equivalently the XSLT instructions +

+ + + + + + + + +]]> + + + + + + + + +]]> +

with the supplied instance of the data model as the context item, the +param-name variable +having as its value +bound to +a value of type +xs:string equal to +the local part of the name of the particular serialization parameter, and +the other components of the dynamic context and static context as +specified in the subsequent tables. If in any case evaluating this +expression would yield an error, +serialization error + +results.

+

If the result of evaluating this expression for a particular serialization +parameter is the empty sequence, then

+ +

If the parameter is either cdata-section-elements or +suppress-indentation and the result of evaluating +the XQuery expression

+ +(validate lax { document { . } }) + /output:serialization-parameters + /output:*[local-name() eq $param-name] + + +document { . } +/output:serialization-parameters +/(validate lax { + output:*[local-name() eq $param-name] +}) + +

+or equivalently the XSLT instructions +

+ + + + + + + + +]]> + + + + + + + + +]]> +

with the same settings of the static context and dynamic context is not an +empty sequence, the setting of the parameter is the empty list;

+

otherwise, the setting of the parameter is +absent.

+
+

The components of the static context used in evaluating the XQuery +expressions or XSLT instructions +are as defined in the following table.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Static Context ComponentXQuery or XSLTSetting
XPath 1.0 compatibility modeBothfalse
Statically known namespacesXQueryThe pair +(output,http://www.w3.org/2010/xslt-xquery-serialization) +
XSLTThe pairs +(output,http://www.w3.org/2010/xslt-xquery-serialization), +(xslt,http://www.w3.org/1999/XSL/Transform) +
Default element/type namespaceBoth"none"
Default function namespaceBothhttp://www.w3.org/2005/xpath-functions
In-scope schema types, In-scope element declarations, +Substitution groups, In-scope attribute declarationsBothAs defined by the schema for serialization parameters +() +and any additional implementation-defined +in-scope schema components +
In-scope variablesBoth{param-name}
Context item static typeBothnode()
Statically-known +function signaturesBoth{fn:data($arg as item()*) as xs:anyAtomicType*}, +{fn:local-name($arg as node()?) as xs:string}
Statically known collationsBoth { (http://www.w3.org/2005/xpath-functions/collation/codepoint, +The Unicode codepoint collation ) } +
Default collationBothThe Unicode codepoint collation
Construction modeXQuerystrip
Ordering modeXQueryordered
Default order for empty sequencesXQueryleast
Boundary space policyXQuerystrip
Copy-namespaces modeXQuery(preserve,inherit)
Base URIBothAbsent
Statically known documentsBothNone
Statically known collectionsBothNone
Statically known default collection typeBothnode()*
Statically known decimal formatsBothNone
Set of named keysXSLT{}
Values of system +propertiesXSLTNone
Set of available +instructionsXSLTThe set of all instructions defined +by
+ +Implementation-defined schema components +MAY be included in the set of schema components +that are used in evaluating an XQuery expression or XSLT instruction +in the process of using an XDM instance to determine the settings +serialization parameters. +

The remaining components of the dynamic context used in evaluating the +XQuery expressions +or XSLT instructions +in the preceding table are as defined in the following +table.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Dynamic Context ComponentXQuery or XSLTSetting
Context positionBoth1
Context sizeBoth1
Variable valuesBothThe param-name variable has +a value of type xs:string equal the local part of the name +of the serialization parameter under consideration
Function implementationsBothThe implementation of fn:data
Current dateTimeBothAbsent
Implicit timezoneBothAbsent
Available documentsBothNone
Available collectionsBothNone
Default collectionBothNone
Current template ruleXSLTAbsent
Current modeXSLTThe default mode
Current groupXSLTAbsent
Current grouping keyXSLTAbsent
Current captured substringsXSLTThe empty sequence
Output stateXSLTTemporary output state
+

In the case of the use-character-maps parameter, the +XQuery +expression

+ +document { . } +/output:serialization-parameters +/ ( validate lax { output:use-character-maps } ) +/output:character-map[@character eq $char] +/string(@map-string) + + +(validate lax { document { . } }) + /output:serialization-parameters/output:use-character-maps + /output:character-map[@character eq $char]/string(@map-string) + +

+or equivalently the XSLT instructions +

+ + + + + + + + +]]> + + + + + + + + +]]> +

is evaluated for each Unicode character that is permitted in an XML +document. The dynamic context and static context used to evaluate the +expression are as defined above, except that in-scope variables is the set +{char} and the value of the variable "char" +is a value of type xs:string +of length one whose value is the Unicode character under consideration. If +the result of evaluating the expression is not an empty sequence, the pair +consisting of the Unicode character and the result of evaluating the +expression is part of the list of pairs in the value of the +use-character-maps parameter. It is a +serialization error + +if the result of evaluating this expression for any character is a +sequence of length greater than one.

+

Using the same settings of the components of the dynamic context and +static context, +serialization error + +results if the result +of evaluating the following +XQuery expression is not true +

+ +(document { . })/output:serialization-parameters + /(count(distinct-values(*/node-name(.))) eq (count(*))) + +

+or equivalently if the result of evaluating the following XSLT +instructions is not true. +

+ + + + + + + + +]]> +

The result of evaluating either +will be false if the data model instance +supplies a value +for any particular serialization parameter +more than once, or will be the +empty sequence if the data model instance does not have as its root node +an element node or a document node with an element node child, where the +local part of the name of the element node is +serialization-parameters and the namespace URI is +http://www.w3.org/2010/xslt-xquery-serialization.

+

A serializer or implementation of a host language does not need to +be accompanied by an XQuery processor nor by a general-purpose schema +validator in order to meet the requirements of this section. It merely +needs to be capable of extracting values from an XDM instance that +conforms to the schema for serialization parameters, while checking that +the constraints implied by the schema and additional constraints implied +by the XQuery validate expression or explicitly stated in this section are +satisfied.

+

The host language MAY provide additional mechanisms for overriding the +values of any serialization parameters specified through the mechanism +defined in this section, as well as additional mechanisms for specifying +the values of any serialization parameters whose values +are absent +after applying the mechanism defined in this section.

+

If the instance of the data model contains elements or attributes +that are in a namespace other than +http://www.w3.org/2010/xslt-xquery-serialization, the +implementation MAY interpret them to specify the +values of implementation-defined +serialization parameters in an +implementation-defined manner.

+If an instance of the +data model used to determine the settings of serialization parameters +contains elements or attributes that are in a namespace other than +http://www.w3.org/2010/xslt-xquery-serialization, the +implementation MAY interpret them to specify the +values of implementation-defined +serialization parameters in an +implementation-defined manner. + +

The following XML document, if converted to a data model instance +and processed using the mechanism described in this section, +would specify the settings of the method, version +and indent serialization parameters with the values +xml, 1.0 and yes, respectively. +

+ + + + + +]]> +

The following document would specify the setting of the +cdata-section-elements serialization parameter with value the +pair of expanded QNames +(http://example.org/book/chapter,heading) and +(http://example.org/book,footnote)

+ + + +]]> +

The following document would specify the value of the method +serialization parameter with the value html.

+

Notice that in this example, the default namespace declaration +in scope has no effect on the interpretation of the setting of the +method parameter.

+ + + +]]> +

The following document would specify the value of the method +serialization parameter with value equal to the expanded QName +(http://example.org/ext, jsp), and the +use-character-maps parameter with value equal to the list of +pairs, («, <%), (», %>).

+ + + + + + + +]]> +
+
+
+ + +Phases of Serialization + +

+ +For the XML, HTML, XHTML and Text output methods, +sSerialization +comprises five phases of processing +(preceded optionally by the sequence normalization process +described in ). + +For the JSON +output method, sequence +normalization is not performed, and +the term 'normalized sequence' in +this section refers to the unnormalized XDM instance. +For the JSON and Adaptive +output methods, +sequence normalization is not performed. + +serialization is described in and respectively. + + +

+

For an implementation-defined +output method, any of these phases MAY be skipped +or MAY be performed in a different order than is +specified here. For the output methods defined in this specification, +these phases are carried out sequentially as follows:

+ + + +

A meta element is added to the +normalized sequence +along with discarding an existing meta element, as +controlled by the include-content-type parameter +for the XHTML and HTML output methods. +This step is skipped for the +JSON output method. +This step is skipped for the +other output methods defined by this +specification. +

+ +

Markup generation produces the character representation of +those parts of the serialized result that describe the structure of the +normalized +sequence. In the cases of the XML, HTML and XHTML +output methods, this phase produces the character representations +of the following:

the document type declaration; +

+

start tags and end tags (except for +attribute values, whose representation is produced by the character +expansion phase); +

+

processing instructions; and

+

comments.

+

In the cases of the XML and XHTML output methods, +this phase also produces the following:

+ +

the XML or text declaration; and

+

empty element tags (except for the attribute +values);

+
+

+In the case of the text output method, +this phase replaces the single +document node produced by sequence +normalization with a new document node that has exactly one child, +which is a text node. The string value of the new text node is the string value +of the document node that was produced by sequence normalization. +

+ +
+ + +

Character expansion +is concerned with the representation of characters appearing in text +and attribute nodes + +and strings + +in the normalized sequence. +This step is skipped +for the JSON output method. +For each text and attribute node, + +and each string, + +the following rules are applied in sequence. +

+ +

Strings will not appear in any sequence produced by +sequence normalization; they may appear in sequences to +be serialized by the JSON output method or by output +methods not defined in this specification.

+
+ +

+If the node is an attribute that is +a URI attribute value +and the escape-uri-attributes parameter is set to +require escaping of URI attributes, +apply URI escaping as defined below, +and skip rules b-e. Otherwise, continue with rule b.

+

URI escaping consists of the following three steps applied in sequence to the content of +URI attribute values: +

normalize to NFC using the method defined in

+

percent-encode any special characters in the URI using the method defined in

+

escape according to + +the rules of the XML or HTML output +method, whichever is applicable, any characters that require + +escaping, and any characters that cannot be represented in the +selected encoding. +For example, replace < with &lt; +(See also section ). +

+ The values of attributes listed in + are URI attribute values. +Attributes are not considered to be URI attributes simply because they are namespace declaration attributes or have the type annotation xs:anyURI.

+

+If the node is a text node whose parent element is selected by the rules of the +cdata-section-elements parameter for the applicable output method, +create CDATA sections as described below, and skip rules c-e. Otherwise, continue with rule c. +

+

Apply the following two processes in sequence to create CDATA sections

+

Unicode Normalization if requested by the normalization-form parameter.

+

The application ofapply changes as detailed in the description of the cdata-section-elements parameter for the applicable output method.

+
+

Apply character mapping as determined by the +use-character-maps parameter for the applicable output method. +For characters that were substituted by this process, skip rules d and e. +For the remaining characters that were not modified by character mapping, continue with rule d. +

+

Apply Unicode Normalization if requested by the normalization-form parameter.

+

+Unicode Normalization +is the process of removing alternative representations of equivalent sequences from textual data, +to convert the data into a form that can be binary-compared for equivalence, as specified in +. +For specific recommendations for character normalization on the World Wide Web, +see .

+

+The meanings associated with the possible values of the normalization-form parameter +are defined in section .

+

Continue with step e. +

+ +

Escape according to +the rules of the XML or HTML output +method, whichever is applicable, + +any characters (such as < and &) where XML or HTML requires +escaping, +and any characters that cannot be represented in the selected encoding. +For example, replace < with &lt;. +(See also section ). +For characters such as > where XML defines a built-in entity but does not +require its use in all circumstances, it is implementation-dependent whether the character +is escaped. +

+
+
+ +

Indentation, as controlled by +the indent parameter and the +suppress-indentation parameter, MAY +add or remove +whitespace according to the rules defined by the applicable output method.

+
+ +

Encoding, as controlled by the +encoding parameter, converts the character stream +produced by the previous phases into an octet stream.

+

Serialization is only defined in terms of encoding the result as a stream of octets. +However, a serializer MAY +provide an option that allows the encoding phase to be skipped, so +that the result of serialization is a stream of Unicode characters. +The effect of any such option is implementation-defined, +and a serializer is not required to support such an option. +

The effect of providing an option that allows the encoding phase to be skipped, +so that the result of serialization is a stream of Unicode characters, +is implementation-defined. +The serializer is not required to support such an option. +
+
+
+ +XML Output Method

The XML output method serializes the normalized sequence as an XML entity that +MUST satisfy the rules for either a well-formed XML document entity, a well-formed XML +external general parsed entity, or both. +A serialization error results if the +serializer +is unable to satisfy those rules, +except for content modified by the character expansion phase of serialization, +as described in . +The effects of the character expansion phase could result in the serialized output +being not well-formed, but will not result in a serialization error. +If a serialization error results, the +serializer MUST signal the error.

+

If the document node of the +normalized sequence has a single element +node child and no text node children, +then the serialized output +is a well-formed XML document entity, and the serialized output +MUST conform to the +appropriate version of the +XML Namespaces Recommendation +or . +If the normalized sequence does not take this form, then +the serialized output is a well-formed XML external general parsed entity, +which, when referenced within a trivial XML document wrapper +like this:

+<?xml version="version"?> +<!DOCTYPE doc [ +<!ENTITY e SYSTEM "entity-URI"> +]> +<doc>&e;</doc>

where entity-URI is a URI for the entity, +and the value of the version +pseudo-attribute is the value of the version +parameter, produces a +document which MUST itself be a +well-formed XML document conforming +to the +corresponding version of the +XML Namespaces Recommendation +or .

+

A reconstructed tree may be +constructed by parsing the XML document and converting it into an +instance of the data model as specified in +. +The result of serialization MUST be such that the reconstructed tree +is the same as the result tree except for the following permitted differences:

If the document was produced by adding a document wrapper, as +described above, then it will contain an extra doc +element as the document element.

+

The order of attribute and namespace nodes in the two trees MAY be +different.

+

+The following properties of corresponding nodes +in the two trees MAY be different:

+

the base-uri property of document nodes and element nodes;

+

the document-uri and unparsed-entities properties of document +nodes;

+

the type-name and typed-value properties of element and attribute +nodes;

+

the nilled property of element nodes;

+

the content property of text nodes, due to the effect of the +indent and use-character-maps +parameters.

+
+

The reconstructed tree MAY contain additional attributes and text nodes resulting from the +expansion of default and fixed values in its DTD or schema; +also, in the presence of a DTD, non-CDATA attributes may lose whitespace +characters as a result of attribute value normalization.

+

The type annotations of the nodes in the two trees MAY be different. +Type annotations in a result tree are discarded when the tree is serialized. +Any new type annotations obtained by parsing the document will depend on whether the serialized XML document is assessed against a schema, +and this MAY result in type annotations that are different from +those in the original result tree. +

In order to influence the type annotations in the +instance of the data model that would result from processing a serialized XML document, +the author of the XSLT stylesheet, XQuery expression or other process +might wish to create the instance of the data model that is input to the +serialization process so that it makes use of mechanisms provided by +, such as xsi:type and +xsi:schemaLocation attributes. The serialization process +will not automatically create such attributes in the serialized +document if those attributes were not part of the result tree that is +to be serialized.

+

Similarly, it is possible that an element node in +the instance of the data model that is to be serialized has the nilled +property with the value true, but no xsi:nil +attribute. The serialization process will not create such an attribute +in the serialized document simply to reflect the value of the property. +The value of the nilled property has no direct effect on +the serialized result. +

+

Additional namespace nodes MAY be present +in the reconstructed tree if the serialization process +did not undeclare one or more namespaces, +as described in , +and the starting instance of the data model contained an element node +with a namespace node that declared some prefix, but a child element +of that node did not have any namespace node that declared the same prefix.

+

+The result tree MAY contain namespace nodes +that are not present in the reconstructed tree, as the process of creating an instance +of the data model MAY ignore namespace declarations in some circumstances. +See and + of +for additional information. +

+

If the indent parameter has +the value yes +one of the values yes, true or 1, +

additional text nodes consisting of +whitespace characters MAY be present in the reconstructed tree; and

+

text nodes in the result tree that contained only whitespace +characters MAY correspond to text nodes in the reconstructed tree that contain additional +whitespace characters that were not present in the result tree

See for more information on the +indent parameter.

+

Additional nodes MAY be present in the +reconstructed tree due to the effect of character mapping in the character expansion phase, +and the values of attribute nodes and text nodes in the +reconstructed tree MAY be different from those in the result tree, due to +the effects of URI expansion, character mapping +and Unicode Normalization in +the character expansion phase of serialization. +

The use-character-maps parameter can +cause arbitrary characters to be inserted into the serialized XML document +in an unescaped form, including characters that would be considered to be +part of XML markup. Such characters could result in arbitrary new element +nodes, attribute nodes, and so on, in the reconstructed tree that results from +processing the serialized XML document.

A consequence of this rule is that certain characters +MUST be output as character +references, to ensure that they survive +the round trip through serialization and parsing. +Specifically, CR, NEL and LINE +SEPARATOR characters in text nodes MUST be output respectively as +"&#xD;", "&#x85;", and +"&#x2028;", or their equivalents; while CR, NL, TAB, NEL and +LINE SEPARATOR characters in attribute nodes MUST be output respectively +as "&#xD;", "&#xA;", "&#x9;", +"&#x85;", and "&#x2028;", or their equivalents. +In addition, the non-whitespace control characters +#x1 through #x1F and #x7F through #x9F in text nodes and attribute nodes MUST be +output as character references. +

+

For example, an attribute with the value "x" followed by "y" +separated by a newline will result in the output +"x&#xA;y" (or with any equivalent character +reference). The XML output cannot be "x" followed by a literal newline +followed by a "y" because after parsing, the attribute value would be +"x y" as a consequence of the XML attribute normalization +rules.

XML 1.0 did not permit +an XML processor to normalize NEL or LINE SEPARATOR characters to a LINE FEED character. However, if +a document entity that specifies version 1.1 invokes an external general +parsed entity with no text declaration or a text declaration that specifies +version 1.0, the external parsed entity is processed according to the rules +of XML 1.1. For this reason, NEL and LINE SEPARATOR characters in text and +attribute nodes MUST always be escaped using character references, +regardless of the value of the version parameter. +

+

+XML 1.0 permitted control characters in the range #x7F through #x9F +to appear as literal characters in an XML document, but XML 1.1 +requires such characters, other than NEL, +to be escaped as character references. An +external general parsed entity with no text declaration or a text +declaration that specifies a version pseudo-attribute with value +1.0 that is invoked by an XML 1.1 document entity +MUST +follow the rules of XML 1.1. Therefore, the non-whitespace control +characters in the ranges #x1 through #x1F and #x7F through #x9F +MUST +always be escaped, regardless of the value of the version parameter. +

+

It is a serialization error to specify the doctype-system parameter, or to specify the standalone parameter with a value other than omit, if the +instance of the data model contains text nodes or multiple element nodes as children +of the root node. The +serializer +MUST either signal the error, or recover +by ignoring the request to output a document type declaration or +standalone parameter.

+The Influence of Serialization Parameters upon the XML Output Method +XML Output Method: the version Parameter

The version parameter specifies the version of XML +and the version of Namespaces in XML to +be used for outputting the instance of the data model. +The version output in the XML declaration (if an XML declaration is not omitted) +MUST correspond to the version of XML that +the serializer +used for outputting the instance of the data model. The value of the +version parameter +MUST match the +VersionNum +production of the XML Recommendation or . +A serialization error results if the value of the version parameter specifies +a version of XML that is not supported by the serializer; +the serializer MUST +signal the error.

+ +

This document provides the normative +definition of serialization for the XML output method if the +version parameter has either the value 1.0 or +1.1. For +any other value of version parameter, the behavior is +implementation-defined. +In that case the implementation-defined +behavior MAY supersede all other requirements of +this recommendation.

+If an implementation +supports a value of the version parameter for the XML or XHTML +output method for which this document does not provide a normative definition, +the behavior is +implementation-defined. + +

If the serialized result would contain an +NCName that contains a character that is not +permitted by the version of Namespaces in XML specified by the +version parameter, a serialization error results. +The serializer MUST signal the error.

+

If the serialized result would contain a character +that is not permitted by the version of XML specified by the +version parameter, a serialization error results. The +serializer MUST signal the error.

For example, if the version +parameter has the value 1.0, and the instance of the data +model contains a non-whitespace control character in the range #x1 to +#x1F, a serialization error results. +If the version parameter has the value 1.1 +and a comment node in the instance of the data model contains a +non-whitespace control character in the range #x1 to #x1F or a +control character other than NEL in the range #x7F to #x9F, a +serialization error results.

+ +XML Output Method: the html-version Parameter +

The html-version parameter is not applicable to the +XML output method. It is the responsibility of the +host language to specify +whether an error occurs if this parameter is specified in combination +with the XML output method, or if the parameter is simply dropped.

+
+XML Output Method: the encoding Parameter +

The encoding parameter specifies the +encoding to be used for outputting the instance of the data model. +Serializers +are REQUIRED to support values of UTF-8 and +UTF-16. A serialization error occurs if an output +encoding other than UTF-8 or UTF-16 is +requested and the serializer +does not support that encoding. The serializer +MUST signal the error, or recover by using +UTF-8 or UTF-16 instead. +The serializer +MUST NOT use an encoding whose name does not match the +EncName +production of the XML Recommendation . +

+

When outputting a newline character in the instance of the data model, the +serializer is free to represent it using any character sequence +that will be normalized to a newline character by an XML parser, +unless a specific mapping for the newline character is +provided in a character map (see ).

+

When outputting any other character that is defined in the +selected encoding, the character +MUST be output +using the correct representation of that character in the selected encoding.

+

It is possible that the instance of the data model will contain a character that +cannot be represented in the encoding that the serializer +is using for output. In this case, if the character occurs in a context where XML +recognizes character references (that is, in the value of an attribute +node or text node), then the character MUST be output as a character +reference. A serialization error occurs if such a character appears in +a context where character references are not allowed (for example, if +the character occurs in the name of an element). The serializer +MUST signal the error. +

For example, +if a text node contains the character LATIN SMALL LETTER E WITH ACUTE (#xE9), +and the value of the encoding parameter is +US-ASCII, the character MUST be serialized as a character +reference. If a comment node contains the same character, a +serialization error results. +

+ +XML Output Method: the indent and suppress-indentation Parameters + +

The indent and +suppress-indentation parameters control whether the +serializer MAY adjust the whitespace +in the serialized result so that a person will find it easier to read. +If the indent parameter has +the value yes +one of the values yes, true or 1, +the serializer MAY output whitespace characters in +addition to the whitespace characters in the instance of the data +model. It MAY also elide from the output whitespace +characters that occurred in the instance of the data model or replace +such whitespace characters with other whitespace characters.

+ + +

+The term content has the +same meaning as the term +defined in of +. + +The immediate content of an element is the part of the +content of the element that is not +also in the content of a child element +of that element. +

+ + +

If the +indent parameter has the value no, false or 0, the +serializer MUST NOT output any additional, elide +or replace whitespace characters. If the indent parameter has +the value yes +one of the values yes, true or 1, +the serializer MUST +use an algorithm for dealing with whitespace characters that satisfies +all of the following constraints. + + +If more than one constraint applies, +the serializer +MUST apply the most restrictive constraint. That is, if +any applicable constraint indicates that whitespace +MUST NOT be added, elided or replaced, that +constraint prevails; if an applicable constraint indicates that +whitespace SHOULD NOT be added, elided or replaced, +while all other applicable constraints indicate that whitespace +MAY +be added, elided or replaced, whitespace SHOULD NOT +be added, elided or replaced. + + +

+ +

Whitespace characters +MAY +be added adjacent to a text node +only +if the text node contains only whitespace characters. Whitespace characters in +such a text node MAY also be elided or replaced. +For example, a tab MAY be inserted as a +replacement for existing spaces.

+

Whitespace characters +MAY be added, elided or replaced in the +immediate content of an +element whose type annotation is xs:untyped or +xs:anyType and that has element node children, in the +immediate content of an element whose content model +is element only, or outside +the content of any element.

+

Whitespace characters +MUST NOT be added, +elided or replaced +in the immediate content of an element +whose content model is +known to be simple or +empty.

+ +

Whitespace characters +SHOULD NOT be added, +elided or replaced +in places where the characters would +constitute significant whitespace, for example, in the +immediate content of an element +that is annotated with a type +other than xs:untyped or xs:anyType, and +whose content model is known to be mixed.

+
+

Whitespace characters +MUST NOT be added, +elided or replaced +in the content +of an element +whose expanded QName is a member of the list of expanded QNames in the +value of the suppress-indentation parameter.

+ +

Whitespace characters MUST NOT be +added, elided or replaced +in a part of the result document that is controlled by an +xml:space attribute with value preserve +(See for more information about the +xml:space attribute). +

+
+

The effect of these rules is to ensure that whitespace is only +added in places where (a) XSLT's <xsl:strip-space> +declaration could cause it to be removed, and +(b) it does not affect the string value of any element node with +simple content. It is usually not safe to indent document types that include elements +with mixed content.

+

The whitespace added may possibly +be based on whitespace stripped from either the source document or the +stylesheet (in the case of XSLT), or +guided by other means that might depend on the host language, +in the case of an instance of the data model created using some other process.

+XML Output Method: the cdata-section-elements Parameter

The cdata-section-elements parameter contains a list +of expanded QNames. If the expanded QName of the parent of a text node +is a member of the list, then the text node + +MUST be output as a +CDATA section, except in those circumstances +described below.

+

If the text node contains the sequence of characters +]]>, then the currently open CDATA section + +MUST be +closed following the ]] and a new CDATA section opened +before the >.

+

If the text node contains characters that are not +representable in the character encoding being used to output the +instance of the data model, then the currently open CDATA section + +MUST be closed +before such characters, the characters + +MUST be output using +character references or entity references, and a new CDATA +section + +MUST be opened for any further +characters in the text node.

+

CDATA sections +MUST NOT be used except where they +have been explicitly requested by the user, either by using the +cdata-section-elements parameter, or by using some other +implementation-defined mechanism.

This is phrased to permit an implementor to provide an option that +attempts to preserve CDATA sections present in the source +document.

A serializer MAY provide an implementation-defined mechanism to place CDATA sections in the result tree.
+XML Output Method: the omit-xml-declaration and standalone Parameters

The XML output method +MUST output an XML declaration if the omit-xml-declaration parameter has the value no, false or 0. +The XML declaration MUST include both version information and an encoding declaration. +If the standalone parameter has +the value yes or the value no +one of the values yes, true, 1, no, false or 0, +the XML declaration MUST include a standalone document declaration with the same value as the value of the standalone parameter. + +If the standalone parameter has +the value omit, the XML declaration +MUST NOT include a standalone document declaration; this ensures +that it is both an XML declaration (allowed at the beginning of a +document entity) and a text declaration (allowed at the beginning of +an external general parsed entity).

+

A serialization error results if the +omit-xml-declaration parameter has +the value yes +one of the values yes, true or 1, +and

+

the standalone parameter has a value other than +omit; or +

+

the version parameter has a value other than +1.0 and the doctype-system +parameter is specified.

+

The serializer MUST signal the error. +

+

Otherwise, if the +omit-xml-declaration parameter has +the value yes +one of the values yes, true or 1, + the XML output method + +MUST NOT output an XML declaration.

+XML Output Method: the doctype-system and doctype-public Parameters

If the doctype-system parameter is specified, the +XML output method +MUST output a document type +declaration immediately before the first element. The name following +<!DOCTYPE +MUST be the name of the first element, +if any. If the doctype-public parameter is also specified, then the +XML output method MUST output PUBLIC +followed by the public identifier and then the system identifier; +otherwise, it MUST output SYSTEM +followed by the system +identifier. The internal subset +MUST be empty. The +doctype-public parameter +MUST be ignored unless the +doctype-system parameter is specified.

+XML Output Method: the undeclare-prefixes Parameter

The Data Model allows an element +node that binds a non-empty prefix to +have a child element node that does +not bind that same prefix. In Namespaces in XML 1.1 (), this can be represented accurately by undeclaring +prefixes. For the undeclaring prefix of the child element node, +if the undeclare-prefixes parameter has +the value yes +one of the values yes, true or 1, +the output method is XML or XHTML, and the version +parameter value is greater than 1.0, +the serializer +MUST undeclare its namespace. If the +undeclare-prefixes parameter has the value no, false or 0 and the output method is XML or +XHTML, then the undeclaration of prefixes MUST NOT occur.

Consider an element x:foo with four in-scope namespaces +that associate prefixes with URIs as follows: +

x is associated with +http://example.org/x

+

y is associated with +http://example.org/y

+

z is associated with +http://example.org/z

+

xml is associated with +http://www.w3.org/XML/1998/namespace

Suppose that it has a child element x:bar with three in-scope namespaces:

x is associated with +http://example.org/x

+

y is associated with +http://example.org/y

+

xml is associated with +http://www.w3.org/XML/1998/namespace

If namespace undeclaration is in effect, it will be serialized this way:

<x:foo xmlns:x="http://example.org/x" + xmlns:y="http://example.org/y" + xmlns:z="http://example.org/z"> + + <x:bar xmlns:z="">...</x:bar> + +</x:foo>

In Namespaces in XML 1.0 (), prefix undeclaration is not possible. +If the output method is XML or XHTML, the value of the undeclare-prefixes parameter is +one of, +yes, +true or 1, +and the value of the version parameter is 1.0, +a serialization error results; the +serializer MUST signal the error.

+XML Output Method: the normalization-form Parameter

The normalization-form parameter is applicable to the XML output method. +The values NFC and none MUST be supported by the serializer. +A serialization error results if the value of the +normalization-form parameter specifies a normalization form +that is not supported by the +serializer; the +serializer MUST signal the error.

+

The meanings associated with the possible values of +the normalization-form parameter are as follows:

+

NFC specifies the serialized result will be + in Normalization Form C, using the rules specified in .

+

NFD specifies the serialized result will be + in Normalization Form D, as specified in .

+

NFKC specifies the serialized result will be + in Normalization Form KC, as specified in .

+

NFKD specifies the serialized result will be + in Normalization Form KD, as specified in .

+

fully-normalized specifies the serialized result + will be in fully normalized text, as specified in .

+

none specifies that no Unicode Normalization will + be applied.

+

An implementation-defined value + has an implementation-defined + effect.

If the value of the normalization-form form parameter is not NFC, NFD, + NFKC, NFKD, fully-normalized, + or none then the meaning of the value and its effect is + implementation-defined.
+

If the value of the parameter is fully-normalized, then no +relevant construct of the parsed entity created by the serializer +may start with a composing character. The term relevant construct +has the meaning defined in section 2.13 of . If this condition is not +satisfied, a serialization error MUST be signaled.

Specifying fully-normalized as the value of this parameter +does not guarantee that the XML document output by the serializer will in fact +be fully normalized as defined in . This is because the serializer does +not check that the text is include normalized, which would involve +checking all external entities that it refers to (such as an external DTD). +Furthermore, the serializer does not check whether any character escape +generated using character maps represents a composing character.

+XML Output Method: the media-type Parameter

The media-type parameter is applicable to the +XML output method. +See for more +information.

+XML Output Method: the use-character-maps Parameter

The use-character-maps parameter is applicable to the XML output method. +The result of serialization using the XML output method is not +guaranteed to be well-formed XML if character maps have been specified. +See for more information.

+XML Output Method: the byte-order-mark Parameter

The byte-order-mark parameter is +applicable to the XML output method. See + for more information.

The byte order mark may be undesirable under certain circumstances; +for example, to concatenate resulting XML fragments without additional processing to remove the byte order mark. +Therefore this specification does not mandate the byte-order-mark parameter to have +the value yes +one of the values yes, true or 1 +when the encoding is UTF-16, +even though the XML 1.0 and XML 1.1 specifications state that entities encoded in UTF-16 MUST begin with a byte order mark. +Consequently, this specification does not guarantee that the resulting XML fragment, +without a byte order mark, will not cause an error when processed by a conforming XML processor.

+XML Output Method: the escape-uri-attributes Parameter

The escape-uri-attributes parameter is +not applicable to the XML output method. It +is the responsibility of the host language to specify whether an error occurs if this parameter is specified in combination with the XML output method, or if the parameter is simply dropped.

+XML Output Method: the include-content-type Parameter

The include-content-type parameter is +not applicable to the XML output method. It +is the responsibility of the host language to specify whether an error occurs if this parameter is specified in combination with the XML output method, or if the parameter is simply dropped.

+ +XML Output Method: the item-separator Parameter +

The effect of the item-separator serialization parameter +is described in .

+
+ +XML Output Method: the allow-duplicate-names Parameter +

The allow-duplicate-names serialization parameter +is not applicable to the XML output method.

+
+ +XML Output Method: the json-node-output-method Parameter +

The json-node-output-method serialization parameter +is not applicable to the XML output method.

+
+
+
+ +XHTML Output Method + + + +

The XHTML output method serializes the instance of the +data model as +XML, using the HTML compatibility guidelines defined in the XHTML +specification +( +or the XHTML syntax of HTML5 +(see ).

+

+An element node is recognized as an +HTML element by the XHTML output method if +

+ +

the element node is in the +XHTML namespace, +regardless of the value of the +html-version +serialization parameter +or if the html-version +serialization parameter is absent; or

+

the value of the +html-version +serialization parameter is +5.0, the element has a +null namespace URI, and +the local part of the name is equal +to the name of an element defined by HTML5 , +making the comparison +without regard to case.

+

+

It is entirely the responsibility of the +person or process that creates the instance of +the data model +to ensure that the instance of the data model +conforms to the or + specification +if the html-version +serialization parameter is absent or has a value less than +5.0 +or the XHTML syntax of +HTML5 if the value of the +html-version serialization parameter is 5.0. +It is not an error if the +instance of the data model is invalid XHTML. Equally, it is entirely under the +control of the person or process that creates the instance +of the data model whether the output conforms to XHTML 1.0 +Strict, XHTML 1.0 Transitional, +the XHTML syntax of HTML5 (see +), + +or any other specific definition of XHTML.

+

The serialization of the instance of the data model follows the same rules as for +the XML output method, with the general exceptions noted below and parameter-specific exceptions in . +These differences are based on the HTML compatibility guidelines +published in Appendix C of +and on , +both of which are designed +to ensure that as far as possible, XHTML is rendered correctly on user +agents designed originally to handle HTML.

+

If the value of the html-version +serialization parameter is 5.0, the instance of the data model that +is to be serialized is first subjected to +prefix normalization.

+

+During +prefix normalization, any element node in the instance of the data +model that is to be serialized that is in one of the +XHTML namespace, the +SVG namespace or the +MathML namespace has its name +replaced by the local part of its name. Such an element node is given a +default namespace node whose value is the element's namespace URI. Any +namespace node for any of those three namespaces that was previously present +on any element node in the instance of the data model is also removed, +unless the prefix that that namespace node declared is used as the prefix on +the name of an attribute on that element or an ancestor of that +element.

+

+The process of prefix normalization +is equivalent to replacing the instance of the data model that is to be +serialized with the result of the transformation described by this XSLT +stylesheet, with the instance of the data model as the initial context item. +

+ + + + + + + + + + + + + + + +]]> + + + + +

The following XHTML elements have an EMPTY content model: area, base, br, col, embed, hr, img, input, link, meta, basefont, frame, isindex, and param. +The +void elements of HTML5 are +area, base, +br, col, command, embed, +hr, img, input, +keygen, link, meta, +param, source, track and +wbr. + +An element node is expected to be empty if +it is recognized as an HTML element +and if either

+ +

the html-version serialization parameter is +absent or has a value less than 5.0 and the content model is +EMPTY, or

+

the html-version serialization parameter has the value +5.0 and the element is a void +element.

+

+ +

+If an element node that has no +child nodes is + + +not +expected to be empty, + +

+ +

+the +html-version +serialization parameter is +absent or has a value +less than 5.0, and the +content model of the HTML element +is not EMPTY +(for example, an empty title or paragraph); or

+

the value of the +html-version +serialization parameter is 5.0, and the +HTML element is not a void element, +

+
+

+the serializer +MUST NOT use the minimized form. +That is, it +MUST +output <p></p> and not +<p />.

+ +

If an element that has no +children is + +expected to be empty, + +the serializer +MUST use the minimized tag syntax, +for example +<br />, as the alternative syntax +<br></br> allowed by XML gives uncertain +results in many +legacy +user agents. +If the +html-version +serialization parameter is +absent or has a value +less than 5.0, +the serializer +MUST include a +space before the trailing />, e.g. +<br />, <hr /> and +<img src="karen.jpg" alt="Karen" />. +

+
+

+If the +html-version +serialization parameter is +absent or has a value +less than 5.0, +the serializer +MUST NOT use the entity reference +&apos; which, although +defined +in XML and therefore in +XHTML, is not defined in +versions of HTML +prior to HTML5, +and is not recognized by all HTML user +agents.

+

If the +html-version +serialization parameter is +absent or has a value +less than +5.0, +the serializer SHOULD output namespace declarations +in a way that is consistent with the requirements of the XHTML DTD if this is +possible. +If the value of the +html-version +serialization parameter is +5.0, +the serializer SHOULD +output namespace declarations in a way that is consistent with the requirements +of +. + +The XHTML 1.0 DTDs require the declaration +xmlns="http://www.w3.org/1999/xhtml" +to appear on the html element, and only on the html element. +The + specification +permits +namespace declarations +to appear in a conforming document, but +there are restrictions on which elements +restricts the elements on which +they can appear. +The serializer MUST output namespace declarations that are consistent with +the namespace nodes present in the result tree, but it MUST avoid outputting +redundant namespace declarations on elements where the DTD would make them invalid, +for versions prior to HTML5, or where they +are not permitted by +, +for serialization according to the syntax of HTML5.

+

If the html element is generated by an XSLT literal result element of +the form <html xmlns="http://www.w3.org/1999/xhtml"> ... </html>, or by an +XQuery direct element constructor of the same form, then the html element in +the result document will have a node name whose prefix is "", which will +satisfy the requirements of the DTD. In other cases the prefix assigned to +the element is implementation-dependent.

+

+and Appendix C of +describe +a number of compatibility guidelines for users of XHTML who wish to +render their XHTML documents with HTML user agents. In some cases, such +as the guideline on the form empty elements +take, only the +serialization process itself has the ability to follow the guideline. In +such cases, those guidelines are reflected in the requirements on the +serializer +described above.

+

In all other cases, the guidelines can be +adhered to by the instance of the data model that is input to the serialization +process. The guideline on the use of whitespace characters in attribute +values is one such example. Another example is that xml:lang="..." does not serialize to both xml:lang="..." and lang="..." as required by some legacy user agents. It is the responsibility of the person or +process that creates the instance of the data model that is input to the +serialization process to ensure it is created in a way that is consistent +with the guidelines. No serialization error results if the input instance +of the data model does not adhere to the guidelines.

+The Influence of Serialization Parameters upon the XHTML Output Method +XHTML Output Method: the version +Parameter +

The behavior for the version +parameter for the XHTML output method is described in +.

+
+ +XHTML Output Method: the html-version Parameter +

The html-version parameter specifies whether the XHTML +output method will produce a serialized document following rules that +are tailored to the requirements of the XHTML syntax of +or the requirements of and .

+

The differences are described in detail throughout +.

+
+XHTML Output Method: the encoding Parameter

The behavior for encoding parameter for the XHTML output method is described in .

+ +XHTML Output Method: the indent and suppress-indentation Parameters + +

If the indent parameter has +the value yes +one of the values yes, true or 1, +the +serializer MAY add or remove whitespace as it serializes the +result tree, +if it observes the following +constraints.

+ +

Whitespace MUST NOT be added other than before or +after an element, or adjacent to an existing whitespace character.

+

Whitespace MUST NOT be added or removed adjacent to +an inline element. The inline elements are those elements +recognized +as HTML elements that are +in the %inline category of any of the XHTML 1.0 DTDs, in the +%inline.class category of the XHTML 1.1 DTD, +those elements defined to be phrasing +elements in HTML5 + and elements +recognized +as HTML elements +with local names ins and del if they are used as +inline elements (i.e., if they do not contain element children).

+

Whitespace MUST NOT be added or removed inside a +formatted element, the formatted elements being those +recognized +as HTML elements +with local names pre, script, style, +title, and + textarea. +

+

Whitespace characters +MUST NOT be added in the content of an element +whose expanded QName matches a +member of the list of expanded QNames in the +value of the suppress-indentation parameter. +The expanded QName of an element node +is considered to match a member of the list of expanded QNames +if:

+ +

the two expanded QNames are equal;

+

the expanded QNames both have null +namespace URIs, and the local parts of the two QNames are +equal without regard to case; or +

+

the value of the +html-version +serialization parameter is +5.0, the local parts of the two QNames are equal +without regard to case +and one QName has a null namespace +URI and the namespace URI of the other is equal to the +XHTML namespace URI.

+
+
+
+ +

The effect of the above constraints is to +ensure any insertion or deletion of whitespace would not affect how +an +HTML user agent +that conforms to the specified version +of HTML +would render the output, assuming the serialized document does +not refer to any HTML style sheets.

+

The HTML definition of whitespace is different from the XML + definition: see section 9.1 of 4.01 specification.

+XHTML Output Method: the cdata-section-elements Parameter

The behavior for cdata-section-elements parameter for the XHTML output method is described in .

+XHTML Output Method: the omit-xml-declaration and standalone Parameters

The behavior for omit-xml-declaration and standalone parameters for the XHTML output method is described in .

As with the XML output method, the XHTML +output method specifies that an XML declaration will be output unless it is suppressed using +the omit-xml-declaration parameter. Appendix C.1 of + +provides advice on the consequences of including, +or omitting, the XML declaration.

+XHTML Output Method: the doctype-system and doctype-public Parameters +

If the value of the +html-version +serialization parameter is 5.0, the + + + + +doctype-system serialization parameter is + +absent, +the first element node child of +the document node that is to be serialized +is +recognized as an HTML +element, the local part of the QName of which is equal to +the string HTML, +without regard to case, +and any text node preceding that +element in document order contains only whitespace characters, +then +the XHTML output method MUST output a document type +declaration immediately before the first element, with no public or +system identifier. The name following <!DOCTYPE +MUST +be the same as the local part of the +name of the element.

+

Otherwise, the behavior for doctype-system and doctype-public parameters for the XHTML output method is described in .

+
+XHTML Output Method: the undeclare-prefixes Parameter

The behavior for undeclare-prefixes parameter for the XHTML output method is described in .

+XHTML Output Method: the normalization-form Parameter

The behavior for normalization-form parameter for the XHTML output method is described in .

+XHTML Output Method: the media-type Parameter

The behavior for media-type parameter for the XHTML output method is described in .

+XHTML Output Method: the use-character-maps Parameter

The behavior for use-character-maps parameter for the XHTML output method is described in .

+XHTML Output Method: the byte-order-mark Parameter

The behavior for byte-order-mark parameter for the XHTML output method is described in .

+XHTML Output Method: the escape-uri-attributes Parameter

+ +If the escape-uri-attributes parameter has +the value yes +one of the values yes, true or 1, + the XHTML output method +MUST apply URI escaping to +URI attribute values, except that relative URIs MUST NOT be absolutized.

This escaping is deliberately confined to non-ASCII characters, +because escaping of ASCII characters is not always appropriate, for +example when URIs or URI fragments are interpreted locally by the HTML +user agent. Even in the case of non-ASCII characters, escaping can +sometimes cause problems. More precise control of URI escaping is +therefore available by setting escape-uri-attributes to +no, and controlling the escaping of URIs by using methods defined in + and .

+XHTML Output Method: the include-content-type Parameter

If the instance of the data model includes a head element +recognized as +an HTML element, +and the include-content-type parameter has +the value yes +one of the values yes, true or 1, +the XHTML output method +MUST +add a meta element as the first child element of the +head element, specifying the character encoding actually +used. +The meta element SHOULD + be in no namespace if the head element is in no namespace, and in the XHTML namespace if the + head element is in the XHTML namespace. +

+

For example,

<head> +<meta http-equiv="Content-Type" + content="text/html; charset=EUC-JP" /> +...

The content type SHOULD be set to the value given for the +media-type parameter.

It is recommended that the host language use as default +value for this parameter one of the MIME types () registered for +XHTML. Currently, these are text/html (registered by ) +and application/xhtml+xml (registered by ). Note that +some user agents fail to recognize the charset parameter if the +content type is not text/html.

+

If a meta element has been added to the head element as described above, +then any existing meta element child of the head element having an +http-equiv attribute with the value "Content-Type", +making the comparison + +without regard to case +after first stripping leading and trailing spaces from the value of +the attribute solely for the purposes of comparison, + +MUST be discarded.

This process removes possible parameters in the attribute value. For example,

+<meta http-equiv="Content-Type" + content="text/html;version='3.0'" /> +

in the data model instance would be replaced by,

+<meta http-equiv="Content-Type" + content="text/html;charset=utf-8" /> +
+ +XHTML Output Method: the item-separator Parameter +

The effect of the item-separator serialization parameter +is described in .

+
+ + +XHTML Output Method: the allow-duplicate-names Parameter +

The allow-duplicate-names serialization parameter +is not applicable to the XHTML output method.

+
+ +XHTML Output Method: the json-node-output-method Parameter +

The json-node-output-method serialization parameter +is not applicable to the XHTML output method.

+
+ +
+
+ +HTML Output Method

The HTML output method serializes the instance of the data model as +HTML.

For example, the following XSL stylesheet generates html output,

+<xsl:stylesheet version="2.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> +<xsl:output method="html" version="4.0"/> +<xsl:template match="/"> + <html> + <xsl:apply-templates/> + </html> +</xsl:template> +... +</xsl:stylesheet>

In the example, the version attribute of the xsl:output element indicates the version of the HTML Recommendation to which the serialized result is to conform.

+

It is entirely the responsibility of the person or process that creates the instance of the data model to ensure that the instance of the data model conforms to the HTML Recommendation . It is not an error if the instance of the data model is invalid HTML. Equally, it is entirely under the control of the person or process that creates the instance of the data model whether the output conforms to HTML. + +If the result tree is valid HTML, the +serializer MUST serialize the result in a way that +conforms with the version of HTML specified by the +requested HTML +version.

+ + +Markup for Elements + +

As is described in detail below, +the HTML output method +will not +output an element +differently from the XML output method unless the +element is to be +serialized as an HTML +element. +The +portion of the serialized document representing the result of serializing +an element, +that is not to be +serialized as an HTML +element, +is known as an +XML Island. +An element node is serialized as an +HTML element if

+ +

the expanded QName of the element has a +null namespace URI, +regardless of the value of the +requested HTML +version, or +

+

the value of the +requested HTML +version +is 5.0 or +greater, and +the element node is in the +XHTML namespace.

+
+

+

If the +element is to be +serialized as an HTML +element, +but the local part of the expanded QName is not recognized as the name +of an HTML element, the element +MUST be output in the same way as a +non-empty, inline element such as span. In particular:

+ +

+Any namespace node in the result tree +for the XML namespace, is ignored +by the HTML output method. +In addition, +if the +requested HTML version +is 5.0, any element node that has a prefix and is in the +XHTML namespace, +MathML namespace, +or SVG namespace +MUST be serialized +with an unprefixed element name. The serializer MUST +serialize an attribute with the name xmlns whose value is +equal to the namespace URI of the element node, unless an ancestor +element in the serialized result already has an attribute named +xmlns with the same value, and no intervening element +has an attribute named xmlns with a different value. +If the element +node has a namespace node for the default namespace whose value is not +equal to the namespace URI of the element node, +the namespace node is ignored. +The serializer +MUST NOT serialize a namespace declaration for the +namespace node declaring the element node's prefix, unless an attribute +of the element node has the same prefix. + + +For namespace nodes in the result tree +that are not ignored, the HTML output method +MUST represent these namespaces using +attributes named xmlns or xmlns:prefix +in the same way as the XML output method would represent them when the +version parameter is set to 1.0.

+

If the result tree contains elements or attributes whose names have a +non-null namespace URI, the HTML output method + +MUST generate +namespace-prefixed QNames for these nodes in the same way as the XML output +method would do when the version parameter is set to 1.0.

+

Where special rules are defined later in this section for +serializing specific HTML elements and attributes, these rules + +MUST NOT be +applied to an element that is not to be +serialized as an HTML +element +or an attribute whose name has +a non-null +namespace URI. However, the generic rules for the HTML output method +that apply to all elements and attributes, for example the rules for +escaping special characters in the text and the rules for indentation, +MUST be used also for namespaced elements and attributes.

+

When serializing an element whose name is not defined in the +HTML specification, but that is +to be +serialized as an HTML +element, the HTML output method + +MUST +apply the same rules (for example, indentation rules) as +when serializing a span element. The descendants of such +an element +MUST be serialized as if they were descendants of a +span element.

+

When serializing an element whose name is in a non-null +namespace, the HTML output method +MUST apply the same rules (for +example, indentation rules) as when serializing a div +element. The descendants of such an element + +MUST be serialized as if +they were descendants of a div element, + except for the influence + of the cdata-section-elements serialization parameter + on any text node children of the element. +

+

The HTML output method +MUST NOT output an end-tag for an empty element +if the element type has an empty content model, +and the value of the +requested HTML +version +is less than 5.0, or the element is a void +element and the value of the +requested HTML +version +is 5.0.

+

For HTML 4.0, the +element types that have an empty content model are +area, base, basefont, +br, col, +embed, +frame, +hr, img, input, +isindex, link, meta and +param. +For HTML5, the void elements are +area, base, +br, col, command, embed, +hr, img, input, +keygen, link, meta, +param, source, track and +wbr. It is implementation-defined +whether the basefont, frame and isindex +elements, which are not part of HTML5 are considered to be void elements when +the +requested HTML +version +has the value 5.0. +

+ +For the HTML output method, +it is implementation-defined +whether the basefont, frame and isindex +elements, which are not part of HTML5 are considered to be void elements when +the requested HTML +version has the value 5.0. + + +

For example, an element written as +<br/> or <br></br> in an +XSLT stylesheet +MUST be output as <br>.

+ + + +

The markup generation step of the +phases of serialization only creates +start tags and end tags for the HTML output method, never XML-style +empty element tags. As such, a serializer +MUST serialize an HTML +element that has no children, but whose content model is not empty, +using a pair of adjacent start and end element tags, or as a solitary +start tag if permitted by the context. +

+
+ +

For any element node that is to be +serialized as an HTML +element, +the HTML output method +MUST +compare the local part of the name of +the element node with the names of HTML elements + +making the comparison +without regard to case. + +If the local part of the name of the +element node compares equal to that of any HTML element, the element node +MUST be recognized as being that kind of HTML +element. +For example, elements named +br, BR or Br +MUST all be +recognized as the HTML br element and output without an +end-tag.

+

The HTML output method +MUST NOT perform escaping for + +any text node +descendant, nor for any attribute of an element node descendant, +of +a +script +or +style +element. + +

For example, a script element +created by an XQuery direct element constructor or an XSLT +literal result element, such as:

<script>if (a &lt; b) foo()</script>

or

<script><![CDATA[if (a < b) foo()]]></script>

+MUST be output as

<script>if (a < b) foo()</script>

A common requirement is to output a script element +as shown in the example below:

<script type="application/ecmascript"> + document.write ("<em>This won't work</em>") +</script>

This is +invalid +HTML, for the reasons explained in section B.3.2 of +the 4.01 specification. Nevertheless, it is possible to output +this fragment, using either of the following constructs:

+

Firstly, by use of a script element +created by an XQuery direct element constructor or an +XSLT literal result element:

<script type="application/ecmascript"> + document.write ("<em>This won't work</em>") +</script>

Secondly, by constructing the markup from ordinary text characters:

<script type="application/ecmascript"> + document.write ("&lt;em&gt;This won't work&lt;/em&gt;") +</script>

As the specification points out, the correct way to write this +is to use the escape conventions for the specific scripting language. +For JavaScript, it can be written as:

<script type="application/ecmascript"> + document.write ("&lt;em&gt;This will work&lt;\/em&gt;") +</script>

The 4.01 specification also shows examples of how to write +this in various other scripting languages. The escaping MUST be done +manually; it will not be done by the serializer.

+Writing Attributes

The HTML output method +MUST NOT escape +"<" characters occurring in attribute values.

+ +

A boolean attribute is an +attribute with only a single allowed value in any of the HTML DTDs +or that is specified to be a boolean +attribute by HTML5 (see ), where the +allowed value is equal without regard to case +to the name of the attribute. +The HTML output method MUST output any boolean attribute in minimized form if +and only if the value of the attribute node actually is equal to the name of +the attribute + + +making the comparison without regard to case. + +

+ +

For example, a start-tag created +using the following XQuery direct element constructor or XSLT +literal result element

<OPTION selected="selected">

MUST be output as

<OPTION selected>

The HTML output method +MUST NOT escape a +& character occurring in an attribute value +immediately followed by a { character (see Section +B.7.1 of the HTML Recommendation ).

For example, a start-tag created +using the following XQuery direct element constructor or XSLT +literal result element

<BODY bgcolor='&amp;{{randomrbg}};'>

+MUST be output as

<BODY bgcolor='&{randomrbg};'>

See for additional directives on how attributes MAY be written.

+Writing Character Data +

The HTML output method MAY output a character using a +character entity reference in preference to using a numeric character +reference, if an entity is defined for the character in the version of +HTML that the output method is using. Entity references and character +references SHOULD be used only where the character is not present in +the selected encoding, or where the visual representation of the +character is unclear (as with &nbsp;, for +example).

+

When outputting a sequence of +whitespace characters in the +instance of the data model, within an element where +whitespace +characters are +treated normally +(but not in elements such as pre and +textarea), the HTML output method +MAY +represent it using any sequence of whitespace +characters that will be treated +in the same way by an HTML user agent. See section 3.5 of for some additional information on +handling of whitespace by an HTML user agent +for versions of HTML prior to HTML5, +and see the for information on the handling of whitespace +characters by an HTML5 user agent. +

+

The terms space character +and white_space character defined in HTML5 do not match the definition of +whitespace character in this +specification.

+

Certain characters + +are +permitted +in XML, but not in HTML +prior to HTML5 + +— for example, +the control characters #x7F-#x9F, are +permitted +in both XML 1.0 and XML 1.1, and +the control characters #x1-#x8, #xB, #xC and #xE-#x1F are +permitted +in XML 1.1, but +none of these is permitted in HTML +prior to HTML5 +. + +It is a +serialization error to use the HTML +output method if such characters +appear in the instance of the data model +and the value of the +requested HTML +version +is less than 5.0. The +serializer +MUST signal the error.

+

The HTML output method +MUST terminate processing +instructions with > rather than +?>. It is a serialization error to use the HTML output method when > appears within a processing instruction in the data model instance being serialized.

+The Influence of Serialization Parameters upon the HTML Output Method +HTML Output Method: the version +and html-version +Parameters +

The +html-version or the +version +serialization parameter +indicates the version of the HTML +Recommendation +or +to which the serialized result is +to conform. + +If the +html-version serialization parameter is not absent, the +requested HTML version is the value of the +html-version serialization parameter; otherwise, it is +the value of the version serialization +parameter. +If the serializer does +not support the version of HTML specified by +the requested +HTML version, it +MUST signal a +serialization error .

+ +

This document provides the normative +definition of serialization for the HTML output method if the +requested HTML version +has the lexical form of a value of type decimal whose value +is 1.0 or greater, but no greater than +5.0. For any other value of +version parameter, the behavior is +implementation-defined. +In that case the +implementation-defined +behavior MAY supersede all other requirements +of this recommendation.

+If an implementation +supports a value of the version parameter for the HTML output +method for which this document does not provide a normative definition, the +behavior is implementation-defined. + +
+HTML Output Method: the encoding Parameter

The encoding parameter specifies the encoding to be used. +Serializers are +REQUIRED to support values of UTF-8 and +UTF-16. A serialization error occurs if an output +encoding other than UTF-8 or UTF-16 is +requested and the serializer +does not support that encoding. The serializer +MUST signal the error. +

+

It is possible that the instance of the data model will contain a character that +cannot be represented in the encoding that the serializer +is using for +output. In this case, if the character occurs in a context where HTML +recognizes character references, then the character +MUST be output +as a character entity reference or decimal numeric character +reference; otherwise (for example, in a script or +style element or in a comment), the serializer +MUST +signal a serialization error . +

+

See regarding how this parameter is used with the include-content-type parameter.

+ +HTML Output Method: the indent and suppress-indentation Parameters + +

If the indent parameter has +the value yes +one of the values yes, true or 1, +then the +HTML output method MAY add or remove whitespace as it +serializes the result tree, +if it observes the following +constraints.

+ +

Whitespace MUST NOT be added other than before or +after an element, or adjacent to an existing whitespace character.

+

Whitespace MUST NOT be added or removed adjacent to +an inline element. The inline elements are those included in the +%inline category of any of the HTML 4.01 DTDs +or those elements defined to be phrasing +elements in HTML5, as well as the +ins and del elements if they are used as inline +elements (i.e., if they do not contain element children).

+

Whitespace MUST NOT be added or removed inside a +formatted element, the formatted elements being pre, +script, style, +title, +and textarea. +

+ +

Whitespace characters +MUST NOT be added in the content of an element +whose expanded QName matches +a member of the list of expanded QNames in the +value of the suppress-indentation parameter. +The expanded QName of an element node +is considered to match a member of the list of expanded QNames +if:

+ +

the two expanded QNames are equal;

+

the expanded QNames both have null +namespace URIs, and the local parts of the two QNames are +equal without regard to case; or +

+

the value of the +requested HTML +version +is 5.0, the local parts of the two QNames are equal +without regard to case +and one QName has a null namespace +URI and the namespace URI of the other is equal to the XHTML +namespace URI.

+
+
+
+ +

The effect of the above constraints is to +ensure any insertion or deletion of whitespace would not affect how a +conforming +HTML user agent would render the output, assuming the serialized document does +not refer to any HTML style sheets.

+

Note that the HTML definition of whitespace is different from the XML definition +(see section 9.1 of the specification).

+HTML Output Method: the cdata-section-elements Parameter

The cdata-section-elements parameter is not applicable to the HTML output method, except in the case of XML Islands.

+HTML Output Method: the omit-xml-declaration and standalone Parameters

The omit-xml-declaration and standalone parameters are not applicable to the HTML output method.

+HTML Output Method: the doctype-system and doctype-public Parameters

If the doctype-public or doctype-system +parameters are specified, then the HTML output method MUST +output a document type declaration. +If the +doctype-public parameter is specified, then the output +method +MUST output PUBLIC +followed by the specified +public identifier; if the doctype-system parameter is +also specified, it +MUST also output the specified +system identifier +following the public identifier. If the doctype-system +parameter is specified but the doctype-public parameter +is not specified, then the output method +MUST output +SYSTEM followed by the specified system identifier.

+

If the value of the +requested HTML +version +is 5.0, the +doctype-public and doctype-system serialization +parameters are both absent, +the first element node child of +the document node that is to be serialized +is to be +serialized as an HTML +element, the local part of the QName of which is equal to +the string HTML, +without regard to case, +and any text node that precedes that +element node in document contain only whitespace characters, +then +the HTML output method MUST output a document type +declaration, with no public or system identifier.

+

If the HTML output method MUST +output a document type declaration, it MUST be serialized +immediately before the first element, if any, and the name following +<!DOCTYPE MUST be HTML +or html.

+
+HTML Output Method: the undeclare-prefixes Parameter

The undeclare-prefixes parameter is not applicable to the HTML output method.

+HTML Output Method: the normalization-form Parameter

The +normalization-form +parameter is applicable to the +HTML output method. +The values NFC and +none MUST be supported by the serializer. +A serialization error results if the value of the normalization-form +parameter specifies a normalization form that is not supported by the +serializer; +the serializer +MUST signal the error.

+HTML Output Method: the media-type Parameter

The media-type parameter is applicable to the +HTML output method. +See for more +information. See regarding how this parameter is used with the include-content-type parameter.

+HTML Output Method: the use-character-maps Parameter

The use-character-maps parameter is applicable to the +HTML output method. See for more +information.

+HTML Output Method: the byte-order-mark Parameter

The byte-order-mark parameter is +applicable to the HTML output method. See + for more information.

+HTML Output Method: the escape-uri-attributes Parameter

+If the escape-uri-attributes parameter +has +the value yes +one of the values yes, true or 1, +the HTML output method MUST +apply URI escaping to +URI attribute values, except that relative URIs MUST NOT be absolutized. +

This escaping is deliberately confined to non-ASCII characters, +because escaping of ASCII characters is not always appropriate, for +example when URIs or URI fragments are interpreted locally by the HTML +user agent. Even in the case of non-ASCII characters, escaping can +sometimes cause problems. More precise control of URI escaping is +therefore available by setting escape-uri-attributes to +no, and controlling the escaping of URIs by using methods defined in + and .

+HTML Output Method: the include-content-type Parameter

If there is a head element, +and the include-content-type parameter has +the value yes +one of the values yes, true or 1, +the HTML output method +MUST add a meta element +as the first child element +of the head element specifying the character encoding +actually used.

For example,

<HEAD> +<META http-equiv="Content-Type" content="text/html; charset=EUC-JP"> +...

The content type MUST +be set to the value given for the +media-type parameter.

+

If a meta element has been added to the head element as described above, +then any existing meta element child of the head element having an +http-equiv attribute with the value +"Content-Type", making the comparison + +without regard to case +after first stripping leading and trailing spaces from the value of +the attribute solely for the purposes of comparison, + +MUST be discarded.

+ +

This process removes possible parameters in the +attribute value. For example,

+<meta http-equiv="Content-Type" + content="text/html;version='3.0'"/> +

in the data model instance would be replaced by,

+<meta http-equiv="Content-Type" + content="text/html;charset=utf-8"/> +
+ +HTML Output Method: the item-separator Parameter +

The effect of the item-separator serialization parameter +is described in .

+
+ + +HTML Output Method: the allow-duplicate-names Parameter +

The allow-duplicate-names serialization parameter +is not applicable to the HTML output method.

+
+ +HTML Output Method: the json-node-output-method Parameter +

The json-node-output-method serialization parameter +is not applicable to the HTML output method.

+
+ +
+
+ +Text Output Method +

The Text output method serializes the instance of the data model by +outputting the string value of the +document node created by +the markup generation step of the +phases of serialization without any +escaping.

+ +

A newline character in the instance of the data model MAY be output using any +character sequence that is conventionally used to represent a line +ending in the chosen system environment.

+ +

The rule just stated applies to newline characters (#xA); it does +not apply to occurrences in the data model instance of carriage return +(CR), NEL, or LINE SEPARATOR characters; these should be output +literally, regardless of the conventions for line endings in the +system environment.

+

To illustrate, the following table shows the expected output +for various character sequences in environments which conventionally +use #xA (LF, as in Linux systems), #xD followed by #xA (CR+LF, Windows), +#xD (CR only, older versions of Mac OS), +#x85 (NEL, some IBM operating systems), +or #x2028 (LINE SEPARATOR) to separate lines:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Input#xA systems#xD#xA systems#xD systems#x85 systems#x2028 systems
character #xDcharacter #xDcharacter #xDcharacter #xDcharacter #xDcharacter #xD
character #xAcharacter #xAstring #xD + #xAcharacter #xDcharacter #x85character #x2028
string #xD + #xAstring #xD + #xAstring #xD + #xD + #xAstring #xD + #xDstring #xD + #x85string #xD + #x2028
string #xD + #xD + #xAstring #xD + #xD + #xAstring #xD + #xD + #xD + #xAstring #xD + #xD + #xDstring #xD + #xD + #x85string #xD + #xD + #x2028
+
+The Influence of Serialization Parameters upon the Text Output Method +Text Output Method: the version Parameter

The version parameter is not applicable to the Text output method.

+ +Text Output Method: the html-version Parameter +

The html-version parameter is not applicable to the +Text output method.

+
+Text Output Method: the encoding Parameter

The encoding parameter identifies the encoding that +the Text output method +MUST use to convert sequences of +characters to sequences of bytes. +Serializers +are REQUIRED to support values of UTF-8 and +UTF-16. +A serialization error +occurs if the serializer +does not support the encoding specified +by the encoding parameter. +The serializer +MUST signal the error. +If the instance of the data model contains a +character that cannot be represented in the encoding that the +serializer is using for output, the serializer +MUST +signal a serialization error .

+ +Text Output Method: the indent and suppress-indentation Parameters +

The indent +and suppress-indentation +parameters are not applicable to the Text output method.

+
+Text Output Method: the cdata-section-elements Parameter

The cdata-section-elements parameter is not applicable to the Text output method.

+Text Output Method: the omit-xml-declaration and standalone Parameters

The omit-xml-declaration and standalone parameters are not applicable to the Text output method.

+Text Output Method: the doctype-system and doctype-public Parameters

The doctype-system and doctype-public parameters are not applicable to the Text output method.

+Text Output Method: the undeclare-prefixes Parameter

The undeclare-prefixes parameter is not applicable to the Text output method.

+Text Output Method: the normalization-form Parameter

The normalization-form +parameter is applicable to the +Text output method. +The values NFC +and none MUST be supported by the serializer. +A serialization error results if the value of the +normalization-form parameter specifies a normalization form +that is not supported by the serializer; the +serializer MUST signal the +error.

+Text Output Method: the media-type Parameter

The media-type parameter is applicable to the +Text output method. +See for more +information.

+Text Output Method: the use-character-maps Parameter

The use-character-maps parameter is applicable to the +Text output method. +See for more +information.

+Text Output Method: the byte-order-mark Parameter

The byte-order-mark parameter is +applicable to the Text output method. See + for more information.

+Text Output Method: the escape-uri-attributes Parameter

The escape-uri-attributes parameter is not applicable to the Text output method.

+Text Output Method: the include-content-type Parameter

The include-content-type parameter is not applicable to the Text output method.

+
+ +Text Output Method: the item-separator Parameter +

The effect of the item-separator serialization parameter +is described in .

+
+ + +Text Output Method: the allow-duplicate-names Parameter +

The allow-duplicate-names serialization parameter +is not applicable to the Text output method.

+
+ +Text Output Method: the json-node-output-method Parameter +

The json-node-output-method serialization parameter +is not applicable to the Text output method.

+
+ +
+
+ + +JSON Output Method + +

The JSON output method serializes the instance of the data model as +a JSON value using the JSON syntax defined in . + +Sequence normalization is not performed for this output method. +

+ + +

An array item in the +data model instance is +serialized to a JSON array by outputting the serialized JSON value of +each item +member + within the array separated by delimiters according to the +JSON array syntax, i.e. [member, member, ...]. +Each +item +member +in the array is to be serialized +as specified +by recursively applying the rules +in this section. +

+ +

A map item +in the data model +instance +is serialized to a JSON object by outputting, for each key/value pair, +the string value of the key +and +to a JSON string, followed by +the serialized JSON value of the entry, +separated by delimiters according to the JSON object +syntax, i.e. {key:value, key:value, ...}. +The order in which each key/value pair appears +in the serialized output is +implementation-dependent.

+When map items are serialized using the JSON +output method, the order in which key/value pairs appear in the +serialized output is +implementation-dependent. +

If any two keys of the map item have the same +string value, +serialization error is raised, +unless the allow-duplicate-names parameter has +the value yes +one of the values yes, true or 1. +

+
+ +

+A node in the data model instance +is serialized to a JSON string by outputting +the result of serializing the node using the method specified by the +json-node-output-method parameter. + +If the json-node-output-method parameter is set to +xml or xhtml then the node is serialized with the +additional serialization parameter omit-xml-declaration set to yes. + + +The node is serialized with the serialization parameter omit-xml-declaration set + to yes and with no other serialization parameters set. + +

+ +

A node in the data model instance will be serialized +to a JSON string by outputting the +string value of the +node.

+

An atomic +value in the data model instance with a numeric type, or +derived from a numeric type xs:float, xs:double or xs:decimal +will beis +serialized to a JSON number. +Implementations MAY serialize the +numeric value using any lexical representation of a JSON number defined in . + +If the numeric value cannot be +represented in the JSON grammar (such as Infinity or NaN), then the +serializer MUST +signal a serialization error +. +

+ +

An atomic value +in the data model instance +of type xs:boolean and value true +will beis +serialized to the JSON token true.

+ +

An atomic value +in the data model instance +of type xs:boolean and value false +will beis +serialized to the JSON token false.

+ +

An atomic +value in the data model instance of any other type +will beis +serialized to a JSON string by outputting the +string value of +result of applying the fn:string function to +the item.

+ +

An empty sequence in the data model +instance is serialized to the JSON token null.

+ +

A sequence of length greater than +one in the data model instance is serialized to a JSON array by +outputting the serialized JSON value of each item within the sequence +separated by delimiters according to the JSON array syntax +will result in a serialization error +. +

+ +

+Any item in the data model instance of type not specified in the above +list will result in a serialization error +.

+ +
+ +

+Whenever a value is serialized +to a JSON string, the following procedure is applied to the supplied string:

+ +

Any character in the string for which character mapping is defined +(see ) is substituted by the replacement string defined in the character map.

+

+Any other character in the input string (but not a character produced by character mapping) +is a candidate for +Unicode Normalization if requested by the normalization-form parameter, +and JSON escaping. JSON escaping replaces the characters +quotation mark, backspace, form-feed, newline, carriage return, +or tab, reverse solidus, or +solidus by the corresponding JSON escape sequences +\", \b, \f, \n, +\r, or +\t, \\, or \/ +respectively, and any other codepoint in the +range 1-31 or 127-159 by an escape in the form \uHHHH +where HHHH is the hexadecimal representation of the codepoint value. +Escaping is also applied to any characters that cannot be represented in the selected encoding. +

+

The resulting string is enclosed in double quotation marks.

+
+

+ +

+Finally, encoding, as controlled by the encoding parameter, +converts the character stream produced by the preceding rules into an octet stream. +

+ + +The Influence of Serialization Parameters upon the JSON Output Method +

+When nodes are serialized using the JSON output method, +serialization is delegated to the output method specified by the +json-node-output-method serialization parameter. The +omit-xml-declaration parameter is set to yes, and no other +serialization parameters are passed down to the serialization +method responsible for serializing the node. +

+JSON Output Method: the version Parameter

The version parameter is not applicable to the JSON output method.

+ + +JSON Output Method: the html-version Parameter +

The html-version parameter is not applicable to the +JSON output method.

+
+ + +JSON Output Method: the encoding Parameter +

The encoding parameter identifies the encoding that +the JSON output method +MUST use to convert sequences of +characters to sequences of bytes. +Serializers +are REQUIRED to support values of UTF-8 and +UTF-16. +A serialization error +occurs if the serializer +does not support the encoding specified +by the encoding parameter. +The serializer +MUST signal the error. +If the instance of the data model contains a +character that cannot be represented in the encoding that the +serializer is using for output, the serializer +MUST +signal a serialization error +.

+ +

If an encoding other than UTF-8, UTF-16, UTF-32, US-ASCII, or an +equivalent is specified for the encoding parameter, the +output will (except in unusual circumstances) fail to conform to the +definition of JSON in .

+
+
+ + +JSON Output Method: the indent and suppress-indentation Parameters +

The indent and +suppress-indentation parameters control whether the +serializer MAY adjust the whitespace +in the serialized result so that a person will find it easier to read. +If the indent parameter has the value yes, +the serializer MAY output additional whitespace +characters adjacent to the JSON structural tokens. +

+

The indent parameter +controls whether the serializer +adjusts the whitespace in the serialized result so that a person will +find it easier to read. If the indent parameter has +the value yes +one of the values yes, true or 1, +the serializer MAY output +additional whitespace characters adjacent to the JSON structural +tokens. If the indent parameter has the value +no, false or 0, +the serializer +MUST output no whitespace characters adjacent to +the JSON structural tokens. +

+

+The suppress-indentation parameter is not applicable to +the JSON output method. +

+
+ + +JSON Output Method: the cdata-section-elements Parameter +

The cdata-section-elements parameter is not applicable to the JSON output method.

+
+ + +JSON Output Method: the omit-xml-declaration and standalone Parameters +

The omit-xml-declaration and standalone parameters are not applicable to the JSON output method.

+
+ +JSON Output Method: the doctype-system and doctype-public Parameters

The doctype-system and doctype-public parameters are not applicable to the JSON output method.

+ +JSON Output Method: the undeclare-prefixes Parameter

The undeclare-prefixes parameter is not applicable to the JSON output method.

+ +JSON Output Method: the normalization-form Parameter

The normalization-form +parameter is applicable to the JSON output method. +The values NFC +and none MUST be supported by the serializer. +A serialization error results if the value of the +normalization-form parameter specifies a normalization form +that is not supported by the serializer; the +serializer MUST signal the +error.

+ +JSON Output Method: the media-type Parameter

The media-type parameter is applicable to the +JSON output method. +See for more +information.

+ +JSON Output Method: the use-character-maps Parameter

The use-character-maps parameter is applicable to the +JSON output method. +See for more +information.

+ + +JSON Output Method: the byte-order-mark Parameter +

The byte-order-mark parameter is +applicable to the JSON output method. See + for more information.

+ +

Serialized output containing a byte-order mark does not +conform to the definition of JSON in +(although conforming JSON parsers are allowed to tolerate +the byte-order mark).

+
+
+ +JSON Output Method: the escape-uri-attributes Parameter

The escape-uri-attributes parameter is not applicable to the JSON output method.

+ +JSON Output Method: the include-content-type Parameter

The include-content-type parameter is not applicable to the JSON output method.

+
+ + +JSON Output Method: the item-separator Parameter +

The effect of the item-separator serialization +parameter is described in .

+

The item-separator serialization +parameter is not applicable to the JSON output method.

+
+ + +JSON Output Method: the allow-duplicate-names Parameter +

The allow-duplicate-names serialization parameter +determines whether the presence of multiple keys in a map item +with the same string value (e.g. the date 2014-10-01 and the string +"2014-10-01") will or will not raise serialization error +. If the value is +one of, +yes, +true or 1, +such duplicate keys will result in duplicate object-member names in +the JSON output and no error will be raised because of the duplicate names. +If the value is no, false or 0, +such duplicate keys are an error +().

+
+ + +JSON Output Method: the json-node-output-method Parameter +

+The json-node-output-method serialization parameter determines how a node +in the data model instance gets converted to a JSON value. +If the value is one of xml, xhtml, html or text, +then the node is converted to a JSON string by serializing the node using the output method +specified by this parameter. If the value is xml or xhtml then +the node is serialised with the additional serialization parameter omit-xml-declaration +set to yes. +

+
+ +
+
+ + +Adaptive Output Method + +

The Adaptive output method serializes the instance of the data model + +by delegating the task of +serializing each item to other output methods depending on item type. + + +into a human readable form for the purposes of debugging query results. + +The intention of this is to allow any valid XDM instance to be serialized +without raising a serialization error. +Sequence normalization is not performed for this output method. +

+ +

Each item in the supplied sequence is serialized individually as +follows, with an occurrence of the chosen item-separator +between successive items.

+ + + +

A document, element, text, comment, or processing instruction +node is serialized using the XML +output method described in .

+ +

An attribute or namespace node is serialized as if it had a containing +element node. For example an +attribute node might be serialized as the string +xsi:type="xs:integer"; a namespace node might be +serialized as +xmlns:sns="http://example.com/sample-namespace". +

+ +

This may result in output of QNames containing prefixes whose +binding is not displayed.

+
+ +

An atomic +value is serialized by +casting the value to a string. +as follows:

+ +

An instance of xs:boolean is serialized as true() or false().

+

An instance of xs:string, xs:untypedAtomic + or xs:anyURI +is serialized by enclosing the value in double +quotation marks and doubling any quotes within the value; or optionally by enclosing the value +in apostrophes and doubling any apostrophes within the value. +The resulting value is then serialized using the Text output method +described in . +

+

The Text output method will apply character expansion and encoding rules to this +string as specified by the serialization parameters.

+
+

An instance of xs:integer or xs:decimal is serialized by converting +the value to a string using the fn:string function.

+

An instance of xs:double is serialized by applying the function +format-number(?, '0.0##########################e0') +using the following default decimal format properties:

+ + +++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Property nameProperty value
decimal-separatorThe period character (.)
exponent-separatorThe character (e)
grouping-separatorThe comma character (,)
zero-digitThe Western digit zero (#x30).
digitThe number sign character (#)
infinityThe string "INF"
NaNThe string "NaN"
minus-signThe hyphen-minus character (#x2D)
+ +
+

An instance of xs:QName or xs:NOTATION is serialized +as a URI-qualified name (that is, in the form Q{uri}local).

+

An atomic value of any other type is serialized using the syntax of a constructor +function: xs:TYPE("VAL") where TYPE is the name of the primitive +type, and VAL is the result of applying the fn:string() function. + For example, xs:date("2015-07-17"). + The resulting string is then serialized using the Text output method +described in .

+
+
+ +

An array item or a +map item is serialized using the +JSON output method described in , with the +following amendments to the JSON serialization rules: + +

A sequence of length greater than one in the data model +instance will be serialized using the Adaptive output method rather +than raising a serialization error . +

+

The allow-duplicate-names serialization +parameter is set to the value yes by default to avoid a +serialization error if a map +contains keys with duplicate string values.

+

Attribute and namespace nodes are serialized using the +Adaptive output method.

+
+ +

Numeric values are serialized with their string value, whether they +can be represented in the JSON grammar or not.

+

The rule just given affects the serialization of values such +as Infinity or NaN, which cannot be represented in JSON and which +raise a serialization error in the JSON output method, +but which the Adaptive output method serializes as the tokens +INF and NaN.

+
+ +

Any item type not serializable by the JSON output method will be + serialized using the Adaptive output method rather than raising a + serialization error .

+

This is an amendment to the final 'catch-all' rule in the JSON + output method.

+
+
+

+ + +

+An array item is serialized using the syntax +of a , +that is as [member,member, ... ]. The members, which in general are sequences, +are serialized in the form (item,item, ...) where the items are serialized by +applying these rules recursively. The items are separated by commas +(not by the item-separator character). The enclosing parentheses are optional if the sequence +has length one.

+

The serializer should avoid outputting the parentheses if it is able +to determine the length of the sequence before serializing the first item; but it is allowed +to output parentheses around a singleton if this avoids buffering data in memory.

+
+ +

+A map item is serialized using the syntax of a +, that is as map{key:value, key:value, ...}. +The order of entries is implementation-dependent. The key is serialized by applying the rules +for serializing an atomic value. The values are serialized in the same way as the members of an array (see above). +

+ +

A function item is +serialized to the representation function +fn:name#A where +fn:name is +a representation of +the function name +and A is the arity. +If the function name is in one of the namespaces +http://www.w3.org/2005/xpath-functions, +http://www.w3.org/2005/xpath-functions/math, +http://www.w3.org/2005/xpath-functions/map, +http://www.w3.org/2005/xpath-functions/array or +http://www.w3.org/2001/XMLSchema, +then the name is output as a lexical QName using the conventional prefix +fn, math, map, array, or xs +as appropriate; if it is in any other namespace or in no namespace, then the name is +output as a URI-qualified name (that is, Q{uri}local). + +If the function is anonymous, +fn:name is replaced by the string +(anonymous-function).

+

+The following examples illustrate this rule:

+ +

exists#1 is serialized as function exists#1

+

fn:exists#1 is serialized as function fn:exists#1

+

Q{http://www.w3.org/2005/xpath-functions}exists#1 is serialized as function fn:exists#1

+

function($a) { $a } is serialized as function (anonymous-function)#1

+

exists(?) is serialized as function (anonymous)#1

+

exists#1(?) is serialized as function (anonymous)#1

+

math:pi#0 is serialized as math:pi#0

+
+
+
+ + + +

Character maps are applied (a) when nodes are serialized using the XML +output method, and (b) to any value represented as a string enclosed in quotation marks.

+ +

Optionally, in all the above constructs, characters whose visual +representation is ambiguous (for example tab or non-breaking-space) may be represented in the +form of an XML numeric character reference (for example &#x9; or &#xa0;)

+ +

+In many cases the serialization of an item conforms to the syntax of an XQuery expression whose result +is that item. There are exceptions, however: for example the syntax will not be valid XQuery in the +case of free-standing attribute or namespace nodes, or QName values, or anonymous functions; and where +it is valid XQuery, the result of evaluating the expression will not necessarily be identical to the +original: for example, the distinction between strings and untypedAtomic values is lost. +

+ +

+If any value cannot be output because doing so would cause a +serialization error, the processor SHOULD attempt +to recover by inserting an implementation-defined error indicator into the +output, and serializing as much of the input as can be serialized +without error. +the behavior is implementation-defined. +

+It is implementation-defined whether the +serialization process recovers from serialization errors when the +Adaptive output method is used. If it does, it +is implementation-defined what error +indicator is used. + +

+If the output is sent to a destination that allows hyperlinks to be included in the +generated text, then the serializer MAY include +implementation-dependent +hyperlinks to provide additional information for example:

+ +

+to allow the type of atomic values +to be ascertained. +

+

+to allow the namespace binding of prefixes to be ascertained. +

+

+to provide further information about the cause of error indicators. +

+
+
+ +It is implementation-defined whether, when the +Adaptive output method is used, a serializer includes hyperlinks in +its output to record the types of atomic values, the bindings of +namespace prefixes, the causes of error indicators, and other +information. + + +If, when the +Adaptive output method is used, a serializer includes hyperlinks in +its output to record the types of atomic values, the bindings of +namespace prefixes, the causes of error indicators, and other +information, then it is implementation-dependent what hyperlinks are +used and how they convey the information. + + + + +The Influence of Serialization Parameters upon the Adaptive Output Method +

Only the item-separator parameter is directly applicable to the Adaptive output method. +All serialization parameters, if set, will be passed down to the serialization method that is applied to each item in the supplied sequence. +

+

+For some item types the Adaptive output method will delegate serialization to other output methods. +With the exception of the byte-order-mark serialization parameter, all serialization parameters, +if set, will be passed down to the serialization method that is applied to each item in the supplied sequence. +Only the item-separator and byte-order-mark parameters are directly applicable to the Adaptive output method. +

+Adaptive Output Method: the version Parameter +

The version parameter is not directly +applicable to the Adaptive output method. +

+ + +Adaptive Output Method: the html-version Parameter +

The html-version parameter is not directly applicable to the +Adaptive output method. +

+ + +Adaptive Output Method: the encoding Parameter +

The encoding parameter is not directly applicable to the +Adaptive output method.

+
+ + +Adaptive Output Method: the indent and suppress-indentation Parameters +

The indent and +suppress-indentation parameters are not directly applicable to the +Adaptive output method.

+
+ + +Adaptive Output Method: the cdata-section-elements Parameter +

The cdata-section-elements parameter is not directly +applicable to the Adaptive output method.

+
+ + +Adaptive Output Method: the omit-xml-declaration and standalone Parameters +

The omit-xml-declaration and standalone +parameters are not directly applicable to the Adaptive output +method.

+ +

+If these parameters call for an XML declaration to be serialized, then +an XML declaration is to be output each time the Adaptive output +method delegates the serialization of a node to the XML output method. +If several node items appear in the sequence to be serialized or as +values in maps or arrays to be serialized, then the output will +contain several XML declarations. + +

+
+
+ + +Adaptive Output Method: the doctype-system and doctype-public Parameters +

The doctype-system and doctype-public +parameters are not directly applicable to the Adaptive output +method.

+ + +Adaptive Output Method: the undeclare-prefixes Parameter +

The undeclare-prefixes parameter is not directly applicable to the +Adaptive output method.

+ + +Adaptive Output Method: the normalization-form Parameter +

The normalization-form parameter is not directly applicable to the +Adaptive output method. +

+ + +Adaptive Output Method: the media-type Parameter +

The media-type parameter is not directly applicable to the Adaptive output +method. +

+ + +Adaptive Output Method: the use-character-maps Parameter +

The use-character-maps parameter is not directly applicable to the +Adaptive output method only as elsewhere specified. +

+ + +Adaptive Output Method: the byte-order-mark Parameter +

The byte-order-mark parameter is not directly applicable to the +Adaptive output method. + See + for more information. +

+

A byte order mark can appear only once in the serialized output. +Therefore, this parameter does not get passed down to any delegated output method.

+
+ + +Adaptive Output Method: the escape-uri-attributes Parameter +

The escape-uri-attributes parameter is not directly applicable to the +Adaptive output method. +

+ + +Adaptive Output Method: the include-content-type Parameter +

The include-content-type parameter is not directly applicable to the +Adaptive output method. +

+ + +Adaptive Output Method: the item-separator Parameter +

The item-separator serialization parameter is directly applicable to the +Adaptive output method. It specifies the string to be inserted between +adjacent serialized items. + + If the item-separator parameter is absent, the string + "&#x000A;" + (a single newline character) is used by the Adaptive output method as the + item-separator value. + +

+
+ + +Adaptive Output Method: the allow-duplicate-names Parameter +

The allow-duplicate-names parameter is not directly applicable to the +Adaptive output method. +

+ + +Adaptive Output Method: the json-node-output-method Parameter +

+The json-node-output-method parameter is not directly applicable to the +Adaptive output method. +

+ +
+
+ +Character Maps

The use-character-maps parameter is a list of characters +and corresponding string substitutions.

+

Character maps allow a specific character appearing in a text or +attribute node or a string +in the instance of the data model to be replaced with a specified +string of characters during serialization. The string that is +substituted is output "as is," and the serializer performs no checks +that the resulting document is well-formed. This mechanism can +therefore be used to introduce arbitrary markup in the serialized +output. +See +of for examples of using character mapping in +XSLT.

+

Character mapping is applied to the characters that actually appear +in a text or attribute node or a string + in the instance of the data model, before any other +serialization operations such as escaping or Unicode Normalization are +applied. If a character is mapped, then it is not subjected to XML or +HTML escaping, nor to Unicode Normalization. The string that is +substituted for a character is not validated or processed in any way +by the serializer, except for translation into the target encoding. In +particular, it is not subjected to XML or HTML escaping, it is not +subjected to Unicode Normalization, and it is not subjected to further +character mapping.

+

Character mapping is not applied to characters in text nodes whose +parent elements are listed in the cdata-section-elements +parameter, + +nor to characters for which output escaping has +been disabled (disabling output escaping is an +feature), + +nor to characters in attribute +values that are subject to URI escaping defined for the HTML and +XHTML output methods, unless URI escaping has been disabled using the +escape-uri-attributes parameter in the output +definition.

+

On serialization, occurrences of a character specified in the +use-character-maps in text nodes, +and attribute values +and strings +are replaced by the corresponding string from the use-character-maps +parameter.

Using a character map can result in non-well-formed documents +if the string contains XML-significant +characters. For example, it is possible to create documents containing +unmatched start and end tags, references to entities that are not +declared, or attributes that contain tags or unescaped quotation +marks.

If a character is mapped, then it is not subjected to XML or HTML escaping.

+

A serialization error occurs if character mapping causes the output +of a string containing a character that cannot be represented in the +encoding that the serializer +is using for output. The serializer +MUST signal the error.

+ +Conformance +

Serialization is intended primarily as a component +of a host language. + +A host language is another +specification that includes, by reference, this specification and all of +its requirements. A host language might be a programming language +such as or , or it +might be an application programming interface (API) intended to be used by +programs written in some other high-level programming language. The use of +the term language is not intended to preclude the possibility that +this specification might be referenced outside the context of a +programming language specification. +This document +relies on specifications that use it to specify conformance criteria +for Serialization in their respective environments. +Specifications that set conformance criteria for their use of +Serialization MUST NOT change the semantic definitions of +Serialization as given in this specification, except by +subsetting and/or compatible extensions. It is the responsibility of the host language to specify how serialization errors +are to +be handled.

+

Certain facilities in this specification are described as producing +implementation-defined results. A +claim that asserts conformance with this specification MUST be accompanied by documentation +stating the effect of each implementation-defined feature. For convenience, a non-normative +checklist of implementation-defined features is provided at +.

+ + + +&bibl; + +Schema for Serialization Parameters +

The following schema describes the structure of a Data Model instance +that can be used to specify the settings of serialization parameters using +the mechanism described in .

+

A copy of this schema is available at +http://www.w3.org/2017/01/xslt-xquery-serialization/schema-for-serialization-parameters.xsd.

+ +
+ +&errors; + + List of URI Attributes + +

The following list of attributes are declared as type %URI or + %UriList for a given HTML or XHTML element, with the exception of the + name attribute for element A which is not a URI type. + The name attribute for element A SHOULD be escaped as is + recommended by the HTML Recommendation in Appendix B.2.1.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributesElements
actionFORM
archiveOBJECT
backgroundBODY
citeBLOCKQUOTE, DEL, INS, Q
classidOBJECT
codebaseAPPLET, OBJECT
dataOBJECT
datasrcBUTTON, DIV, INPUT, OBJECT, SELECT, SPAN, TABLE, TEXTAREA
forSCRIPT
formactionBUTTON, INPUT
hrefA, AREA, BASE, LINK
iconCOMMAND
longdescFRAME, IFRAME, IMG
manifestHTML
nameA
posterVIDEO
profileHEAD
srcAUDIO, EMBED, FRAME, IFRAME, IMG, INPUT, SCRIPT, SOURCE, TRACK, VIDEO
usemapIMG, INPUT, OBJECT
valueINPUT
+
+ + + Checklist of Implementation-Defined and Implementation-Dependent Features + +

This appendix provides a summary of Serialization features whose + effect is explicitly + implementation-defined or implementation-dependent. The + conformance rules (see ) require vendors + to provide documentation that explains how these choices have been + exercised.

+ + + Checklist of Implementation-Defined Features + +

The following list describes + Serialization features whose effect is explicitly implementation-defined. The conformance + rules (see ) require vendors to provide + documentation that explains how these choices have been + exercised.

+ + + +
+ + + Checklist of Implementation-Dependent Features +

The following list describes Serialization features whose effect is + explicitly implementation-dependent. + The conformance rules (see ) do not require vendors + or specifications which define conformance criteria for serialization + to provide documentation that explains how these choices have been exercised.

+ + + + +
+ +
+ + + + +Change Log +

This appendix details the changes that have been made since the publication of +the first public working draft of this version (3.1) of this specification +(). For differences between +that working draft and earlier versions of this specification, +see the +Change Log in that draft.

+ + + Changes introduced in the Proposed Recommendation +

None.

+
+ + + Changes introduced in the second Candidate Recommendation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Bugzilla bug (if applicable)Erratum (if applicable)CategoryDescription of changeAffected sections
Bugzilla bug 29435NoneSubstantiveAllow numbers to be serialized using any legal JSON lexical representation in the JSON output method + +

allow implementors to choose which legal lexical form to serialize JSON numbers. + This allows an xs:double containing a whole number to be serialized as an integer.

+
Bugzilla bug 29703NoneSubstantiveAddition of true, false, 1 and 0 as +allowable values of Boolean serialization parameters. + +

update table with allowable values of Boolean parameters.

+
+

update description with new allowable values.

+

update description with new allowable values.

+

update description with new allowable values.

+

update description with new allowable values.

+

update description with new allowable values.

+

update description with new allowable values.

+
Bugzilla bug 29704NoneEditorialMinor changes in static context table. + +

change prefix for xslt namespace and define fn:local-name in staic context table.

+
+
Bugzilla bug 29296NoneEditorialAdaptive Method, Error Recovery. + +

change how errors SHOULD recover to implementation-defined.

+
+
Bugzilla bug 29664NoneSubstantiveJSON Output Method: the json-node-output-method Parameter. + +

and clarify how serialization parameters are passed down to delegated json-node-output-method.

+
+
Bugzilla bug 29665NoneSubstantiveJSON escaping + +

add solidus and reverse solidus to the list of characters that are escaped when serializing to a JSON string.

+
+
Bugzilla bug 29824NoneSubstantiveAdaptive method: decimal format + +

specify the default decimal format properties.

+
+
Bugzilla bug 29822NoneSubstantiveAdaptive method: xs:anyURI + +

serialize xs:anyURI in the same way as an xs:string.

+
+
Bugzilla bug 29890NoneSubstantiveSpecified sequence normalization for arrays + +

sequence normalization rules.

+
+
+
+ + + + Changes introduced in the first Candidate Recommendation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Bugzilla bug (if applicable)Erratum (if applicable)CategoryDescription of changeAffected sections
Bugzilla bug 28811NoneEditorialClarified the serialization of function items in the Adaptive output method + +

clarify the definition of serialization of function items and give more relevant examples

+
Bugzilla bug 28457NoneSubstantiveSpecified the namespace of the generated meta element in the XHTML output method + +

add missing information on the namespace of the meta element when added by the +include-content-type parameter.

+
Bugzilla bug 28476NoneSubstantiveClarified the escaping rules for the JSON output method. + +

removed the JSON output method rules from this section.

+ +

added the escaping and encoding rules for serialization as a JSON string.

+
Bugzilla bug 28947NoneSubstantiveChanged the serialization rules for the Adaptive output method based on implementation feedback. + +

modified the serialization logic to produce a more consistent representation +of values from top level sequences and values embedded in maps and arrays.

+
Bugzilla bug 29272NoneSubstantiveChanged the serialization rules for the Adaptive output method based on implementation feedback. + +

modified the serialization rule for +xs:string and xs:untypedAtomic values to +produce a result consistant with text nodes. byte-order-mark serialization parameter is now +handled by the Adaptive output method and not passed down to any delegated output methods.

+
+
+ + +Changes applied to previous Public Working Drafts +

The following changes have been applied previous versions of this document since the publication of the +first Public Working Draft.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Bugzilla bug (if applicable)Erratum (if applicable)CategoryDescription of changeAffected sections
Bugzilla +bug 26862NoneSubstantiveCorrection of expressions used to describe +validation of serialization parameters supplied +by means of an XDM data model instance. +

+reword XQuery and XSLT fragments involving +validation to apply validation to the children +of output:serialization-parameters +and not to output:serialization-parameters +itself. (Validation against the schema defined in this +specification is required only for parameters defined +in this specification; it is not required for +implementation-defined +serialization parameters.

+
NoneNoneEditorialEditorial changes to wording regarding names with +null namespace URIs. +

+reword definition of +null namespace URI +to avoid the suggestion that unqualified names are +names in no namespace

+

+describe names of serialization parameters have having +null namespace URI +rather than as being "in no namespace"

+
NoneNoneEditorialEditorial improvements to presentation of JSON output method. +

(say explicitly that sequence normalization +must not be performed)

+

rephrase description of method +parameter for clarity; add mentions of json method in prose

+

specify how the phases of serialization +work for JSON output; change references to "normalized sequence" to +refer to "sequence"; specify that character expansion applies to +strings; specify that character escaping applies to JSON output

+

add section

+

add section

+

add section

+

add section

+

add section

+

add section

+

add section

+

add section

+

refer to RFC 7159; +rephrase rules for different kinds of items, to +connect with ; +specify that empty sequences are serialized as null +

+

add warning on non-standard encodings

+

add warning about BOM use in JSON

+

add section

+

add section

+
Bugzilla bug 26784NoneSubstantiveChanges to JSON output method +

add allow-duplicate-names +and json-node-output-method +parameters

+

+specify that duplicate names are an error unless +allow-duplicate-names has the value +yes; +specify that NaN and Infinity raise an error +in the JSON output method; +specify that sequences with length greater than one +are serialized as JSON arrays; +specify that nodes are serialized using the output method defined by +the json-node-output-method parameter +

+

+change description of indent +and suppress-indentation +parameters +

+

+specify that this is not applicable

+
Bugzilla bug 27498NoneSubstantiveDefinition of Adaptive output method + +

add mention of Adaptive output method as not +performing sequence normalization

+

add adaptive to list of +values for the method parameter

+

add mention of Adaptive output method as not +performing sequence normalization

+

add new section

+
NoneNoneEditorialAddition of appendix with checklist of +implementation-dependent features + +

add +section as wrapper for the two checklists

+ +

add checklist of +implementation-dependent features

+ +

push down a level +and change introductory wording accordingly

+
+
+ +
+ + + + + +
+
diff --git a/specifications/xslt-xquery-serialization-31/style/ns-xslt-xquery-serialization.xsl b/specifications/xslt-xquery-serialization-31/style/ns-xslt-xquery-serialization.xsl new file mode 100644 index 0000000..05c9c4d --- /dev/null +++ b/specifications/xslt-xquery-serialization-31/style/ns-xslt-xquery-serialization.xsl @@ -0,0 +1,65 @@ + + + + + + + + + + + + + XSLT Processor: + + + + + + + + + + + + + + + +
+ +
+
+ + + +
+ + + Serialization parameter output: + + + + http://www.w3.org/2010/xslt-xquery-serialization/schema-for-parameters-for-xslt-xquery-serialization.xsd# + + + + + http://www.w3.org/2010/xslt-xquery-serialization/schema-for-parameters-for-xslt-xquery-serialization.xsd# + + output: + + + +
+
+ +
diff --git a/specifications/xslt-xquery-serialization-31/style/serialization-diff.xsl b/specifications/xslt-xquery-serialization-31/style/serialization-diff.xsl new file mode 100644 index 0000000..4111267 --- /dev/null +++ b/specifications/xslt-xquery-serialization-31/style/serialization-diff.xsl @@ -0,0 +1,36 @@ + + + + + + + + + +dd.indent { margin-left: 2em; } +p.element-syntax { border: solid thin; background-color: #ffccff } +div.proto { border: solid thin; background-color: #ffccff } +div.example { border: solid thin; background-color: blue; padding: 1em } +span.verb { font: small-caps 100% sans-serif } +span.error { font-size: small } + + + + + + + + + + + +
+      
+    
+
+ +
+ From 382987c8b1fc6cce692d7a72398ffd0fe2109e35 Mon Sep 17 00:00:00 2001 From: Norman Walsh Date: Fri, 4 Dec 2020 17:13:00 +0000 Subject: [PATCH 2/3] Fix spec references --- .../xpath-functions-40/style/ns-xpath-functions.xsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/specifications/xpath-functions-40/style/ns-xpath-functions.xsl b/specifications/xpath-functions-40/style/ns-xpath-functions.xsl index f6c4cf1..9b393cc 100644 --- a/specifications/xpath-functions-40/style/ns-xpath-functions.xsl +++ b/specifications/xpath-functions-40/style/ns-xpath-functions.xsl @@ -20,8 +20,8 @@ - - + + From a33ba2d8712b258bcaac75da525e76d462520784 Mon Sep 17 00:00:00 2001 From: Norman Walsh Date: Fri, 4 Dec 2020 17:13:11 +0000 Subject: [PATCH 3/3] Fix classpath for 3.0 stylesheet --- specifications/xslt-40/build.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specifications/xslt-40/build.xml b/specifications/xslt-40/build.xml index 8fd2d31..3d0236c 100644 --- a/specifications/xslt-40/build.xml +++ b/specifications/xslt-40/build.xml @@ -142,7 +142,7 @@ out="${merged-spec0.xml}" style="${merge.functions.style}" force="yes" - classpathref="saxon9.classpath"> + classpathref="saxon10he.classpath">