001/*
002 * SPDX-FileCopyrightText: none
003 * SPDX-License-Identifier: CC0-1.0
004 */
005
006package dev.metaschema.databind.io.xml;
007
008import org.codehaus.stax2.XMLEventReader2;
009import org.codehaus.stax2.XMLStreamWriter2;
010import org.w3c.dom.Document;
011import org.w3c.dom.Element;
012import org.w3c.dom.NamedNodeMap;
013import org.w3c.dom.Node;
014import org.w3c.dom.NodeList;
015
016import java.util.Iterator;
017
018import javax.xml.XMLConstants;
019import javax.xml.namespace.QName;
020import javax.xml.parsers.DocumentBuilder;
021import javax.xml.parsers.DocumentBuilderFactory;
022import javax.xml.parsers.ParserConfigurationException;
023import javax.xml.stream.XMLStreamConstants;
024import javax.xml.stream.XMLStreamException;
025import javax.xml.stream.events.Attribute;
026import javax.xml.stream.events.Characters;
027import javax.xml.stream.events.Namespace;
028import javax.xml.stream.events.StartElement;
029import javax.xml.stream.events.XMLEvent;
030
031import edu.umd.cs.findbugs.annotations.NonNull;
032
033/**
034 * Utility methods for converting between StAX events and W3C DOM elements.
035 *
036 * <p>
037 * These methods support the {@code any} content feature by converting unmodeled
038 * XML content between the StAX event stream used during parsing and the DOM
039 * representation stored in {@link XmlAnyContent}.
040 */
041public final class XmlDomUtil {
042
043  private XmlDomUtil() {
044    // disable construction
045  }
046
047  /**
048   * Read an XML element from a StAX event reader and return it as a DOM
049   * {@link Element}.
050   *
051   * <p>
052   * The reader must be positioned so that the next event is a
053   * {@link XMLStreamConstants#START_ELEMENT}. After this method returns, the
054   * reader will be positioned just past the matching
055   * {@link XMLStreamConstants#END_ELEMENT}.
056   *
057   * @param reader
058   *          the StAX event reader, positioned before a start element
059   * @return the DOM element containing the full subtree
060   * @throws XMLStreamException
061   *           if an error occurs while reading XML events
062   */
063  @NonNull
064  public static Element staxToElement(@NonNull XMLEventReader2 reader)
065      throws XMLStreamException {
066    Document doc = newDocument();
067    XMLEvent event = reader.nextEvent();
068    if (!event.isStartElement()) {
069      throw new XMLStreamException("Expected START_ELEMENT but found " + event.getEventType());
070    }
071    StartElement startElement = event.asStartElement();
072    Element root = createDomElement(doc, startElement);
073    doc.appendChild(root);
074
075    readChildren(reader, doc, root);
076    return root;
077  }
078
079  /**
080   * Write a DOM {@link Element} to a StAX stream writer.
081   *
082   * <p>
083   * This writes the complete element subtree including attributes, namespace
084   * declarations, child elements, and text content.
085   *
086   * @param element
087   *          the DOM element to write
088   * @param writer
089   *          the StAX stream writer to write to
090   * @throws XMLStreamException
091   *           if an error occurs while writing to the stream
092   */
093  public static void elementToStax(
094      @NonNull Element element,
095      @NonNull XMLStreamWriter2 writer)
096      throws XMLStreamException {
097    String namespaceUri = element.getNamespaceURI();
098    String localName = element.getLocalName();
099    String prefix = element.getPrefix();
100
101    if (namespaceUri != null && !namespaceUri.isEmpty()) {
102      if (prefix != null && !prefix.isEmpty()) {
103        writer.writeStartElement(prefix, localName, namespaceUri);
104        // Declare the namespace if the writer doesn't know about it
105        String existingPrefix = writer.getNamespaceContext().getPrefix(namespaceUri);
106        if (existingPrefix == null || !existingPrefix.equals(prefix)) {
107          writer.writeNamespace(prefix, namespaceUri);
108        }
109      } else {
110        writer.writeStartElement(namespaceUri, localName);
111      }
112    } else {
113      writer.writeStartElement(localName);
114    }
115
116    // Write attributes
117    writeAttributes(element, writer);
118
119    // Write child nodes
120    writeChildren(element, writer);
121
122    writer.writeEndElement();
123  }
124
125  @NonNull
126  private static Document newDocument() throws XMLStreamException {
127    try {
128      DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
129      dbf.setNamespaceAware(true);
130      // Harden against XXE: deny external DTD and schema access
131      dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
132      dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
133      DocumentBuilder builder = dbf.newDocumentBuilder();
134      return builder.newDocument();
135    } catch (ParserConfigurationException ex) {
136      throw new XMLStreamException("Failed to create DOM DocumentBuilder", ex);
137    }
138  }
139
140  @NonNull
141  private static Element createDomElement(
142      @NonNull Document doc,
143      @NonNull StartElement startElement) {
144    QName name = startElement.getName();
145    String namespaceUri = name.getNamespaceURI();
146    String localName = name.getLocalPart();
147    String prefix = name.getPrefix();
148
149    Element element;
150    if (namespaceUri != null && !namespaceUri.isEmpty()) {
151      String qualifiedName = (prefix != null && !prefix.isEmpty())
152          ? prefix + ":" + localName
153          : localName;
154      element = doc.createElementNS(namespaceUri, qualifiedName);
155    } else {
156      element = doc.createElement(localName);
157    }
158
159    // Copy attributes
160    @SuppressWarnings("unchecked")
161    Iterator<Attribute> attrs = startElement.getAttributes();
162    while (attrs.hasNext()) {
163      Attribute attr = attrs.next();
164      QName attrName = attr.getName();
165      String attrNs = attrName.getNamespaceURI();
166      String attrLocal = attrName.getLocalPart();
167      String attrPrefix = attrName.getPrefix();
168
169      if (attrNs != null && !attrNs.isEmpty()) {
170        String attrQualified = (attrPrefix != null && !attrPrefix.isEmpty())
171            ? attrPrefix + ":" + attrLocal
172            : attrLocal;
173        element.setAttributeNS(attrNs, attrQualified, attr.getValue());
174      } else {
175        element.setAttribute(attrLocal, attr.getValue());
176      }
177    }
178
179    // Copy namespace declarations as xmlns attributes
180    @SuppressWarnings("unchecked")
181    Iterator<Namespace> namespaces = startElement.getNamespaces();
182    while (namespaces.hasNext()) {
183      Namespace ns = namespaces.next();
184      String nsPrefix = ns.getPrefix();
185      if (nsPrefix != null && !nsPrefix.isEmpty()) {
186        element.setAttributeNS(
187            "http://www.w3.org/2000/xmlns/",
188            "xmlns:" + nsPrefix,
189            ns.getNamespaceURI());
190      }
191      // default namespace is handled by createElementNS
192    }
193
194    return element;
195  }
196
197  private static void readChildren(
198      @NonNull XMLEventReader2 reader,
199      @NonNull Document doc,
200      @NonNull Element parent) throws XMLStreamException {
201    while (reader.hasNext()) {
202      XMLEvent event = reader.peek();
203      if (event.isEndElement()) {
204        // Consume the end element and return
205        reader.nextEvent();
206        return;
207      } else if (event.isStartElement()) {
208        StartElement childStart = reader.nextEvent().asStartElement();
209        Element child = createDomElement(doc, childStart);
210        parent.appendChild(child);
211        readChildren(reader, doc, child);
212      } else if (event.isCharacters()) {
213        Characters chars = reader.nextEvent().asCharacters();
214        parent.appendChild(doc.createTextNode(chars.getData()));
215      } else {
216        // Skip other event types (comments, processing instructions, etc.)
217        reader.nextEvent();
218      }
219    }
220  }
221
222  private static void writeAttributes(
223      @NonNull Element element,
224      @NonNull XMLStreamWriter2 writer) throws XMLStreamException {
225    NamedNodeMap attrs = element.getAttributes();
226    for (int i = 0; i < attrs.getLength(); i++) {
227      Node attr = attrs.item(i);
228      String attrNs = attr.getNamespaceURI();
229      // getLocalName() may return null for attributes created without
230      // namespace awareness; fall back to getNodeName()
231      String attrName = attr.getLocalName();
232      if (attrName == null) {
233        attrName = attr.getNodeName();
234      }
235      String attrValue = attr.getNodeValue();
236
237      // Skip xmlns declarations - they are handled by
238      // writeStartElement/writeNamespace
239      if ("http://www.w3.org/2000/xmlns/".equals(attrNs)) {
240        continue;
241      }
242
243      if (attrNs != null && !attrNs.isEmpty()) {
244        String attrPrefix = attr.getPrefix();
245        if (attrPrefix != null && !attrPrefix.isEmpty()) {
246          writer.writeAttribute(attrPrefix, attrNs, attrName, attrValue);
247        } else {
248          writer.writeAttribute(attrNs, attrName, attrValue);
249        }
250      } else {
251        writer.writeAttribute(attrName, attrValue);
252      }
253    }
254  }
255
256  private static void writeChildren(
257      @NonNull Element element,
258      @NonNull XMLStreamWriter2 writer) throws XMLStreamException {
259    NodeList children = element.getChildNodes();
260    for (int i = 0; i < children.getLength(); i++) {
261      Node child = children.item(i);
262      switch (child.getNodeType()) {
263      case Node.ELEMENT_NODE:
264        elementToStax((Element) child, writer);
265        break;
266      case Node.TEXT_NODE:
267        writer.writeCharacters(child.getTextContent());
268        break;
269      case Node.CDATA_SECTION_NODE:
270        writer.writeCData(child.getTextContent());
271        break;
272      default:
273        // Skip other node types
274        break;
275      }
276    }
277  }
278}