1   /*
2    * SPDX-FileCopyrightText: none
3    * SPDX-License-Identifier: CC0-1.0
4    */
5   
6   package dev.metaschema.databind.io.xml;
7   
8   import org.codehaus.stax2.XMLEventReader2;
9   import org.codehaus.stax2.XMLStreamWriter2;
10  import org.w3c.dom.Document;
11  import org.w3c.dom.Element;
12  import org.w3c.dom.NamedNodeMap;
13  import org.w3c.dom.Node;
14  import org.w3c.dom.NodeList;
15  
16  import java.util.Iterator;
17  
18  import javax.xml.XMLConstants;
19  import javax.xml.namespace.QName;
20  import javax.xml.parsers.DocumentBuilder;
21  import javax.xml.parsers.DocumentBuilderFactory;
22  import javax.xml.parsers.ParserConfigurationException;
23  import javax.xml.stream.XMLStreamConstants;
24  import javax.xml.stream.XMLStreamException;
25  import javax.xml.stream.events.Attribute;
26  import javax.xml.stream.events.Characters;
27  import javax.xml.stream.events.Namespace;
28  import javax.xml.stream.events.StartElement;
29  import javax.xml.stream.events.XMLEvent;
30  
31  import edu.umd.cs.findbugs.annotations.NonNull;
32  
33  /**
34   * Utility methods for converting between StAX events and W3C DOM elements.
35   *
36   * <p>
37   * These methods support the {@code any} content feature by converting unmodeled
38   * XML content between the StAX event stream used during parsing and the DOM
39   * representation stored in {@link XmlAnyContent}.
40   */
41  public final class XmlDomUtil {
42  
43    private XmlDomUtil() {
44      // disable construction
45    }
46  
47    /**
48     * Read an XML element from a StAX event reader and return it as a DOM
49     * {@link Element}.
50     *
51     * <p>
52     * The reader must be positioned so that the next event is a
53     * {@link XMLStreamConstants#START_ELEMENT}. After this method returns, the
54     * reader will be positioned just past the matching
55     * {@link XMLStreamConstants#END_ELEMENT}.
56     *
57     * @param reader
58     *          the StAX event reader, positioned before a start element
59     * @return the DOM element containing the full subtree
60     * @throws XMLStreamException
61     *           if an error occurs while reading XML events
62     */
63    @NonNull
64    public static Element staxToElement(@NonNull XMLEventReader2 reader)
65        throws XMLStreamException {
66      Document doc = newDocument();
67      XMLEvent event = reader.nextEvent();
68      if (!event.isStartElement()) {
69        throw new XMLStreamException("Expected START_ELEMENT but found " + event.getEventType());
70      }
71      StartElement startElement = event.asStartElement();
72      Element root = createDomElement(doc, startElement);
73      doc.appendChild(root);
74  
75      readChildren(reader, doc, root);
76      return root;
77    }
78  
79    /**
80     * Write a DOM {@link Element} to a StAX stream writer.
81     *
82     * <p>
83     * This writes the complete element subtree including attributes, namespace
84     * declarations, child elements, and text content.
85     *
86     * @param element
87     *          the DOM element to write
88     * @param writer
89     *          the StAX stream writer to write to
90     * @throws XMLStreamException
91     *           if an error occurs while writing to the stream
92     */
93    public static void elementToStax(
94        @NonNull Element element,
95        @NonNull XMLStreamWriter2 writer)
96        throws XMLStreamException {
97      String namespaceUri = element.getNamespaceURI();
98      String localName = element.getLocalName();
99      String prefix = element.getPrefix();
100 
101     if (namespaceUri != null && !namespaceUri.isEmpty()) {
102       if (prefix != null && !prefix.isEmpty()) {
103         writer.writeStartElement(prefix, localName, namespaceUri);
104         // Declare the namespace if the writer doesn't know about it
105         String existingPrefix = writer.getNamespaceContext().getPrefix(namespaceUri);
106         if (existingPrefix == null || !existingPrefix.equals(prefix)) {
107           writer.writeNamespace(prefix, namespaceUri);
108         }
109       } else {
110         writer.writeStartElement(namespaceUri, localName);
111       }
112     } else {
113       writer.writeStartElement(localName);
114     }
115 
116     // Write attributes
117     writeAttributes(element, writer);
118 
119     // Write child nodes
120     writeChildren(element, writer);
121 
122     writer.writeEndElement();
123   }
124 
125   @NonNull
126   private static Document newDocument() throws XMLStreamException {
127     try {
128       DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
129       dbf.setNamespaceAware(true);
130       // Harden against XXE: deny external DTD and schema access
131       dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
132       dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
133       DocumentBuilder builder = dbf.newDocumentBuilder();
134       return builder.newDocument();
135     } catch (ParserConfigurationException ex) {
136       throw new XMLStreamException("Failed to create DOM DocumentBuilder", ex);
137     }
138   }
139 
140   @NonNull
141   private static Element createDomElement(
142       @NonNull Document doc,
143       @NonNull StartElement startElement) {
144     QName name = startElement.getName();
145     String namespaceUri = name.getNamespaceURI();
146     String localName = name.getLocalPart();
147     String prefix = name.getPrefix();
148 
149     Element element;
150     if (namespaceUri != null && !namespaceUri.isEmpty()) {
151       String qualifiedName = (prefix != null && !prefix.isEmpty())
152           ? prefix + ":" + localName
153           : localName;
154       element = doc.createElementNS(namespaceUri, qualifiedName);
155     } else {
156       element = doc.createElement(localName);
157     }
158 
159     // Copy attributes
160     @SuppressWarnings("unchecked")
161     Iterator<Attribute> attrs = startElement.getAttributes();
162     while (attrs.hasNext()) {
163       Attribute attr = attrs.next();
164       QName attrName = attr.getName();
165       String attrNs = attrName.getNamespaceURI();
166       String attrLocal = attrName.getLocalPart();
167       String attrPrefix = attrName.getPrefix();
168 
169       if (attrNs != null && !attrNs.isEmpty()) {
170         String attrQualified = (attrPrefix != null && !attrPrefix.isEmpty())
171             ? attrPrefix + ":" + attrLocal
172             : attrLocal;
173         element.setAttributeNS(attrNs, attrQualified, attr.getValue());
174       } else {
175         element.setAttribute(attrLocal, attr.getValue());
176       }
177     }
178 
179     // Copy namespace declarations as xmlns attributes
180     @SuppressWarnings("unchecked")
181     Iterator<Namespace> namespaces = startElement.getNamespaces();
182     while (namespaces.hasNext()) {
183       Namespace ns = namespaces.next();
184       String nsPrefix = ns.getPrefix();
185       if (nsPrefix != null && !nsPrefix.isEmpty()) {
186         element.setAttributeNS(
187             "http://www.w3.org/2000/xmlns/",
188             "xmlns:" + nsPrefix,
189             ns.getNamespaceURI());
190       }
191       // default namespace is handled by createElementNS
192     }
193 
194     return element;
195   }
196 
197   private static void readChildren(
198       @NonNull XMLEventReader2 reader,
199       @NonNull Document doc,
200       @NonNull Element parent) throws XMLStreamException {
201     while (reader.hasNext()) {
202       XMLEvent event = reader.peek();
203       if (event.isEndElement()) {
204         // Consume the end element and return
205         reader.nextEvent();
206         return;
207       } else if (event.isStartElement()) {
208         StartElement childStart = reader.nextEvent().asStartElement();
209         Element child = createDomElement(doc, childStart);
210         parent.appendChild(child);
211         readChildren(reader, doc, child);
212       } else if (event.isCharacters()) {
213         Characters chars = reader.nextEvent().asCharacters();
214         parent.appendChild(doc.createTextNode(chars.getData()));
215       } else {
216         // Skip other event types (comments, processing instructions, etc.)
217         reader.nextEvent();
218       }
219     }
220   }
221 
222   private static void writeAttributes(
223       @NonNull Element element,
224       @NonNull XMLStreamWriter2 writer) throws XMLStreamException {
225     NamedNodeMap attrs = element.getAttributes();
226     for (int i = 0; i < attrs.getLength(); i++) {
227       Node attr = attrs.item(i);
228       String attrNs = attr.getNamespaceURI();
229       // getLocalName() may return null for attributes created without
230       // namespace awareness; fall back to getNodeName()
231       String attrName = attr.getLocalName();
232       if (attrName == null) {
233         attrName = attr.getNodeName();
234       }
235       String attrValue = attr.getNodeValue();
236 
237       // Skip xmlns declarations - they are handled by
238       // writeStartElement/writeNamespace
239       if ("http://www.w3.org/2000/xmlns/".equals(attrNs)) {
240         continue;
241       }
242 
243       if (attrNs != null && !attrNs.isEmpty()) {
244         String attrPrefix = attr.getPrefix();
245         if (attrPrefix != null && !attrPrefix.isEmpty()) {
246           writer.writeAttribute(attrPrefix, attrNs, attrName, attrValue);
247         } else {
248           writer.writeAttribute(attrNs, attrName, attrValue);
249         }
250       } else {
251         writer.writeAttribute(attrName, attrValue);
252       }
253     }
254   }
255 
256   private static void writeChildren(
257       @NonNull Element element,
258       @NonNull XMLStreamWriter2 writer) throws XMLStreamException {
259     NodeList children = element.getChildNodes();
260     for (int i = 0; i < children.getLength(); i++) {
261       Node child = children.item(i);
262       switch (child.getNodeType()) {
263       case Node.ELEMENT_NODE:
264         elementToStax((Element) child, writer);
265         break;
266       case Node.TEXT_NODE:
267         writer.writeCharacters(child.getTextContent());
268         break;
269       case Node.CDATA_SECTION_NODE:
270         writer.writeCData(child.getTextContent());
271         break;
272       default:
273         // Skip other node types
274         break;
275       }
276     }
277   }
278 }