1   /*
2    * SPDX-FileCopyrightText: none
3    * SPDX-License-Identifier: CC0-1.0
4    */
5   
6   package gov.nist.secauto.metaschema.core.model.xml;
7   
8   import gov.nist.secauto.metaschema.core.model.AbstractModuleLoader;
9   import gov.nist.secauto.metaschema.core.model.IModuleLoader;
10  import gov.nist.secauto.metaschema.core.model.MetaschemaException;
11  import gov.nist.secauto.metaschema.core.model.xml.impl.XmlModule;
12  import gov.nist.secauto.metaschema.core.model.xml.xmlbeans.METASCHEMADocument;
13  import gov.nist.secauto.metaschema.core.util.CollectionUtil;
14  import gov.nist.secauto.metaschema.core.util.ObjectUtils;
15  
16  import org.apache.xmlbeans.XmlException;
17  import org.apache.xmlbeans.XmlOptions;
18  import org.xml.sax.EntityResolver;
19  import org.xml.sax.InputSource;
20  import org.xml.sax.SAXException;
21  import org.xml.sax.XMLReader;
22  
23  import java.io.IOException;
24  import java.net.URI;
25  import java.util.List;
26  import java.util.stream.Collectors;
27  
28  import javax.xml.XMLConstants;
29  import javax.xml.parsers.ParserConfigurationException;
30  import javax.xml.parsers.SAXParser;
31  import javax.xml.parsers.SAXParserFactory;
32  
33  import edu.umd.cs.findbugs.annotations.NonNull;
34  
35  /**
36   * Provides methods to load a Metaschema expressed in XML.
37   * <p>
38   * Loaded Metaschema instances are cached to avoid the need to load them for
39   * every use. Any Metaschema imported is also loaded and cached automatically.
40   */
41  public class ModuleLoader
42      extends AbstractModuleLoader<METASCHEMADocument, IXmlMetaschemaModule> {
43    private boolean resolveEntities; // = false;
44  
45    /**
46     * Construct a new Metaschema loader.
47     */
48    public ModuleLoader() {
49      this(CollectionUtil.emptyList());
50    }
51  
52    /**
53     * Construct a new Metaschema loader, which use the provided module post
54     * processors when loading a module.
55     *
56     * @param modulePostProcessors
57     *          post processors to perform additional module customization when
58     *          loading
59     */
60    public ModuleLoader(@NonNull List<IModuleLoader.IModulePostProcessor> modulePostProcessors) {
61      super(modulePostProcessors);
62    }
63  
64    /**
65     * Enable a mode that allows XML entity resolution. This may be needed to parse
66     * some resource files that contain entities. Enabling entity resolution is a
67     * less secure, which requires trust in the resource content being parsed.
68     */
69    public void allowEntityResolution() {
70      resolveEntities = true;
71    }
72  
73    @Override
74    protected IXmlMetaschemaModule newModule(URI resource, METASCHEMADocument binding,
75        List<? extends IXmlMetaschemaModule> importedModules)
76        throws MetaschemaException {
77      return new XmlModule(resource, binding, importedModules);
78    }
79  
80    @Override
81    protected List<URI> getImports(METASCHEMADocument binding) {
82      return ObjectUtils.notNull(binding.getMETASCHEMA().getImportList().stream()
83          .map(imported -> URI.create(imported.getHref()))
84          .collect(Collectors.toList()));
85    }
86  
87    /**
88     * Parse the provided XML resource as a Metaschema module.
89     *
90     * @param resource
91     *          the resource to parse
92     * @return the XMLBeans representation of the Metaschema module
93     * @throws IOException
94     *           if a parsing error occurred
95     */
96    @Override
97    protected METASCHEMADocument parseModule(@NonNull URI resource) throws IOException {
98      METASCHEMADocument metaschemaXml;
99      try {
100       XmlOptions options = new XmlOptions();
101       if (resolveEntities) {
102         SAXParserFactory factory = SAXParserFactory.newInstance();
103 
104         try {
105           // factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
106           factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false);
107           factory.setFeature("http://xml.org/sax/features/external-general-entities", true);
108           factory.setFeature("http://xml.org/sax/features/external-parameter-entities", true);
109           SAXParser parser = factory.newSAXParser();
110           parser.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "file"); // ,jar:file
111           XMLReader reader = parser.getXMLReader();
112           reader.setEntityResolver(new EntityResolver() {
113 
114             @Override
115             public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
116               return null;
117             }
118 
119           });
120           options.setLoadUseXMLReader(reader);
121         } catch (SAXException | ParserConfigurationException ex) {
122           throw new IOException(ex);
123         }
124         // options.setLoadEntityBytesLimit(204800);
125         // options.setLoadUseDefaultResolver();
126         options.setEntityResolver(new EntityResolver() {
127 
128           @Override
129           public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
130             String effectiveSystemId = systemId;
131             // TODO: It's very odd that the system id looks like this. Need to investigate.
132             if (effectiveSystemId.startsWith("file://file://")) {
133               effectiveSystemId = effectiveSystemId.substring(14);
134             }
135             URI resolvedSystemId = resource.resolve(effectiveSystemId);
136             return new InputSource(resolvedSystemId.toString());
137           }
138 
139         });
140         options.setLoadDTDGrammar(true);
141       }
142       options.setBaseURI(resource);
143       options.setLoadLineNumbers();
144       metaschemaXml = ObjectUtils.notNull(METASCHEMADocument.Factory.parse(resource.toURL(), options));
145     } catch (XmlException ex) {
146       throw new IOException(ex);
147     }
148     return metaschemaXml;
149   }
150 
151 }