1   /*
2    * SPDX-FileCopyrightText: none
3    * SPDX-License-Identifier: CC0-1.0
4    */
5   
6   package gov.nist.secauto.metaschema.core.model.xml;
7   
8   import gov.nist.secauto.metaschema.core.model.AbstractModuleLoader;
9   import gov.nist.secauto.metaschema.core.model.IModuleLoader;
10  import gov.nist.secauto.metaschema.core.model.MetaschemaException;
11  import gov.nist.secauto.metaschema.core.model.constraint.ExternalConstraintsModulePostProcessor;
12  import gov.nist.secauto.metaschema.core.model.constraint.IConstraintSet;
13  import gov.nist.secauto.metaschema.core.model.xml.impl.XmlModule;
14  import gov.nist.secauto.metaschema.core.model.xml.xmlbeans.METASCHEMADocument;
15  import gov.nist.secauto.metaschema.core.util.CollectionUtil;
16  import gov.nist.secauto.metaschema.core.util.ObjectUtils;
17  
18  import org.apache.xmlbeans.XmlException;
19  import org.apache.xmlbeans.XmlOptions;
20  import org.xml.sax.InputSource;
21  import org.xml.sax.SAXException;
22  import org.xml.sax.XMLReader;
23  
24  import java.io.IOException;
25  import java.net.URI;
26  import java.util.Collection;
27  import java.util.List;
28  import java.util.stream.Collectors;
29  
30  import javax.xml.XMLConstants;
31  import javax.xml.parsers.ParserConfigurationException;
32  import javax.xml.parsers.SAXParser;
33  import javax.xml.parsers.SAXParserFactory;
34  
35  import edu.umd.cs.findbugs.annotations.NonNull;
36  
37  /**
38   * Provides methods to load a Metaschema expressed in XML.
39   * <p>
40   * Loaded Metaschema instances are cached to avoid the need to load them for
41   * every use. Any Metaschema imported is also loaded and cached automatically.
42   */
43  public class ModuleLoader
44      extends AbstractModuleLoader<METASCHEMADocument, IXmlMetaschemaModule> {
45    private boolean resolveEntities; // = false;
46    @NonNull
47    private final List<IModuleLoader.IModulePostProcessor> modulePostProcessors;
48  
49    /**
50     * Construct a new Metaschema loader, which applies the provided constraints to
51     * loaded modules.
52     *
53     * @param constraints
54     *          a set of Metaschema module constraints to be applied during loading
55     * @return the loader instance configured with the specified constraints
56     */
57    public static ModuleLoader newInstanceUsingConstraints(@NonNull Collection<IConstraintSet> constraints) {
58      return new ModuleLoader(CollectionUtil.singletonList(new ExternalConstraintsModulePostProcessor(constraints)));
59    }
60  
61    /**
62     * Construct a new Metaschema loader with no constraints.
63     *
64     * @see #newInstanceUsingConstraints(Collection) for creating an instance with
65     *      constraints
66     */
67    public ModuleLoader() {
68      this(CollectionUtil.<IModuleLoader.IModulePostProcessor>emptyList());
69    }
70  
71    /**
72     * Construct a new Metaschema loader, which use the provided module post
73     * processors when loading a module.
74     *
75     * @param modulePostProcessors
76     *          post processors to perform additional module customization when
77     *          loading
78     */
79    public ModuleLoader(@NonNull List<IModuleLoader.IModulePostProcessor> modulePostProcessors) {
80      this.modulePostProcessors = modulePostProcessors;
81    }
82  
83    /**
84     * Enable a mode that allows XML entity resolution. This may be needed to parse
85     * some resource files that contain entities. Enabling entity resolution is a
86     * less secure, which requires trust in the resource content being parsed.
87     */
88    public void allowEntityResolution() {
89      resolveEntities = true;
90    }
91  
92    @Override
93    protected IXmlMetaschemaModule newModule(
94        URI resource,
95        METASCHEMADocument binding,
96        List<? extends IXmlMetaschemaModule> importedModules)
97        throws MetaschemaException {
98      IXmlMetaschemaModule module = new XmlModule(resource, binding, importedModules);
99  
100     for (IModuleLoader.IModulePostProcessor postProcessor : modulePostProcessors) {
101       postProcessor.processModule(module);
102     }
103     return module;
104   }
105 
106   @Override
107   protected List<URI> getImports(METASCHEMADocument binding) {
108     return ObjectUtils.notNull(binding.getMETASCHEMA().getImportList().stream()
109         .map(imported -> URI.create(imported.getHref()))
110         .collect(Collectors.toList()));
111   }
112 
113   /**
114    * Parse the provided XML resource as a Metaschema module.
115    *
116    * @param resource
117    *          the resource to parse
118    * @return the XMLBeans representation of the Metaschema module
119    * @throws IOException
120    *           if a parsing error occurred
121    */
122   @Override
123   protected METASCHEMADocument parseModule(@NonNull URI resource) throws IOException {
124     METASCHEMADocument metaschemaXml;
125     try {
126       XmlOptions options = new XmlOptions();
127       if (resolveEntities) {
128         SAXParserFactory factory = SAXParserFactory.newInstance();
129 
130         try {
131           // factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
132           factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false);
133           factory.setFeature("http://xml.org/sax/features/external-general-entities", true);
134           factory.setFeature("http://xml.org/sax/features/external-parameter-entities", true);
135           SAXParser parser = factory.newSAXParser();
136           parser.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "file"); // ,jar:file
137           XMLReader reader = parser.getXMLReader();
138           reader.setEntityResolver((publicId, systemId) -> null);
139           options.setLoadUseXMLReader(reader);
140         } catch (SAXException | ParserConfigurationException ex) {
141           throw new IOException(ex);
142         }
143         // options.setLoadEntityBytesLimit(204800);
144         // options.setLoadUseDefaultResolver();
145         options.setEntityResolver((publicId, systemId) -> {
146           String effectiveSystemId = systemId;
147           // TODO: It's very odd that the system id looks like this. Need to investigate.
148           if (effectiveSystemId.startsWith("file://file://")) {
149             effectiveSystemId = effectiveSystemId.substring(14);
150           }
151           URI resolvedSystemId = resource.resolve(effectiveSystemId);
152           return new InputSource(resolvedSystemId.toString());
153         });
154         options.setLoadDTDGrammar(true);
155       }
156       options.setBaseURI(resource);
157       options.setLoadLineNumbers();
158       metaschemaXml = ObjectUtils.notNull(METASCHEMADocument.Factory.parse(resource.toURL(), options));
159     } catch (XmlException ex) {
160       throw new IOException(ex);
161     }
162     return metaschemaXml;
163   }
164 
165 }