1   /*
2    * SPDX-FileCopyrightText: none
3    * SPDX-License-Identifier: CC0-1.0
4    */
5   
6   package gov.nist.secauto.metaschema.core.model.xml;
7   
8   import gov.nist.secauto.metaschema.core.model.AbstractModuleLoader;
9   import gov.nist.secauto.metaschema.core.model.IModuleLoader;
10  import gov.nist.secauto.metaschema.core.model.MetaschemaException;
11  import gov.nist.secauto.metaschema.core.model.constraint.ExternalConstraintsModulePostProcessor;
12  import gov.nist.secauto.metaschema.core.model.constraint.IConstraintSet;
13  import gov.nist.secauto.metaschema.core.model.xml.impl.XmlModule;
14  import gov.nist.secauto.metaschema.core.model.xml.xmlbeans.METASCHEMADocument;
15  import gov.nist.secauto.metaschema.core.util.CollectionUtil;
16  import gov.nist.secauto.metaschema.core.util.ObjectUtils;
17  
18  import org.apache.xmlbeans.XmlException;
19  import org.apache.xmlbeans.XmlOptions;
20  import org.xml.sax.InputSource;
21  import org.xml.sax.SAXException;
22  import org.xml.sax.XMLReader;
23  
24  import java.io.IOException;
25  import java.net.URI;
26  import java.util.Collection;
27  import java.util.List;
28  import java.util.stream.Collectors;
29  
30  import javax.xml.XMLConstants;
31  import javax.xml.parsers.ParserConfigurationException;
32  import javax.xml.parsers.SAXParser;
33  import javax.xml.parsers.SAXParserFactory;
34  
35  import edu.umd.cs.findbugs.annotations.NonNull;
36  
37  /**
38   * Provides methods to load a Metaschema expressed in XML.
39   * <p>
40   * Loaded Metaschema instances are cached to avoid the need to load them for
41   * every use. Any Metaschema imported is also loaded and cached automatically.
42   */
43  public class ModuleLoader
44      extends AbstractModuleLoader<METASCHEMADocument, IXmlMetaschemaModule> {
45    private boolean resolveEntities; // = false;
46    @NonNull
47    private final List<IModuleLoader.IModulePostProcessor> modulePostProcessors;
48  
49    /**
50     * Construct a new Metaschema loader.
51     */
52    public ModuleLoader() {
53      this(CollectionUtil.<IModuleLoader.IModulePostProcessor>emptyList());
54    }
55  
56    /**
57     * Construct a new Metaschema loader, which applies the provided constraints to
58     * loaded modules.
59     *
60     * @param constraints
61     *          a set of Metaschema module constraints
62     */
63    public ModuleLoader(@NonNull Collection<IConstraintSet> constraints) {
64      this(CollectionUtil.singletonList(new ExternalConstraintsModulePostProcessor(constraints)));
65    }
66  
67    /**
68     * Construct a new Metaschema loader, which use the provided module post
69     * processors when loading a module.
70     *
71     * @param modulePostProcessors
72     *          post processors to perform additional module customization when
73     *          loading
74     */
75    public ModuleLoader(@NonNull List<IModuleLoader.IModulePostProcessor> modulePostProcessors) {
76      this.modulePostProcessors = modulePostProcessors;
77    }
78  
79    /**
80     * Enable a mode that allows XML entity resolution. This may be needed to parse
81     * some resource files that contain entities. Enabling entity resolution is a
82     * less secure, which requires trust in the resource content being parsed.
83     */
84    public void allowEntityResolution() {
85      resolveEntities = true;
86    }
87  
88    @Override
89    protected IXmlMetaschemaModule newModule(
90        URI resource,
91        METASCHEMADocument binding,
92        List<? extends IXmlMetaschemaModule> importedModules)
93        throws MetaschemaException {
94      IXmlMetaschemaModule module = new XmlModule(resource, binding, importedModules);
95  
96      for (IModuleLoader.IModulePostProcessor postProcessor : modulePostProcessors) {
97        postProcessor.processModule(module);
98      }
99      return module;
100   }
101 
102   @Override
103   protected List<URI> getImports(METASCHEMADocument binding) {
104     return ObjectUtils.notNull(binding.getMETASCHEMA().getImportList().stream()
105         .map(imported -> URI.create(imported.getHref()))
106         .collect(Collectors.toList()));
107   }
108 
109   /**
110    * Parse the provided XML resource as a Metaschema module.
111    *
112    * @param resource
113    *          the resource to parse
114    * @return the XMLBeans representation of the Metaschema module
115    * @throws IOException
116    *           if a parsing error occurred
117    */
118   @Override
119   protected METASCHEMADocument parseModule(@NonNull URI resource) throws IOException {
120     METASCHEMADocument metaschemaXml;
121     try {
122       XmlOptions options = new XmlOptions();
123       if (resolveEntities) {
124         SAXParserFactory factory = SAXParserFactory.newInstance();
125 
126         try {
127           // factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
128           factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false);
129           factory.setFeature("http://xml.org/sax/features/external-general-entities", true);
130           factory.setFeature("http://xml.org/sax/features/external-parameter-entities", true);
131           SAXParser parser = factory.newSAXParser();
132           parser.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "file"); // ,jar:file
133           XMLReader reader = parser.getXMLReader();
134           reader.setEntityResolver((publicId, systemId) -> null);
135           options.setLoadUseXMLReader(reader);
136         } catch (SAXException | ParserConfigurationException ex) {
137           throw new IOException(ex);
138         }
139         // options.setLoadEntityBytesLimit(204800);
140         // options.setLoadUseDefaultResolver();
141         options.setEntityResolver((publicId, systemId) -> {
142           String effectiveSystemId = systemId;
143           // TODO: It's very odd that the system id looks like this. Need to investigate.
144           if (effectiveSystemId.startsWith("file://file://")) {
145             effectiveSystemId = effectiveSystemId.substring(14);
146           }
147           URI resolvedSystemId = resource.resolve(effectiveSystemId);
148           return new InputSource(resolvedSystemId.toString());
149         });
150         options.setLoadDTDGrammar(true);
151       }
152       options.setBaseURI(resource);
153       options.setLoadLineNumbers();
154       metaschemaXml = ObjectUtils.notNull(METASCHEMADocument.Factory.parse(resource.toURL(), options));
155     } catch (XmlException ex) {
156       throw new IOException(ex);
157     }
158     return metaschemaXml;
159   }
160 
161 }