ModuleLoader.java

/*
 * SPDX-FileCopyrightText: none
 * SPDX-License-Identifier: CC0-1.0
 */

package gov.nist.secauto.metaschema.core.model.xml;

import gov.nist.secauto.metaschema.core.model.AbstractModuleLoader;
import gov.nist.secauto.metaschema.core.model.IModuleLoader;
import gov.nist.secauto.metaschema.core.model.MetaschemaException;
import gov.nist.secauto.metaschema.core.model.constraint.ExternalConstraintsModulePostProcessor;
import gov.nist.secauto.metaschema.core.model.constraint.IConstraintSet;
import gov.nist.secauto.metaschema.core.model.xml.impl.XmlModule;
import gov.nist.secauto.metaschema.core.model.xml.xmlbeans.METASCHEMADocument;
import gov.nist.secauto.metaschema.core.util.CollectionUtil;
import gov.nist.secauto.metaschema.core.util.ObjectUtils;

import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlOptions;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

import java.io.IOException;
import java.net.URI;
import java.util.Collection;
import java.util.List;
import java.util.stream.Collectors;

import javax.xml.XMLConstants;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import edu.umd.cs.findbugs.annotations.NonNull;

/**
 * Provides methods to load a Metaschema expressed in XML.
 * <p>
 * Loaded Metaschema instances are cached to avoid the need to load them for
 * every use. Any Metaschema imported is also loaded and cached automatically.
 */
public class ModuleLoader
    extends AbstractModuleLoader<METASCHEMADocument, IXmlMetaschemaModule> {
  private boolean resolveEntities; // = false;
  @NonNull
  private final List<IModuleLoader.IModulePostProcessor> modulePostProcessors;

  /**
   * Construct a new Metaschema loader.
   */
  public ModuleLoader() {
    this(CollectionUtil.<IModuleLoader.IModulePostProcessor>emptyList());
  }

  /**
   * Construct a new Metaschema loader, which applies the provided constraints to
   * loaded modules.
   *
   * @param constraints
   *          a set of Metaschema module constraints
   */
  public ModuleLoader(@NonNull Collection<IConstraintSet> constraints) {
    this(CollectionUtil.singletonList(new ExternalConstraintsModulePostProcessor(constraints)));
  }

  /**
   * Construct a new Metaschema loader, which use the provided module post
   * processors when loading a module.
   *
   * @param modulePostProcessors
   *          post processors to perform additional module customization when
   *          loading
   */
  public ModuleLoader(@NonNull List<IModuleLoader.IModulePostProcessor> modulePostProcessors) {
    this.modulePostProcessors = modulePostProcessors;
  }

  /**
   * Enable a mode that allows XML entity resolution. This may be needed to parse
   * some resource files that contain entities. Enabling entity resolution is a
   * less secure, which requires trust in the resource content being parsed.
   */
  public void allowEntityResolution() {
    resolveEntities = true;
  }

  @Override
  protected IXmlMetaschemaModule newModule(
      URI resource,
      METASCHEMADocument binding,
      List<? extends IXmlMetaschemaModule> importedModules)
      throws MetaschemaException {
    IXmlMetaschemaModule module = new XmlModule(resource, binding, importedModules);

    for (IModuleLoader.IModulePostProcessor postProcessor : modulePostProcessors) {
      postProcessor.processModule(module);
    }
    return module;
  }

  @Override
  protected List<URI> getImports(METASCHEMADocument binding) {
    return ObjectUtils.notNull(binding.getMETASCHEMA().getImportList().stream()
        .map(imported -> URI.create(imported.getHref()))
        .collect(Collectors.toList()));
  }

  /**
   * Parse the provided XML resource as a Metaschema module.
   *
   * @param resource
   *          the resource to parse
   * @return the XMLBeans representation of the Metaschema module
   * @throws IOException
   *           if a parsing error occurred
   */
  @Override
  protected METASCHEMADocument parseModule(@NonNull URI resource) throws IOException {
    METASCHEMADocument metaschemaXml;
    try {
      XmlOptions options = new XmlOptions();
      if (resolveEntities) {
        SAXParserFactory factory = SAXParserFactory.newInstance();

        try {
          // factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
          factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false);
          factory.setFeature("http://xml.org/sax/features/external-general-entities", true);
          factory.setFeature("http://xml.org/sax/features/external-parameter-entities", true);
          SAXParser parser = factory.newSAXParser();
          parser.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "file"); // ,jar:file
          XMLReader reader = parser.getXMLReader();
          reader.setEntityResolver((publicId, systemId) -> null);
          options.setLoadUseXMLReader(reader);
        } catch (SAXException | ParserConfigurationException ex) {
          throw new IOException(ex);
        }
        // options.setLoadEntityBytesLimit(204800);
        // options.setLoadUseDefaultResolver();
        options.setEntityResolver((publicId, systemId) -> {
          String effectiveSystemId = systemId;
          // TODO: It's very odd that the system id looks like this. Need to investigate.
          if (effectiveSystemId.startsWith("file://file://")) {
            effectiveSystemId = effectiveSystemId.substring(14);
          }
          URI resolvedSystemId = resource.resolve(effectiveSystemId);
          return new InputSource(resolvedSystemId.toString());
        });
        options.setLoadDTDGrammar(true);
      }
      options.setBaseURI(resource);
      options.setLoadLineNumbers();
      metaschemaXml = ObjectUtils.notNull(METASCHEMADocument.Factory.parse(resource.toURL(), options));
    } catch (XmlException ex) {
      throw new IOException(ex);
    }
    return metaschemaXml;
  }

}