001/*
002 * SPDX-FileCopyrightText: none
003 * SPDX-License-Identifier: CC0-1.0
004 */
005
006package gov.nist.secauto.metaschema.databind.io;
007
008import com.ctc.wstx.stax.WstxInputFactory;
009import com.fasterxml.jackson.core.JsonParser;
010import com.fasterxml.jackson.core.JsonToken;
011import com.fasterxml.jackson.core.io.MergedStream;
012import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
013
014import gov.nist.secauto.metaschema.core.configuration.DefaultConfiguration;
015import gov.nist.secauto.metaschema.core.configuration.IConfiguration;
016import gov.nist.secauto.metaschema.core.model.IBoundObject;
017import gov.nist.secauto.metaschema.core.model.util.JsonUtil;
018import gov.nist.secauto.metaschema.core.model.util.XmlEventUtil;
019import gov.nist.secauto.metaschema.core.util.ObjectUtils;
020import gov.nist.secauto.metaschema.databind.IBindingContext;
021import gov.nist.secauto.metaschema.databind.io.json.JsonFactoryFactory;
022import gov.nist.secauto.metaschema.databind.io.yaml.impl.YamlFactoryFactory;
023
024import org.codehaus.stax2.XMLEventReader2;
025import org.codehaus.stax2.XMLInputFactory2;
026import org.eclipse.jdt.annotation.NotOwning;
027import org.eclipse.jdt.annotation.Owning;
028
029import java.io.ByteArrayInputStream;
030import java.io.Closeable;
031import java.io.IOException;
032import java.io.InputStream;
033import java.io.InputStreamReader;
034import java.io.Reader;
035import java.net.URI;
036import java.nio.charset.Charset;
037
038import javax.xml.namespace.QName;
039import javax.xml.stream.XMLInputFactory;
040import javax.xml.stream.XMLStreamException;
041import javax.xml.stream.events.StartElement;
042import javax.xml.stream.events.XMLEvent;
043
044import edu.umd.cs.findbugs.annotations.NonNull;
045import edu.umd.cs.findbugs.annotations.Nullable;
046
047/**
048 * Provides a means to analyze content to determine what type of bound data it
049 * contains.
050 */
051public class ModelDetector {
052  @NonNull
053  private final IBindingContext bindingContext;
054  @NonNull
055  private final IConfiguration<DeserializationFeature<?>> configuration;
056
057  /**
058   * Construct a new format detector using the default configuration.
059   *
060   * @param bindingContext
061   *          information about how Java classes are bound to Module definitions
062   */
063  public ModelDetector(
064      @NonNull IBindingContext bindingContext) {
065    this(bindingContext, new DefaultConfiguration<>());
066  }
067
068  /**
069   * Construct a new format detector using the provided {@code configuration}.
070   *
071   * @param bindingContext
072   *          information about how Java classes are bound to Module definitions
073   * @param configuration
074   *          the deserialization configuration
075   */
076  public ModelDetector(
077      @NonNull IBindingContext bindingContext,
078      @NonNull IConfiguration<DeserializationFeature<?>> configuration) {
079    this.bindingContext = bindingContext;
080    this.configuration = configuration;
081  }
082
083  private int getLookaheadLimit() {
084    return configuration.get(DeserializationFeature.FORMAT_DETECTION_LOOKAHEAD_LIMIT);
085  }
086
087  @NonNull
088  private IBindingContext getBindingContext() {
089    return bindingContext;
090  }
091
092  @NonNull
093  private IConfiguration<DeserializationFeature<?>> getConfiguration() {
094    return configuration;
095  }
096
097  /**
098   * Analyzes the data from the provided {@code inputStream} to determine it's
099   * model.
100   *
101   * @param inputStream
102   *          the resource stream to analyze
103   * @param resource
104   *          the resource being parsed
105   * @param format
106   *          the expected format of the data to read
107   * @return the analysis result
108   * @throws IOException
109   *           if an error occurred while reading the resource
110   */
111  @NonNull
112  @Owning
113  public Result detect(
114      @NonNull @NotOwning InputStream inputStream,
115      @NonNull URI resource,
116      @NonNull Format format)
117      throws IOException {
118    byte[] buf = ObjectUtils.notNull(inputStream.readNBytes(getLookaheadLimit()));
119
120    Class<? extends IBoundObject> clazz;
121    try (InputStream bis = new ByteArrayInputStream(buf)) {
122      assert bis != null;
123      switch (format) {
124      case JSON:
125        try (JsonParser parser = JsonFactoryFactory.instance().createParser(bis)) {
126          assert parser != null;
127          clazz = detectModelJsonClass(parser, resource);
128        }
129        break;
130      case YAML:
131        YAMLFactory factory = YamlFactoryFactory.newParserFactoryInstance(getConfiguration());
132        try (JsonParser parser = factory.createParser(bis)) {
133          assert parser != null;
134          clazz = detectModelJsonClass(parser, resource);
135        }
136        break;
137      case XML:
138        clazz = detectModelXmlClass(bis, resource);
139        break;
140      default:
141        throw new UnsupportedOperationException(
142            String.format("The format '%s' dataStream not supported", format));
143      }
144    }
145
146    if (clazz == null) {
147      throw new IllegalStateException(
148          String.format("Detected format '%s', but unable to detect the bound data type", format.name()));
149    }
150
151    return new Result(clazz, inputStream, buf);
152  }
153
154  @NonNull
155  private Class<? extends IBoundObject> detectModelXmlClass(
156      @NonNull InputStream is,
157      @NonNull URI resource) throws IOException {
158    StartElement start;
159    try {
160      XMLInputFactory2 xmlInputFactory = (XMLInputFactory2) XMLInputFactory.newInstance();
161      assert xmlInputFactory instanceof WstxInputFactory;
162      xmlInputFactory.configureForXmlConformance();
163      xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, false);
164
165      Reader reader = new InputStreamReader(is, Charset.forName("UTF8"));
166      XMLEventReader2 eventReader = (XMLEventReader2) xmlInputFactory.createXMLEventReader(reader);
167
168      while (eventReader.hasNext() && !eventReader.peek().isStartElement()) {
169        eventReader.nextEvent();
170      }
171
172      XMLEvent nextEvent = eventReader.peek();
173      if (!nextEvent.isStartElement()) {
174        throw new IOException(String.format("Unable to detect a start element%s.",
175            XmlEventUtil.generateLocationMessage(nextEvent, resource)));
176      }
177
178      start = eventReader.nextEvent().asStartElement();
179    } catch (XMLStreamException ex) {
180      throw new IOException(ex);
181    }
182
183    QName startElementQName = ObjectUtils.notNull(start.getName());
184    Class<? extends IBoundObject> clazz = getBindingContext().getBoundClassForRootXmlQName(startElementQName);
185    if (clazz == null) {
186      throw new IOException(String.format(
187          "Unrecognized element name: %s%s.",
188          startElementQName.toString(),
189          XmlEventUtil.generateLocationMessage(start, resource)));
190    }
191    return clazz;
192  }
193
194  @Nullable
195  private Class<? extends IBoundObject> detectModelJsonClass(
196      @NotOwning @NonNull JsonParser parser,
197      @NonNull URI resource) throws IOException {
198    Class<? extends IBoundObject> retval = null;
199    JsonUtil.advanceAndAssert(parser, resource, JsonToken.START_OBJECT);
200    outer: while (JsonToken.FIELD_NAME.equals(parser.nextToken())) {
201      String name = ObjectUtils.notNull(parser.currentName());
202      if (!"$schema".equals(name)) {
203        IBindingContext bindingContext = getBindingContext();
204        retval = bindingContext.getBoundClassForRootJsonName(name);
205        if (retval == null) {
206          throw new IOException("Unrecognized JSON field name: " + name);
207        }
208        break outer;
209      }
210      // do nothing
211      parser.nextToken();
212      // JsonUtil.skipNextValue(parser);
213    }
214    return retval;
215  }
216
217  /**
218   * Describes the result of detecting which model a resource is described by.
219   * <p>
220   * The method {@link #getBoundClass()} can be used to get class binding for the
221   * identified node in a Metaschema-based model.
222   * <p>
223   * The method {@link #getDataStream()} can be used to get a stream to read the
224   * content used for detection. This will replay any content used for detection.
225   */
226  public static final class Result implements Closeable {
227    @NonNull
228    private final Class<? extends IBoundObject> boundClass;
229    @Owning
230    private InputStream dataStream;
231
232    private Result(
233        @NonNull Class<? extends IBoundObject> clazz,
234        @NonNull InputStream is,
235        @NonNull byte[] buf) {
236      this.boundClass = clazz;
237      this.dataStream = new MergedStream(null, is, buf, 0, buf.length);
238    }
239
240    /**
241     * Get the Java class representing the detected bound object.
242     *
243     * @return the Java class
244     */
245    @NonNull
246    public Class<? extends IBoundObject> getBoundClass() {
247      return boundClass;
248    }
249
250    /**
251     * Get an {@link InputStream} that can be used to read the analyzed data from
252     * the start.
253     *
254     * @return the stream
255     */
256    @NonNull
257    @Owning
258    public InputStream getDataStream() {
259      return ObjectUtils.requireNonNull(dataStream, "data stream already closed");
260    }
261
262    @SuppressWarnings("PMD.NullAssignment")
263    @Override
264    public void close() throws IOException {
265      this.dataStream.close();
266      this.dataStream = null;
267    }
268  }
269}