001/*
002 * SPDX-FileCopyrightText: none
003 * SPDX-License-Identifier: CC0-1.0
004 */
005
006package dev.metaschema.databind.io;
007
008import com.ctc.wstx.stax.WstxInputFactory;
009import com.fasterxml.jackson.core.JsonParser;
010import com.fasterxml.jackson.core.JsonToken;
011import com.fasterxml.jackson.core.io.MergedStream;
012import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
013
014import org.codehaus.stax2.XMLEventReader2;
015import org.codehaus.stax2.XMLInputFactory2;
016import org.eclipse.jdt.annotation.NotOwning;
017import org.eclipse.jdt.annotation.Owning;
018
019import java.io.ByteArrayInputStream;
020import java.io.Closeable;
021import java.io.IOException;
022import java.io.InputStream;
023import java.io.InputStreamReader;
024import java.io.Reader;
025import java.net.URI;
026import java.nio.charset.Charset;
027
028import javax.xml.namespace.QName;
029import javax.xml.stream.XMLInputFactory;
030import javax.xml.stream.XMLStreamException;
031import javax.xml.stream.events.StartElement;
032import javax.xml.stream.events.XMLEvent;
033
034import dev.metaschema.core.configuration.DefaultConfiguration;
035import dev.metaschema.core.configuration.IConfiguration;
036import dev.metaschema.core.model.IBoundObject;
037import dev.metaschema.core.model.util.JsonUtil;
038import dev.metaschema.core.model.util.XmlEventUtil;
039import dev.metaschema.core.util.ObjectUtils;
040import dev.metaschema.databind.IBindingContext;
041import dev.metaschema.databind.io.json.JsonFactoryFactory;
042import dev.metaschema.databind.io.yaml.impl.YamlFactoryFactory;
043import edu.umd.cs.findbugs.annotations.NonNull;
044import edu.umd.cs.findbugs.annotations.Nullable;
045
046/**
047 * Provides a means to analyze content to determine what type of bound data it
048 * contains.
049 */
050public class ModelDetector {
051  @NonNull
052  private final IBindingContext bindingContext;
053  @NonNull
054  private final IConfiguration<DeserializationFeature<?>> configuration;
055
056  /**
057   * Construct a new format detector using the default configuration.
058   *
059   * @param bindingContext
060   *          information about how Java classes are bound to Module definitions
061   */
062  public ModelDetector(
063      @NonNull IBindingContext bindingContext) {
064    this(bindingContext, new DefaultConfiguration<>());
065  }
066
067  /**
068   * Construct a new format detector using the provided {@code configuration}.
069   *
070   * @param bindingContext
071   *          information about how Java classes are bound to Module definitions
072   * @param configuration
073   *          the deserialization configuration
074   */
075  public ModelDetector(
076      @NonNull IBindingContext bindingContext,
077      @NonNull IConfiguration<DeserializationFeature<?>> configuration) {
078    this.bindingContext = bindingContext;
079    this.configuration = configuration;
080  }
081
082  private int getLookaheadLimit() {
083    return configuration.get(DeserializationFeature.FORMAT_DETECTION_LOOKAHEAD_LIMIT);
084  }
085
086  @NonNull
087  private IBindingContext getBindingContext() {
088    return bindingContext;
089  }
090
091  @NonNull
092  private IConfiguration<DeserializationFeature<?>> getConfiguration() {
093    return configuration;
094  }
095
096  /**
097   * Analyzes the data from the provided {@code inputStream} to determine it's
098   * model.
099   * <p>
100   * <b>Ownership semantics:</b> This method transfers ownership of the input
101   * stream to the returned {@link Result} object. The stream is wrapped in a
102   * {@code MergedStream} that replays the buffered detection data followed by the
103   * remaining stream content. The caller should NOT close the original stream;
104   * instead, close the Result object which will close the underlying stream.
105   *
106   * @param inputStream
107   *          the resource stream to analyze. Ownership is transferred to the
108   *          returned Result; the caller should not close this stream directly.
109   * @param resource
110   *          the resource being parsed
111   * @param format
112   *          the expected format of the data to read
113   * @return the analysis result. The caller owns this result and is responsible
114   *         for closing it, which will close the underlying stream.
115   * @throws IOException
116   *           if an error occurred while reading the resource
117   */
118  @NonNull
119  @Owning
120  public Result detect(
121      @NonNull @Owning InputStream inputStream,
122      @NonNull URI resource,
123      @NonNull Format format)
124      throws IOException {
125    byte[] buf = ObjectUtils.notNull(inputStream.readNBytes(getLookaheadLimit()));
126
127    Class<? extends IBoundObject> clazz;
128    try (InputStream bis = new ByteArrayInputStream(buf)) {
129      assert bis != null;
130      switch (format) {
131      case JSON:
132        try (JsonParser parser = JsonFactoryFactory.instance().createParser(bis)) {
133          assert parser != null;
134          clazz = detectModelJsonClass(parser, resource);
135        }
136        break;
137      case YAML:
138        YAMLFactory factory = YamlFactoryFactory.newParserFactoryInstance(getConfiguration());
139        try (JsonParser parser = factory.createParser(bis)) {
140          assert parser != null;
141          clazz = detectModelJsonClass(parser, resource);
142        }
143        break;
144      case XML:
145        clazz = detectModelXmlClass(bis, resource);
146        break;
147      default:
148        throw new UnsupportedOperationException(
149            String.format("The format '%s' dataStream not supported", format));
150      }
151    }
152
153    if (clazz == null) {
154      throw new IllegalStateException(
155          String.format("Detected format '%s', but unable to detect the bound data type", format.name()));
156    }
157
158    return new Result(clazz, inputStream, buf);
159  }
160
161  @NonNull
162  private Class<? extends IBoundObject> detectModelXmlClass(
163      @NonNull InputStream is,
164      @NonNull URI resource) throws IOException {
165    StartElement start;
166    try {
167      XMLInputFactory2 xmlInputFactory = (XMLInputFactory2) XMLInputFactory.newInstance();
168      assert xmlInputFactory instanceof WstxInputFactory;
169      xmlInputFactory.configureForXmlConformance();
170      xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, false);
171
172      Reader reader = new InputStreamReader(is, Charset.forName("UTF8"));
173      XMLEventReader2 eventReader = (XMLEventReader2) xmlInputFactory.createXMLEventReader(reader);
174
175      while (eventReader.hasNext() && !eventReader.peek().isStartElement()) {
176        eventReader.nextEvent();
177      }
178
179      XMLEvent nextEvent = eventReader.peek();
180      if (!nextEvent.isStartElement()) {
181        throw new IOException(String.format("Unable to detect a start element%s.",
182            XmlEventUtil.generateLocationMessage(nextEvent, resource)));
183      }
184
185      start = eventReader.nextEvent().asStartElement();
186    } catch (XMLStreamException ex) {
187      throw new IOException(ex);
188    }
189
190    QName startElementQName = ObjectUtils.notNull(start.getName());
191    Class<? extends IBoundObject> clazz = getBindingContext().getBoundClassForRootXmlQName(startElementQName);
192    if (clazz == null) {
193      throw new IOException(String.format(
194          "Unrecognized element name: %s%s.",
195          startElementQName.toString(),
196          XmlEventUtil.generateLocationMessage(start, resource)));
197    }
198    return clazz;
199  }
200
201  @Nullable
202  private Class<? extends IBoundObject> detectModelJsonClass(
203      @NotOwning @NonNull JsonParser parser,
204      @NonNull URI resource) throws IOException {
205    Class<? extends IBoundObject> retval = null;
206    JsonUtil.advanceAndAssert(parser, resource, JsonToken.START_OBJECT);
207    outer: while (JsonToken.FIELD_NAME.equals(parser.nextToken())) {
208      String name = ObjectUtils.notNull(parser.currentName());
209      if (!"$schema".equals(name)) {
210        IBindingContext bindingContext = getBindingContext();
211        retval = bindingContext.getBoundClassForRootJsonName(name);
212        if (retval == null) {
213          throw new IOException("Unrecognized JSON field name: " + name);
214        }
215        break outer;
216      }
217      // do nothing
218      parser.nextToken();
219      // JsonUtil.skipNextValue(parser);
220    }
221    return retval;
222  }
223
224  /**
225   * Describes the result of detecting which model a resource is described by.
226   * <p>
227   * The method {@link #getBoundClass()} can be used to get class binding for the
228   * identified node in a Metaschema-based model.
229   * <p>
230   * The method {@link #getDataStream()} can be used to get a stream to read the
231   * content used for detection. This will replay any content used for detection.
232   */
233  public static final class Result implements Closeable {
234    @NonNull
235    private final Class<? extends IBoundObject> boundClass;
236    @Owning
237    private InputStream dataStream;
238
239    private Result(
240        @NonNull Class<? extends IBoundObject> clazz,
241        @NonNull InputStream is,
242        @NonNull byte[] buf) {
243      this.boundClass = clazz;
244      this.dataStream = new MergedStream(null, is, buf, 0, buf.length);
245    }
246
247    /**
248     * Get the Java class representing the detected bound object.
249     *
250     * @return the Java class
251     */
252    @NonNull
253    public Class<? extends IBoundObject> getBoundClass() {
254      return boundClass;
255    }
256
257    /**
258     * Get an {@link InputStream} that can be used to read the analyzed data from
259     * the start.
260     * <p>
261     * The caller owns this stream and is responsible for closing it.
262     *
263     * @return the stream
264     */
265    @NonNull
266    @Owning
267    public InputStream getDataStream() {
268      return ObjectUtils.requireNonNull(dataStream, "data stream already closed");
269    }
270
271    @SuppressWarnings("PMD.NullAssignment")
272    @Override
273    public void close() throws IOException {
274      this.dataStream.close();
275      this.dataStream = null;
276    }
277  }
278}