1   /*
2    * SPDX-FileCopyrightText: none
3    * SPDX-License-Identifier: CC0-1.0
4    */
5   
6   package dev.metaschema.databind.io;
7   
8   import com.ctc.wstx.stax.WstxInputFactory;
9   import com.fasterxml.jackson.core.JsonParser;
10  import com.fasterxml.jackson.core.JsonToken;
11  import com.fasterxml.jackson.core.io.MergedStream;
12  import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
13  
14  import org.codehaus.stax2.XMLEventReader2;
15  import org.codehaus.stax2.XMLInputFactory2;
16  import org.eclipse.jdt.annotation.NotOwning;
17  import org.eclipse.jdt.annotation.Owning;
18  
19  import java.io.ByteArrayInputStream;
20  import java.io.Closeable;
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.io.InputStreamReader;
24  import java.io.Reader;
25  import java.net.URI;
26  import java.nio.charset.Charset;
27  
28  import javax.xml.namespace.QName;
29  import javax.xml.stream.XMLInputFactory;
30  import javax.xml.stream.XMLStreamException;
31  import javax.xml.stream.events.StartElement;
32  import javax.xml.stream.events.XMLEvent;
33  
34  import dev.metaschema.core.configuration.DefaultConfiguration;
35  import dev.metaschema.core.configuration.IConfiguration;
36  import dev.metaschema.core.model.IBoundObject;
37  import dev.metaschema.core.model.util.JsonUtil;
38  import dev.metaschema.core.model.util.XmlEventUtil;
39  import dev.metaschema.core.util.ObjectUtils;
40  import dev.metaschema.databind.IBindingContext;
41  import dev.metaschema.databind.io.json.JsonFactoryFactory;
42  import dev.metaschema.databind.io.yaml.impl.YamlFactoryFactory;
43  import edu.umd.cs.findbugs.annotations.NonNull;
44  import edu.umd.cs.findbugs.annotations.Nullable;
45  
46  /**
47   * Provides a means to analyze content to determine what type of bound data it
48   * contains.
49   */
50  public class ModelDetector {
51    @NonNull
52    private final IBindingContext bindingContext;
53    @NonNull
54    private final IConfiguration<DeserializationFeature<?>> configuration;
55  
56    /**
57     * Construct a new format detector using the default configuration.
58     *
59     * @param bindingContext
60     *          information about how Java classes are bound to Module definitions
61     */
62    public ModelDetector(
63        @NonNull IBindingContext bindingContext) {
64      this(bindingContext, new DefaultConfiguration<>());
65    }
66  
67    /**
68     * Construct a new format detector using the provided {@code configuration}.
69     *
70     * @param bindingContext
71     *          information about how Java classes are bound to Module definitions
72     * @param configuration
73     *          the deserialization configuration
74     */
75    public ModelDetector(
76        @NonNull IBindingContext bindingContext,
77        @NonNull IConfiguration<DeserializationFeature<?>> configuration) {
78      this.bindingContext = bindingContext;
79      this.configuration = configuration;
80    }
81  
82    private int getLookaheadLimit() {
83      return configuration.get(DeserializationFeature.FORMAT_DETECTION_LOOKAHEAD_LIMIT);
84    }
85  
86    @NonNull
87    private IBindingContext getBindingContext() {
88      return bindingContext;
89    }
90  
91    @NonNull
92    private IConfiguration<DeserializationFeature<?>> getConfiguration() {
93      return configuration;
94    }
95  
96    /**
97     * Analyzes the data from the provided {@code inputStream} to determine it's
98     * model.
99     * <p>
100    * <b>Ownership semantics:</b> This method transfers ownership of the input
101    * stream to the returned {@link Result} object. The stream is wrapped in a
102    * {@code MergedStream} that replays the buffered detection data followed by the
103    * remaining stream content. The caller should NOT close the original stream;
104    * instead, close the Result object which will close the underlying stream.
105    *
106    * @param inputStream
107    *          the resource stream to analyze. Ownership is transferred to the
108    *          returned Result; the caller should not close this stream directly.
109    * @param resource
110    *          the resource being parsed
111    * @param format
112    *          the expected format of the data to read
113    * @return the analysis result. The caller owns this result and is responsible
114    *         for closing it, which will close the underlying stream.
115    * @throws IOException
116    *           if an error occurred while reading the resource
117    */
118   @NonNull
119   @Owning
120   public Result detect(
121       @NonNull @Owning InputStream inputStream,
122       @NonNull URI resource,
123       @NonNull Format format)
124       throws IOException {
125     byte[] buf = ObjectUtils.notNull(inputStream.readNBytes(getLookaheadLimit()));
126 
127     Class<? extends IBoundObject> clazz;
128     try (InputStream bis = new ByteArrayInputStream(buf)) {
129       assert bis != null;
130       switch (format) {
131       case JSON:
132         try (JsonParser parser = JsonFactoryFactory.instance().createParser(bis)) {
133           assert parser != null;
134           clazz = detectModelJsonClass(parser, resource);
135         }
136         break;
137       case YAML:
138         YAMLFactory factory = YamlFactoryFactory.newParserFactoryInstance(getConfiguration());
139         try (JsonParser parser = factory.createParser(bis)) {
140           assert parser != null;
141           clazz = detectModelJsonClass(parser, resource);
142         }
143         break;
144       case XML:
145         clazz = detectModelXmlClass(bis, resource);
146         break;
147       default:
148         throw new UnsupportedOperationException(
149             String.format("The format '%s' dataStream not supported", format));
150       }
151     }
152 
153     if (clazz == null) {
154       throw new IllegalStateException(
155           String.format("Detected format '%s', but unable to detect the bound data type", format.name()));
156     }
157 
158     return new Result(clazz, inputStream, buf);
159   }
160 
161   @NonNull
162   private Class<? extends IBoundObject> detectModelXmlClass(
163       @NonNull InputStream is,
164       @NonNull URI resource) throws IOException {
165     StartElement start;
166     try {
167       XMLInputFactory2 xmlInputFactory = (XMLInputFactory2) XMLInputFactory.newInstance();
168       assert xmlInputFactory instanceof WstxInputFactory;
169       xmlInputFactory.configureForXmlConformance();
170       xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, false);
171 
172       Reader reader = new InputStreamReader(is, Charset.forName("UTF8"));
173       XMLEventReader2 eventReader = (XMLEventReader2) xmlInputFactory.createXMLEventReader(reader);
174 
175       while (eventReader.hasNext() && !eventReader.peek().isStartElement()) {
176         eventReader.nextEvent();
177       }
178 
179       XMLEvent nextEvent = eventReader.peek();
180       if (!nextEvent.isStartElement()) {
181         throw new IOException(String.format("Unable to detect a start element%s.",
182             XmlEventUtil.generateLocationMessage(nextEvent, resource)));
183       }
184 
185       start = eventReader.nextEvent().asStartElement();
186     } catch (XMLStreamException ex) {
187       throw new IOException(ex);
188     }
189 
190     QName startElementQName = ObjectUtils.notNull(start.getName());
191     Class<? extends IBoundObject> clazz = getBindingContext().getBoundClassForRootXmlQName(startElementQName);
192     if (clazz == null) {
193       throw new IOException(String.format(
194           "Unrecognized element name: %s%s.",
195           startElementQName.toString(),
196           XmlEventUtil.generateLocationMessage(start, resource)));
197     }
198     return clazz;
199   }
200 
201   @Nullable
202   private Class<? extends IBoundObject> detectModelJsonClass(
203       @NotOwning @NonNull JsonParser parser,
204       @NonNull URI resource) throws IOException {
205     Class<? extends IBoundObject> retval = null;
206     JsonUtil.advanceAndAssert(parser, resource, JsonToken.START_OBJECT);
207     outer: while (JsonToken.FIELD_NAME.equals(parser.nextToken())) {
208       String name = ObjectUtils.notNull(parser.currentName());
209       if (!"$schema".equals(name)) {
210         IBindingContext bindingContext = getBindingContext();
211         retval = bindingContext.getBoundClassForRootJsonName(name);
212         if (retval == null) {
213           throw new IOException("Unrecognized JSON field name: " + name);
214         }
215         break outer;
216       }
217       // do nothing
218       parser.nextToken();
219       // JsonUtil.skipNextValue(parser);
220     }
221     return retval;
222   }
223 
224   /**
225    * Describes the result of detecting which model a resource is described by.
226    * <p>
227    * The method {@link #getBoundClass()} can be used to get class binding for the
228    * identified node in a Metaschema-based model.
229    * <p>
230    * The method {@link #getDataStream()} can be used to get a stream to read the
231    * content used for detection. This will replay any content used for detection.
232    */
233   public static final class Result implements Closeable {
234     @NonNull
235     private final Class<? extends IBoundObject> boundClass;
236     @Owning
237     private InputStream dataStream;
238 
239     private Result(
240         @NonNull Class<? extends IBoundObject> clazz,
241         @NonNull InputStream is,
242         @NonNull byte[] buf) {
243       this.boundClass = clazz;
244       this.dataStream = new MergedStream(null, is, buf, 0, buf.length);
245     }
246 
247     /**
248      * Get the Java class representing the detected bound object.
249      *
250      * @return the Java class
251      */
252     @NonNull
253     public Class<? extends IBoundObject> getBoundClass() {
254       return boundClass;
255     }
256 
257     /**
258      * Get an {@link InputStream} that can be used to read the analyzed data from
259      * the start.
260      * <p>
261      * The caller owns this stream and is responsible for closing it.
262      *
263      * @return the stream
264      */
265     @NonNull
266     @Owning
267     public InputStream getDataStream() {
268       return ObjectUtils.requireNonNull(dataStream, "data stream already closed");
269     }
270 
271     @SuppressWarnings("PMD.NullAssignment")
272     @Override
273     public void close() throws IOException {
274       this.dataStream.close();
275       this.dataStream = null;
276     }
277   }
278 }