1   /*
2    * SPDX-FileCopyrightText: none
3    * SPDX-License-Identifier: CC0-1.0
4    */
5   
6   package gov.nist.secauto.metaschema.databind.io;
7   
8   import com.ctc.wstx.stax.WstxInputFactory;
9   import com.fasterxml.jackson.core.JsonParser;
10  import com.fasterxml.jackson.core.JsonToken;
11  import com.fasterxml.jackson.core.io.MergedStream;
12  import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
13  
14  import gov.nist.secauto.metaschema.core.configuration.DefaultConfiguration;
15  import gov.nist.secauto.metaschema.core.configuration.IConfiguration;
16  import gov.nist.secauto.metaschema.core.model.IBoundObject;
17  import gov.nist.secauto.metaschema.core.model.util.JsonUtil;
18  import gov.nist.secauto.metaschema.core.model.util.XmlEventUtil;
19  import gov.nist.secauto.metaschema.core.util.ObjectUtils;
20  import gov.nist.secauto.metaschema.databind.IBindingContext;
21  import gov.nist.secauto.metaschema.databind.io.json.JsonFactoryFactory;
22  import gov.nist.secauto.metaschema.databind.io.yaml.impl.YamlFactoryFactory;
23  
24  import org.codehaus.stax2.XMLEventReader2;
25  import org.codehaus.stax2.XMLInputFactory2;
26  import org.eclipse.jdt.annotation.NotOwning;
27  import org.eclipse.jdt.annotation.Owning;
28  
29  import java.io.ByteArrayInputStream;
30  import java.io.Closeable;
31  import java.io.IOException;
32  import java.io.InputStream;
33  import java.io.InputStreamReader;
34  import java.io.Reader;
35  import java.net.URI;
36  import java.nio.charset.Charset;
37  
38  import javax.xml.namespace.QName;
39  import javax.xml.stream.XMLInputFactory;
40  import javax.xml.stream.XMLStreamException;
41  import javax.xml.stream.events.StartElement;
42  import javax.xml.stream.events.XMLEvent;
43  
44  import edu.umd.cs.findbugs.annotations.NonNull;
45  import edu.umd.cs.findbugs.annotations.Nullable;
46  
47  /**
48   * Provides a means to analyze content to determine what type of bound data it
49   * contains.
50   */
51  public class ModelDetector {
52    @NonNull
53    private final IBindingContext bindingContext;
54    @NonNull
55    private final IConfiguration<DeserializationFeature<?>> configuration;
56  
57    /**
58     * Construct a new format detector using the default configuration.
59     *
60     * @param bindingContext
61     *          information about how Java classes are bound to Module definitions
62     */
63    public ModelDetector(
64        @NonNull IBindingContext bindingContext) {
65      this(bindingContext, new DefaultConfiguration<>());
66    }
67  
68    /**
69     * Construct a new format detector using the provided {@code configuration}.
70     *
71     * @param bindingContext
72     *          information about how Java classes are bound to Module definitions
73     * @param configuration
74     *          the deserialization configuration
75     */
76    public ModelDetector(
77        @NonNull IBindingContext bindingContext,
78        @NonNull IConfiguration<DeserializationFeature<?>> configuration) {
79      this.bindingContext = bindingContext;
80      this.configuration = configuration;
81    }
82  
83    private int getLookaheadLimit() {
84      return configuration.get(DeserializationFeature.FORMAT_DETECTION_LOOKAHEAD_LIMIT);
85    }
86  
87    @NonNull
88    private IBindingContext getBindingContext() {
89      return bindingContext;
90    }
91  
92    @NonNull
93    private IConfiguration<DeserializationFeature<?>> getConfiguration() {
94      return configuration;
95    }
96  
97    /**
98     * Analyzes the data from the provided {@code inputStream} to determine it's
99     * model.
100    *
101    * @param inputStream
102    *          the resource stream to analyze
103    * @param resource
104    *          the resource being parsed
105    * @param format
106    *          the expected format of the data to read
107    * @return the analysis result
108    * @throws IOException
109    *           if an error occurred while reading the resource
110    */
111   @NonNull
112   @Owning
113   public Result detect(
114       @NonNull @NotOwning InputStream inputStream,
115       @NonNull URI resource,
116       @NonNull Format format)
117       throws IOException {
118     byte[] buf = ObjectUtils.notNull(inputStream.readNBytes(getLookaheadLimit()));
119 
120     Class<? extends IBoundObject> clazz;
121     try (InputStream bis = new ByteArrayInputStream(buf)) {
122       assert bis != null;
123       switch (format) {
124       case JSON:
125         try (JsonParser parser = JsonFactoryFactory.instance().createParser(bis)) {
126           assert parser != null;
127           clazz = detectModelJsonClass(parser, resource);
128         }
129         break;
130       case YAML:
131         YAMLFactory factory = YamlFactoryFactory.newParserFactoryInstance(getConfiguration());
132         try (JsonParser parser = factory.createParser(bis)) {
133           assert parser != null;
134           clazz = detectModelJsonClass(parser, resource);
135         }
136         break;
137       case XML:
138         clazz = detectModelXmlClass(bis, resource);
139         break;
140       default:
141         throw new UnsupportedOperationException(
142             String.format("The format '%s' dataStream not supported", format));
143       }
144     }
145 
146     if (clazz == null) {
147       throw new IllegalStateException(
148           String.format("Detected format '%s', but unable to detect the bound data type", format.name()));
149     }
150 
151     return new Result(clazz, inputStream, buf);
152   }
153 
154   @NonNull
155   private Class<? extends IBoundObject> detectModelXmlClass(
156       @NonNull InputStream is,
157       @NonNull URI resource) throws IOException {
158     StartElement start;
159     try {
160       XMLInputFactory2 xmlInputFactory = (XMLInputFactory2) XMLInputFactory.newInstance();
161       assert xmlInputFactory instanceof WstxInputFactory;
162       xmlInputFactory.configureForXmlConformance();
163       xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, false);
164 
165       Reader reader = new InputStreamReader(is, Charset.forName("UTF8"));
166       XMLEventReader2 eventReader = (XMLEventReader2) xmlInputFactory.createXMLEventReader(reader);
167 
168       while (eventReader.hasNext() && !eventReader.peek().isStartElement()) {
169         eventReader.nextEvent();
170       }
171 
172       XMLEvent nextEvent = eventReader.peek();
173       if (!nextEvent.isStartElement()) {
174         throw new IOException(String.format("Unable to detect a start element%s.",
175             XmlEventUtil.generateLocationMessage(nextEvent, resource)));
176       }
177 
178       start = eventReader.nextEvent().asStartElement();
179     } catch (XMLStreamException ex) {
180       throw new IOException(ex);
181     }
182 
183     QName startElementQName = ObjectUtils.notNull(start.getName());
184     Class<? extends IBoundObject> clazz = getBindingContext().getBoundClassForRootXmlQName(startElementQName);
185     if (clazz == null) {
186       throw new IOException(String.format(
187           "Unrecognized element name: %s%s.",
188           startElementQName.toString(),
189           XmlEventUtil.generateLocationMessage(start, resource)));
190     }
191     return clazz;
192   }
193 
194   @Nullable
195   private Class<? extends IBoundObject> detectModelJsonClass(
196       @NotOwning @NonNull JsonParser parser,
197       @NonNull URI resource) throws IOException {
198     Class<? extends IBoundObject> retval = null;
199     JsonUtil.advanceAndAssert(parser, resource, JsonToken.START_OBJECT);
200     outer: while (JsonToken.FIELD_NAME.equals(parser.nextToken())) {
201       String name = ObjectUtils.notNull(parser.currentName());
202       if (!"$schema".equals(name)) {
203         IBindingContext bindingContext = getBindingContext();
204         retval = bindingContext.getBoundClassForRootJsonName(name);
205         if (retval == null) {
206           throw new IOException("Unrecognized JSON field name: " + name);
207         }
208         break outer;
209       }
210       // do nothing
211       parser.nextToken();
212       // JsonUtil.skipNextValue(parser);
213     }
214     return retval;
215   }
216 
217   /**
218    * Describes the result of detecting which model a resource is described by.
219    * <p>
220    * The method {@link #getBoundClass()} can be used to get class binding for the
221    * identified node in a Metaschema-based model.
222    * <p>
223    * The method {@link #getDataStream()} can be used to get a stream to read the
224    * content used for detection. This will replay any content used for detection.
225    */
226   public static final class Result implements Closeable {
227     @NonNull
228     private final Class<? extends IBoundObject> boundClass;
229     @Owning
230     private InputStream dataStream;
231 
232     private Result(
233         @NonNull Class<? extends IBoundObject> clazz,
234         @NonNull InputStream is,
235         @NonNull byte[] buf) {
236       this.boundClass = clazz;
237       this.dataStream = new MergedStream(null, is, buf, 0, buf.length);
238     }
239 
240     /**
241      * Get the Java class representing the detected bound object.
242      *
243      * @return the Java class
244      */
245     @NonNull
246     public Class<? extends IBoundObject> getBoundClass() {
247       return boundClass;
248     }
249 
250     /**
251      * Get an {@link InputStream} that can be used to read the analyzed data from
252      * the start.
253      *
254      * @return the stream
255      */
256     @NonNull
257     @Owning
258     public InputStream getDataStream() {
259       return ObjectUtils.requireNonNull(dataStream, "data stream already closed");
260     }
261 
262     @SuppressWarnings("PMD.NullAssignment")
263     @Override
264     public void close() throws IOException {
265       this.dataStream.close();
266       this.dataStream = null;
267     }
268   }
269 }