001/* 002 * SPDX-FileCopyrightText: none 003 * SPDX-License-Identifier: CC0-1.0 004 */ 005 006package dev.metaschema.databind.io; 007 008import com.ctc.wstx.stax.WstxInputFactory; 009import com.fasterxml.jackson.core.JsonParser; 010import com.fasterxml.jackson.core.JsonToken; 011import com.fasterxml.jackson.core.io.MergedStream; 012import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; 013 014import org.codehaus.stax2.XMLEventReader2; 015import org.codehaus.stax2.XMLInputFactory2; 016import org.eclipse.jdt.annotation.NotOwning; 017import org.eclipse.jdt.annotation.Owning; 018 019import java.io.ByteArrayInputStream; 020import java.io.Closeable; 021import java.io.IOException; 022import java.io.InputStream; 023import java.io.InputStreamReader; 024import java.io.Reader; 025import java.net.URI; 026import java.nio.charset.Charset; 027 028import javax.xml.namespace.QName; 029import javax.xml.stream.XMLInputFactory; 030import javax.xml.stream.XMLStreamException; 031import javax.xml.stream.events.StartElement; 032import javax.xml.stream.events.XMLEvent; 033 034import dev.metaschema.core.configuration.DefaultConfiguration; 035import dev.metaschema.core.configuration.IConfiguration; 036import dev.metaschema.core.model.IBoundObject; 037import dev.metaschema.core.model.util.JsonUtil; 038import dev.metaschema.core.model.util.XmlEventUtil; 039import dev.metaschema.core.util.ObjectUtils; 040import dev.metaschema.databind.IBindingContext; 041import dev.metaschema.databind.io.json.JsonFactoryFactory; 042import dev.metaschema.databind.io.yaml.impl.YamlFactoryFactory; 043import edu.umd.cs.findbugs.annotations.NonNull; 044import edu.umd.cs.findbugs.annotations.Nullable; 045 046/** 047 * Provides a means to analyze content to determine what type of bound data it 048 * contains. 049 */ 050public class ModelDetector { 051 @NonNull 052 private final IBindingContext bindingContext; 053 @NonNull 054 private final IConfiguration<DeserializationFeature<?>> configuration; 055 056 /** 057 * Construct a new format detector using the default configuration. 058 * 059 * @param bindingContext 060 * information about how Java classes are bound to Module definitions 061 */ 062 public ModelDetector( 063 @NonNull IBindingContext bindingContext) { 064 this(bindingContext, new DefaultConfiguration<>()); 065 } 066 067 /** 068 * Construct a new format detector using the provided {@code configuration}. 069 * 070 * @param bindingContext 071 * information about how Java classes are bound to Module definitions 072 * @param configuration 073 * the deserialization configuration 074 */ 075 public ModelDetector( 076 @NonNull IBindingContext bindingContext, 077 @NonNull IConfiguration<DeserializationFeature<?>> configuration) { 078 this.bindingContext = bindingContext; 079 this.configuration = configuration; 080 } 081 082 private int getLookaheadLimit() { 083 return configuration.get(DeserializationFeature.FORMAT_DETECTION_LOOKAHEAD_LIMIT); 084 } 085 086 @NonNull 087 private IBindingContext getBindingContext() { 088 return bindingContext; 089 } 090 091 @NonNull 092 private IConfiguration<DeserializationFeature<?>> getConfiguration() { 093 return configuration; 094 } 095 096 /** 097 * Analyzes the data from the provided {@code inputStream} to determine it's 098 * model. 099 * <p> 100 * <b>Ownership semantics:</b> This method transfers ownership of the input 101 * stream to the returned {@link Result} object. The stream is wrapped in a 102 * {@code MergedStream} that replays the buffered detection data followed by the 103 * remaining stream content. The caller should NOT close the original stream; 104 * instead, close the Result object which will close the underlying stream. 105 * 106 * @param inputStream 107 * the resource stream to analyze. Ownership is transferred to the 108 * returned Result; the caller should not close this stream directly. 109 * @param resource 110 * the resource being parsed 111 * @param format 112 * the expected format of the data to read 113 * @return the analysis result. The caller owns this result and is responsible 114 * for closing it, which will close the underlying stream. 115 * @throws IOException 116 * if an error occurred while reading the resource 117 */ 118 @NonNull 119 @Owning 120 public Result detect( 121 @NonNull @Owning InputStream inputStream, 122 @NonNull URI resource, 123 @NonNull Format format) 124 throws IOException { 125 byte[] buf = ObjectUtils.notNull(inputStream.readNBytes(getLookaheadLimit())); 126 127 Class<? extends IBoundObject> clazz; 128 try (InputStream bis = new ByteArrayInputStream(buf)) { 129 assert bis != null; 130 switch (format) { 131 case JSON: 132 try (JsonParser parser = JsonFactoryFactory.instance().createParser(bis)) { 133 assert parser != null; 134 clazz = detectModelJsonClass(parser, resource); 135 } 136 break; 137 case YAML: 138 YAMLFactory factory = YamlFactoryFactory.newParserFactoryInstance(getConfiguration()); 139 try (JsonParser parser = factory.createParser(bis)) { 140 assert parser != null; 141 clazz = detectModelJsonClass(parser, resource); 142 } 143 break; 144 case XML: 145 clazz = detectModelXmlClass(bis, resource); 146 break; 147 default: 148 throw new UnsupportedOperationException( 149 String.format("The format '%s' dataStream not supported", format)); 150 } 151 } 152 153 if (clazz == null) { 154 throw new IllegalStateException( 155 String.format("Detected format '%s', but unable to detect the bound data type", format.name())); 156 } 157 158 return new Result(clazz, inputStream, buf); 159 } 160 161 @NonNull 162 private Class<? extends IBoundObject> detectModelXmlClass( 163 @NonNull InputStream is, 164 @NonNull URI resource) throws IOException { 165 StartElement start; 166 try { 167 XMLInputFactory2 xmlInputFactory = (XMLInputFactory2) XMLInputFactory.newInstance(); 168 assert xmlInputFactory instanceof WstxInputFactory; 169 xmlInputFactory.configureForXmlConformance(); 170 xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, false); 171 172 Reader reader = new InputStreamReader(is, Charset.forName("UTF8")); 173 XMLEventReader2 eventReader = (XMLEventReader2) xmlInputFactory.createXMLEventReader(reader); 174 175 while (eventReader.hasNext() && !eventReader.peek().isStartElement()) { 176 eventReader.nextEvent(); 177 } 178 179 XMLEvent nextEvent = eventReader.peek(); 180 if (!nextEvent.isStartElement()) { 181 throw new IOException(String.format("Unable to detect a start element%s.", 182 XmlEventUtil.generateLocationMessage(nextEvent, resource))); 183 } 184 185 start = eventReader.nextEvent().asStartElement(); 186 } catch (XMLStreamException ex) { 187 throw new IOException(ex); 188 } 189 190 QName startElementQName = ObjectUtils.notNull(start.getName()); 191 Class<? extends IBoundObject> clazz = getBindingContext().getBoundClassForRootXmlQName(startElementQName); 192 if (clazz == null) { 193 throw new IOException(String.format( 194 "Unrecognized element name: %s%s.", 195 startElementQName.toString(), 196 XmlEventUtil.generateLocationMessage(start, resource))); 197 } 198 return clazz; 199 } 200 201 @Nullable 202 private Class<? extends IBoundObject> detectModelJsonClass( 203 @NotOwning @NonNull JsonParser parser, 204 @NonNull URI resource) throws IOException { 205 Class<? extends IBoundObject> retval = null; 206 JsonUtil.advanceAndAssert(parser, resource, JsonToken.START_OBJECT); 207 outer: while (JsonToken.FIELD_NAME.equals(parser.nextToken())) { 208 String name = ObjectUtils.notNull(parser.currentName()); 209 if (!"$schema".equals(name)) { 210 IBindingContext bindingContext = getBindingContext(); 211 retval = bindingContext.getBoundClassForRootJsonName(name); 212 if (retval == null) { 213 throw new IOException("Unrecognized JSON field name: " + name); 214 } 215 break outer; 216 } 217 // do nothing 218 parser.nextToken(); 219 // JsonUtil.skipNextValue(parser); 220 } 221 return retval; 222 } 223 224 /** 225 * Describes the result of detecting which model a resource is described by. 226 * <p> 227 * The method {@link #getBoundClass()} can be used to get class binding for the 228 * identified node in a Metaschema-based model. 229 * <p> 230 * The method {@link #getDataStream()} can be used to get a stream to read the 231 * content used for detection. This will replay any content used for detection. 232 */ 233 public static final class Result implements Closeable { 234 @NonNull 235 private final Class<? extends IBoundObject> boundClass; 236 @Owning 237 private InputStream dataStream; 238 239 private Result( 240 @NonNull Class<? extends IBoundObject> clazz, 241 @NonNull InputStream is, 242 @NonNull byte[] buf) { 243 this.boundClass = clazz; 244 this.dataStream = new MergedStream(null, is, buf, 0, buf.length); 245 } 246 247 /** 248 * Get the Java class representing the detected bound object. 249 * 250 * @return the Java class 251 */ 252 @NonNull 253 public Class<? extends IBoundObject> getBoundClass() { 254 return boundClass; 255 } 256 257 /** 258 * Get an {@link InputStream} that can be used to read the analyzed data from 259 * the start. 260 * <p> 261 * The caller owns this stream and is responsible for closing it. 262 * 263 * @return the stream 264 */ 265 @NonNull 266 @Owning 267 public InputStream getDataStream() { 268 return ObjectUtils.requireNonNull(dataStream, "data stream already closed"); 269 } 270 271 @SuppressWarnings("PMD.NullAssignment") 272 @Override 273 public void close() throws IOException { 274 this.dataStream.close(); 275 this.dataStream = null; 276 } 277 } 278}