001/*
002 * SPDX-FileCopyrightText: none
003 * SPDX-License-Identifier: CC0-1.0
004 */
005
006package gov.nist.secauto.metaschema.databind.io;
007
008import com.fasterxml.jackson.core.JsonFactory;
009import com.fasterxml.jackson.core.format.DataFormatDetector;
010import com.fasterxml.jackson.core.format.DataFormatMatcher;
011import com.fasterxml.jackson.core.format.MatchStrength;
012import com.fasterxml.jackson.dataformat.xml.XmlFactory;
013import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
014
015import gov.nist.secauto.metaschema.core.configuration.DefaultConfiguration;
016import gov.nist.secauto.metaschema.core.configuration.IConfiguration;
017import gov.nist.secauto.metaschema.core.util.ObjectUtils;
018import gov.nist.secauto.metaschema.databind.io.json.JsonFactoryFactory;
019import gov.nist.secauto.metaschema.databind.io.yaml.impl.YamlFactoryFactory;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.net.URL;
024
025import edu.umd.cs.findbugs.annotations.NonNull;
026
027/**
028 * Provides a means to analyze content to determine what {@link Format} the data
029 * is represented as.
030 */
031public class FormatDetector {
032
033  private final DataFormatDetector detector;
034
035  /**
036   * Construct a new format detector using the default configuration.
037   */
038  public FormatDetector() {
039    this(new DefaultConfiguration<>());
040  }
041
042  /**
043   * Construct a new format detector using the provided {@code configuration}.
044   *
045   * @param configuration
046   *          the deserialization configuration to use for detection
047   */
048  public FormatDetector(
049      @NonNull IConfiguration<DeserializationFeature<?>> configuration) {
050    this(configuration, newDetectorFactory(configuration));
051  }
052
053  /**
054   * Construct a new format detector using the provided {@code configuration}.
055   *
056   * @param configuration
057   *          the deserialization configuration to use for detection
058   * @param detectors
059   *          the JSON parser instances to use for format detection
060   */
061  protected FormatDetector(
062      @NonNull IConfiguration<DeserializationFeature<?>> configuration,
063      @NonNull JsonFactory... detectors) {
064    int lookaheadBytes = configuration.get(DeserializationFeature.FORMAT_DETECTION_LOOKAHEAD_LIMIT);
065    this.detector = new DataFormatDetector(detectors)
066        .withMinimalMatch(MatchStrength.INCONCLUSIVE)
067        .withOptimalMatch(MatchStrength.SOLID_MATCH)
068        .withMaxInputLookahead(lookaheadBytes - 1);
069
070  }
071
072  @NonNull
073  private static JsonFactory[] newDetectorFactory(@NonNull IConfiguration<DeserializationFeature<?>> config) {
074    JsonFactory[] detectorFactory = new JsonFactory[3];
075    detectorFactory[0] = YamlFactoryFactory.newParserFactoryInstance(config);
076    detectorFactory[1] = JsonFactoryFactory.instance();
077    detectorFactory[2] = new XmlFactory();
078    return detectorFactory;
079  }
080
081  /**
082   * Analyzes the provided {@code resource} to determine it's format.
083   *
084   * @param resource
085   *          the resource to analyze
086   * @return the analysis result
087   * @throws IOException
088   *           if an error occurred while reading the resource
089   */
090  @NonNull
091  public Result detect(@NonNull URL resource) throws IOException {
092    try (InputStream is = ObjectUtils.notNull(resource.openStream())) {
093      return detect(is);
094    }
095  }
096
097  /**
098   * Analyzes the data from the provided {@code inputStream} to determine it's
099   * format.
100   *
101   * @param inputStream
102   *          the resource stream to analyze
103   * @return the analysis result
104   * @throws IOException
105   *           if an error occurred while reading the resource
106   */
107  @NonNull
108  public Result detect(@NonNull InputStream inputStream) throws IOException {
109    DataFormatMatcher matcher = detector.findFormat(inputStream);
110    switch (matcher.getMatchStrength()) {
111    case FULL_MATCH:
112    case SOLID_MATCH:
113    case WEAK_MATCH:
114    case INCONCLUSIVE:
115      return new Result(matcher);
116    case NO_MATCH:
117    default:
118      throw new IOException("Unable to identify format");
119    }
120  }
121
122  public static final class Result {
123    @NonNull
124    private final DataFormatMatcher matcher;
125
126    private Result(@NonNull DataFormatMatcher matcher) {
127      this.matcher = matcher;
128    }
129
130    /**
131     * Get the detected format.
132     *
133     * @return the format
134     */
135    @NonNull
136    public Format getFormat() {
137      Format retval;
138      String formatName = matcher.getMatchedFormatName();
139      if (YAMLFactory.FORMAT_NAME_YAML.equals(formatName)) {
140        retval = Format.YAML;
141      } else if (JsonFactory.FORMAT_NAME_JSON.equals(formatName)) {
142        retval = Format.JSON;
143      } else if (XmlFactory.FORMAT_NAME_XML.equals(formatName)) {
144        retval = Format.XML;
145      } else {
146        throw new UnsupportedOperationException(String.format("The detected format '%s' is not supported", formatName));
147      }
148      return retval;
149    }
150
151    /**
152     * Get an {@link InputStream} that can be used to read the analyzed data from
153     * the start.
154     *
155     * @return the stream
156     */
157    @SuppressWarnings("resource")
158    @NonNull
159    public InputStream getDataStream() {
160      return ObjectUtils.notNull(matcher.getDataStream());
161    }
162
163    // @SuppressWarnings("resource")
164    // @NonNull
165    // public JsonParser getParser() throws IOException {
166    // return ObjectUtils.notNull(matcher.createParserWithMatch());
167    // }
168
169    /**
170     * Get the strength of the match.
171     *
172     * @return the strength
173     */
174    @NonNull
175    public MatchStrength getMatchStrength() {
176      return ObjectUtils.notNull(matcher.getMatchStrength());
177    }
178  }
179}