001/*
002 * SPDX-FileCopyrightText: none
003 * SPDX-License-Identifier: CC0-1.0
004 */
005
006package dev.metaschema.databind.io;
007
008import com.fasterxml.jackson.core.JsonFactory;
009import com.fasterxml.jackson.core.format.DataFormatDetector;
010import com.fasterxml.jackson.core.format.DataFormatMatcher;
011import com.fasterxml.jackson.core.format.MatchStrength;
012import com.fasterxml.jackson.dataformat.xml.XmlFactory;
013import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
014
015import java.io.IOException;
016import java.io.InputStream;
017import java.net.URL;
018
019import dev.metaschema.core.configuration.DefaultConfiguration;
020import dev.metaschema.core.configuration.IConfiguration;
021import dev.metaschema.core.util.ObjectUtils;
022import dev.metaschema.databind.io.json.JsonFactoryFactory;
023import dev.metaschema.databind.io.yaml.impl.YamlFactoryFactory;
024import edu.umd.cs.findbugs.annotations.NonNull;
025
026/**
027 * Provides a means to analyze content to determine what {@link Format} the data
028 * is represented as.
029 */
030public class FormatDetector {
031
032  private final DataFormatDetector detector;
033
034  /**
035   * Construct a new format detector using the default configuration.
036   */
037  public FormatDetector() {
038    this(new DefaultConfiguration<>());
039  }
040
041  /**
042   * Construct a new format detector using the provided {@code configuration}.
043   *
044   * @param configuration
045   *          the deserialization configuration to use for detection
046   */
047  public FormatDetector(
048      @NonNull IConfiguration<DeserializationFeature<?>> configuration) {
049    this(configuration, newDetectorFactory(configuration));
050  }
051
052  /**
053   * Construct a new format detector using the provided {@code configuration}.
054   *
055   * @param configuration
056   *          the deserialization configuration to use for detection
057   * @param detectors
058   *          the JSON parser instances to use for format detection
059   */
060  protected FormatDetector(
061      @NonNull IConfiguration<DeserializationFeature<?>> configuration,
062      @NonNull JsonFactory... detectors) {
063    int lookaheadBytes = configuration.get(DeserializationFeature.FORMAT_DETECTION_LOOKAHEAD_LIMIT);
064    this.detector = new DataFormatDetector(detectors)
065        .withMinimalMatch(MatchStrength.INCONCLUSIVE)
066        .withOptimalMatch(MatchStrength.SOLID_MATCH)
067        .withMaxInputLookahead(lookaheadBytes - 1);
068
069  }
070
071  @NonNull
072  private static JsonFactory[] newDetectorFactory(@NonNull IConfiguration<DeserializationFeature<?>> config) {
073    JsonFactory[] detectorFactory = new JsonFactory[3];
074    detectorFactory[0] = YamlFactoryFactory.newParserFactoryInstance(config);
075    detectorFactory[1] = JsonFactoryFactory.instance();
076    detectorFactory[2] = new XmlFactory();
077    return detectorFactory;
078  }
079
080  /**
081   * Analyzes the provided {@code resource} to determine it's format.
082   *
083   * @param resource
084   *          the resource to analyze
085   * @return the analysis result
086   * @throws IOException
087   *           if an error occurred while reading the resource
088   */
089  @NonNull
090  public Result detect(@NonNull URL resource) throws IOException {
091    try (InputStream is = ObjectUtils.notNull(resource.openStream())) {
092      return detect(is);
093    }
094  }
095
096  /**
097   * Analyzes the data from the provided {@code inputStream} to determine it's
098   * format.
099   *
100   * @param inputStream
101   *          the resource stream to analyze
102   * @return the analysis result
103   * @throws IOException
104   *           if an error occurred while reading the resource
105   */
106  @NonNull
107  public Result detect(@NonNull InputStream inputStream) throws IOException {
108    DataFormatMatcher matcher = detector.findFormat(inputStream);
109    switch (matcher.getMatchStrength()) {
110    case FULL_MATCH:
111    case SOLID_MATCH:
112    case WEAK_MATCH:
113    case INCONCLUSIVE:
114      return new Result(matcher);
115    case NO_MATCH:
116    default:
117      throw new IOException("Unable to identify format");
118    }
119  }
120
121  /**
122   * Represents the result of format detection, providing access to the detected
123   * format and the data stream for further processing.
124   */
125  public static final class Result {
126    @NonNull
127    private final DataFormatMatcher matcher;
128
129    private Result(@NonNull DataFormatMatcher matcher) {
130      this.matcher = matcher;
131    }
132
133    /**
134     * Get the detected format.
135     *
136     * @return the format
137     */
138    @NonNull
139    public Format getFormat() {
140      Format retval;
141      String formatName = matcher.getMatchedFormatName();
142      if (YAMLFactory.FORMAT_NAME_YAML.equals(formatName)) {
143        retval = Format.YAML;
144      } else if (JsonFactory.FORMAT_NAME_JSON.equals(formatName)) {
145        retval = Format.JSON;
146      } else if (XmlFactory.FORMAT_NAME_XML.equals(formatName)) {
147        retval = Format.XML;
148      } else {
149        throw new UnsupportedOperationException(String.format("The detected format '%s' is not supported", formatName));
150      }
151      return retval;
152    }
153
154    /**
155     * Get an {@link InputStream} that can be used to read the analyzed data from
156     * the start.
157     *
158     * @return the stream
159     */
160    @SuppressWarnings("resource")
161    @NonNull
162    public InputStream getDataStream() {
163      return ObjectUtils.notNull(matcher.getDataStream());
164    }
165
166    // @SuppressWarnings("resource")
167    // @NonNull
168    // public JsonParser getParser() throws IOException {
169    // return ObjectUtils.notNull(matcher.createParserWithMatch());
170    // }
171
172    /**
173     * Get the strength of the match.
174     *
175     * @return the strength
176     */
177    @NonNull
178    public MatchStrength getMatchStrength() {
179      return ObjectUtils.notNull(matcher.getMatchStrength());
180    }
181  }
182}