1   /*
2    * SPDX-FileCopyrightText: none
3    * SPDX-License-Identifier: CC0-1.0
4    */
5   
6   package dev.metaschema.databind.io;
7   
8   import com.fasterxml.jackson.core.JsonFactory;
9   import com.fasterxml.jackson.core.format.DataFormatDetector;
10  import com.fasterxml.jackson.core.format.DataFormatMatcher;
11  import com.fasterxml.jackson.core.format.MatchStrength;
12  import com.fasterxml.jackson.dataformat.xml.XmlFactory;
13  import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
14  
15  import java.io.IOException;
16  import java.io.InputStream;
17  import java.net.URL;
18  
19  import dev.metaschema.core.configuration.DefaultConfiguration;
20  import dev.metaschema.core.configuration.IConfiguration;
21  import dev.metaschema.core.util.ObjectUtils;
22  import dev.metaschema.databind.io.json.JsonFactoryFactory;
23  import dev.metaschema.databind.io.yaml.impl.YamlFactoryFactory;
24  import edu.umd.cs.findbugs.annotations.NonNull;
25  
26  /**
27   * Provides a means to analyze content to determine what {@link Format} the data
28   * is represented as.
29   */
30  public class FormatDetector {
31  
32    private final DataFormatDetector detector;
33  
34    /**
35     * Construct a new format detector using the default configuration.
36     */
37    public FormatDetector() {
38      this(new DefaultConfiguration<>());
39    }
40  
41    /**
42     * Construct a new format detector using the provided {@code configuration}.
43     *
44     * @param configuration
45     *          the deserialization configuration to use for detection
46     */
47    public FormatDetector(
48        @NonNull IConfiguration<DeserializationFeature<?>> configuration) {
49      this(configuration, newDetectorFactory(configuration));
50    }
51  
52    /**
53     * Construct a new format detector using the provided {@code configuration}.
54     *
55     * @param configuration
56     *          the deserialization configuration to use for detection
57     * @param detectors
58     *          the JSON parser instances to use for format detection
59     */
60    protected FormatDetector(
61        @NonNull IConfiguration<DeserializationFeature<?>> configuration,
62        @NonNull JsonFactory... detectors) {
63      int lookaheadBytes = configuration.get(DeserializationFeature.FORMAT_DETECTION_LOOKAHEAD_LIMIT);
64      this.detector = new DataFormatDetector(detectors)
65          .withMinimalMatch(MatchStrength.INCONCLUSIVE)
66          .withOptimalMatch(MatchStrength.SOLID_MATCH)
67          .withMaxInputLookahead(lookaheadBytes - 1);
68  
69    }
70  
71    @NonNull
72    private static JsonFactory[] newDetectorFactory(@NonNull IConfiguration<DeserializationFeature<?>> config) {
73      JsonFactory[] detectorFactory = new JsonFactory[3];
74      detectorFactory[0] = YamlFactoryFactory.newParserFactoryInstance(config);
75      detectorFactory[1] = JsonFactoryFactory.instance();
76      detectorFactory[2] = new XmlFactory();
77      return detectorFactory;
78    }
79  
80    /**
81     * Analyzes the provided {@code resource} to determine it's format.
82     *
83     * @param resource
84     *          the resource to analyze
85     * @return the analysis result
86     * @throws IOException
87     *           if an error occurred while reading the resource
88     */
89    @NonNull
90    public Result detect(@NonNull URL resource) throws IOException {
91      try (InputStream is = ObjectUtils.notNull(resource.openStream())) {
92        return detect(is);
93      }
94    }
95  
96    /**
97     * Analyzes the data from the provided {@code inputStream} to determine it's
98     * format.
99     *
100    * @param inputStream
101    *          the resource stream to analyze
102    * @return the analysis result
103    * @throws IOException
104    *           if an error occurred while reading the resource
105    */
106   @NonNull
107   public Result detect(@NonNull InputStream inputStream) throws IOException {
108     DataFormatMatcher matcher = detector.findFormat(inputStream);
109     switch (matcher.getMatchStrength()) {
110     case FULL_MATCH:
111     case SOLID_MATCH:
112     case WEAK_MATCH:
113     case INCONCLUSIVE:
114       return new Result(matcher);
115     case NO_MATCH:
116     default:
117       throw new IOException("Unable to identify format");
118     }
119   }
120 
121   /**
122    * Represents the result of format detection, providing access to the detected
123    * format and the data stream for further processing.
124    */
125   public static final class Result {
126     @NonNull
127     private final DataFormatMatcher matcher;
128 
129     private Result(@NonNull DataFormatMatcher matcher) {
130       this.matcher = matcher;
131     }
132 
133     /**
134      * Get the detected format.
135      *
136      * @return the format
137      */
138     @NonNull
139     public Format getFormat() {
140       Format retval;
141       String formatName = matcher.getMatchedFormatName();
142       if (YAMLFactory.FORMAT_NAME_YAML.equals(formatName)) {
143         retval = Format.YAML;
144       } else if (JsonFactory.FORMAT_NAME_JSON.equals(formatName)) {
145         retval = Format.JSON;
146       } else if (XmlFactory.FORMAT_NAME_XML.equals(formatName)) {
147         retval = Format.XML;
148       } else {
149         throw new UnsupportedOperationException(String.format("The detected format '%s' is not supported", formatName));
150       }
151       return retval;
152     }
153 
154     /**
155      * Get an {@link InputStream} that can be used to read the analyzed data from
156      * the start.
157      *
158      * @return the stream
159      */
160     @SuppressWarnings("resource")
161     @NonNull
162     public InputStream getDataStream() {
163       return ObjectUtils.notNull(matcher.getDataStream());
164     }
165 
166     // @SuppressWarnings("resource")
167     // @NonNull
168     // public JsonParser getParser() throws IOException {
169     // return ObjectUtils.notNull(matcher.createParserWithMatch());
170     // }
171 
172     /**
173      * Get the strength of the match.
174      *
175      * @return the strength
176      */
177     @NonNull
178     public MatchStrength getMatchStrength() {
179       return ObjectUtils.notNull(matcher.getMatchStrength());
180     }
181   }
182 }