001/* 002 * SPDX-FileCopyrightText: none 003 * SPDX-License-Identifier: CC0-1.0 004 */ 005 006package dev.metaschema.databind.io; 007 008import com.fasterxml.jackson.core.JsonFactory; 009import com.fasterxml.jackson.core.format.DataFormatDetector; 010import com.fasterxml.jackson.core.format.DataFormatMatcher; 011import com.fasterxml.jackson.core.format.MatchStrength; 012import com.fasterxml.jackson.dataformat.xml.XmlFactory; 013import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; 014 015import java.io.IOException; 016import java.io.InputStream; 017import java.net.URL; 018 019import dev.metaschema.core.configuration.DefaultConfiguration; 020import dev.metaschema.core.configuration.IConfiguration; 021import dev.metaschema.core.util.ObjectUtils; 022import dev.metaschema.databind.io.json.JsonFactoryFactory; 023import dev.metaschema.databind.io.yaml.impl.YamlFactoryFactory; 024import edu.umd.cs.findbugs.annotations.NonNull; 025 026/** 027 * Provides a means to analyze content to determine what {@link Format} the data 028 * is represented as. 029 */ 030public class FormatDetector { 031 032 private final DataFormatDetector detector; 033 034 /** 035 * Construct a new format detector using the default configuration. 036 */ 037 public FormatDetector() { 038 this(new DefaultConfiguration<>()); 039 } 040 041 /** 042 * Construct a new format detector using the provided {@code configuration}. 043 * 044 * @param configuration 045 * the deserialization configuration to use for detection 046 */ 047 public FormatDetector( 048 @NonNull IConfiguration<DeserializationFeature<?>> configuration) { 049 this(configuration, newDetectorFactory(configuration)); 050 } 051 052 /** 053 * Construct a new format detector using the provided {@code configuration}. 054 * 055 * @param configuration 056 * the deserialization configuration to use for detection 057 * @param detectors 058 * the JSON parser instances to use for format detection 059 */ 060 protected FormatDetector( 061 @NonNull IConfiguration<DeserializationFeature<?>> configuration, 062 @NonNull JsonFactory... detectors) { 063 int lookaheadBytes = configuration.get(DeserializationFeature.FORMAT_DETECTION_LOOKAHEAD_LIMIT); 064 this.detector = new DataFormatDetector(detectors) 065 .withMinimalMatch(MatchStrength.INCONCLUSIVE) 066 .withOptimalMatch(MatchStrength.SOLID_MATCH) 067 .withMaxInputLookahead(lookaheadBytes - 1); 068 069 } 070 071 @NonNull 072 private static JsonFactory[] newDetectorFactory(@NonNull IConfiguration<DeserializationFeature<?>> config) { 073 JsonFactory[] detectorFactory = new JsonFactory[3]; 074 detectorFactory[0] = YamlFactoryFactory.newParserFactoryInstance(config); 075 detectorFactory[1] = JsonFactoryFactory.instance(); 076 detectorFactory[2] = new XmlFactory(); 077 return detectorFactory; 078 } 079 080 /** 081 * Analyzes the provided {@code resource} to determine it's format. 082 * 083 * @param resource 084 * the resource to analyze 085 * @return the analysis result 086 * @throws IOException 087 * if an error occurred while reading the resource 088 */ 089 @NonNull 090 public Result detect(@NonNull URL resource) throws IOException { 091 try (InputStream is = ObjectUtils.notNull(resource.openStream())) { 092 return detect(is); 093 } 094 } 095 096 /** 097 * Analyzes the data from the provided {@code inputStream} to determine it's 098 * format. 099 * 100 * @param inputStream 101 * the resource stream to analyze 102 * @return the analysis result 103 * @throws IOException 104 * if an error occurred while reading the resource 105 */ 106 @NonNull 107 public Result detect(@NonNull InputStream inputStream) throws IOException { 108 DataFormatMatcher matcher = detector.findFormat(inputStream); 109 switch (matcher.getMatchStrength()) { 110 case FULL_MATCH: 111 case SOLID_MATCH: 112 case WEAK_MATCH: 113 case INCONCLUSIVE: 114 return new Result(matcher); 115 case NO_MATCH: 116 default: 117 throw new IOException("Unable to identify format"); 118 } 119 } 120 121 /** 122 * Represents the result of format detection, providing access to the detected 123 * format and the data stream for further processing. 124 */ 125 public static final class Result { 126 @NonNull 127 private final DataFormatMatcher matcher; 128 129 private Result(@NonNull DataFormatMatcher matcher) { 130 this.matcher = matcher; 131 } 132 133 /** 134 * Get the detected format. 135 * 136 * @return the format 137 */ 138 @NonNull 139 public Format getFormat() { 140 Format retval; 141 String formatName = matcher.getMatchedFormatName(); 142 if (YAMLFactory.FORMAT_NAME_YAML.equals(formatName)) { 143 retval = Format.YAML; 144 } else if (JsonFactory.FORMAT_NAME_JSON.equals(formatName)) { 145 retval = Format.JSON; 146 } else if (XmlFactory.FORMAT_NAME_XML.equals(formatName)) { 147 retval = Format.XML; 148 } else { 149 throw new UnsupportedOperationException(String.format("The detected format '%s' is not supported", formatName)); 150 } 151 return retval; 152 } 153 154 /** 155 * Get an {@link InputStream} that can be used to read the analyzed data from 156 * the start. 157 * 158 * @return the stream 159 */ 160 @SuppressWarnings("resource") 161 @NonNull 162 public InputStream getDataStream() { 163 return ObjectUtils.notNull(matcher.getDataStream()); 164 } 165 166 // @SuppressWarnings("resource") 167 // @NonNull 168 // public JsonParser getParser() throws IOException { 169 // return ObjectUtils.notNull(matcher.createParserWithMatch()); 170 // } 171 172 /** 173 * Get the strength of the match. 174 * 175 * @return the strength 176 */ 177 @NonNull 178 public MatchStrength getMatchStrength() { 179 return ObjectUtils.notNull(matcher.getMatchStrength()); 180 } 181 } 182}