1   /*
2    * SPDX-FileCopyrightText: none
3    * SPDX-License-Identifier: CC0-1.0
4    */
5   
6   package dev.metaschema.cli.commands;
7   
8   import org.apache.commons.cli.CommandLine;
9   import org.apache.commons.cli.Option;
10  import org.apache.logging.log4j.LogManager;
11  import org.apache.logging.log4j.Logger;
12  
13  import java.io.File;
14  import java.io.FileNotFoundException;
15  import java.io.IOException;
16  import java.net.URI;
17  import java.net.UnknownHostException;
18  import java.nio.file.Path;
19  import java.nio.file.Paths;
20  import java.util.Collection;
21  import java.util.List;
22  import java.util.Locale;
23  import java.util.Set;
24  
25  import dev.metaschema.cli.processor.CLIProcessor;
26  import dev.metaschema.cli.processor.CallingContext;
27  import dev.metaschema.cli.processor.ExitCode;
28  import dev.metaschema.cli.processor.command.AbstractCommandExecutor;
29  import dev.metaschema.cli.processor.command.AbstractTerminalCommand;
30  import dev.metaschema.cli.processor.command.CommandExecutionException;
31  import dev.metaschema.cli.processor.command.ExtraArgument;
32  import dev.metaschema.cli.util.LoggingValidationHandler;
33  import dev.metaschema.core.configuration.DefaultConfiguration;
34  import dev.metaschema.core.configuration.IMutableConfiguration;
35  import dev.metaschema.core.metapath.MetapathException;
36  import dev.metaschema.core.metapath.format.IPathFormatter;
37  import dev.metaschema.core.metapath.format.PathFormatSelection;
38  import dev.metaschema.core.model.IModule;
39  import dev.metaschema.core.model.MetaschemaException;
40  import dev.metaschema.core.model.constraint.CompositeValidationEventListener;
41  import dev.metaschema.core.model.constraint.ConstraintValidationException;
42  import dev.metaschema.core.model.constraint.IConstraintSet;
43  import dev.metaschema.core.model.constraint.TimingCollector;
44  import dev.metaschema.core.model.constraint.ValidationFeature;
45  import dev.metaschema.core.model.validation.AggregateValidationResult;
46  import dev.metaschema.core.model.validation.IValidationResult;
47  import dev.metaschema.core.util.IVersionInfo;
48  import dev.metaschema.core.util.ObjectUtils;
49  import dev.metaschema.databind.IBindingContext;
50  import dev.metaschema.databind.IBindingContext.ISchemaValidationProvider;
51  import dev.metaschema.databind.io.Format;
52  import dev.metaschema.databind.io.IBoundLoader;
53  import dev.metaschema.modules.sarif.SarifValidationHandler;
54  import edu.umd.cs.findbugs.annotations.NonNull;
55  import edu.umd.cs.findbugs.annotations.Nullable;
56  
57  /**
58   * Used by implementing classes to provide a content validation command.
59   */
60  public abstract class AbstractValidateContentCommand
61      extends AbstractTerminalCommand {
62    private static final Logger LOGGER = LogManager.getLogger(AbstractValidateContentCommand.class);
63    @NonNull
64    private static final String COMMAND = "validate";
65    @NonNull
66    private static final List<ExtraArgument> EXTRA_ARGUMENTS = ObjectUtils.notNull(List.of(
67        ExtraArgument.newInstance("file-or-URI-to-validate", true, URI.class)));
68  
69    @NonNull
70    private static final Option CONSTRAINTS_OPTION = ObjectUtils.notNull(
71        Option.builder("c")
72            .hasArgs()
73            .argName("URL")
74            .type(URI.class)
75            .desc("additional constraint definitions")
76            .get());
77    @NonNull
78    private static final Option SARIF_OUTPUT_FILE_OPTION = ObjectUtils.notNull(
79        Option.builder("o")
80            .hasArg()
81            .argName("FILE")
82            .type(File.class)
83            .desc("write SARIF results to the provided FILE")
84            .numberOfArgs(1)
85            .get());
86    @NonNull
87    private static final Option SARIF_INCLUDE_PASS_OPTION = ObjectUtils.notNull(
88        Option.builder()
89            .longOpt("sarif-include-pass")
90            .desc("include pass results in SARIF")
91            .get());
92    @NonNull
93    private static final Option NO_SCHEMA_VALIDATION_OPTION = ObjectUtils.notNull(
94        Option.builder()
95            .longOpt("disable-schema-validation")
96            .desc("do not perform schema validation")
97            .get());
98    @NonNull
99    private static final Option NO_CONSTRAINT_VALIDATION_OPTION = ObjectUtils.notNull(
100       Option.builder()
101           .longOpt("disable-constraint-validation")
102           .desc("do not perform constraint validation")
103           .get());
104   @NonNull
105   private static final Option PATH_FORMAT_OPTION = ObjectUtils.notNull(
106       Option.builder()
107           .longOpt("path-format")
108           .hasArg()
109           .argName("FORMAT")
110           .type(PathFormatSelection.class)
111           .desc("path format in validation output: auto (default, selects based on document format), "
112               + "metapath, xpath, jsonpointer")
113           .get());
114   @NonNull
115   private static final Option PARALLEL_THREADS_OPTION = ObjectUtils.notNull(
116       Option.builder()
117           .longOpt("threads")
118           .hasArg()
119           .argName("count")
120           .type(Number.class)
121           .desc("number of threads for parallel constraint validation (default: 1, experimental)")
122           .get());
123   @NonNull
124   private static final Option SARIF_TIMING_OPTION = ObjectUtils.notNull(
125       Option.builder()
126           .longOpt("sarif-timing")
127           .desc("include per-constraint and per-phase timing data in SARIF output (requires -o, experimental)")
128           .get());
129 
130   @Override
131   public String getName() {
132     return COMMAND;
133   }
134 
135   @SuppressWarnings("null")
136   @Override
137   public Collection<? extends Option> gatherOptions() {
138     return List.of(
139         MetaschemaCommands.AS_FORMAT_OPTION,
140         CONSTRAINTS_OPTION,
141         SARIF_OUTPUT_FILE_OPTION,
142         SARIF_INCLUDE_PASS_OPTION,
143         SARIF_TIMING_OPTION,
144         NO_SCHEMA_VALIDATION_OPTION,
145         NO_CONSTRAINT_VALIDATION_OPTION,
146         PATH_FORMAT_OPTION,
147         PARALLEL_THREADS_OPTION);
148   }
149 
150   @Override
151   public List<ExtraArgument> getExtraArguments() {
152     return EXTRA_ARGUMENTS;
153   }
154 
155   /**
156    * Drives the validation execution.
157    */
158   protected abstract class AbstractValidationCommandExecutor
159       extends AbstractCommandExecutor {
160 
161     @Nullable
162     private TimingCollector timingCollector;
163     @Nullable
164     private SarifValidationHandler sarifHandler;
165 
166     /**
167      * Construct a new command executor.
168      *
169      * @param callingContext
170      *          the context of the command execution
171      * @param commandLine
172      *          the parsed command line details
173      */
174     public AbstractValidationCommandExecutor(
175         @NonNull CallingContext callingContext,
176         @NonNull CommandLine commandLine) {
177       super(callingContext, commandLine);
178     }
179 
180     /**
181      * Get the binding context to use for data processing.
182      *
183      * @param constraintSets
184      *          the constraints to configure in the resulting binding context
185      * @return the context
186      * @throws CommandExecutionException
187      *           if a error occurred while getting the binding context
188      */
189     @NonNull
190     protected abstract IBindingContext getBindingContext(@NonNull Set<IConstraintSet> constraintSets)
191         throws CommandExecutionException;
192 
193     /**
194      * Get the module to use for validation.
195      * <p>
196      * This module is used to generate schemas and as a source of built-in
197      * constraints.
198      *
199      * @param commandLine
200      *          the provided command line argument information
201      * @param bindingContext
202      *          the context used to access Metaschema module information based on
203      *          Java class bindings
204      * @return the loaded Metaschema module
205      * @throws CommandExecutionException
206      *           if an error occurred while loading the module
207      */
208     @NonNull
209     protected abstract IModule getModule(
210         @NonNull CommandLine commandLine,
211         @NonNull IBindingContext bindingContext)
212         throws CommandExecutionException;
213 
214     /**
215      * Get the schema validation implementation requested based on the provided
216      * command line arguments.
217      * <p>
218      * It is typical for this call to result in the dynamic generation of a schema
219      * to use for validation.
220      *
221      * @param module
222      *          the Metaschema module to generate the schema from
223      * @param commandLine
224      *          the provided command line argument information
225      * @param bindingContext
226      *          the context used to access Metaschema module information based on
227      *          Java class bindings
228      * @return the provider
229      */
230     @NonNull
231     protected abstract ISchemaValidationProvider getSchemaValidationProvider(
232         @NonNull IModule module,
233         @NonNull CommandLine commandLine,
234         @NonNull IBindingContext bindingContext);
235 
236     /**
237      * Execute the validation operation.
238      */
239     @Override
240     public void execute() throws CommandExecutionException {
241       CommandLine cmdLine = getCommandLine();
242       @SuppressWarnings("synthetic-access")
243       URI currentWorkingDirectory = ObjectUtils.notNull(getCurrentWorkingDirectory().toUri());
244 
245       Set<IConstraintSet> constraintSets = MetaschemaCommands.loadConstraintSets(
246           cmdLine,
247           CONSTRAINTS_OPTION,
248           currentWorkingDirectory);
249 
250       List<String> extraArgs = cmdLine.getArgList();
251 
252       URI source = MetaschemaCommands.handleSource(
253           ObjectUtils.requireNonNull(extraArgs.get(0)),
254           currentWorkingDirectory);
255 
256       IBindingContext bindingContext = getBindingContext(constraintSets);
257       IBoundLoader loader = bindingContext.newBoundLoader();
258       Format asFormat = MetaschemaCommands.determineSourceFormat(
259           cmdLine,
260           MetaschemaCommands.AS_FORMAT_OPTION,
261           loader,
262           source);
263 
264       IValidationResult validationResult = validate(source, asFormat, cmdLine, bindingContext);
265       handleOutput(source, validationResult, asFormat, cmdLine, bindingContext);
266 
267       if (validationResult == null || validationResult.isPassing()) {
268         if (LOGGER.isInfoEnabled()) {
269           LOGGER.info("The file '{}' is valid.", source);
270         }
271       } else if (LOGGER.isErrorEnabled()) {
272         LOGGER.error("The file '{}' is invalid.", source);
273       }
274 
275       if (validationResult != null && !validationResult.isPassing()) {
276         throw new CommandExecutionException(ExitCode.FAIL);
277       }
278     }
279 
280     @SuppressWarnings("PMD.CyclomaticComplexity")
281     @Nullable
282     private IValidationResult validate(
283         @NonNull URI source,
284         @NonNull Format asFormat,
285         @NonNull CommandLine commandLine,
286         @NonNull IBindingContext bindingContext) throws CommandExecutionException {
287 
288       if (LOGGER.isInfoEnabled()) {
289         LOGGER.info("Validating '{}' as {}.", source, asFormat.name());
290       }
291 
292       IValidationResult validationResult = null;
293       try {
294         // get the module, but don't register it
295         IModule module = getModule(commandLine, bindingContext);
296         if (!commandLine.hasOption(NO_SCHEMA_VALIDATION_OPTION)) {
297           // perform schema validation
298           validationResult = getSchemaValidationProvider(module, commandLine, bindingContext)
299               .validateWithSchema(source, asFormat, bindingContext);
300         }
301 
302         if (!commandLine.hasOption(NO_CONSTRAINT_VALIDATION_OPTION)) {
303           IMutableConfiguration<ValidationFeature<?>> configuration = new DefaultConfiguration<>();
304           if (commandLine.hasOption(SARIF_OUTPUT_FILE_OPTION) && commandLine.hasOption(SARIF_INCLUDE_PASS_OPTION)) {
305             configuration.enableFeature(ValidationFeature.VALIDATE_GENERATE_PASS_FINDINGS);
306           }
307 
308           // Validate --sarif-timing requires -o
309           if (commandLine.hasOption(SARIF_TIMING_OPTION)
310               && !commandLine.hasOption(SARIF_OUTPUT_FILE_OPTION)) {
311             throw new CommandExecutionException(
312                 ExitCode.INVALID_ARGUMENTS,
313                 "--sarif-timing requires -o <FILE> for SARIF output");
314           }
315 
316           // Configure timing collection if requested
317           if (commandLine.hasOption(SARIF_TIMING_OPTION) && commandLine.hasOption(SARIF_OUTPUT_FILE_OPTION)) {
318             TimingCollector collector = new TimingCollector();
319             this.timingCollector = collector;
320 
321             // Create SARIF handler early for per-result timing event delivery
322             IVersionInfo version = getCallingContext().getCLIProcessor()
323                 .getVersionInfos().get(CLIProcessor.COMMAND_VERSION);
324             SarifValidationHandler handler
325                 = new SarifValidationHandler(source, version);
326             this.sarifHandler = handler;
327 
328             // Register both listeners as a composite
329             configuration.set(ValidationFeature.EVENT_LISTENER,
330                 new CompositeValidationEventListener(List.of(collector, handler)));
331           }
332 
333           // Configure parallel validation if requested
334           if (commandLine.hasOption(PARALLEL_THREADS_OPTION)) {
335             String threadValue = commandLine.getOptionValue(PARALLEL_THREADS_OPTION);
336             int threadCount;
337             try {
338               threadCount = Integer.parseInt(threadValue);
339             } catch (NumberFormatException ex) {
340               throw new CommandExecutionException(
341                   ExitCode.INVALID_ARGUMENTS,
342                   String.format("Invalid thread count '%s': must be a positive integer", threadValue),
343                   ex);
344             }
345             if (threadCount < 1) {
346               throw new CommandExecutionException(
347                   ExitCode.INVALID_ARGUMENTS,
348                   String.format("Thread count must be at least 1, got: %d", threadCount));
349             }
350             if (threadCount > 1) {
351               if (LOGGER.isWarnEnabled()) {
352                 LOGGER.warn("Parallel constraint validation is an experimental feature. "
353                     + "Using {} threads.", threadCount);
354               }
355               configuration.set(ValidationFeature.PARALLEL_THREADS, threadCount);
356             }
357           }
358 
359           // perform constraint validation
360           bindingContext.registerModule(module); // ensure the module is registered
361           TimingCollector collector = this.timingCollector;
362           if (collector != null) {
363             collector.beforeValidation(source);
364           }
365           IValidationResult constraintValidationResult;
366           try {
367             constraintValidationResult = bindingContext.validateWithConstraints(source, configuration);
368           } finally {
369             if (collector != null) {
370               collector.afterValidation(source);
371             }
372           }
373           validationResult = validationResult == null
374               ? constraintValidationResult
375               : AggregateValidationResult.aggregate(validationResult, constraintValidationResult);
376         }
377       } catch (FileNotFoundException ex) {
378         throw new CommandExecutionException(
379             ExitCode.IO_ERROR,
380             String.format("Resource not found at '%s'", source),
381             ex);
382       } catch (UnknownHostException ex) {
383         throw new CommandExecutionException(
384             ExitCode.IO_ERROR,
385             String.format("Unknown host for '%s'.", source),
386             ex);
387       } catch (IOException ex) {
388         throw new CommandExecutionException(ExitCode.IO_ERROR, ex.getLocalizedMessage(), ex);
389       } catch (MetapathException | MetaschemaException | ConstraintValidationException ex) {
390         throw new CommandExecutionException(ExitCode.PROCESSING_ERROR, ex.getLocalizedMessage(), ex);
391       }
392       return validationResult;
393     }
394 
395     private void handleOutput(
396         @NonNull URI source,
397         @Nullable IValidationResult validationResult,
398         @NonNull Format asFormat,
399         @NonNull CommandLine commandLine,
400         @NonNull IBindingContext bindingContext) throws CommandExecutionException {
401       if (commandLine.hasOption(SARIF_OUTPUT_FILE_OPTION)) {
402         Path sarifFile = ObjectUtils.notNull(Paths.get(commandLine.getOptionValue(SARIF_OUTPUT_FILE_OPTION)));
403 
404         try {
405           // Use pre-created handler (for per-result timing) or create a new one
406           SarifValidationHandler handler = this.sarifHandler;
407           if (handler == null) {
408             IVersionInfo version = getCallingContext().getCLIProcessor()
409                 .getVersionInfos().get(CLIProcessor.COMMAND_VERSION);
410             handler = new SarifValidationHandler(source, version);
411           }
412           if (timingCollector != null) {
413             handler.setTimingCollector(timingCollector);
414           }
415           if (validationResult != null) {
416             handler.addFindings(validationResult.getFindings());
417           }
418           handler.write(sarifFile, bindingContext);
419         } catch (IOException ex) {
420           throw new CommandExecutionException(ExitCode.IO_ERROR, ex.getLocalizedMessage(), ex);
421         }
422       } else if (validationResult != null && !validationResult.getFindings().isEmpty()) {
423         LOGGER.info("Validation identified the following issues:");
424         IPathFormatter pathFormatter = resolvePathFormatter(commandLine, asFormat);
425         LoggingValidationHandler.withPathFormatter(pathFormatter).handleResults(validationResult);
426       }
427 
428     }
429 
430     /**
431      * Resolve the path formatter based on command line option and document format.
432      *
433      * @param commandLine
434      *          the parsed command line
435      * @param asFormat
436      *          the document format
437      * @return the resolved path formatter
438      */
439     @NonNull
440     private IPathFormatter resolvePathFormatter(
441         @NonNull CommandLine commandLine,
442         @NonNull Format asFormat) {
443       PathFormatSelection selection = PathFormatSelection.AUTO;
444 
445       if (commandLine.hasOption(PATH_FORMAT_OPTION)) {
446         String value = commandLine.getOptionValue(PATH_FORMAT_OPTION);
447         if (value != null) {
448           selection = parsePathFormatSelection(value);
449         }
450       }
451 
452       return Format.resolvePathFormatter(selection, asFormat);
453     }
454 
455     /**
456      * Parse the path format selection from a string value.
457      *
458      * @param value
459      *          the string value from the command line
460      * @return the parsed selection, defaults to AUTO if unrecognized
461      */
462     @NonNull
463     private PathFormatSelection parsePathFormatSelection(@NonNull String value) {
464       switch (value.toLowerCase(Locale.ROOT)) {
465       case "auto":
466         return PathFormatSelection.AUTO;
467       case "metapath":
468         return PathFormatSelection.METAPATH;
469       case "xpath":
470         return PathFormatSelection.XPATH;
471       case "jsonpointer":
472       case "json-pointer":
473         return PathFormatSelection.JSON_POINTER;
474       default:
475         LOGGER.warn("Unrecognized path format '{}', using auto", value);
476         return PathFormatSelection.AUTO;
477       }
478     }
479   }
480 }