001/*
002 * SPDX-FileCopyrightText: none
003 * SPDX-License-Identifier: CC0-1.0
004 */
005
006package dev.metaschema.cli.commands;
007
008import org.apache.commons.cli.CommandLine;
009import org.apache.commons.cli.Option;
010import org.apache.logging.log4j.LogManager;
011import org.apache.logging.log4j.Logger;
012
013import java.io.File;
014import java.io.FileNotFoundException;
015import java.io.IOException;
016import java.net.URI;
017import java.net.UnknownHostException;
018import java.nio.file.Path;
019import java.nio.file.Paths;
020import java.util.Collection;
021import java.util.List;
022import java.util.Locale;
023import java.util.Set;
024
025import dev.metaschema.cli.processor.CLIProcessor;
026import dev.metaschema.cli.processor.CallingContext;
027import dev.metaschema.cli.processor.ExitCode;
028import dev.metaschema.cli.processor.command.AbstractCommandExecutor;
029import dev.metaschema.cli.processor.command.AbstractTerminalCommand;
030import dev.metaschema.cli.processor.command.CommandExecutionException;
031import dev.metaschema.cli.processor.command.ExtraArgument;
032import dev.metaschema.cli.util.LoggingValidationHandler;
033import dev.metaschema.core.configuration.DefaultConfiguration;
034import dev.metaschema.core.configuration.IMutableConfiguration;
035import dev.metaschema.core.metapath.MetapathException;
036import dev.metaschema.core.metapath.format.IPathFormatter;
037import dev.metaschema.core.metapath.format.PathFormatSelection;
038import dev.metaschema.core.model.IModule;
039import dev.metaschema.core.model.MetaschemaException;
040import dev.metaschema.core.model.constraint.CompositeValidationEventListener;
041import dev.metaschema.core.model.constraint.ConstraintValidationException;
042import dev.metaschema.core.model.constraint.IConstraintSet;
043import dev.metaschema.core.model.constraint.TimingCollector;
044import dev.metaschema.core.model.constraint.ValidationFeature;
045import dev.metaschema.core.model.validation.AggregateValidationResult;
046import dev.metaschema.core.model.validation.IValidationResult;
047import dev.metaschema.core.util.IVersionInfo;
048import dev.metaschema.core.util.ObjectUtils;
049import dev.metaschema.databind.IBindingContext;
050import dev.metaschema.databind.IBindingContext.ISchemaValidationProvider;
051import dev.metaschema.databind.io.Format;
052import dev.metaschema.databind.io.IBoundLoader;
053import dev.metaschema.modules.sarif.SarifValidationHandler;
054import edu.umd.cs.findbugs.annotations.NonNull;
055import edu.umd.cs.findbugs.annotations.Nullable;
056
057/**
058 * Used by implementing classes to provide a content validation command.
059 */
060public abstract class AbstractValidateContentCommand
061    extends AbstractTerminalCommand {
062  private static final Logger LOGGER = LogManager.getLogger(AbstractValidateContentCommand.class);
063  @NonNull
064  private static final String COMMAND = "validate";
065  @NonNull
066  private static final List<ExtraArgument> EXTRA_ARGUMENTS = ObjectUtils.notNull(List.of(
067      ExtraArgument.newInstance("file-or-URI-to-validate", true, URI.class)));
068
069  @NonNull
070  private static final Option CONSTRAINTS_OPTION = ObjectUtils.notNull(
071      Option.builder("c")
072          .hasArgs()
073          .argName("URL")
074          .type(URI.class)
075          .desc("additional constraint definitions")
076          .get());
077  @NonNull
078  private static final Option SARIF_OUTPUT_FILE_OPTION = ObjectUtils.notNull(
079      Option.builder("o")
080          .hasArg()
081          .argName("FILE")
082          .type(File.class)
083          .desc("write SARIF results to the provided FILE")
084          .numberOfArgs(1)
085          .get());
086  @NonNull
087  private static final Option SARIF_INCLUDE_PASS_OPTION = ObjectUtils.notNull(
088      Option.builder()
089          .longOpt("sarif-include-pass")
090          .desc("include pass results in SARIF")
091          .get());
092  @NonNull
093  private static final Option NO_SCHEMA_VALIDATION_OPTION = ObjectUtils.notNull(
094      Option.builder()
095          .longOpt("disable-schema-validation")
096          .desc("do not perform schema validation")
097          .get());
098  @NonNull
099  private static final Option NO_CONSTRAINT_VALIDATION_OPTION = ObjectUtils.notNull(
100      Option.builder()
101          .longOpt("disable-constraint-validation")
102          .desc("do not perform constraint validation")
103          .get());
104  @NonNull
105  private static final Option PATH_FORMAT_OPTION = ObjectUtils.notNull(
106      Option.builder()
107          .longOpt("path-format")
108          .hasArg()
109          .argName("FORMAT")
110          .type(PathFormatSelection.class)
111          .desc("path format in validation output: auto (default, selects based on document format), "
112              + "metapath, xpath, jsonpointer")
113          .get());
114  @NonNull
115  private static final Option PARALLEL_THREADS_OPTION = ObjectUtils.notNull(
116      Option.builder()
117          .longOpt("threads")
118          .hasArg()
119          .argName("count")
120          .type(Number.class)
121          .desc("number of threads for parallel constraint validation (default: 1, experimental)")
122          .get());
123  @NonNull
124  private static final Option SARIF_TIMING_OPTION = ObjectUtils.notNull(
125      Option.builder()
126          .longOpt("sarif-timing")
127          .desc("include per-constraint and per-phase timing data in SARIF output (requires -o, experimental)")
128          .get());
129
130  @Override
131  public String getName() {
132    return COMMAND;
133  }
134
135  @SuppressWarnings("null")
136  @Override
137  public Collection<? extends Option> gatherOptions() {
138    return List.of(
139        MetaschemaCommands.AS_FORMAT_OPTION,
140        CONSTRAINTS_OPTION,
141        SARIF_OUTPUT_FILE_OPTION,
142        SARIF_INCLUDE_PASS_OPTION,
143        SARIF_TIMING_OPTION,
144        NO_SCHEMA_VALIDATION_OPTION,
145        NO_CONSTRAINT_VALIDATION_OPTION,
146        PATH_FORMAT_OPTION,
147        PARALLEL_THREADS_OPTION);
148  }
149
150  @Override
151  public List<ExtraArgument> getExtraArguments() {
152    return EXTRA_ARGUMENTS;
153  }
154
155  /**
156   * Drives the validation execution.
157   */
158  protected abstract class AbstractValidationCommandExecutor
159      extends AbstractCommandExecutor {
160
161    @Nullable
162    private TimingCollector timingCollector;
163    @Nullable
164    private SarifValidationHandler sarifHandler;
165
166    /**
167     * Construct a new command executor.
168     *
169     * @param callingContext
170     *          the context of the command execution
171     * @param commandLine
172     *          the parsed command line details
173     */
174    public AbstractValidationCommandExecutor(
175        @NonNull CallingContext callingContext,
176        @NonNull CommandLine commandLine) {
177      super(callingContext, commandLine);
178    }
179
180    /**
181     * Get the binding context to use for data processing.
182     *
183     * @param constraintSets
184     *          the constraints to configure in the resulting binding context
185     * @return the context
186     * @throws CommandExecutionException
187     *           if a error occurred while getting the binding context
188     */
189    @NonNull
190    protected abstract IBindingContext getBindingContext(@NonNull Set<IConstraintSet> constraintSets)
191        throws CommandExecutionException;
192
193    /**
194     * Get the module to use for validation.
195     * <p>
196     * This module is used to generate schemas and as a source of built-in
197     * constraints.
198     *
199     * @param commandLine
200     *          the provided command line argument information
201     * @param bindingContext
202     *          the context used to access Metaschema module information based on
203     *          Java class bindings
204     * @return the loaded Metaschema module
205     * @throws CommandExecutionException
206     *           if an error occurred while loading the module
207     */
208    @NonNull
209    protected abstract IModule getModule(
210        @NonNull CommandLine commandLine,
211        @NonNull IBindingContext bindingContext)
212        throws CommandExecutionException;
213
214    /**
215     * Get the schema validation implementation requested based on the provided
216     * command line arguments.
217     * <p>
218     * It is typical for this call to result in the dynamic generation of a schema
219     * to use for validation.
220     *
221     * @param module
222     *          the Metaschema module to generate the schema from
223     * @param commandLine
224     *          the provided command line argument information
225     * @param bindingContext
226     *          the context used to access Metaschema module information based on
227     *          Java class bindings
228     * @return the provider
229     */
230    @NonNull
231    protected abstract ISchemaValidationProvider getSchemaValidationProvider(
232        @NonNull IModule module,
233        @NonNull CommandLine commandLine,
234        @NonNull IBindingContext bindingContext);
235
236    /**
237     * Execute the validation operation.
238     */
239    @Override
240    public void execute() throws CommandExecutionException {
241      CommandLine cmdLine = getCommandLine();
242      @SuppressWarnings("synthetic-access")
243      URI currentWorkingDirectory = ObjectUtils.notNull(getCurrentWorkingDirectory().toUri());
244
245      Set<IConstraintSet> constraintSets = MetaschemaCommands.loadConstraintSets(
246          cmdLine,
247          CONSTRAINTS_OPTION,
248          currentWorkingDirectory);
249
250      List<String> extraArgs = cmdLine.getArgList();
251
252      URI source = MetaschemaCommands.handleSource(
253          ObjectUtils.requireNonNull(extraArgs.get(0)),
254          currentWorkingDirectory);
255
256      IBindingContext bindingContext = getBindingContext(constraintSets);
257      IBoundLoader loader = bindingContext.newBoundLoader();
258      Format asFormat = MetaschemaCommands.determineSourceFormat(
259          cmdLine,
260          MetaschemaCommands.AS_FORMAT_OPTION,
261          loader,
262          source);
263
264      IValidationResult validationResult = validate(source, asFormat, cmdLine, bindingContext);
265      handleOutput(source, validationResult, asFormat, cmdLine, bindingContext);
266
267      if (validationResult == null || validationResult.isPassing()) {
268        if (LOGGER.isInfoEnabled()) {
269          LOGGER.info("The file '{}' is valid.", source);
270        }
271      } else if (LOGGER.isErrorEnabled()) {
272        LOGGER.error("The file '{}' is invalid.", source);
273      }
274
275      if (validationResult != null && !validationResult.isPassing()) {
276        throw new CommandExecutionException(ExitCode.FAIL);
277      }
278    }
279
280    @SuppressWarnings("PMD.CyclomaticComplexity")
281    @Nullable
282    private IValidationResult validate(
283        @NonNull URI source,
284        @NonNull Format asFormat,
285        @NonNull CommandLine commandLine,
286        @NonNull IBindingContext bindingContext) throws CommandExecutionException {
287
288      if (LOGGER.isInfoEnabled()) {
289        LOGGER.info("Validating '{}' as {}.", source, asFormat.name());
290      }
291
292      IValidationResult validationResult = null;
293      try {
294        // get the module, but don't register it
295        IModule module = getModule(commandLine, bindingContext);
296        if (!commandLine.hasOption(NO_SCHEMA_VALIDATION_OPTION)) {
297          // perform schema validation
298          validationResult = getSchemaValidationProvider(module, commandLine, bindingContext)
299              .validateWithSchema(source, asFormat, bindingContext);
300        }
301
302        if (!commandLine.hasOption(NO_CONSTRAINT_VALIDATION_OPTION)) {
303          IMutableConfiguration<ValidationFeature<?>> configuration = new DefaultConfiguration<>();
304          if (commandLine.hasOption(SARIF_OUTPUT_FILE_OPTION) && commandLine.hasOption(SARIF_INCLUDE_PASS_OPTION)) {
305            configuration.enableFeature(ValidationFeature.VALIDATE_GENERATE_PASS_FINDINGS);
306          }
307
308          // Validate --sarif-timing requires -o
309          if (commandLine.hasOption(SARIF_TIMING_OPTION)
310              && !commandLine.hasOption(SARIF_OUTPUT_FILE_OPTION)) {
311            throw new CommandExecutionException(
312                ExitCode.INVALID_ARGUMENTS,
313                "--sarif-timing requires -o <FILE> for SARIF output");
314          }
315
316          // Configure timing collection if requested
317          if (commandLine.hasOption(SARIF_TIMING_OPTION) && commandLine.hasOption(SARIF_OUTPUT_FILE_OPTION)) {
318            TimingCollector collector = new TimingCollector();
319            this.timingCollector = collector;
320
321            // Create SARIF handler early for per-result timing event delivery
322            IVersionInfo version = getCallingContext().getCLIProcessor()
323                .getVersionInfos().get(CLIProcessor.COMMAND_VERSION);
324            SarifValidationHandler handler
325                = new SarifValidationHandler(source, version);
326            this.sarifHandler = handler;
327
328            // Register both listeners as a composite
329            configuration.set(ValidationFeature.EVENT_LISTENER,
330                new CompositeValidationEventListener(List.of(collector, handler)));
331          }
332
333          // Configure parallel validation if requested
334          if (commandLine.hasOption(PARALLEL_THREADS_OPTION)) {
335            String threadValue = commandLine.getOptionValue(PARALLEL_THREADS_OPTION);
336            int threadCount;
337            try {
338              threadCount = Integer.parseInt(threadValue);
339            } catch (NumberFormatException ex) {
340              throw new CommandExecutionException(
341                  ExitCode.INVALID_ARGUMENTS,
342                  String.format("Invalid thread count '%s': must be a positive integer", threadValue),
343                  ex);
344            }
345            if (threadCount < 1) {
346              throw new CommandExecutionException(
347                  ExitCode.INVALID_ARGUMENTS,
348                  String.format("Thread count must be at least 1, got: %d", threadCount));
349            }
350            if (threadCount > 1) {
351              if (LOGGER.isWarnEnabled()) {
352                LOGGER.warn("Parallel constraint validation is an experimental feature. "
353                    + "Using {} threads.", threadCount);
354              }
355              configuration.set(ValidationFeature.PARALLEL_THREADS, threadCount);
356            }
357          }
358
359          // perform constraint validation
360          bindingContext.registerModule(module); // ensure the module is registered
361          TimingCollector collector = this.timingCollector;
362          if (collector != null) {
363            collector.beforeValidation(source);
364          }
365          IValidationResult constraintValidationResult;
366          try {
367            constraintValidationResult = bindingContext.validateWithConstraints(source, configuration);
368          } finally {
369            if (collector != null) {
370              collector.afterValidation(source);
371            }
372          }
373          validationResult = validationResult == null
374              ? constraintValidationResult
375              : AggregateValidationResult.aggregate(validationResult, constraintValidationResult);
376        }
377      } catch (FileNotFoundException ex) {
378        throw new CommandExecutionException(
379            ExitCode.IO_ERROR,
380            String.format("Resource not found at '%s'", source),
381            ex);
382      } catch (UnknownHostException ex) {
383        throw new CommandExecutionException(
384            ExitCode.IO_ERROR,
385            String.format("Unknown host for '%s'.", source),
386            ex);
387      } catch (IOException ex) {
388        throw new CommandExecutionException(ExitCode.IO_ERROR, ex.getLocalizedMessage(), ex);
389      } catch (MetapathException | MetaschemaException | ConstraintValidationException ex) {
390        throw new CommandExecutionException(ExitCode.PROCESSING_ERROR, ex.getLocalizedMessage(), ex);
391      }
392      return validationResult;
393    }
394
395    private void handleOutput(
396        @NonNull URI source,
397        @Nullable IValidationResult validationResult,
398        @NonNull Format asFormat,
399        @NonNull CommandLine commandLine,
400        @NonNull IBindingContext bindingContext) throws CommandExecutionException {
401      if (commandLine.hasOption(SARIF_OUTPUT_FILE_OPTION)) {
402        Path sarifFile = ObjectUtils.notNull(Paths.get(commandLine.getOptionValue(SARIF_OUTPUT_FILE_OPTION)));
403
404        try {
405          // Use pre-created handler (for per-result timing) or create a new one
406          SarifValidationHandler handler = this.sarifHandler;
407          if (handler == null) {
408            IVersionInfo version = getCallingContext().getCLIProcessor()
409                .getVersionInfos().get(CLIProcessor.COMMAND_VERSION);
410            handler = new SarifValidationHandler(source, version);
411          }
412          if (timingCollector != null) {
413            handler.setTimingCollector(timingCollector);
414          }
415          if (validationResult != null) {
416            handler.addFindings(validationResult.getFindings());
417          }
418          handler.write(sarifFile, bindingContext);
419        } catch (IOException ex) {
420          throw new CommandExecutionException(ExitCode.IO_ERROR, ex.getLocalizedMessage(), ex);
421        }
422      } else if (validationResult != null && !validationResult.getFindings().isEmpty()) {
423        LOGGER.info("Validation identified the following issues:");
424        IPathFormatter pathFormatter = resolvePathFormatter(commandLine, asFormat);
425        LoggingValidationHandler.withPathFormatter(pathFormatter).handleResults(validationResult);
426      }
427
428    }
429
430    /**
431     * Resolve the path formatter based on command line option and document format.
432     *
433     * @param commandLine
434     *          the parsed command line
435     * @param asFormat
436     *          the document format
437     * @return the resolved path formatter
438     */
439    @NonNull
440    private IPathFormatter resolvePathFormatter(
441        @NonNull CommandLine commandLine,
442        @NonNull Format asFormat) {
443      PathFormatSelection selection = PathFormatSelection.AUTO;
444
445      if (commandLine.hasOption(PATH_FORMAT_OPTION)) {
446        String value = commandLine.getOptionValue(PATH_FORMAT_OPTION);
447        if (value != null) {
448          selection = parsePathFormatSelection(value);
449        }
450      }
451
452      return Format.resolvePathFormatter(selection, asFormat);
453    }
454
455    /**
456     * Parse the path format selection from a string value.
457     *
458     * @param value
459     *          the string value from the command line
460     * @return the parsed selection, defaults to AUTO if unrecognized
461     */
462    @NonNull
463    private PathFormatSelection parsePathFormatSelection(@NonNull String value) {
464      switch (value.toLowerCase(Locale.ROOT)) {
465      case "auto":
466        return PathFormatSelection.AUTO;
467      case "metapath":
468        return PathFormatSelection.METAPATH;
469      case "xpath":
470        return PathFormatSelection.XPATH;
471      case "jsonpointer":
472      case "json-pointer":
473        return PathFormatSelection.JSON_POINTER;
474      default:
475        LOGGER.warn("Unrecognized path format '{}', using auto", value);
476        return PathFormatSelection.AUTO;
477      }
478    }
479  }
480}