FnTokenize.java

/*
 * SPDX-FileCopyrightText: none
 * SPDX-License-Identifier: CC0-1.0
 */

package gov.nist.secauto.metaschema.core.metapath.function.library;

import gov.nist.secauto.metaschema.core.metapath.DynamicContext;
import gov.nist.secauto.metaschema.core.metapath.ISequence;
import gov.nist.secauto.metaschema.core.metapath.MetapathConstants;
import gov.nist.secauto.metaschema.core.metapath.function.FunctionUtils;
import gov.nist.secauto.metaschema.core.metapath.function.IArgument;
import gov.nist.secauto.metaschema.core.metapath.function.IFunction;
import gov.nist.secauto.metaschema.core.metapath.function.regex.RegexUtil;
import gov.nist.secauto.metaschema.core.metapath.function.regex.RegularExpressionMetapathException;
import gov.nist.secauto.metaschema.core.metapath.item.IItem;
import gov.nist.secauto.metaschema.core.metapath.item.atomic.IStringItem;
import gov.nist.secauto.metaschema.core.util.CollectionUtil;
import gov.nist.secauto.metaschema.core.util.ObjectUtils;

import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import edu.umd.cs.findbugs.annotations.NonNull;
import edu.umd.cs.findbugs.annotations.Nullable;

/**
 * Implements the XPath 3.1 <a href=
 * "https://www.w3.org/TR/xpath-functions-31/#func-tokenize">fn:tokenize</a>
 * function.
 */
public final class FnTokenize {
  // CPD-OFF
  @NonNull
  private static final String NAME = "tokenize";
  @NonNull
  static final IFunction SIGNATURE_ONE_ARG = IFunction.builder()
      .name(NAME)
      .namespace(MetapathConstants.NS_METAPATH_FUNCTIONS)
      .deterministic()
      .contextIndependent()
      .focusIndependent()
      .argument(IArgument.builder()
          .name("input")
          .type(IStringItem.class)
          .zeroOrOne()
          .build())
      .returnType(IStringItem.class)
      .returnZeroOrMore()
      .functionHandler(FnTokenize::executeOneArg)
      .build();
  @NonNull
  static final IFunction SIGNATURE_TWO_ARG = IFunction.builder()
      .name(NAME)
      .namespace(MetapathConstants.NS_METAPATH_FUNCTIONS)
      .deterministic()
      .contextIndependent()
      .focusIndependent()
      .argument(IArgument.builder()
          .name("input")
          .type(IStringItem.class)
          .zeroOrOne()
          .build())
      .argument(IArgument.builder()
          .name("pattern")
          .type(IStringItem.class)
          .one()
          .build())
      .returnType(IStringItem.class)
      .returnZeroOrMore()
      .functionHandler(FnTokenize::executeTwoArg)
      .build();

  @NonNull
  static final IFunction SIGNATURE_THREE_ARG = IFunction.builder()
      .name(NAME)
      .namespace(MetapathConstants.NS_METAPATH_FUNCTIONS)
      .deterministic()
      .contextIndependent()
      .focusIndependent()
      .argument(IArgument.builder()
          .name("input")
          .type(IStringItem.class)
          .zeroOrOne()
          .build())
      .argument(IArgument.builder()
          .name("pattern")
          .type(IStringItem.class)
          .one()
          .build())
      .argument(IArgument.builder()
          .name("flags")
          .type(IStringItem.class)
          .one()
          .build())
      .returnType(IStringItem.class)
      .returnZeroOrMore()
      .functionHandler(FnTokenize::executeThreeArg)
      .build();
  // CPD-ON

  @SuppressWarnings({ "PMD.UnusedFormalParameter", "unused" })
  @NonNull
  private static ISequence<IStringItem> executeOneArg(
      @NonNull IFunction function,
      @NonNull List<ISequence<?>> arguments,
      @NonNull DynamicContext dynamicContext,
      IItem focus) {
    IStringItem input = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true));

    return input == null
        ? ISequence.empty()
        : ISequence.of(ObjectUtils.notNull(
            fnTokenize(input.normalizeSpace().asString(), " ", "").stream()
                .map(IStringItem::valueOf)));
  }

  @SuppressWarnings({ "PMD.UnusedFormalParameter", "unused" })
  @NonNull
  private static ISequence<IStringItem> executeTwoArg(
      @NonNull IFunction function,
      @NonNull List<ISequence<?>> arguments,
      @NonNull DynamicContext dynamicContext,
      IItem focus) {
    IStringItem input = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true));
    IStringItem pattern = ObjectUtils.requireNonNull(FunctionUtils.asTypeOrNull(arguments.get(1).getFirstItem(true)));

    return execute(input, pattern, IStringItem.valueOf(""));
  }

  @SuppressWarnings({ "PMD.UnusedFormalParameter", "unused" })
  @NonNull
  private static ISequence<IStringItem> executeThreeArg(
      @NonNull IFunction function,
      @NonNull List<ISequence<?>> arguments,
      @NonNull DynamicContext dynamicContext,
      IItem focus) {

    IStringItem input = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true));
    IStringItem pattern = ObjectUtils.requireNonNull(FunctionUtils.asTypeOrNull(arguments.get(1).getFirstItem(true)));
    IStringItem flags = ObjectUtils.requireNonNull(FunctionUtils.asTypeOrNull(arguments.get(2).getFirstItem(true)));

    return execute(input, pattern, flags);
  }

  @SuppressWarnings("PMD.OnlyOneReturn")
  @NonNull
  private static ISequence<IStringItem> execute(
      @Nullable IStringItem input,
      @NonNull IStringItem pattern,
      @NonNull IStringItem flags) {
    return input == null
        ? ISequence.empty()
        : fnTokenize(input, pattern, flags);
  }

  /**
   * Implements <a href=
   * "https://www.w3.org/TR/xpath-functions-31/#func-tokenize">fn:tokenize</a>.
   *
   * @param input
   *          the string to tokenize
   * @param pattern
   *          the regular expression to use for identifying token boundaries
   * @param flags
   *          matching options
   * @return the sequence of tokens
   */
  @NonNull
  public static ISequence<IStringItem> fnTokenize(
      @NonNull IStringItem input,
      @NonNull IStringItem pattern,
      @NonNull IStringItem flags) {
    return ISequence.of(ObjectUtils.notNull(
        fnTokenize(input.asString(), pattern.asString(), flags.asString()).stream()
            .map(IStringItem::valueOf)));
  }

  /**
   * Implements <a href=
   * "https://www.w3.org/TR/xpath-functions-31/#func-tokenize">fn:tokenize</a>.
   *
   * @param input
   *          the string to match against
   * @param pattern
   *          the regular expression to use for matching
   * @param flags
   *          matching options
   * @return the stream of tokens
   */
  @SuppressWarnings({ "PMD.OnlyOneReturn", "PMD.CyclomaticComplexity" })
  @NonNull
  public static List<String> fnTokenize(@NonNull String input, @NonNull String pattern, @NonNull String flags) {
    if (input.isEmpty()) {
      return CollectionUtil.emptyList();
    }

    try {
      Matcher matcher = Pattern.compile(pattern, RegexUtil.parseFlags(flags)).matcher(input);

      int lastPosition = 0;
      int length = input.length();

      List<String> result = new LinkedList<>();
      while (matcher.find()) {
        String group = matcher.group();
        if (group.isEmpty()) {
          throw new RegularExpressionMetapathException(RegularExpressionMetapathException.MATCHES_ZERO_LENGTH_STRING,
              String.format("Pattern '%s' will match a zero-length string.", pattern));
        }

        int start = matcher.start();
        if (start == 0) {
          result.add("");
        } else {
          result.add(input.substring(lastPosition, start));
        }

        lastPosition = matcher.end();
      }

      if (lastPosition == length) {
        result.add("");
      } else {
        result.add(input.substring(lastPosition, length));
      }

      return result;
    } catch (PatternSyntaxException ex) {
      throw new RegularExpressionMetapathException(RegularExpressionMetapathException.INVALID_EXPRESSION, ex);
    } catch (IllegalArgumentException ex) {
      throw new RegularExpressionMetapathException(RegularExpressionMetapathException.INVALID_FLAG, ex);
    }
  }

  private FnTokenize() {
    // disable construction
  }
}