1   /*
2    * SPDX-FileCopyrightText: none
3    * SPDX-License-Identifier: CC0-1.0
4    */
5   
6   package dev.metaschema.core.metapath.function.library;
7   
8   import java.util.LinkedList;
9   import java.util.List;
10  import java.util.regex.Matcher;
11  import java.util.regex.Pattern;
12  import java.util.regex.PatternSyntaxException;
13  
14  import dev.metaschema.core.metapath.DynamicContext;
15  import dev.metaschema.core.metapath.MetapathConstants;
16  import dev.metaschema.core.metapath.function.FunctionUtils;
17  import dev.metaschema.core.metapath.function.IArgument;
18  import dev.metaschema.core.metapath.function.IFunction;
19  import dev.metaschema.core.metapath.function.regex.RegexUtil;
20  import dev.metaschema.core.metapath.function.regex.RegularExpressionMetapathException;
21  import dev.metaschema.core.metapath.item.IItem;
22  import dev.metaschema.core.metapath.item.ISequence;
23  import dev.metaschema.core.metapath.item.atomic.IStringItem;
24  import dev.metaschema.core.util.CollectionUtil;
25  import dev.metaschema.core.util.ObjectUtils;
26  import edu.umd.cs.findbugs.annotations.NonNull;
27  import edu.umd.cs.findbugs.annotations.Nullable;
28  
29  /**
30   * Implements the XPath 3.1 <a href=
31   * "https://www.w3.org/TR/xpath-functions-31/#func-tokenize">fn:tokenize</a>
32   * function.
33   */
34  public final class FnTokenize {
35    // CPD-OFF
36    @NonNull
37    private static final String NAME = "tokenize";
38    @NonNull
39    static final IFunction SIGNATURE_ONE_ARG = IFunction.builder()
40        .name(NAME)
41        .namespace(MetapathConstants.NS_METAPATH_FUNCTIONS)
42        .deterministic()
43        .contextIndependent()
44        .focusIndependent()
45        .argument(IArgument.builder()
46            .name("input")
47            .type(IStringItem.type())
48            .zeroOrOne()
49            .build())
50        .returnType(IStringItem.type())
51        .returnZeroOrMore()
52        .functionHandler(FnTokenize::executeOneArg)
53        .build();
54    @NonNull
55    static final IFunction SIGNATURE_TWO_ARG = IFunction.builder()
56        .name(NAME)
57        .namespace(MetapathConstants.NS_METAPATH_FUNCTIONS)
58        .deterministic()
59        .contextIndependent()
60        .focusIndependent()
61        .argument(IArgument.builder()
62            .name("input")
63            .type(IStringItem.type())
64            .zeroOrOne()
65            .build())
66        .argument(IArgument.builder()
67            .name("pattern")
68            .type(IStringItem.type())
69            .one()
70            .build())
71        .returnType(IStringItem.type())
72        .returnZeroOrMore()
73        .functionHandler(FnTokenize::executeTwoArg)
74        .build();
75  
76    @NonNull
77    static final IFunction SIGNATURE_THREE_ARG = IFunction.builder()
78        .name(NAME)
79        .namespace(MetapathConstants.NS_METAPATH_FUNCTIONS)
80        .deterministic()
81        .contextIndependent()
82        .focusIndependent()
83        .argument(IArgument.builder()
84            .name("input")
85            .type(IStringItem.type())
86            .zeroOrOne()
87            .build())
88        .argument(IArgument.builder()
89            .name("pattern")
90            .type(IStringItem.type())
91            .one()
92            .build())
93        .argument(IArgument.builder()
94            .name("flags")
95            .type(IStringItem.type())
96            .one()
97            .build())
98        .returnType(IStringItem.type())
99        .returnZeroOrMore()
100       .functionHandler(FnTokenize::executeThreeArg)
101       .build();
102   // CPD-ON
103 
104   @SuppressWarnings("unused")
105   @NonNull
106   private static ISequence<IStringItem> executeOneArg(
107       @NonNull IFunction function,
108       @NonNull List<ISequence<?>> arguments,
109       @NonNull DynamicContext dynamicContext,
110       IItem focus) {
111     IStringItem input = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true));
112 
113     return input == null
114         ? ISequence.empty()
115         : ISequence.of(ObjectUtils.notNull(
116             fnTokenize(input.normalizeSpace().asString(), " ", "").stream()
117                 .map(IStringItem::valueOf)));
118   }
119 
120   @SuppressWarnings("unused")
121   @NonNull
122   private static ISequence<IStringItem> executeTwoArg(
123       @NonNull IFunction function,
124       @NonNull List<ISequence<?>> arguments,
125       @NonNull DynamicContext dynamicContext,
126       IItem focus) {
127     IStringItem input = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true));
128     IStringItem pattern = ObjectUtils.requireNonNull(FunctionUtils.asTypeOrNull(arguments.get(1).getFirstItem(true)));
129 
130     return execute(input, pattern, IStringItem.valueOf(""));
131   }
132 
133   @SuppressWarnings("unused")
134   @NonNull
135   private static ISequence<IStringItem> executeThreeArg(
136       @NonNull IFunction function,
137       @NonNull List<ISequence<?>> arguments,
138       @NonNull DynamicContext dynamicContext,
139       IItem focus) {
140 
141     IStringItem input = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true));
142     IStringItem pattern = ObjectUtils.requireNonNull(FunctionUtils.asTypeOrNull(arguments.get(1).getFirstItem(true)));
143     IStringItem flags = ObjectUtils.requireNonNull(FunctionUtils.asTypeOrNull(arguments.get(2).getFirstItem(true)));
144 
145     return execute(input, pattern, flags);
146   }
147 
148   @NonNull
149   private static ISequence<IStringItem> execute(
150       @Nullable IStringItem input,
151       @NonNull IStringItem pattern,
152       @NonNull IStringItem flags) {
153     return input == null
154         ? ISequence.empty()
155         : fnTokenize(input, pattern, flags);
156   }
157 
158   /**
159    * Implements <a href=
160    * "https://www.w3.org/TR/xpath-functions-31/#func-tokenize">fn:tokenize</a>.
161    *
162    * @param input
163    *          the string to tokenize
164    * @param pattern
165    *          the regular expression to use for identifying token boundaries
166    * @param flags
167    *          matching options
168    * @return the sequence of tokens
169    */
170   @NonNull
171   public static ISequence<IStringItem> fnTokenize(
172       @NonNull IStringItem input,
173       @NonNull IStringItem pattern,
174       @NonNull IStringItem flags) {
175     return ISequence.of(ObjectUtils.notNull(
176         fnTokenize(input.asString(), pattern.asString(), flags.asString()).stream()
177             .map(IStringItem::valueOf)));
178   }
179 
180   /**
181    * Implements <a href=
182    * "https://www.w3.org/TR/xpath-functions-31/#func-tokenize">fn:tokenize</a>.
183    *
184    * @param input
185    *          the string to match against
186    * @param pattern
187    *          the regular expression to use for matching
188    * @param flags
189    *          matching options
190    * @return the stream of tokens
191    */
192   @SuppressWarnings("PMD.CyclomaticComplexity")
193   @NonNull
194   public static List<String> fnTokenize(@NonNull String input, @NonNull String pattern, @NonNull String flags) {
195     if (input.isEmpty()) {
196       return CollectionUtil.emptyList();
197     }
198 
199     try {
200       Matcher matcher = Pattern.compile(pattern, RegexUtil.parseFlags(flags)).matcher(input);
201 
202       int lastPosition = 0;
203       int length = input.length();
204 
205       List<String> result = new LinkedList<>();
206       while (matcher.find()) {
207         String group = matcher.group();
208         if (group.isEmpty()) {
209           throw new RegularExpressionMetapathException(RegularExpressionMetapathException.MATCHES_ZERO_LENGTH_STRING,
210               String.format("Pattern '%s' will match a zero-length string.", pattern));
211         }
212 
213         int start = matcher.start();
214         if (start == 0) {
215           result.add("");
216         } else {
217           result.add(input.substring(lastPosition, start));
218         }
219 
220         lastPosition = matcher.end();
221       }
222 
223       if (lastPosition == length) {
224         result.add("");
225       } else {
226         result.add(input.substring(lastPosition, length));
227       }
228 
229       return result;
230     } catch (PatternSyntaxException ex) {
231       throw new RegularExpressionMetapathException(RegularExpressionMetapathException.INVALID_EXPRESSION, ex);
232     } catch (IllegalArgumentException ex) {
233       throw new RegularExpressionMetapathException(RegularExpressionMetapathException.INVALID_FLAG, ex);
234     }
235   }
236 
237   private FnTokenize() {
238     // disable construction
239   }
240 }