1
2
3
4
5
6 package dev.metaschema.core.metapath.function.library;
7
8 import java.util.LinkedList;
9 import java.util.List;
10 import java.util.regex.Matcher;
11 import java.util.regex.Pattern;
12 import java.util.regex.PatternSyntaxException;
13
14 import dev.metaschema.core.metapath.DynamicContext;
15 import dev.metaschema.core.metapath.MetapathConstants;
16 import dev.metaschema.core.metapath.function.FunctionUtils;
17 import dev.metaschema.core.metapath.function.IArgument;
18 import dev.metaschema.core.metapath.function.IFunction;
19 import dev.metaschema.core.metapath.function.regex.RegexUtil;
20 import dev.metaschema.core.metapath.function.regex.RegularExpressionMetapathException;
21 import dev.metaschema.core.metapath.item.IItem;
22 import dev.metaschema.core.metapath.item.ISequence;
23 import dev.metaschema.core.metapath.item.atomic.IStringItem;
24 import dev.metaschema.core.util.CollectionUtil;
25 import dev.metaschema.core.util.ObjectUtils;
26 import edu.umd.cs.findbugs.annotations.NonNull;
27 import edu.umd.cs.findbugs.annotations.Nullable;
28
29
30
31
32
33
34 public final class FnTokenize {
35
36 @NonNull
37 private static final String NAME = "tokenize";
38 @NonNull
39 static final IFunction SIGNATURE_ONE_ARG = IFunction.builder()
40 .name(NAME)
41 .namespace(MetapathConstants.NS_METAPATH_FUNCTIONS)
42 .deterministic()
43 .contextIndependent()
44 .focusIndependent()
45 .argument(IArgument.builder()
46 .name("input")
47 .type(IStringItem.type())
48 .zeroOrOne()
49 .build())
50 .returnType(IStringItem.type())
51 .returnZeroOrMore()
52 .functionHandler(FnTokenize::executeOneArg)
53 .build();
54 @NonNull
55 static final IFunction SIGNATURE_TWO_ARG = IFunction.builder()
56 .name(NAME)
57 .namespace(MetapathConstants.NS_METAPATH_FUNCTIONS)
58 .deterministic()
59 .contextIndependent()
60 .focusIndependent()
61 .argument(IArgument.builder()
62 .name("input")
63 .type(IStringItem.type())
64 .zeroOrOne()
65 .build())
66 .argument(IArgument.builder()
67 .name("pattern")
68 .type(IStringItem.type())
69 .one()
70 .build())
71 .returnType(IStringItem.type())
72 .returnZeroOrMore()
73 .functionHandler(FnTokenize::executeTwoArg)
74 .build();
75
76 @NonNull
77 static final IFunction SIGNATURE_THREE_ARG = IFunction.builder()
78 .name(NAME)
79 .namespace(MetapathConstants.NS_METAPATH_FUNCTIONS)
80 .deterministic()
81 .contextIndependent()
82 .focusIndependent()
83 .argument(IArgument.builder()
84 .name("input")
85 .type(IStringItem.type())
86 .zeroOrOne()
87 .build())
88 .argument(IArgument.builder()
89 .name("pattern")
90 .type(IStringItem.type())
91 .one()
92 .build())
93 .argument(IArgument.builder()
94 .name("flags")
95 .type(IStringItem.type())
96 .one()
97 .build())
98 .returnType(IStringItem.type())
99 .returnZeroOrMore()
100 .functionHandler(FnTokenize::executeThreeArg)
101 .build();
102
103
104 @SuppressWarnings("unused")
105 @NonNull
106 private static ISequence<IStringItem> executeOneArg(
107 @NonNull IFunction function,
108 @NonNull List<ISequence<?>> arguments,
109 @NonNull DynamicContext dynamicContext,
110 IItem focus) {
111 IStringItem input = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true));
112
113 return input == null
114 ? ISequence.empty()
115 : ISequence.of(ObjectUtils.notNull(
116 fnTokenize(input.normalizeSpace().asString(), " ", "").stream()
117 .map(IStringItem::valueOf)));
118 }
119
120 @SuppressWarnings("unused")
121 @NonNull
122 private static ISequence<IStringItem> executeTwoArg(
123 @NonNull IFunction function,
124 @NonNull List<ISequence<?>> arguments,
125 @NonNull DynamicContext dynamicContext,
126 IItem focus) {
127 IStringItem input = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true));
128 IStringItem pattern = ObjectUtils.requireNonNull(FunctionUtils.asTypeOrNull(arguments.get(1).getFirstItem(true)));
129
130 return execute(input, pattern, IStringItem.valueOf(""));
131 }
132
133 @SuppressWarnings("unused")
134 @NonNull
135 private static ISequence<IStringItem> executeThreeArg(
136 @NonNull IFunction function,
137 @NonNull List<ISequence<?>> arguments,
138 @NonNull DynamicContext dynamicContext,
139 IItem focus) {
140
141 IStringItem input = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true));
142 IStringItem pattern = ObjectUtils.requireNonNull(FunctionUtils.asTypeOrNull(arguments.get(1).getFirstItem(true)));
143 IStringItem flags = ObjectUtils.requireNonNull(FunctionUtils.asTypeOrNull(arguments.get(2).getFirstItem(true)));
144
145 return execute(input, pattern, flags);
146 }
147
148 @NonNull
149 private static ISequence<IStringItem> execute(
150 @Nullable IStringItem input,
151 @NonNull IStringItem pattern,
152 @NonNull IStringItem flags) {
153 return input == null
154 ? ISequence.empty()
155 : fnTokenize(input, pattern, flags);
156 }
157
158
159
160
161
162
163
164
165
166
167
168
169
170 @NonNull
171 public static ISequence<IStringItem> fnTokenize(
172 @NonNull IStringItem input,
173 @NonNull IStringItem pattern,
174 @NonNull IStringItem flags) {
175 return ISequence.of(ObjectUtils.notNull(
176 fnTokenize(input.asString(), pattern.asString(), flags.asString()).stream()
177 .map(IStringItem::valueOf)));
178 }
179
180
181
182
183
184
185
186
187
188
189
190
191
192 @SuppressWarnings("PMD.CyclomaticComplexity")
193 @NonNull
194 public static List<String> fnTokenize(@NonNull String input, @NonNull String pattern, @NonNull String flags) {
195 if (input.isEmpty()) {
196 return CollectionUtil.emptyList();
197 }
198
199 try {
200 Matcher matcher = Pattern.compile(pattern, RegexUtil.parseFlags(flags)).matcher(input);
201
202 int lastPosition = 0;
203 int length = input.length();
204
205 List<String> result = new LinkedList<>();
206 while (matcher.find()) {
207 String group = matcher.group();
208 if (group.isEmpty()) {
209 throw new RegularExpressionMetapathException(RegularExpressionMetapathException.MATCHES_ZERO_LENGTH_STRING,
210 String.format("Pattern '%s' will match a zero-length string.", pattern));
211 }
212
213 int start = matcher.start();
214 if (start == 0) {
215 result.add("");
216 } else {
217 result.add(input.substring(lastPosition, start));
218 }
219
220 lastPosition = matcher.end();
221 }
222
223 if (lastPosition == length) {
224 result.add("");
225 } else {
226 result.add(input.substring(lastPosition, length));
227 }
228
229 return result;
230 } catch (PatternSyntaxException ex) {
231 throw new RegularExpressionMetapathException(RegularExpressionMetapathException.INVALID_EXPRESSION, ex);
232 } catch (IllegalArgumentException ex) {
233 throw new RegularExpressionMetapathException(RegularExpressionMetapathException.INVALID_FLAG, ex);
234 }
235 }
236
237 private FnTokenize() {
238
239 }
240 }