1   /*
2    * SPDX-FileCopyrightText: none
3    * SPDX-License-Identifier: CC0-1.0
4    */
5   
6   package dev.metaschema.core.metapath.cst;
7   
8   import org.antlr.v4.runtime.ParserRuleContext;
9   import org.antlr.v4.runtime.tree.ParseTree;
10  import org.antlr.v4.runtime.tree.TerminalNode;
11  
12  import java.util.ArrayList;
13  import java.util.List;
14  import java.util.Objects;
15  import java.util.function.BiFunction;
16  import java.util.function.Function;
17  import java.util.regex.Matcher;
18  import java.util.regex.Pattern;
19  
20  import javax.xml.namespace.QName;
21  
22  import dev.metaschema.core.metapath.IExpression;
23  import dev.metaschema.core.metapath.StaticContext;
24  import dev.metaschema.core.metapath.StaticMetapathException;
25  import dev.metaschema.core.metapath.antlr.AbstractAstVisitor;
26  import dev.metaschema.core.metapath.antlr.Metapath10;
27  import dev.metaschema.core.util.ObjectUtils;
28  import edu.umd.cs.findbugs.annotations.NonNull;
29  import edu.umd.cs.findbugs.annotations.Nullable;
30  
31  /**
32   * Provides utility methods for processing Metapath abstract syntax tree (AST)
33   * nodes to produce a compact syntax tree (CST).
34   * <p>
35   * This base class implements common visitor patterns for transforming AST nodes
36   * into a more compact representation. The CST is optimized for efficient
37   * evaluation of Metapath expressions.
38   * <p>
39   * Key utility methods include:
40   * <ul>
41   * <li>{@link #nairyToList} - Processes n-airy expressions into a list
42   * <li>{@link #nairyToCollection} - Processes n-airy expressions into a
43   * collection
44   * <li>{@link #handleNAiryCollection} - Handles n-airy expressions with
45   * operators
46   * <li>{@link #handleGroupedNAiry} - Processes grouped n-airy expressions
47   * </ul>
48   */
49  @SuppressWarnings({
50      "PMD.CouplingBetweenObjects"
51  })
52  public abstract class AbstractCSTVisitorBase
53      extends AbstractAstVisitor<IExpression> {
54  
55    private static final Pattern QUALIFIED_NAME_PATTERN = Pattern.compile("^Q\\{([^}]*)\\}(.+)$");
56  
57    /**
58     * Get the QName for an
59     * <a href="https://www.w3.org/TR/xpath-31/#dt-expanded-qname">expanded
60     * QName</a>.
61     *
62     * @param eqname
63     *          the expanded QName
64     * @param context
65     *          the Metapath evaluation static context
66     * @param requireNamespace
67     *          if {@code true} require the resulting QName to have a namespace, or
68     *          {@code false} otherwise
69     * @return the QName
70     * @throws StaticMetapathException
71     *           if the expanded QName prefix is not bound or if the resulting
72     *           namespace is invalid
73     */
74    @NonNull
75    static QName toQName(@NonNull Metapath10.EqnameContext eqname, @NonNull StaticContext context,
76        boolean requireNamespace) {
77      String namespaceUri;
78      String localName;
79      TerminalNode node;
80      if ((node = eqname.URIQualifiedName()) != null) {
81        // BracedURILiteral - Q{uri}name -
82        // https://www.w3.org/TR/xpath-31/#doc-xpath31-BracedURILiteral
83        Matcher matcher = QUALIFIED_NAME_PATTERN.matcher(node.getText());
84        if (!matcher.matches()) {
85          // the syntax should always match above, since ANTLR is parsing it
86          throw new IllegalStateException();
87        }
88        namespaceUri = matcher.group(1);
89        localName = matcher.group(2);
90      } else {
91        String prefix;
92        String[] tokens = eqname.getText().split(":", 2);
93        if (tokens.length == 2) {
94          // lexical QName with prefix - prefix:name
95          // https://www.w3.org/TR/xpath-31/#dt-qname
96          prefix = ObjectUtils.notNull(tokens[0]);
97          localName = tokens[1];
98        } else {
99          // lexical QName without prefix - name
100         // https://www.w3.org/TR/xpath-31/#dt-qname
101         prefix = "";
102         localName = tokens[0];
103       }
104       namespaceUri = context.lookupNamespaceForPrefix(prefix);
105       if (namespaceUri == null && requireNamespace) {
106         throw new StaticMetapathException(
107             StaticMetapathException.PREFIX_NOT_EXPANDABLE,
108             String.format("The static context does not have a namespace URI configured for prefix '%s'.", prefix));
109       }
110     }
111 
112     QName retval;
113     if (namespaceUri == null) {
114       retval = new QName(localName);
115     } else {
116       if ("http://www.w3.org/2000/xmlns/".equals(namespaceUri)) {
117         throw new StaticMetapathException(StaticMetapathException.NAMESPACE_MISUSE,
118             "The namespace of an expanded QName cannot be: http://www.w3.org/2000/xmlns/");
119       }
120       retval = new QName(namespaceUri, localName);
121     }
122     return retval;
123   }
124 
125   @SuppressWarnings("null")
126   @Override
127   @NonNull
128   public IExpression visit(ParseTree tree) {
129     assert tree != null;
130     return super.visit(tree);
131   }
132 
133   /**
134    * Parse the provided context as an n-airy phrase.
135    *
136    * @param <CONTEXT>
137    *          the Java type of the antlr context to parse
138    * @param <T>
139    *          the Java type of the child expressions produced by this parser
140    * @param <R>
141    *          the Java type of the outer expression produced by the parser
142    * @param context
143    *          the antlr context to parse
144    * @param startIndex
145    *          the child index to start parsing on
146    * @param step
147    *          the increment to advance while parsing child expressions
148    * @param parser
149    *          a binary function used to produce child expressions
150    * @return the outer expression or {@code null} if no children exist to parse
151    */
152   @Nullable
153   protected <CONTEXT extends ParserRuleContext, T, R>
154       List<R> nairyToList(
155           @NonNull CONTEXT context,
156           int startIndex,
157           int step,
158           @NonNull BiFunction<CONTEXT, Integer, R> parser) {
159     int numChildren = context.getChildCount();
160 
161     List<R> retval = null;
162     if (startIndex < numChildren) {
163       retval = new ArrayList<>((numChildren - startIndex) / step);
164       for (int idx = startIndex; idx < numChildren; idx += step) {
165         R result = parser.apply(context, idx);
166         retval.add(result);
167       }
168     }
169     return retval;
170   }
171 
172   /**
173    * Parse the provided context as an n-airy phrase.
174    *
175    * @param <CONTEXT>
176    *          the Java type of the antlr context to parse
177    * @param <T>
178    *          the Java type of the child expressions produced by this parser
179    * @param <R>
180    *          the Java type of the outer expression produced by the parser
181    * @param context
182    *          the antlr context to parse
183    * @param startIndex
184    *          the child index to start parsing on
185    * @param step
186    *          the increment to advance while parsing child expressions
187    * @param parser
188    *          a binary function used to produce child expressions
189    * @param supplier
190    *          a function used to produce the other expression
191    * @return the outer expression or {@code null} if no children exist to parse
192    */
193   @Nullable
194   protected <CONTEXT extends ParserRuleContext, T, R>
195       R nairyToCollection(
196           @NonNull CONTEXT context,
197           int startIndex,
198           int step,
199           @NonNull BiFunction<CONTEXT, Integer, T> parser,
200           @NonNull Function<List<T>, R> supplier) {
201     int numChildren = context.getChildCount();
202 
203     R retval = null;
204     if (startIndex < numChildren) {
205       List<T> children = new ArrayList<>((numChildren - startIndex) / step);
206       for (int idx = startIndex; idx < numChildren; idx += step) {
207         T result = parser.apply(context, idx);
208         children.add(result);
209       }
210       retval = supplier.apply(children);
211     }
212     return retval;
213   }
214 
215   /**
216    * Parse the provided context as an n-airy phrase, which will be one of the
217    * following.
218    * <ol>
219    * <li>A single <code>expr</code> for which that expr will be returned
220    * <li><code>left (operator right)*</code> for which a collection of the left
221    * and right members will be returned based on what is provided by the supplier.
222    * </ol>
223    *
224    * @param <CONTEXT>
225    *          the context type to parse
226    * @param context
227    *          the context instance
228    * @param supplier
229    *          a supplier that will instantiate an expression based on the provided
230    *          parsed collection
231    * @return the left expression or the supplied expression for a collection
232    */
233   @NonNull
234   protected <CONTEXT extends ParserRuleContext> IExpression
235       handleNAiryCollection(
236           @NonNull CONTEXT context,
237           @NonNull Function<List<IExpression>, IExpression> supplier) {
238     return handleNAiryCollection(context, 1, 2, (ctx, idx) -> {
239       // skip operator, since we know what it is
240       ParseTree tree = ctx.getChild(idx + 1);
241       return tree.accept(this);
242     }, supplier);
243   }
244 
245   /**
246    * Parse the provided context as an n-airy phrase, which will be one of the
247    * following.
248    * <ol>
249    * <li><code>expr</code> for which the expr will be returned.
250    * <li><code>left</code> plus a number of additional recurring tokens as defined
251    * by the <em>step</em>.
252    * </ol>
253    * <p>
254    * In the second case, the supplier will be used to generate an expression from
255    * the collection of tuples.
256    *
257    * @param <CONTEXT>
258    *          the context type to parse
259    * @param context
260    *          the context instance
261    * @param startIndex
262    *          the starting context child position
263    * @param step
264    *          the amount to advance the loop over the context children
265    * @param parser
266    *          a binary function used to parse the context children
267    * @param supplier
268    *          a supplier that will instantiate an expression based on the provided
269    *          collection
270    * @return the left expression or the supplied expression for a collection
271    */
272   @NonNull
273   protected <CONTEXT extends ParserRuleContext> IExpression
274       handleNAiryCollection(
275           @NonNull CONTEXT context,
276           int startIndex,
277           int step,
278           @NonNull BiFunction<CONTEXT, Integer, IExpression> parser,
279           @NonNull Function<List<IExpression>, IExpression> supplier) {
280     int numChildren = context.getChildCount();
281 
282     if (numChildren == 0) {
283       throw new IllegalStateException("there should always be a child expression");
284     }
285     if (startIndex > numChildren) {
286       throw new IllegalStateException("Start index is out of bounds");
287     }
288 
289     ParseTree leftTree = context.getChild(0);
290     @SuppressWarnings({ "null" })
291     @NonNull
292     IExpression leftResult = leftTree.accept(this);
293 
294     IExpression retval;
295     if (numChildren == 1) {
296       retval = leftResult;
297     } else {
298       List<IExpression> children = new ArrayList<>(numChildren - 1 / step);
299       children.add(leftResult);
300       for (int i = startIndex; i < numChildren; i = i + step) {
301         IExpression result = parser.apply(context, i);
302         children.add(result);
303       }
304       IExpression result = ObjectUtils.notNull(supplier.apply(children));
305       retval = result;
306     }
307     return retval;
308   }
309 
310   /**
311    * Parse the provided context as a simple n-airy phrase, which will be one of
312    * the following.
313    * <ol>
314    * <li><code>expr</code> for which the expr will be returned
315    * <li><code>left (operator right)*</code> for which a collection of the left
316    * and right members will be returned based on what is provided by the supplier.
317    * </ol>
318    * <p>
319    * In the second case, the supplier will be used to generate an expression from
320    * the collection of tuples.
321    *
322    * @param <CONTEXT>
323    *          the context type to parse
324    * @param context
325    *          the context instance
326    * @param startingIndex
327    *          the index of the first child expression, which must be a
328    *          non-negative value that is less than the number of children
329    * @param step
330    *          the amount to advance the loop over the context children
331    * @param parser
332    *          a trinary function used to parse the context children and supply a
333    *          result
334    * @return the left expression or the supplied expression
335    */
336   protected <CONTEXT extends ParserRuleContext> IExpression handleGroupedNAiry(
337       @NonNull CONTEXT context,
338       int startingIndex,
339       int step,
340       @NonNull ITriFunction<CONTEXT, Integer, IExpression, IExpression> parser) {
341     int numChildren = context.getChildCount();
342     if (startingIndex >= numChildren) {
343       throw new IndexOutOfBoundsException(
344           String.format("The starting index '%d' exceeds the child count '%d'",
345               startingIndex,
346               numChildren));
347     }
348 
349     IExpression retval = null;
350     if (numChildren > 0) {
351       ParseTree leftTree = context.getChild(startingIndex);
352       retval = ObjectUtils.notNull(leftTree.accept(this));
353 
354       for (int i = startingIndex + 1; i < numChildren; i = i + step) {
355         retval = parser.apply(context, i, retval);
356       }
357     }
358     return retval;
359   }
360 
361   @FunctionalInterface
362   interface ITriFunction<T, U, V, R> {
363 
364     R apply(T argT, U argU, V argV);
365 
366     default <W> ITriFunction<T, U, V, W> andThen(Function<? super R, ? extends W> after) {
367       Objects.requireNonNull(after);
368       return (T t, U u, V v) -> after.apply(apply(t, u, v));
369     }
370   }
371 }