001/* 002 * SPDX-FileCopyrightText: none 003 * SPDX-License-Identifier: CC0-1.0 004 */ 005 006package dev.metaschema.databind.io.xml; 007 008import org.apache.logging.log4j.LogManager; 009import org.apache.logging.log4j.Logger; 010import org.codehaus.stax2.XMLEventReader2; 011 012import java.io.IOException; 013import java.net.URI; 014import java.util.Collection; 015import java.util.HashSet; 016import java.util.LinkedHashMap; 017import java.util.LinkedList; 018import java.util.List; 019import java.util.Map; 020import java.util.Set; 021import java.util.function.Function; 022import java.util.stream.Collectors; 023 024import javax.xml.namespace.QName; 025import javax.xml.stream.Location; 026import javax.xml.stream.XMLStreamConstants; 027import javax.xml.stream.XMLStreamException; 028import javax.xml.stream.events.Attribute; 029import javax.xml.stream.events.StartElement; 030import javax.xml.stream.events.XMLEvent; 031 032import dev.metaschema.core.model.IBoundObject; 033import dev.metaschema.core.model.IResourceLocation; 034import dev.metaschema.core.model.SimpleResourceLocation; 035import dev.metaschema.core.model.util.XmlEventUtil; 036import dev.metaschema.core.qname.IEnhancedQName; 037import dev.metaschema.core.util.CollectionUtil; 038import dev.metaschema.core.util.ObjectUtils; 039import dev.metaschema.databind.io.BindingException; 040import dev.metaschema.databind.io.Format; 041import dev.metaschema.databind.io.PathTracker; 042import dev.metaschema.databind.io.ValidationContext; 043import dev.metaschema.databind.model.IBoundDefinitionModelAssembly; 044import dev.metaschema.databind.model.IBoundDefinitionModelComplex; 045import dev.metaschema.databind.model.IBoundDefinitionModelFieldComplex; 046import dev.metaschema.databind.model.IBoundFieldValue; 047import dev.metaschema.databind.model.IBoundInstanceFlag; 048import dev.metaschema.databind.model.IBoundInstanceModel; 049import dev.metaschema.databind.model.IBoundInstanceModelAssembly; 050import dev.metaschema.databind.model.IBoundInstanceModelChoiceGroup; 051import dev.metaschema.databind.model.IBoundInstanceModelFieldComplex; 052import dev.metaschema.databind.model.IBoundInstanceModelFieldScalar; 053import dev.metaschema.databind.model.IBoundInstanceModelGroupedAssembly; 054import dev.metaschema.databind.model.IBoundInstanceModelGroupedField; 055import dev.metaschema.databind.model.IBoundInstanceModelGroupedNamed; 056import dev.metaschema.databind.model.info.AbstractModelInstanceReadHandler; 057import dev.metaschema.databind.model.info.IFeatureScalarItemValueHandler; 058import dev.metaschema.databind.model.info.IItemReadHandler; 059import dev.metaschema.databind.model.info.IModelInstanceCollectionInfo; 060import edu.umd.cs.findbugs.annotations.NonNull; 061import edu.umd.cs.findbugs.annotations.Nullable; 062 063/** 064 * Supports reading XML-based Metaschema module instances. 065 */ 066@SuppressWarnings("PMD.CouplingBetweenObjects") 067public class MetaschemaXmlReader 068 implements IXmlParsingContext { 069 private static final Logger LOGGER = LogManager.getLogger(MetaschemaXmlReader.class); 070 @NonNull 071 private final XMLEventReader2 reader; 072 @NonNull 073 private final URI source; 074 @NonNull 075 private final IXmlProblemHandler problemHandler; 076 /** 077 * Tracks the current parsing path for context-aware error reporting. 078 */ 079 @NonNull 080 private final PathTracker pathTracker = new PathTracker(); 081 082 /** 083 * Construct a new Module-aware XML parser using the default problem handler. 084 * 085 * @param reader 086 * the XML reader to parse with 087 * @param source 088 * the resource being parsed 089 * @see DefaultXmlProblemHandler 090 */ 091 public MetaschemaXmlReader( 092 @NonNull XMLEventReader2 reader, 093 @NonNull URI source) { 094 this(reader, source, new DefaultXmlProblemHandler()); 095 } 096 097 /** 098 * Construct a new Module-aware parser. 099 * 100 * @param reader 101 * the XML reader to parse with 102 * @param source 103 * the resource being parsed 104 * @param problemHandler 105 * the problem handler implementation to use 106 */ 107 public MetaschemaXmlReader( 108 @NonNull XMLEventReader2 reader, 109 @NonNull URI source, 110 @NonNull IXmlProblemHandler problemHandler) { 111 this.reader = reader; 112 this.source = source; 113 this.problemHandler = problemHandler; 114 } 115 116 @Override 117 public XMLEventReader2 getReader() { 118 return reader; 119 } 120 121 @Override 122 public URI getSource() { 123 return source; 124 } 125 126 @Override 127 public IXmlProblemHandler getProblemHandler() { 128 return problemHandler; 129 } 130 131 /** 132 * Build a validation context from the current XML parser state. 133 * 134 * @param location 135 * the XML location, may be null 136 * @return a new validation context with current location and path 137 */ 138 @NonNull 139 private ValidationContext buildValidationContext(@Nullable Location location) { 140 IResourceLocation resourceLocation = location == null 141 ? SimpleResourceLocation.UNKNOWN 142 : SimpleResourceLocation.fromXmlLocation(location); 143 return ValidationContext.of(source, resourceLocation, pathTracker.getCurrentPath(), Format.XML); 144 } 145 146 /** 147 * Parses XML into a bound object based on the provided {@code definition}. 148 * <p> 149 * Parses the {@link XMLStreamConstants#START_DOCUMENT}, any processing 150 * instructions, and the element. 151 * 152 * @param <CLASS> 153 * the returned object type 154 * @param definition 155 * the definition describing the element data to read 156 * @return the parsed object 157 * @throws IOException 158 * if an error occurred while parsing the input 159 */ 160 @Override 161 @NonNull 162 public <CLASS> CLASS read(@NonNull IBoundDefinitionModelComplex definition) throws IOException { 163 URI resource = getSource(); 164 try { 165 // we may be at the START_DOCUMENT 166 if (reader.peek().isStartDocument()) { 167 XmlEventUtil.consumeAndAssert(reader, resource, XMLStreamConstants.START_DOCUMENT); 168 } 169 170 // advance past any other info to get to next start element 171 XmlEventUtil.skipEvents(reader, XMLStreamConstants.CHARACTERS, XMLStreamConstants.PROCESSING_INSTRUCTION, 172 XMLStreamConstants.DTD); 173 174 XMLEvent event = ObjectUtils.requireNonNull(reader.peek()); 175 if (!event.isStartElement()) { 176 throw new IOException( 177 String.format("The token '%s' is not an XML element%s.", 178 XmlEventUtil.toEventName(event), 179 XmlEventUtil.generateLocationMessage(event, resource))); 180 } 181 182 ItemReadHandler handler = new ItemReadHandler(ObjectUtils.notNull(event.asStartElement())); 183 Object value = definition.readItem(null, handler); 184 if (value == null) { 185 event = reader.peek(); 186 throw new IOException(String.format("Unable to read data.%s", 187 event == null ? "" : XmlEventUtil.generateLocationMessage(event, resource))); 188 } 189 190 return ObjectUtils.asType(value); 191 } catch (XMLStreamException ex) { 192 throw new IOException(ex); 193 } 194 } 195 196 /** 197 * Read the XML attribute data described by the {@code targetDefinition} and 198 * apply it to the provided {@code targetObject}. 199 * 200 * @param targetDefinition 201 * the Module definition that describes the syntax of the data to read 202 * @param targetObject 203 * the Java object that data parsed by this method will be stored in 204 * @param start 205 * the containing XML element that was previously parsed 206 * @throws IOException 207 * if an error occurred while parsing the input 208 * @throws XMLStreamException 209 * if an error occurred while parsing XML events 210 */ 211 protected void readFlagInstances( 212 @NonNull IBoundDefinitionModelComplex targetDefinition, 213 @NonNull IBoundObject targetObject, 214 @NonNull StartElement start) throws IOException, XMLStreamException { 215 URI resource = getSource(); 216 217 Map<IEnhancedQName, IBoundInstanceFlag> flagInstanceMap = targetDefinition.getFlagInstances().stream() 218 .collect(Collectors.toMap( 219 IBoundInstanceFlag::getQName, 220 Function.identity())); 221 222 for (Attribute attribute : CollectionUtil.toIterable(ObjectUtils.notNull(start.getAttributes()))) { 223 IEnhancedQName qname = IEnhancedQName.of(ObjectUtils.requireNonNull(attribute.getName())); 224 IBoundInstanceFlag instance = flagInstanceMap.get(qname); 225 if (instance == null) { 226 // unrecognized flag 227 if (!getProblemHandler().handleUnknownAttribute(targetDefinition, targetObject, attribute, this)) { 228 throw new IOException( 229 String.format("Unrecognized attribute '%s'%s.", 230 qname, 231 XmlEventUtil.generateLocationMessage(attribute, resource))); 232 } 233 } else { 234 try { 235 // get the attribute value 236 Object value = instance.getDefinition().getJavaTypeAdapter() 237 .parse(ObjectUtils.notNull(attribute.getValue())); 238 // apply the value to the parentObject 239 instance.setValue(targetObject, value); 240 flagInstanceMap.remove(qname); 241 } catch (IllegalArgumentException ex) { 242 throw new IOException( 243 String.format("Malformed data '%s'%s. %s", 244 attribute.getValue(), 245 XmlEventUtil.generateLocationMessage(start, resource), 246 ex.getLocalizedMessage()), 247 ex); 248 } 249 } 250 } 251 252 if (!flagInstanceMap.isEmpty()) { 253 // Build validation context with current location and path 254 ValidationContext context = buildValidationContext(start.getLocation()); 255 getProblemHandler().handleMissingFlagInstances( 256 targetDefinition, 257 targetObject, 258 ObjectUtils.notNull(flagInstanceMap.values()), 259 context); 260 } 261 } 262 263 /** 264 * Read the XML element data described by the {@code targetDefinition} and apply 265 * it to the provided {@code targetObject}. 266 * 267 * @param targetDefinition 268 * the Module definition that describes the syntax of the data to read 269 * @param targetObject 270 * the Java object that data parsed by this method will be stored in 271 * @throws IOException 272 * if an error occurred while parsing the input 273 */ 274 protected void readModelInstances( 275 @NonNull IBoundDefinitionModelAssembly targetDefinition, 276 @NonNull IBoundObject targetObject) 277 throws IOException { 278 Collection<? extends IBoundInstanceModel<?>> instances = targetDefinition.getModelInstances(); 279 Set<IBoundInstanceModel<?>> unhandledProperties = new HashSet<>(); 280 for (IBoundInstanceModel<?> modelInstance : instances) { 281 assert modelInstance != null; 282 if (!readItems(modelInstance, targetObject, true)) { 283 unhandledProperties.add(modelInstance); 284 } 285 } 286 287 // process all properties that did not get a value 288 try { 289 XMLEvent event = getReader().peek(); 290 Location location = event != null ? event.getLocation() : null; 291 ValidationContext context = buildValidationContext(location); 292 getProblemHandler().handleMissingModelInstances(targetDefinition, targetObject, unhandledProperties, context); 293 } catch (XMLStreamException ex) { 294 throw new IOException(ex); 295 } 296 297 XMLEventReader2 reader = getReader(); 298 URI resource = getSource(); 299 300 // handle any 301 try { 302 if (!getReader().peek().isEndElement()) { 303 // handle any 304 XmlEventUtil.skipWhitespace(reader); 305 XmlEventUtil.skipElement(reader); 306 XmlEventUtil.skipWhitespace(reader); 307 } 308 309 XmlEventUtil.assertNext(reader, resource, XMLStreamConstants.END_ELEMENT); 310 } catch (XMLStreamException ex) { 311 throw new IOException(ex); 312 } 313 } 314 315 /** 316 * Determine if the next data to read corresponds to the next model instance. 317 * 318 * @param targetInstance 319 * the model instance that describes the syntax of the data to read 320 * @return {@code true} if the Module instance needs to be parsed, or 321 * {@code false} otherwise 322 * @throws XMLStreamException 323 * if an error occurred while parsing XML events 324 */ 325 protected boolean isNextInstance( 326 @NonNull IBoundInstanceModel<?> targetInstance) 327 throws XMLStreamException { 328 329 XmlEventUtil.skipWhitespace(reader); 330 331 XMLEvent nextEvent = reader.peek(); 332 333 boolean retval = nextEvent.isStartElement(); 334 if (retval) { 335 IEnhancedQName qname = IEnhancedQName.of(ObjectUtils.notNull(nextEvent.asStartElement().getName())); 336 retval = qname.equals(targetInstance.getEffectiveXmlGroupAsQName()) // parse the grouping element 337 || targetInstance.canHandleXmlQName(qname); // parse the instance(s) 338 } 339 return retval; 340 } 341 342 /** 343 * Read the data associated with the {@code instance} and apply it to the 344 * provided {@code parentObject}. 345 * 346 * @param instance 347 * the instance to parse data for 348 * @param parentObject 349 * the Java object that data parsed by this method will be stored in 350 * @return {@code true} if the instance was parsed, or {@code false} if the data 351 * did not contain information for this instance 352 * @throws IOException 353 * if an error occurred while parsing the input 354 */ 355 @Override 356 public <T> boolean readItems( 357 @NonNull IBoundInstanceModel<T> instance, 358 @NonNull IBoundObject parentObject, 359 boolean parseGrouping) 360 throws IOException { 361 try { 362 boolean handled = isNextInstance(instance); 363 if (handled) { 364 XMLEventReader2 reader = getReader(); 365 URI resource = getSource(); 366 367 // XmlEventUtil.skipWhitespace(reader); 368 369 IEnhancedQName groupEQName = parseGrouping ? instance.getEffectiveXmlGroupAsQName() : null; 370 QName groupQName = groupEQName == null ? null : groupEQName.toQName(); 371 if (groupQName != null) { 372 // we need to parse the grouping element, if the next token matches 373 XmlEventUtil.requireStartElement(reader, resource, groupQName); 374 } 375 376 IModelInstanceCollectionInfo<T> collectionInfo = instance.getCollectionInfo(); 377 378 ModelInstanceReadHandler<T> handler = new ModelInstanceReadHandler<>(instance, parentObject); 379 380 // let the property info decide how to parse the value 381 Object value = collectionInfo.readItems(handler); 382 if (value != null) { 383 instance.setValue(parentObject, value); 384 } 385 386 // consume extra whitespace between elements 387 XmlEventUtil.skipWhitespace(reader); 388 389 if (groupQName != null) { 390 // consume the end of the group 391 XmlEventUtil.requireEndElement(reader, resource, groupQName); 392 } 393 } 394 return handled; 395 } catch (XMLStreamException ex) { 396 throw new IOException(ex); 397 } 398 } 399 400 private final class ModelInstanceReadHandler<ITEM> 401 extends AbstractModelInstanceReadHandler<ITEM> { 402 403 private ModelInstanceReadHandler( 404 @NonNull IBoundInstanceModel<ITEM> instance, 405 @NonNull IBoundObject parentObject) { 406 super(instance, parentObject); 407 } 408 409 @Override 410 public List<ITEM> readList() throws IOException { 411 return ObjectUtils.notNull(readCollection()); 412 } 413 414 @Override 415 public Map<String, ITEM> readMap() throws IOException { 416 IBoundInstanceModel<?> instance = getCollectionInfo().getInstance(); 417 418 return ObjectUtils.notNull(readCollection().stream() 419 .collect(Collectors.toMap( 420 item -> { 421 assert item != null; 422 423 IBoundInstanceFlag jsonKey = instance.getItemJsonKey(item); 424 assert jsonKey != null; 425 return ObjectUtils.requireNonNull(jsonKey.getValue(item)).toString(); 426 }, 427 Function.identity(), 428 (t, u) -> u, 429 LinkedHashMap::new))); 430 } 431 432 @NonNull 433 private List<ITEM> readCollection() throws IOException { 434 List<ITEM> retval = new LinkedList<>(); 435 XMLEventReader2 reader = getReader(); 436 try { 437 438 // consume extra whitespace between elements 439 XmlEventUtil.skipWhitespace(reader); 440 441 IBoundInstanceModel<?> instance = getCollectionInfo().getInstance(); 442 XMLEvent event; 443 while ((event = reader.peek()).isStartElement() 444 && instance.canHandleXmlQName( 445 IEnhancedQName.of(ObjectUtils.notNull(event.asStartElement().getName())))) { 446 447 // Consume the start element 448 ITEM value = readItem(); 449 retval.add(value); 450 451 // consume extra whitespace between elements 452 XmlEventUtil.skipWhitespace(reader); 453 } 454 } catch (XMLStreamException ex) { 455 throw new IOException(ex); 456 } 457 return retval; 458 } 459 460 @Override 461 public ITEM readItem() throws IOException { 462 try { 463 return getCollectionInfo().getInstance().readItem( 464 getParentObject(), 465 new ItemReadHandler(ObjectUtils.notNull(getReader().peek().asStartElement()))); 466 } catch (XMLStreamException ex) { 467 throw new IOException(ex); 468 } 469 } 470 } 471 472 private final class ItemReadHandler implements IItemReadHandler { 473 @NonNull 474 private final StartElement startElement; 475 476 private ItemReadHandler(@NonNull StartElement startElement) { 477 this.startElement = startElement; 478 } 479 480 /** 481 * Get the current start element. 482 * 483 * @return the startElement 484 */ 485 @NonNull 486 private StartElement getStartElement() { 487 return startElement; 488 } 489 490 @NonNull 491 private <DEF extends IBoundDefinitionModelComplex> IBoundObject readDefinitionElement( 492 @NonNull DEF definition, 493 @NonNull StartElement start, 494 @NonNull IEnhancedQName expectedEQName, 495 @Nullable IBoundObject parent, 496 @NonNull DefinitionBodyHandler<DEF, IBoundObject> bodyHandler) throws IOException { 497 XMLEventReader2 reader = getReader(); 498 URI resource = getSource(); 499 QName expectedQName = expectedEQName.toQName(); 500 501 // Track path for error messages 502 pathTracker.push(definition.getEffectiveName()); 503 504 try { 505 // consume the start element 506 XmlEventUtil.requireStartElement(reader, resource, expectedQName); 507 508 Location location = start.getLocation(); 509 510 // construct the item 511 IBoundObject item = definition.newInstance( 512 location == null ? null : () -> SimpleResourceLocation.fromXmlLocation(location)); 513 514 // call pre-parse initialization hook 515 definition.callBeforeDeserialize(item, parent); 516 517 // read the flags 518 readFlagInstances(definition, item, start); 519 520 // read the body 521 bodyHandler.accept(definition, item); 522 523 XmlEventUtil.skipWhitespace(reader); 524 525 // call post-parse initialization hook 526 definition.callAfterDeserialize(item, parent); 527 528 // consume the end element 529 XmlEventUtil.requireEndElement(reader, resource, expectedQName); 530 return ObjectUtils.asType(item); 531 } catch (BindingException | XMLStreamException ex) { 532 throw new IOException(ex); 533 } finally { 534 pathTracker.pop(); 535 } 536 } 537 538 @Override 539 public Object readItemFlag( 540 IBoundObject parent, 541 IBoundInstanceFlag flag) throws IOException { 542 // should never be called 543 throw new UnsupportedOperationException("should be handled by readFlagInstances()"); 544 } 545 546 private void handleFieldDefinitionBody( 547 @NonNull IBoundDefinitionModelFieldComplex definition, 548 @NonNull IBoundObject item) throws IOException { 549 IBoundFieldValue fieldValue = definition.getFieldValue(); 550 551 // parse the value 552 Object value = fieldValue.readItem(item, this); 553 if (value != null) { 554 fieldValue.setValue(item, value); 555 } 556 } 557 558 @Override 559 public Object readItemField( 560 IBoundObject parent, 561 IBoundInstanceModelFieldScalar instance) 562 throws IOException { 563 XMLEventReader2 reader = getReader(); 564 URI resource = getSource(); 565 try { 566 QName wrapper = null; 567 if (instance.isEffectiveValueWrappedInXml()) { 568 wrapper = instance.getQName().toQName(); 569 570 XmlEventUtil.skipWhitespace(reader); 571 XmlEventUtil.requireStartElement(reader, resource, wrapper); 572 } 573 574 Object retval = readScalarItem(instance); 575 576 if (wrapper != null) { 577 XmlEventUtil.skipWhitespace(reader); 578 579 XmlEventUtil.requireEndElement(reader, resource, wrapper); 580 } 581 return retval; 582 } catch (XMLStreamException ex) { 583 throw new IOException(ex); 584 } 585 } 586 587 @Override 588 public IBoundObject readItemField( 589 IBoundObject parent, 590 IBoundInstanceModelFieldComplex instance) 591 throws IOException { 592 return readDefinitionElement( 593 instance.getDefinition(), 594 getStartElement(), 595 instance.getQName(), 596 parent, 597 this::handleFieldDefinitionBody); 598 } 599 600 @Override 601 public IBoundObject readItemField(IBoundObject parent, IBoundInstanceModelGroupedField instance) 602 throws IOException { 603 return readDefinitionElement( 604 instance.getDefinition(), 605 getStartElement(), 606 instance.getQName(), 607 parent, 608 this::handleFieldDefinitionBody); 609 } 610 611 @Override 612 public IBoundObject readItemField( 613 IBoundObject parent, 614 IBoundDefinitionModelFieldComplex definition) throws IOException { 615 return readDefinitionElement( 616 definition, 617 getStartElement(), 618 definition.getQName(), 619 parent, 620 this::handleFieldDefinitionBody); 621 } 622 623 @Override 624 public Object readItemFieldValue( 625 IBoundObject parent, 626 IBoundFieldValue fieldValue) throws IOException { 627 return checkMissingFieldValue(readScalarItem(fieldValue)); 628 } 629 630 @Nullable 631 private Object checkMissingFieldValue(Object value) { 632 if (value == null && LOGGER.isWarnEnabled()) { 633 StartElement start = getStartElement(); 634 LOGGER.atWarn().log("Missing property value{}", 635 XmlEventUtil.generateLocationMessage(start, getSource())); 636 } 637 return value; 638 } 639 640 private void handleAssemblyDefinitionBody( 641 @NonNull IBoundDefinitionModelAssembly definition, 642 @NonNull IBoundObject item) throws IOException { 643 readModelInstances(definition, item); 644 } 645 646 @Override 647 public IBoundObject readItemAssembly( 648 IBoundObject parent, 649 IBoundInstanceModelAssembly instance) throws IOException { 650 return readDefinitionElement( 651 instance.getDefinition(), 652 getStartElement(), 653 instance.getQName(), 654 parent, 655 this::handleAssemblyDefinitionBody); 656 } 657 658 @Override 659 public IBoundObject readItemAssembly(IBoundObject parent, IBoundInstanceModelGroupedAssembly instance) 660 throws IOException { 661 return readDefinitionElement( 662 instance.getDefinition(), 663 getStartElement(), 664 instance.getQName(), 665 parent, 666 this::handleAssemblyDefinitionBody); 667 } 668 669 @Override 670 public IBoundObject readItemAssembly( 671 IBoundObject parent, 672 IBoundDefinitionModelAssembly definition) throws IOException { 673 return readDefinitionElement( 674 definition, 675 getStartElement(), 676 ObjectUtils.requireNonNull(definition.getRootQName()), 677 parent, 678 this::handleAssemblyDefinitionBody); 679 } 680 681 @Nullable 682 private Object readScalarItem(@NonNull IFeatureScalarItemValueHandler handler) 683 throws IOException { 684 return handler.getJavaTypeAdapter().parse(getReader(), getSource()); 685 } 686 687 @Override 688 public IBoundObject readChoiceGroupItem(IBoundObject parent, IBoundInstanceModelChoiceGroup instance) 689 throws IOException { 690 try { 691 XMLEventReader2 eventReader = getReader(); 692 // consume extra whitespace between elements 693 XmlEventUtil.skipWhitespace(eventReader); 694 695 XMLEvent event = eventReader.peek(); 696 IEnhancedQName nextQName = IEnhancedQName.of(ObjectUtils.notNull(event.asStartElement().getName())); 697 IBoundInstanceModelGroupedNamed actualInstance = instance.getGroupedModelInstance(nextQName); 698 assert actualInstance != null; 699 return actualInstance.readItem(parent, this); 700 } catch (XMLStreamException ex) { 701 throw new IOException(ex); 702 } 703 } 704 } 705 706 @FunctionalInterface 707 private interface DefinitionBodyHandler<DEF extends IBoundDefinitionModelComplex, ITEM> { 708 void accept( 709 @NonNull DEF definition, 710 @NonNull ITEM item) throws IOException; 711 } 712 713}