diff --git a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/validate/common/SiardValidationErrorFormatter.java b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/validate/common/SiardValidationErrorFormatter.java new file mode 100644 index 000000000..2555b601d --- /dev/null +++ b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/validate/common/SiardValidationErrorFormatter.java @@ -0,0 +1,293 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE file at the root of the source + * tree and available online at + * + * https://github.com/keeps/db-preservation-toolkit + */ +package com.databasepreservation.modules.siard.validate.common; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; +import org.xml.sax.helpers.DefaultHandler; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; +import java.io.IOException; +import java.io.InputStream; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Utility class for formatting SIARD validation error messages with detailed + * context information and extracting database context from XML. + * + * @author Generated for enhanced SIARD validation diagnostics + */ +public class SiardValidationErrorFormatter { + private static final Logger LOGGER = LoggerFactory.getLogger(SiardValidationErrorFormatter.class); + + // Patterns for extracting context from error messages + private static final Pattern MISSING_ELEMENT_PATTERN = + Pattern.compile("Cannot find the declaration of element '([^']+)'"); + private static final Pattern EXPECTED_ELEMENT_PATTERN = + Pattern.compile("Expected elements? '([^']+)'"); + private static final Pattern INVALID_CONTENT_PATTERN = + Pattern.compile("Invalid content was found starting with element '([^']+)'"); + + /** + * Formats a validation error message with detailed context information. + * + * @param error The validation error to format + * @param xmlContext The XML context extracted from parsing (can be null) + * @return A formatted, actionable error message + */ + public static String formatErrorMessage(SiardValidationErrorHandler.ValidationError error, SiardXmlContext xmlContext) { + StringBuilder message = new StringBuilder(); + + // Add error type + message.append("[").append(error.getType()).append("] "); + + // Extract and format the core error message + String coreMessage = extractCoreMessage(error.getMessage()); + message.append(coreMessage); + + // Add location information + if (error.getLineNumber() > 0) { + message.append(" at line ").append(error.getLineNumber()); + if (error.getColumnNumber() > 0) { + message.append(", column ").append(error.getColumnNumber()); + } + } + + // Add XML context if available + if (xmlContext != null) { + if (xmlContext.getCurrentSchema() != null) { + message.append(" (in schema: '").append(xmlContext.getCurrentSchema()).append("'"); + if (xmlContext.getCurrentTable() != null) { + message.append(", table: '").append(xmlContext.getCurrentTable()).append("'"); + } else if (xmlContext.getCurrentView() != null) { + message.append(", view: '").append(xmlContext.getCurrentView()).append("'"); + } + message.append(")"); + } + } + + return message.toString(); + } + + /** + * Extracts a more readable core message from the SAX error message. + * Attempts to identify common validation error patterns and format them + * in a more actionable way. + * + * @param rawMessage The raw SAX error message + * @return A formatted core message + */ + private static String extractCoreMessage(String rawMessage) { + if (rawMessage == null) { + return "Unknown validation error"; + } + + // Check for missing element declaration + Matcher missingMatcher = MISSING_ELEMENT_PATTERN.matcher(rawMessage); + if (missingMatcher.find()) { + return "Missing or undeclared element: '" + missingMatcher.group(1) + "'"; + } + + // Check for expected element + Matcher expectedMatcher = EXPECTED_ELEMENT_PATTERN.matcher(rawMessage); + if (expectedMatcher.find()) { + return "Missing required element(s): " + expectedMatcher.group(1); + } + + // Check for invalid content + Matcher invalidMatcher = INVALID_CONTENT_PATTERN.matcher(rawMessage); + if (invalidMatcher.find()) { + return "Invalid or unexpected element: '" + invalidMatcher.group(1) + "'"; + } + + // Return the original message if no pattern matches + return rawMessage; + } + + /** + * Attempts to extract SIARD database context (schema, table, view) from XML + * by parsing up to the specified line number. + * + * @param xmlInputStream The XML input stream to parse + * @param targetLineNumber The line number where the error occurred + * @return The extracted XML context, or null if extraction fails + */ + public static SiardXmlContext extractXmlContext(InputStream xmlInputStream, int targetLineNumber) { + if (xmlInputStream == null) { + LOGGER.debug("Cannot extract XML context: input stream is null"); + return null; + } + + if (targetLineNumber <= 0) { + LOGGER.debug("Cannot extract XML context: invalid target line number {}", targetLineNumber); + return null; + } + + try { + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + XMLReader xmlReader = saxParser.getXMLReader(); + + SiardContextExtractor contextExtractor = new SiardContextExtractor(targetLineNumber); + xmlReader.setContentHandler(contextExtractor); + + // Parse until target line is reached. SAX parsing will be stopped via exception + // (a common pattern for early termination in SAX parsing) + try { + xmlReader.parse(new InputSource(xmlInputStream)); + } catch (SAXException e) { + // Expected - parsing is stopped when target line is reached + } + + return contextExtractor.getContext(); + } catch (ParserConfigurationException | IOException | SAXException e) { + // If context extraction fails, just return null + LOGGER.debug("Failed to extract XML context at line {}", targetLineNumber, e); + return null; + } + } + + /** + * SAX Handler that extracts SIARD-specific context (schema, table, view names) + * while parsing XML up to a specific line number. + */ + private static class SiardContextExtractor extends DefaultHandler { + private final int targetLineNumber; + private final SiardXmlContext context = new SiardXmlContext(); + private boolean inSchema = false; + private boolean inTable = false; + private boolean inView = false; + private boolean inSchemaName = false; + private boolean inTableName = false; + private boolean inViewName = false; + private final StringBuilder currentText = new StringBuilder(); + private org.xml.sax.Locator locator; + + public SiardContextExtractor(int targetLineNumber) { + this.targetLineNumber = targetLineNumber; + } + + @Override + public void setDocumentLocator(org.xml.sax.Locator locator) { + this.locator = locator; + } + + @Override + public void startElement(String uri, String localName, String qName, org.xml.sax.Attributes attributes) + throws SAXException { + // Stop parsing if we've reached the target line + if (locator != null && locator.getLineNumber() >= targetLineNumber) { + throw new SAXException("Context extraction completed: reached target line " + targetLineNumber); + } + + currentText.setLength(0); + + if ("schema".equals(localName)) { + inSchema = true; + inTable = false; + inView = false; + context.setCurrentSchema(null); + context.setCurrentTable(null); + context.setCurrentView(null); + } else if ("table".equals(localName) && inSchema) { + inTable = true; + inView = false; + context.setCurrentTable(null); + context.setCurrentView(null); + } else if ("view".equals(localName) && inSchema) { + inView = true; + inTable = false; + context.setCurrentTable(null); + context.setCurrentView(null); + } else if ("name".equals(localName)) { + if (inSchema && !inTable && !inView) { + inSchemaName = true; + } else if (inTable) { + inTableName = true; + } else if (inView) { + inViewName = true; + } + } + } + + @Override + public void characters(char[] ch, int start, int length) { + currentText.append(ch, start, length); + } + + @Override + public void endElement(String uri, String localName, String qName) throws SAXException { + String text = currentText.toString().trim(); + + if ("name".equals(localName)) { + if (inSchemaName) { + context.setCurrentSchema(text); + inSchemaName = false; + } else if (inTableName) { + context.setCurrentTable(text); + inTableName = false; + } else if (inViewName) { + context.setCurrentView(text); + inViewName = false; + } + } else if ("schema".equals(localName)) { + inSchema = false; + } else if ("table".equals(localName)) { + inTable = false; + } else if ("view".equals(localName)) { + inView = false; + } + + currentText.setLength(0); + } + + public SiardXmlContext getContext() { + return context; + } + } + + /** + * Container for SIARD XML context information + */ + public static class SiardXmlContext { + private String currentSchema; + private String currentTable; + private String currentView; + + public String getCurrentSchema() { + return currentSchema; + } + + public void setCurrentSchema(String currentSchema) { + this.currentSchema = currentSchema; + } + + public String getCurrentTable() { + return currentTable; + } + + public void setCurrentTable(String currentTable) { + this.currentTable = currentTable; + } + + public String getCurrentView() { + return currentView; + } + + public void setCurrentView(String currentView) { + this.currentView = currentView; + } + } +} diff --git a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/validate/common/SiardValidationErrorHandler.java b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/validate/common/SiardValidationErrorHandler.java new file mode 100644 index 000000000..85a87ff8f --- /dev/null +++ b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/validate/common/SiardValidationErrorHandler.java @@ -0,0 +1,138 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE file at the root of the source + * tree and available online at + * + * https://github.com/keeps/db-preservation-toolkit + */ +package com.databasepreservation.modules.siard.validate.common; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +import java.util.ArrayList; +import java.util.List; + +/** + * Custom ErrorHandler for SIARD XML validation that captures detailed error + * information including line numbers, column numbers, and error messages. + * + * @author Generated for enhanced SIARD validation diagnostics + */ +public class SiardValidationErrorHandler implements ErrorHandler { + + private final List errors = new ArrayList<>(); + private final List warnings = new ArrayList<>(); + private final List fatalErrors = new ArrayList<>(); + + @Override + public void warning(SAXParseException exception) throws SAXException { + warnings.add(new ValidationError( + ErrorType.WARNING, + exception.getLineNumber(), + exception.getColumnNumber(), + exception.getMessage(), + exception + )); + } + + @Override + public void error(SAXParseException exception) throws SAXException { + errors.add(new ValidationError( + ErrorType.ERROR, + exception.getLineNumber(), + exception.getColumnNumber(), + exception.getMessage(), + exception + )); + } + + @Override + public void fatalError(SAXParseException exception) throws SAXException { + fatalErrors.add(new ValidationError( + ErrorType.FATAL_ERROR, + exception.getLineNumber(), + exception.getColumnNumber(), + exception.getMessage(), + exception + )); + // Fatal errors should stop processing + throw exception; + } + + public boolean hasErrors() { + return !errors.isEmpty() || !fatalErrors.isEmpty(); + } + + public boolean hasWarnings() { + return !warnings.isEmpty(); + } + + public List getErrors() { + return errors; + } + + public List getWarnings() { + return warnings; + } + + public List getFatalErrors() { + return fatalErrors; + } + + public List getAllErrors() { + List allErrors = new ArrayList<>(); + allErrors.addAll(fatalErrors); + allErrors.addAll(errors); + return allErrors; + } + + /** + * Represents a validation error with detailed context information + */ + public static class ValidationError { + private final ErrorType type; + private final int lineNumber; + private final int columnNumber; + private final String message; + private final SAXParseException exception; + + public ValidationError(ErrorType type, int lineNumber, int columnNumber, String message, SAXParseException exception) { + this.type = type; + this.lineNumber = lineNumber; + this.columnNumber = columnNumber; + this.message = message; + this.exception = exception; + } + + public ErrorType getType() { + return type; + } + + public int getLineNumber() { + return lineNumber; + } + + public int getColumnNumber() { + return columnNumber; + } + + public String getMessage() { + return message; + } + + public SAXParseException getException() { + return exception; + } + } + + /** + * Enum representing the type of validation error + */ + public enum ErrorType { + WARNING, + ERROR, + FATAL_ERROR + } +} diff --git a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/validate/siard_21/component/metadata/MetadataXMLAgainstXSDValidator.java b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/validate/siard_21/component/metadata/MetadataXMLAgainstXSDValidator.java index f9659ab2e..feace2199 100644 --- a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/validate/siard_21/component/metadata/MetadataXMLAgainstXSDValidator.java +++ b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/validate/siard_21/component/metadata/MetadataXMLAgainstXSDValidator.java @@ -17,7 +17,11 @@ import javax.xml.validation.SchemaFactory; import javax.xml.validation.Validator; +import com.databasepreservation.modules.siard.validate.common.SiardValidationErrorFormatter; +import com.databasepreservation.modules.siard.validate.common.SiardValidationErrorHandler; import com.databasepreservation.modules.siard.validate.generic.component.metadata.MetadataValidator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.xml.sax.SAXException; import com.databasepreservation.model.exception.ModuleException; @@ -28,6 +32,8 @@ * @author Gabriel Barros */ public class MetadataXMLAgainstXSDValidator extends MetadataValidator { + private static final Logger LOGGER = LoggerFactory.getLogger(MetadataXMLAgainstXSDValidator.class); + private static final int MAX_REPORTED_ERRORS = 5; private final String MODULE_NAME; private static final String M_50 = "5.0"; private static final String M_501 = "M_5.0-1"; @@ -61,26 +67,88 @@ public boolean validate() throws ModuleException { * positively validated against metadata.xsd. */ private boolean validateXMLAgainstXSD() { + SiardValidationErrorHandler errorHandler = new SiardValidationErrorHandler(); + try ( InputStream XSDInputStream = SiardArchive.class.getClassLoader() .getResourceAsStream("schema/siard2-1-metadata.xsd"); InputStream XMLInputStream = zipFileManagerStrategy.getZipInputStream(path, validatorPathStrategy.getMetadataXMLPath())) { - Source schemaFile = new StreamSource(XSDInputStream); - Source xmlFile = new StreamSource(XMLInputStream); + Source schemaFile = new StreamSource(XSDInputStream); + Source xmlFile = new StreamSource(XMLInputStream); - SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); - Schema schema; - - schema = schemaFactory.newSchema(schemaFile); + SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); + Schema schema = schemaFactory.newSchema(schemaFile); Validator validator = schema.newValidator(); + + // Set custom error handler to capture detailed validation errors + validator.setErrorHandler(errorHandler); + validator.validate(xmlFile); } catch (SAXException | IOException e) { - setError(M_501, e.getMessage()); + // Process errors with enhanced formatting + if (errorHandler.hasErrors()) { + reportDetailedErrors(errorHandler); + } else { + // Fallback to basic error message if no detailed errors captured + setError(M_501, e.getMessage()); + } + return false; + } + + // Check if any errors were captured even without exceptions + if (errorHandler.hasErrors()) { + reportDetailedErrors(errorHandler); return false; } return true; } + + /** + * Reports detailed validation errors with context information. + * + * @param errorHandler The error handler containing captured errors + */ + private void reportDetailedErrors(SiardValidationErrorHandler errorHandler) { + // Try to extract XML context for the first error + SiardValidationErrorFormatter.SiardXmlContext xmlContext = null; + + if (!errorHandler.getAllErrors().isEmpty()) { + SiardValidationErrorHandler.ValidationError firstError = errorHandler.getAllErrors().get(0); + + // Attempt to extract context from XML + try (InputStream contextInputStream = zipFileManagerStrategy.getZipInputStream(path, + validatorPathStrategy.getMetadataXMLPath())) { + xmlContext = SiardValidationErrorFormatter.extractXmlContext( + contextInputStream, + firstError.getLineNumber() + ); + } catch (IOException e) { + // Context extraction failed, continue without it + LOGGER.debug("Failed to extract XML context for error at line {}", firstError.getLineNumber(), e); + } + } + + // Report errors with formatted messages (limit to MAX_REPORTED_ERRORS to avoid excessively long messages) + StringBuilder errorMessage = new StringBuilder(); + int errorCount = 0; + + for (SiardValidationErrorHandler.ValidationError error : errorHandler.getAllErrors()) { + if (errorCount >= MAX_REPORTED_ERRORS) { + int remainingErrors = errorHandler.getAllErrors().size() - MAX_REPORTED_ERRORS; + errorMessage.append("; ... and ").append(remainingErrors).append(" more error(s)"); + break; + } + + if (errorMessage.length() > 0) { + errorMessage.append("; "); + } + errorMessage.append(SiardValidationErrorFormatter.formatErrorMessage(error, xmlContext)); + errorCount++; + } + + setError(M_501, errorMessage.toString()); + } } \ No newline at end of file diff --git a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/validate/siard_22/component/metadata/MetadataXMLAgainstXSDValidator.java b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/validate/siard_22/component/metadata/MetadataXMLAgainstXSDValidator.java index 418cc08db..fe0c6b6d8 100644 --- a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/validate/siard_22/component/metadata/MetadataXMLAgainstXSDValidator.java +++ b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/validate/siard_22/component/metadata/MetadataXMLAgainstXSDValidator.java @@ -17,17 +17,23 @@ import javax.xml.validation.SchemaFactory; import javax.xml.validation.Validator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.xml.sax.SAXException; import com.databasepreservation.model.exception.ModuleException; import com.databasepreservation.model.reporters.ValidationReporterStatus; import com.databasepreservation.modules.siard.bindings.siard_2_2.SiardArchive; +import com.databasepreservation.modules.siard.validate.common.SiardValidationErrorFormatter; +import com.databasepreservation.modules.siard.validate.common.SiardValidationErrorHandler; import com.databasepreservation.modules.siard.validate.generic.component.metadata.MetadataValidator; /** * @author Gabriel Barros */ public class MetadataXMLAgainstXSDValidator extends MetadataValidator { + private static final Logger LOGGER = LoggerFactory.getLogger(MetadataXMLAgainstXSDValidator.class); + private static final int MAX_REPORTED_ERRORS = 5; private final String MODULE_NAME; private static final String M_50 = "5.0"; private static final String M_501 = "M_5.0-1"; @@ -61,26 +67,88 @@ public boolean validate() throws ModuleException { * positively validated against metadata.xsd. */ private boolean validateXMLAgainstXSD() { + SiardValidationErrorHandler errorHandler = new SiardValidationErrorHandler(); + try ( InputStream XSDInputStream = SiardArchive.class.getClassLoader() .getResourceAsStream("schema/siard2-2-metadata.xsd"); InputStream XMLInputStream = zipFileManagerStrategy.getZipInputStream(path, validatorPathStrategy.getMetadataXMLPath())) { - Source schemaFile = new StreamSource(XSDInputStream); - Source xmlFile = new StreamSource(XMLInputStream); + Source schemaFile = new StreamSource(XSDInputStream); + Source xmlFile = new StreamSource(XMLInputStream); - SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); - Schema schema; - - schema = schemaFactory.newSchema(schemaFile); + SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); + Schema schema = schemaFactory.newSchema(schemaFile); Validator validator = schema.newValidator(); + + // Set custom error handler to capture detailed validation errors + validator.setErrorHandler(errorHandler); + validator.validate(xmlFile); } catch (SAXException | IOException e) { - setError(M_501, e.getMessage()); + // Process errors with enhanced formatting + if (errorHandler.hasErrors()) { + reportDetailedErrors(errorHandler); + } else { + // Fallback to basic error message if no detailed errors captured + setError(M_501, e.getMessage()); + } + return false; + } + + // Check if any errors were captured even without exceptions + if (errorHandler.hasErrors()) { + reportDetailedErrors(errorHandler); return false; } return true; } + + /** + * Reports detailed validation errors with context information. + * + * @param errorHandler The error handler containing captured errors + */ + private void reportDetailedErrors(SiardValidationErrorHandler errorHandler) { + // Try to extract XML context for the first error + SiardValidationErrorFormatter.SiardXmlContext xmlContext = null; + + if (!errorHandler.getAllErrors().isEmpty()) { + SiardValidationErrorHandler.ValidationError firstError = errorHandler.getAllErrors().get(0); + + // Attempt to extract context from XML + try (InputStream contextInputStream = zipFileManagerStrategy.getZipInputStream(path, + validatorPathStrategy.getMetadataXMLPath())) { + xmlContext = SiardValidationErrorFormatter.extractXmlContext( + contextInputStream, + firstError.getLineNumber() + ); + } catch (IOException e) { + // Context extraction failed, continue without it + LOGGER.debug("Failed to extract XML context for error at line {}", firstError.getLineNumber(), e); + } + } + + // Report errors with formatted messages (limit to MAX_REPORTED_ERRORS to avoid excessively long messages) + StringBuilder errorMessage = new StringBuilder(); + int errorCount = 0; + + for (SiardValidationErrorHandler.ValidationError error : errorHandler.getAllErrors()) { + if (errorCount >= MAX_REPORTED_ERRORS) { + int remainingErrors = errorHandler.getAllErrors().size() - MAX_REPORTED_ERRORS; + errorMessage.append("; ... and ").append(remainingErrors).append(" more error(s)"); + break; + } + + if (errorMessage.length() > 0) { + errorMessage.append("; "); + } + errorMessage.append(SiardValidationErrorFormatter.formatErrorMessage(error, xmlContext)); + errorCount++; + } + + setError(M_501, errorMessage.toString()); + } } \ No newline at end of file