Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,293 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE file at the root of the source
* tree and available online at
*
* https://github.com/keeps/db-preservation-toolkit
*/
package com.databasepreservation.modules.siard.validate.common;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.IOException;
import java.io.InputStream;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* Utility class for formatting SIARD validation error messages with detailed
* context information and extracting database context from XML.
*
* @author Generated for enhanced SIARD validation diagnostics
*/
public class SiardValidationErrorFormatter {
private static final Logger LOGGER = LoggerFactory.getLogger(SiardValidationErrorFormatter.class);

// Patterns for extracting context from error messages
private static final Pattern MISSING_ELEMENT_PATTERN =
Pattern.compile("Cannot find the declaration of element '([^']+)'");
private static final Pattern EXPECTED_ELEMENT_PATTERN =
Pattern.compile("Expected elements? '([^']+)'");
private static final Pattern INVALID_CONTENT_PATTERN =
Pattern.compile("Invalid content was found starting with element '([^']+)'");

/**
* Formats a validation error message with detailed context information.
*
* @param error The validation error to format
* @param xmlContext The XML context extracted from parsing (can be null)
* @return A formatted, actionable error message
*/
public static String formatErrorMessage(SiardValidationErrorHandler.ValidationError error, SiardXmlContext xmlContext) {
StringBuilder message = new StringBuilder();

// Add error type
message.append("[").append(error.getType()).append("] ");

// Extract and format the core error message
String coreMessage = extractCoreMessage(error.getMessage());
message.append(coreMessage);

// Add location information
if (error.getLineNumber() > 0) {
message.append(" at line ").append(error.getLineNumber());
if (error.getColumnNumber() > 0) {
message.append(", column ").append(error.getColumnNumber());
}
}

// Add XML context if available
if (xmlContext != null) {
if (xmlContext.getCurrentSchema() != null) {
message.append(" (in schema: '").append(xmlContext.getCurrentSchema()).append("'");
if (xmlContext.getCurrentTable() != null) {
message.append(", table: '").append(xmlContext.getCurrentTable()).append("'");
} else if (xmlContext.getCurrentView() != null) {
message.append(", view: '").append(xmlContext.getCurrentView()).append("'");
}
message.append(")");
}
}

return message.toString();
}

/**
* Extracts a more readable core message from the SAX error message.
* Attempts to identify common validation error patterns and format them
* in a more actionable way.
*
* @param rawMessage The raw SAX error message
* @return A formatted core message
*/
private static String extractCoreMessage(String rawMessage) {
if (rawMessage == null) {
return "Unknown validation error";
}

// Check for missing element declaration
Matcher missingMatcher = MISSING_ELEMENT_PATTERN.matcher(rawMessage);
if (missingMatcher.find()) {
return "Missing or undeclared element: '" + missingMatcher.group(1) + "'";
}

// Check for expected element
Matcher expectedMatcher = EXPECTED_ELEMENT_PATTERN.matcher(rawMessage);
if (expectedMatcher.find()) {
return "Missing required element(s): " + expectedMatcher.group(1);
}

// Check for invalid content
Matcher invalidMatcher = INVALID_CONTENT_PATTERN.matcher(rawMessage);
if (invalidMatcher.find()) {
return "Invalid or unexpected element: '" + invalidMatcher.group(1) + "'";
}

// Return the original message if no pattern matches
return rawMessage;
}

/**
* Attempts to extract SIARD database context (schema, table, view) from XML
* by parsing up to the specified line number.
*
* @param xmlInputStream The XML input stream to parse
* @param targetLineNumber The line number where the error occurred
* @return The extracted XML context, or null if extraction fails
*/
public static SiardXmlContext extractXmlContext(InputStream xmlInputStream, int targetLineNumber) {
if (xmlInputStream == null) {
LOGGER.debug("Cannot extract XML context: input stream is null");
return null;
}

if (targetLineNumber <= 0) {
LOGGER.debug("Cannot extract XML context: invalid target line number {}", targetLineNumber);
return null;
}

try {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
SAXParser saxParser = factory.newSAXParser();
XMLReader xmlReader = saxParser.getXMLReader();

SiardContextExtractor contextExtractor = new SiardContextExtractor(targetLineNumber);
xmlReader.setContentHandler(contextExtractor);

// Parse until target line is reached. SAX parsing will be stopped via exception
// (a common pattern for early termination in SAX parsing)
try {
xmlReader.parse(new InputSource(xmlInputStream));
} catch (SAXException e) {
// Expected - parsing is stopped when target line is reached
}

return contextExtractor.getContext();
} catch (ParserConfigurationException | IOException | SAXException e) {
// If context extraction fails, just return null
LOGGER.debug("Failed to extract XML context at line {}", targetLineNumber, e);
return null;
}
}

/**
* SAX Handler that extracts SIARD-specific context (schema, table, view names)
* while parsing XML up to a specific line number.
*/
private static class SiardContextExtractor extends DefaultHandler {
private final int targetLineNumber;
private final SiardXmlContext context = new SiardXmlContext();
private boolean inSchema = false;
private boolean inTable = false;
private boolean inView = false;
private boolean inSchemaName = false;
private boolean inTableName = false;
private boolean inViewName = false;
private final StringBuilder currentText = new StringBuilder();
private org.xml.sax.Locator locator;

public SiardContextExtractor(int targetLineNumber) {
this.targetLineNumber = targetLineNumber;
}

@Override
public void setDocumentLocator(org.xml.sax.Locator locator) {
this.locator = locator;
}

@Override
public void startElement(String uri, String localName, String qName, org.xml.sax.Attributes attributes)
throws SAXException {
// Stop parsing if we've reached the target line
if (locator != null && locator.getLineNumber() >= targetLineNumber) {
throw new SAXException("Context extraction completed: reached target line " + targetLineNumber);
}

currentText.setLength(0);

if ("schema".equals(localName)) {
inSchema = true;
inTable = false;
inView = false;
context.setCurrentSchema(null);
context.setCurrentTable(null);
context.setCurrentView(null);
} else if ("table".equals(localName) && inSchema) {
inTable = true;
inView = false;
context.setCurrentTable(null);
context.setCurrentView(null);
} else if ("view".equals(localName) && inSchema) {
inView = true;
inTable = false;
context.setCurrentTable(null);
context.setCurrentView(null);
} else if ("name".equals(localName)) {
if (inSchema && !inTable && !inView) {
inSchemaName = true;
} else if (inTable) {
inTableName = true;
} else if (inView) {
inViewName = true;
}
}
}

@Override
public void characters(char[] ch, int start, int length) {
currentText.append(ch, start, length);
}

@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
String text = currentText.toString().trim();

if ("name".equals(localName)) {
if (inSchemaName) {
context.setCurrentSchema(text);
inSchemaName = false;
} else if (inTableName) {
context.setCurrentTable(text);
inTableName = false;
} else if (inViewName) {
context.setCurrentView(text);
inViewName = false;
}
} else if ("schema".equals(localName)) {
inSchema = false;
} else if ("table".equals(localName)) {
inTable = false;
} else if ("view".equals(localName)) {
inView = false;
}

currentText.setLength(0);
}

public SiardXmlContext getContext() {
return context;
}
}

/**
* Container for SIARD XML context information
*/
public static class SiardXmlContext {
private String currentSchema;
private String currentTable;
private String currentView;

public String getCurrentSchema() {
return currentSchema;
}

public void setCurrentSchema(String currentSchema) {
this.currentSchema = currentSchema;
}

public String getCurrentTable() {
return currentTable;
}

public void setCurrentTable(String currentTable) {
this.currentTable = currentTable;
}

public String getCurrentView() {
return currentView;
}

public void setCurrentView(String currentView) {
this.currentView = currentView;
}
}
}
Loading