diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e08b5fcba..1e66fd0c9c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,8 +7,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased] +### Added + +- Added `compactionThreshold` configuration option to control how often the engine compacts accumulated formula transformations. [#1629](https://github.com/handsontable/hyperformula/issues/1629) + ### Fixed +- Fixed a memory leak in `LazilyTransformingAstService` where the transformations array grew unboundedly, causing increasing memory usage over time. [#1629](https://github.com/handsontable/hyperformula/issues/1629) +- Fixed a memory leak in `UndoRedo` where `oldData` entries for evicted undo stack entries were never cleaned up, causing increasing memory usage over time. [#1629](https://github.com/handsontable/hyperformula/issues/1629) - Fixed the IRR function returning `#NUM!` error when the initial investment significantly exceeds the sum of returns. [#1628](https://github.com/handsontable/hyperformula/issues/1628) - Fixed the ADDRESS function ignoring `defaultValue` when arguments are syntactically empty (e.g., `=ADDRESS(2,3,,FALSE())`). [#1632](https://github.com/handsontable/hyperformula/issues/1632) diff --git a/src/BuildEngineFactory.ts b/src/BuildEngineFactory.ts index 1faff1e889..97f09a0d95 100644 --- a/src/BuildEngineFactory.ts +++ b/src/BuildEngineFactory.ts @@ -72,7 +72,7 @@ export class BuildEngineFactory { const namedExpressions = new NamedExpressions() const functionRegistry = new FunctionRegistry(config) - const lazilyTransformingAstService = new LazilyTransformingAstService(stats) + const lazilyTransformingAstService = new LazilyTransformingAstService(stats, config.compactionThreshold) const dependencyGraph = DependencyGraph.buildEmpty(lazilyTransformingAstService, config, functionRegistry, namedExpressions, stats) const columnSearch = buildColumnSearchStrategy(dependencyGraph, config, stats) const sheetMapping = dependencyGraph.sheetMapping diff --git a/src/Config.ts b/src/Config.ts index c345c97e95..f362e0fa00 100644 --- a/src/Config.ts +++ b/src/Config.ts @@ -62,6 +62,7 @@ export class Config implements ConfigParams, ParserConfig { timeFormats: ['hh:mm', 'hh:mm:ss.sss'], thousandSeparator: '', undoLimit: 20, + compactionThreshold: 50, useRegularExpressions: false, useWildcards: true, useColumnIndex: false, @@ -135,6 +136,8 @@ export class Config implements ConfigParams, ParserConfig { /** @inheritDoc */ public readonly undoLimit: number /** @inheritDoc */ + public readonly compactionThreshold: number + /** @inheritDoc */ public readonly context: unknown /** @@ -198,6 +201,7 @@ export class Config implements ConfigParams, ParserConfig { useArrayArithmetic, useStats, undoLimit, + compactionThreshold, useColumnIndex, useRegularExpressions, useWildcards, @@ -244,10 +248,12 @@ export class Config implements ConfigParams, ParserConfig { this.nullDate = configValueFromParamCheck(nullDate, instanceOfSimpleDate, 'IDate', 'nullDate') this.leapYear1900 = configValueFromParam(leapYear1900, 'boolean', 'leapYear1900') this.undoLimit = configValueFromParam(undoLimit, 'number', 'undoLimit') + this.compactionThreshold = configValueFromParam(compactionThreshold, 'number', 'compactionThreshold') this.useRegularExpressions = configValueFromParam(useRegularExpressions, 'boolean', 'useRegularExpressions') this.useWildcards = configValueFromParam(useWildcards, 'boolean', 'useWildcards') this.matchWholeCell = configValueFromParam(matchWholeCell, 'boolean', 'matchWholeCell') validateNumberToBeAtLeast(this.undoLimit, 'undoLimit', 0) + validateNumberToBeAtLeast(this.compactionThreshold, 'compactionThreshold', 1) this.maxRows = configValueFromParam(maxRows, 'number', 'maxRows') validateNumberToBeAtLeast(this.maxRows, 'maxRows', 1) this.maxColumns = configValueFromParam(maxColumns, 'number', 'maxColumns') diff --git a/src/ConfigParams.ts b/src/ConfigParams.ts index e036731c4f..2a8067beef 100644 --- a/src/ConfigParams.ts +++ b/src/ConfigParams.ts @@ -402,6 +402,18 @@ export interface ConfigParams { * @category Undo and Redo */ undoLimit: number, + /** + * Sets the number of accumulated formula transformations that triggers compaction + * of the LazilyTransformingAstService. When the number of pending transformations + * reaches this threshold, all formula vertices and column indexes are forced to + * apply their postponed transformations, and the transformation history is cleared. + * + * Lower values cause more frequent compaction (useful for testing), while higher + * values reduce overhead at the cost of more memory usage. + * @default 50 + * @category Engine + */ + compactionThreshold: number, /** * When set to `true`, criteria in functions (SUMIF, COUNTIF, ...) are allowed to use regular expressions. * @default false diff --git a/src/HyperFormula.ts b/src/HyperFormula.ts index e7c4b5150c..566957270a 100644 --- a/src/HyperFormula.ts +++ b/src/HyperFormula.ts @@ -4595,6 +4595,20 @@ export class HyperFormula implements TypedEmitter { this._functionRegistry = newEngine.functionRegistry } + /** + * When enough transformations have accumulated, forces all formula vertices and + * column index entries to apply pending lazy transformations, then compacts the + * transformation history and cleans up orphaned undo oldData entries. + */ + private compactLazyTransformationsIfNeeded(): void { + if (this._lazilyTransformingAstService.needsCompaction()) { + this._dependencyGraph.forceApplyPostponedTransformations() + this._columnSearch.forceApplyPostponedTransformations() + this._lazilyTransformingAstService.compact() + this._lazilyTransformingAstService.undoRedo?.cleanupOrphanedOldData() + } + } + /** * Runs a recomputation starting from recently changed vertices. * @@ -4610,6 +4624,8 @@ export class HyperFormula implements TypedEmitter { const verticesToRecomputeFrom = this.dependencyGraph.verticesToRecompute() this.dependencyGraph.clearDirtyVertices() + this.compactLazyTransformationsIfNeeded() + if (verticesToRecomputeFrom.length > 0) { changes.addAll(this.evaluator.partialRun(verticesToRecomputeFrom)) } diff --git a/src/LazilyTransformingAstService.ts b/src/LazilyTransformingAstService.ts index 899f89d2ac..acd517577b 100644 --- a/src/LazilyTransformingAstService.ts +++ b/src/LazilyTransformingAstService.ts @@ -11,21 +11,56 @@ import {StatType} from './statistics' import {Statistics} from './statistics/Statistics' import {UndoRedo} from './UndoRedo' +/** + * Manages lazy application of formula AST transformations. + * + * ## Problem + * Structural operations (adding/removing rows/columns, moving cells, renaming sheets) + * require updating every formula that references the affected area. Applying these + * transformations eagerly to all formulas after every operation is expensive, especially + * for large spreadsheets with many formulas. + * + * ## Solution: Lazy Transformation + * Instead of transforming all formulas immediately, this service stores transformations + * in a queue. Each formula vertex (FormulaVertex) and column index entry (ValueIndex) + * tracks its own version number. When a consumer needs up-to-date data, it calls + * `applyTransformations()` with its current version and receives all transformations + * accumulated since that version. + * + * ## Compaction + * Over time, the transformations array grows unboundedly. To prevent this memory leak, + * the engine periodically triggers compaction when the number of accumulated + * transformations reaches the configurable `compactionThreshold`: + * + * 1. All FormulaVertex instances are forced to apply pending transformations + * (via `DependencyGraph.forceApplyPostponedTransformations()`). + * 2. All ColumnIndex entries are forced to apply pending transformations + * (via `ColumnSearchStrategy.forceApplyPostponedTransformations()`). + * 3. `compact()` is called, which advances `versionOffset` and clears the + * transformations array. + * 4. `UndoRedo.cleanupOrphanedOldData()` removes any oldData entries that were + * written during forced application but belong to already-evicted undo entries. + * + * The `versionOffset` ensures that version numbers remain globally consistent + * after compaction: `version() = versionOffset + transformations.length`. + */ export class LazilyTransformingAstService { public parser?: ParserWithCaching public undoRedo?: UndoRedo private transformations: FormulaTransformer[] = [] + private versionOffset: number = 0 private combinedTransformer?: CombinedTransformer constructor( private readonly stats: Statistics, + private readonly compactionThreshold: number, ) { } public version(): number { - return this.transformations.length + return this.versionOffset + this.transformations.length } public addTransformation(transformation: FormulaTransformer): number { @@ -53,8 +88,9 @@ export class LazilyTransformingAstService { public applyTransformations(ast: Ast, address: SimpleCellAddress, version: number): [Ast, SimpleCellAddress, number] { this.stats.start(StatType.TRANSFORM_ASTS_POSTPONED) - for (let v = version; v < this.transformations.length; v++) { - const transformation = this.transformations[v] + const currentVersion = this.version() + for (let v = Math.max(version, this.versionOffset); v < currentVersion; v++) { + const transformation = this.transformations[v - this.versionOffset] if (transformation.isIrreversible()) { this.undoRedo!.storeDataForVersion(v, address, this.parser!.computeHashFromAst(ast)) this.parser!.rememberNewAst(ast) @@ -67,15 +103,37 @@ export class LazilyTransformingAstService { const cachedAst = this.parser!.rememberNewAst(ast) this.stats.end(StatType.TRANSFORM_ASTS_POSTPONED) - return [cachedAst, address, this.transformations.length] + return [cachedAst, address, currentVersion] } public* getTransformationsFrom(version: number, filter?: (transformation: FormulaTransformer) => boolean): IterableIterator { - for (let v = version; v < this.transformations.length; v++) { - const transformation = this.transformations[v] + const currentVersion = this.version() + for (let v = Math.max(version, this.versionOffset); v < currentVersion; v++) { + const transformation = this.transformations[v - this.versionOffset] if (!filter || filter(transformation)) { yield transformation } } } + + /** + * Returns true when enough transformations have accumulated to justify the cost + * of forcing all consumers (FormulaVertex, ColumnIndex) to apply pending changes. + */ + public needsCompaction(): boolean { + return this.transformations.length >= this.compactionThreshold + } + + /** + * Compacts the transformations array by discarding all entries that have already + * been applied by every consumer. Safe to call only after all FormulaVertex and + * ColumnIndex consumers have been brought up to the current version. + * After calling, UndoRedo.cleanupOrphanedOldData() must be invoked to remove + * oldData entries written during forceApplyPostponedTransformations for + * already-evicted undo entries. + */ + public compact(): void { + this.versionOffset += this.transformations.length + this.transformations = [] + } } diff --git a/src/Lookup/ColumnBinarySearch.ts b/src/Lookup/ColumnBinarySearch.ts index 458070687c..cf034d11cc 100644 --- a/src/Lookup/ColumnBinarySearch.ts +++ b/src/Lookup/ColumnBinarySearch.ts @@ -53,6 +53,16 @@ export class ColumnBinarySearch extends AdvancedFind implements ColumnSearchStra public removeValues(range: IterableIterator<[RawScalarValue, SimpleCellAddress]>): void { } + /** + * No-op: ColumnBinarySearch reads cell values directly from the dependency graph + * on every lookup, so it has no cached data that could become stale. + * Unlike ColumnIndex, which maintains a separate value-to-address index that + * must be kept in sync with lazy transformations, binary search always operates + * on the current graph state. + */ + public forceApplyPostponedTransformations(): void { + } + /* * WARNING: Finding lower/upper bounds in unordered ranges is not supported. When ordering === 'none', assumes matchExactly === true */ diff --git a/src/Lookup/ColumnIndex.ts b/src/Lookup/ColumnIndex.ts index ba6a3c9201..d7c32e5894 100644 --- a/src/Lookup/ColumnIndex.ts +++ b/src/Lookup/ColumnIndex.ts @@ -184,6 +184,24 @@ export class ColumnIndex implements ColumnSearchStrategy { this.index.delete(sheetId) } + /** + * Forces all ValueIndex entries to apply any pending lazy transformations, + * bringing every entry up to the current LazilyTransformingAstService version. + * Must be called before compacting LazilyTransformingAstService. + */ + public forceApplyPostponedTransformations(): void { + for (const [sheet, sheetIndex] of this.index) { + sheetIndex.forEach((columnMap, col) => { + if (!columnMap) { + return + } + for (const value of columnMap.keys()) { + this.ensureRecentData(sheet, col, value) + } + }) + } + } + public getColumnMap(sheet: number, col: number): ColumnMap { if (!this.index.has(sheet)) { this.index.set(sheet, []) diff --git a/src/Lookup/SearchStrategy.ts b/src/Lookup/SearchStrategy.ts index a12a48fc6a..2316e586c9 100644 --- a/src/Lookup/SearchStrategy.ts +++ b/src/Lookup/SearchStrategy.ts @@ -51,6 +51,13 @@ export interface ColumnSearchStrategy extends SearchStrategy { moveValues(range: IterableIterator<[RawScalarValue, SimpleCellAddress]>, toRight: number, toBottom: number, toSheet: number): void, removeValues(range: IterableIterator<[RawScalarValue, SimpleCellAddress]>): void, + + /** + * Forces all lazily-tracked ValueIndex entries to apply any pending transformations, + * bringing every entry's version up to the current LazilyTransformingAstService version. + * Must be called before compacting LazilyTransformingAstService. + */ + forceApplyPostponedTransformations(): void, } export function buildColumnSearchStrategy(dependencyGraph: DependencyGraph, config: Config, statistics: Statistics): ColumnSearchStrategy { diff --git a/src/Operations.ts b/src/Operations.ts index 27f45c2d9c..6a0e5ccdd4 100644 --- a/src/Operations.ts +++ b/src/Operations.ts @@ -737,8 +737,14 @@ export class Operations { return changes } + /** + * Forces all formula vertices and column index entries to apply pending lazy + * transformations, bringing them up to the current LazilyTransformingAstService version. + * Called before undo of move operations and before compaction. + */ public forceApplyPostponedTransformations(): void { this.dependencyGraph.forceApplyPostponedTransformations() + this.columnSearch.forceApplyPostponedTransformations() } /** diff --git a/src/UndoRedo.ts b/src/UndoRedo.ts index 5413a79d59..87ab06439a 100644 --- a/src/UndoRedo.ts +++ b/src/UndoRedo.ts @@ -22,12 +22,26 @@ export interface UndoEntry { doUndo(undoRedo: UndoRedo): void, doRedo(undoRedo: UndoRedo): void, + + /** + * Returns the LazilyTransformingAstService version keys referenced by this entry's oldData storage. + * Used to clean up oldData when the entry is permanently evicted from the undo/redo stack. + */ + getReferencedOldDataVersions(): number[], } export abstract class BaseUndoEntry implements UndoEntry { abstract doUndo(undoRedo: UndoRedo): void abstract doRedo(undoRedo: UndoRedo): void + + /** + * Returns LazilyTransformingAstService version keys referenced by this entry's oldData. + * Default implementation returns empty — override in entries that store oldData. + */ + public getReferencedOldDataVersions(): number[] { + return [] + } } export class RemoveRowsUndoEntry extends BaseUndoEntry { @@ -45,6 +59,10 @@ export class RemoveRowsUndoEntry extends BaseUndoEntry { public doRedo(undoRedo: UndoRedo): void { undoRedo.redoRemoveRows(this) } + + public getReferencedOldDataVersions(): number[] { + return this.rowsRemovals.filter(r => r.version > 0).map(r => r.version - 1) + } } export class MoveCellsUndoEntry extends BaseUndoEntry { @@ -67,6 +85,10 @@ export class MoveCellsUndoEntry extends BaseUndoEntry { public doRedo(undoRedo: UndoRedo): void { undoRedo.redoMoveCells(this) } + + public getReferencedOldDataVersions(): number[] { + return this.version > 0 ? [this.version - 1] : [] + } } export class AddRowsUndoEntry extends BaseUndoEntry { @@ -162,6 +184,10 @@ export class MoveRowsUndoEntry extends BaseUndoEntry { public doRedo(undoRedo: UndoRedo): void { undoRedo.redoMoveRows(this) } + + public getReferencedOldDataVersions(): number[] { + return this.version > 0 ? [this.version - 1] : [] + } } export class MoveColumnsUndoEntry extends BaseUndoEntry { @@ -187,6 +213,10 @@ export class MoveColumnsUndoEntry extends BaseUndoEntry { public doRedo(undoRedo: UndoRedo): void { undoRedo.redoMoveColumns(this) } + + public getReferencedOldDataVersions(): number[] { + return this.version > 0 ? [this.version - 1] : [] + } } export class AddColumnsUndoEntry extends BaseUndoEntry { @@ -220,6 +250,10 @@ export class RemoveColumnsUndoEntry extends BaseUndoEntry { public doRedo(undoRedo: UndoRedo): void { undoRedo.redoRemoveColumns(this) } + + public getReferencedOldDataVersions(): number[] { + return this.columnsRemovals.filter(r => r.version > 0).map(r => r.version - 1) + } } export class AddSheetUndoEntry extends BaseUndoEntry { @@ -286,6 +320,10 @@ export class RenameSheetUndoEntry extends BaseUndoEntry { public doRedo(undoRedo: UndoRedo): void { undoRedo.redoRenameSheet(this) } + + public getReferencedOldDataVersions(): number[] { + return (this.version !== undefined && this.version > 0) ? [this.version - 1] : [] + } } export class ClearSheetUndoEntry extends BaseUndoEntry { @@ -421,8 +459,44 @@ export class BatchUndoEntry extends BaseUndoEntry { public doRedo(undoRedo: UndoRedo): void { undoRedo.redoBatch(this) } + + public getReferencedOldDataVersions(): number[] { + return this.operations.flatMap(op => op.getReferencedOldDataVersions()) + } } +/** + * Manages undo/redo stacks for all spreadsheet operations. + * + * ## oldData: Preserving Formula ASTs Across Irreversible Transformations + * + * Some structural operations (e.g., removing rows/columns, moving cells) destroy + * formula information that cannot be reconstructed from the transformation alone. + * For example, when a row is removed, formulas referencing that row are rewritten + * to `#REF!` — an irreversible change. + * + * To support undo of such operations, `oldData` stores snapshots of formula AST + * hashes keyed by the LazilyTransformingAstService version at which the irreversible + * transformation was applied. Each entry maps a version number to an array of + * `[cellAddress, astHash]` pairs that can be used to restore the original formula + * from the parser cache. + * + * ### Memory Management + * + * Without cleanup, `oldData` grows indefinitely as undo entries are evicted but + * their oldData keys remain. Three mechanisms prevent this: + * + * 1. **Eviction cleanup**: When undo entries are evicted (due to `undoLimit`), + * `cleanupOldDataForEntries()` deletes their referenced oldData keys + * (unless still needed by entries on the other stack). + * 2. **Orphan cleanup**: Compaction may force lazy formula evaluation, which + * writes new oldData entries for already-evicted undo entries. After compaction, + * `cleanupOrphanedOldData()` removes any keys not referenced by entries on + * either stack or the in-progress batch. + * 3. **Short-circuit**: When `undoLimit` is 0 (undo disabled), + * `storeDataForVersion()` returns immediately to avoid storing data that + * would never be used. + */ export class UndoRedo { public oldData: Map = new Map() private undoStack: UndoEntry[] = [] @@ -457,7 +531,14 @@ export class UndoRedo { this.batchUndoEntry = undefined } + /** + * Stores a formula AST hash snapshot for the given LazilyTransformingAstService version. + * Skipped when `undoLimit` is 0 (undo disabled) to avoid storing data that would never be used. + */ public storeDataForVersion(version: number, address: SimpleCellAddress, astHash: string) { + if (this.undoLimit === 0) { + return + } if (!this.oldData.has(version)) { this.oldData.set(version, []) } @@ -465,11 +546,15 @@ export class UndoRedo { currentOldData.push([address, astHash]) } + /** Clears the redo stack and removes oldData entries no longer referenced by any remaining entry. */ public clearRedoStack() { + this.cleanupOldDataForEntries(this.redoStack, this.undoStack) this.redoStack = [] } + /** Clears the undo stack and removes oldData entries no longer referenced by any remaining entry. */ public clearUndoStack() { + this.cleanupOldDataForEntries(this.undoStack, this.redoStack) this.undoStack = [] } @@ -565,12 +650,14 @@ export class UndoRedo { } public undoMoveRows(operation: MoveRowsUndoEntry) { + this.operations.forceApplyPostponedTransformations() const {sheet} = operation this.operations.moveRows(sheet, operation.undoStart, operation.numberOfRows, operation.undoEnd) this.restoreOldDataFromVersion(operation.version - 1) } public undoMoveColumns(operation: MoveColumnsUndoEntry) { + this.operations.forceApplyPostponedTransformations() const {sheet} = operation this.operations.moveColumns(sheet, operation.undoStart, operation.numberOfColumns, operation.undoEnd) this.restoreOldDataFromVersion(operation.version - 1) @@ -771,9 +858,56 @@ export class UndoRedo { this.operations.setColumnOrder(operation.sheetId, operation.columnMapping) } + /** + * Adds an entry to the undo stack, evicting the oldest entries when undoLimit is exceeded. + * Evicted entries have their oldData keys cleaned up to prevent memory leaks. + */ private addUndoEntry(operation: UndoEntry) { this.undoStack.push(operation) - this.undoStack.splice(0, Math.max(0, this.undoStack.length - this.undoLimit)) + const evictCount = Math.max(0, this.undoStack.length - this.undoLimit) + if (evictCount > 0) { + const evicted = this.undoStack.splice(0, evictCount) + this.cleanupOldDataForEntries(evicted, [...this.undoStack, ...this.redoStack]) + } + } + + /** + * Removes oldData entries whose version keys are not referenced by any + * entry on the undo stack, redo stack, or in-progress batch. Called after + * compaction forces lazy formula evaluation, which may insert oldData for + * already-evicted entries. + */ + public cleanupOrphanedOldData() { + const referencedVersions = this.collectReferencedOldDataVersions(this.undoStack, this.redoStack) + for (const version of this.oldData.keys()) { + if (!referencedVersions.has(version)) { + this.oldData.delete(version) + } + } + } + + /** + * Removes oldData entries referenced by permanently discarded undo/redo entries, + * but only if not still referenced by entries remaining on the other stack or + * in-progress batch. + */ + private cleanupOldDataForEntries(discardedEntries: UndoEntry[], retainedEntries: UndoEntry[]) { + const retainedVersions = this.collectReferencedOldDataVersions(retainedEntries) + for (const entry of discardedEntries) { + for (const version of entry.getReferencedOldDataVersions()) { + if (!retainedVersions.has(version)) { + this.oldData.delete(version) + } + } + } + } + + /** Collects all oldData version keys referenced by the given entry lists and in-progress batch. */ + private collectReferencedOldDataVersions(...entryLists: UndoEntry[][]): Set { + return new Set([ + ...entryLists.flatMap(list => list.flatMap(e => e.getReferencedOldDataVersions())), + ...(this.batchUndoEntry?.getReferencedOldDataVersions() ?? []), + ]) } private undoEntry(operation: UndoEntry) {