From f925a20b5908e79ae45592a98ca440c6f5f2ac40 Mon Sep 17 00:00:00 2001
From: Andrew Sazonov <andrew.sazonov@icloud.com>
Date: Thu, 2 Apr 2026 11:34:40 +0200
Subject: [PATCH 1/5] Add architecture design document for sequential fitting
 feature

---
 .../architecture/sequential_fitting_design.md | 717 ++++++++++++++++++
 1 file changed, 717 insertions(+)
 create mode 100644 docs/architecture/sequential_fitting_design.md

diff --git a/docs/architecture/sequential_fitting_design.md b/docs/architecture/sequential_fitting_design.md
new file mode 100644
index 00000000..bf3ac81b
--- /dev/null
+++ b/docs/architecture/sequential_fitting_design.md
@@ -0,0 +1,717 @@
+# Sequential Fitting — Architecture Design
+
+**Status:** Draft — for discussion before implementation  
+**Date:** 2026-04-02
+
+---
+
+## 1. Motivation
+
+The current single-fit mode (`fit_mode = 'single'`) iterates over
+pre-loaded experiments sequentially. Structural parameters propagate
+from one fit to the next, and per-experiment snapshots are stored in
+memory (`Analysis._parameter_snapshots`). This works well for tens or
+hundreds of datasets (e.g. the D20 temperature scan in `ed-17.py`) but
+does not scale to thousands or tens of thousands of files:
+
+| Limitation                   | Impact at scale (N > 1000)                    |
+| ---------------------------- | --------------------------------------------- |
+| All experiments pre-loaded   | Memory grows linearly with N                  |
+| Single-threaded              | Wall-clock time grows linearly with N         |
+| Snapshots in memory only     | Lost on crash; no incremental persistence     |
+| Structure params overwritten | Cannot replot earlier datasets after the loop |
+
+Users at ESS request fitting of up to ~50 000 data files from
+dose-resolved or time-resolved experiments.
+
+---
+
+## 2. Current State
+
+### 2.1 Single-fit loop (Analysis.fit, mode='single')
+
+```
+for each experiment in project.experiments:
+    create dummy Experiments wrapper (1 experiment)
+    fitter.fit(structures, dummy_experiments)
+    snapshot fitted params in memory dict
+```
+
+- Structure parameters are shared: the `Structures` collection is passed
+  directly to `Fitter.fit()`, so each fit mutates the same `Structure`
+  objects.
+- Experiment parameters are independent: each `ExperimentBase` has its
+  own instrument, peak, background, etc.
+- After the loop, `_parameter_snapshots[expt_name]` contains
+  `{unique_name: {value, uncertainty, units}}` per experiment.
+- `plot_param_series()` reads from `_parameter_snapshots` and the live
+  `experiments` collection (for diffrn metadata like temperature).
+
+### 2.2 Limitations for the proposed feature
+
+- `ExperimentFactory.from_data_path()` creates a fresh experiment with
+  default parameters — it cannot directly clone a template experiment's
+  full configuration (instrument type, peak type, background points,
+  excluded regions, linked phases).
+- `ConstraintsHandler` and `UidMapHandler` are process-wide singletons —
+  concurrent access from multiple threads would cause state corruption.
+- `CryspyCalculator` stores `_cryspy_dicts` per
+  `{structure}_{experiment}` — pure-Python computation, does not release
+  the GIL.
+
+---
+
+## 3. Requirements
+
+Numbered to match the original request.
+
+1. **No bulk preloading** — do not create all experiment objects in
+   advance.
+2. **Parallel fitting** — run multiple independent fits simultaneously.
+3. **Parameter propagation** — reuse fitted parameters from the previous
+   chunk as starting values for the next chunk.
+4. **Chunk-based processing** — load N datasets, fit N independently in
+   parallel, propagate, repeat.
+5. **Incremental CSV output** — one row per dataset, written after each
+   chunk; includes χ², fit status, and all fitted parameter values +
+   uncertainties.
+6. **Crash recovery** — on restart, read CSV, skip already-fitted files,
+   resume from the last fitted row's parameters.
+7. **Separate initial fit** — the user runs a regular `fit()` on one
+   dataset first to establish good starting values.
+8. **Store all fitted parameters in CSV** — both structure and
+   experiment params, so any dataset can be replayed later. The project
+   file retains only the template structure CIF and template experiment
+   CIF.
+9. **Scalable** — works for N = 1 (degenerate case) and N = 50 000.
+10. **`max_workers` configuration** — default `1` (sequential), user can
+    set higher or `'auto'`.
+11. **Consider alternatives** — see § 7.
+
+---
+
+## 4. Design Overview
+
+A new method `Analysis.fit_sequential()` orchestrates the batch. It does
+**not** reuse the existing `fit()` loop — the data-flow is fundamentally
+different (data paths in → CSV out, experiments are ephemeral).
+
+```
+User sets up: 1 Structure + 1 template Experiment
+         │
+         ▼
+  project.analysis.fit()          ← initial fit on template
+         │
+         ▼
+  project.analysis.fit_sequential(
+      data_paths=[...],
+      output_csv='results.csv',
+      max_workers=4,
+  )
+         │
+         ▼
+  ┌──────────────────────────────┐
+  │  Read CSV for crash recovery │
+  │  Skip already-fitted files   │
+  │  Seed params from last row   │
+  └──────┬───────────────────────┘
+         │
+         ▼
+  ┌──────────────────────────────┐
+  │  For each chunk of N files:  │
+  │                              │
+  │  ┌────┐ ┌────┐     ┌────┐   │
+  │  │ W1 │ │ W2 │ ... │ Wn │   │  Workers (processes)
+  │  └──┬─┘ └──┬─┘     └──┬─┘   │
+  │     │      │           │     │
+  │     ▼      ▼           ▼     │
+  │  Collect results             │
+  │  Append rows to CSV          │
+  │  Propagate params from       │
+  │  last file in chunk          │
+  └──────────────────────────────┘
+         │
+         ▼
+  CSV with all results
+  (one row per dataset)
+```
+
+---
+
+## 5. User-Facing API
+
+### 5.1 The template workflow
+
+```python
+import easydiffraction as ed
+
+project = ed.Project()
+project.verbosity = 'short'
+
+# ── Structure ────────────────────────────────────────────
+project.structures.create(name='cosio')
+structure = project.structures['cosio']
+structure.space_group.name_h_m = 'P n m a'
+structure.cell.length_a = 10.31
+# ... atom sites ...
+
+# ── Template experiment ──────────────────────────────────
+data_paths = ed.extract_data_paths_from_zip('scans.zip')
+
+project.experiments.add_from_data_path(
+    name='template',
+    data_path=data_paths[0],
+    sample_form='powder',
+    beam_mode='constant wavelength',
+    radiation_probe='neutron',
+)
+expt = project.experiments['template']
+expt.instrument.setup_wavelength = 1.87
+expt.peak.broad_gauss_u = 0.24
+expt.background.create(id='1', x=8, y=609)
+# ... more experiment config ...
+expt.linked_phases.create(id='cosio', scale=1.2)
+
+# ── Free parameters ──────────────────────────────────────
+structure.cell.length_a.free = True
+expt.linked_phases['cosio'].scale.free = True
+expt.instrument.calib_twotheta_offset.free = True
+for point in expt.background:
+    point.y.free = True
+
+# ── Constraints (optional) ───────────────────────────────
+project.analysis.aliases.create(
+    label='biso_Co1',
+    param_uid=structure.atom_sites['Co1'].b_iso.uid,
+)
+project.analysis.constraints.create(expression='biso_Co2 = biso_Co1')
+project.analysis.apply_constraints()
+
+# ── Initial fit on the template ──────────────────────────
+project.analysis.fit()
+project.analysis.show_fit_results()
+
+# ── Sequential fit over all files ────────────────────────
+project.analysis.fit_sequential(
+    data_paths=data_paths,
+    output_csv='results.csv',
+    max_workers=4,
+)
+```
+
+### 5.2 Method signature
+
+```python
+def fit_sequential(
+    self,
+    data_paths: list[str],
+    output_csv: str,
+    max_workers: int = 1,
+    chunk_size: int | None = None,
+    verbosity: str | None = None,
+) -> None:
+```
+
+| Parameter     | Description                                                                           |
+| ------------- | ------------------------------------------------------------------------------------- |
+| `data_paths`  | Ordered list of data file paths to process.                                           |
+| `output_csv`  | Path to the output CSV file. Created if missing, appended if exists (crash recovery). |
+| `max_workers` | Number of parallel worker processes. `1` = sequential (no subprocess overhead).       |
+| `chunk_size`  | Files per chunk. Default `None` → uses `max_workers`.                                 |
+| `verbosity`   | `'full'`, `'short'`, `'silent'`. Default: project verbosity.                          |
+
+### 5.3 Plotting results from CSV
+
+After `fit_sequential()`, parameter evolution is read from the CSV file
+rather than from in-memory snapshots:
+
+```python
+project.plot_param_series_from_csv(
+    csv_path='results.csv',
+    param=structure.cell.length_a,
+    versus=expt.diffrn.ambient_temperature,
+)
+```
+
+Alternatively, `plot_param_series()` could detect whether the data
+source is in-memory snapshots (small N) or a CSV file (large N) based on
+whether `output_csv` was produced. This is a UX decision to discuss.
+
+### 5.4 Replaying a single dataset
+
+To replot or inspect one dataset from the batch:
+
+```python
+# Load template project
+project = ed.Project.load('my_project')
+
+# Replace fitted params for dataset #500 from CSV
+project.apply_params_from_csv('results.csv', row=500)
+
+# Plot
+project.plot_meas_vs_calc(expt_name='template')
+```
+
+This requires loading the dataset's data file and overriding parameter
+values from the CSV row. The exact API can be refined; the key point is
+that CSV + template CIF is sufficient to reconstruct any dataset's
+state.
+
+---
+
+## 6. Internal Architecture
+
+### 6.1 Preconditions
+
+`fit_sequential()` validates before starting:
+
+- Exactly 1 structure in `project.structures`.
+- Exactly 1 experiment in `project.experiments` (the template).
+- At least 1 free parameter.
+- `output_csv` path is writable.
+- `data_paths` is non-empty.
+
+### 6.2 Template snapshot
+
+Before dispatching workers, the method captures a **template snapshot**
+— everything a worker needs to recreate the project independently:
+
+```python
+@dataclass(frozen=True)
+class SequentialFitTemplate:
+    structure_cif: str          # structure.as_cif
+    experiment_cif: str         # experiment.as_cif (template experiment)
+    experiment_axes: dict       # {sample_form, beam_mode, radiation_probe, scattering_type}
+    initial_params: dict        # {unique_name: value} for ALL free params
+    free_param_names: list[str] # unique_names of free params
+    alias_defs: list[dict]      # [{label, param_uid}, ...]
+    constraint_defs: list[str]  # [expression, ...]
+    minimizer_tag: str          # e.g. 'lmfit'
+    calculator_tag: str         # e.g. 'cryspy'
+```
+
+This is a plain, picklable data object (no live references to
+`GuardedBase` instances).
+
+### 6.3 Chunk-based processing
+
+```python
+remaining_paths = [p for p in data_paths if p not in already_fitted]
+
+for chunk in chunked(remaining_paths, chunk_size):
+    if max_workers == 1:
+        results = [_fit_worker(template, path) for path in chunk]
+    else:
+        with ProcessPoolExecutor(
+            max_workers=max_workers,
+            mp_context=get_context('spawn'),
+        ) as pool:
+            futures = {
+                pool.submit(_fit_worker, template, path): path
+                for path in chunk
+            }
+            results = [f.result() for f in as_completed(futures)]
+
+    # Sort results to match file order (as_completed is unordered)
+    results.sort(key=lambda r: data_paths.index(r['file_path']))
+
+    _append_to_csv(output_csv, results)
+
+    # Propagate: use last file's params as next chunk's starting values
+    last_result = results[-1]
+    template = replace(template, initial_params=last_result['params'])
+```
+
+The `'spawn'` context is required because:
+
+- `cryspy` is pure Python — the GIL prevents true thread parallelism.
+- `fork` can deadlock with C extensions and is unreliable on macOS.
+- `spawn` creates a fresh Python interpreter per worker — singletons
+  (`UidMapHandler`, `ConstraintsHandler`) are naturally isolated.
+
+### 6.4 Worker function
+
+The worker is a **module-level function** (required for pickling by
+`ProcessPoolExecutor`):
+
+```python
+def _fit_worker(
+    template: SequentialFitTemplate,
+    data_path: str,
+) -> dict[str, Any]:
+    """
+    Fit a single dataset in an isolated process.
+
+    Creates a fresh Project, loads the template configuration,
+    replaces data from data_path, applies initial parameters, fits,
+    and returns a plain dict of results.
+    """
+    # 1. Create fresh project (isolated singletons, no shared state)
+    project = Project(name='_worker')
+
+    # 2. Load structure from CIF
+    project.structures.add_from_cif_str(template.structure_cif)
+
+    # 3. Create experiment from data path (loads measured data)
+    project.experiments.add_from_data_path(
+        name='expt',
+        data_path=data_path,
+        sample_form=template.experiment_axes['sample_form'],
+        beam_mode=template.experiment_axes['beam_mode'],
+        radiation_probe=template.experiment_axes['radiation_probe'],
+        scattering_type=template.experiment_axes['scattering_type'],
+        verbosity='silent',
+    )
+
+    # 4. Apply template experiment configuration
+    _apply_template_config(
+        experiment=project.experiments['expt'],
+        template=template,
+    )
+
+    # 5. Override parameter values from propagated starting values
+    _apply_param_overrides(project, template.initial_params)
+
+    # 6. Set free flags
+    _set_free_params(project, template.free_param_names)
+
+    # 7. Apply constraints
+    _apply_constraints(project, template.alias_defs, template.constraint_defs)
+
+    # 8. Set calculator and minimizer
+    project.experiments['expt'].calculator_type = template.calculator_tag
+    project.analysis.current_minimizer = template.minimizer_tag
+
+    # 9. Fit
+    project.analysis.fit(verbosity='silent')
+
+    # 10. Collect results
+    return _collect_results(project, data_path)
+```
+
+#### Step 4: Applying template configuration
+
+The template experiment's CIF contains instrument, peak, background,
+excluded regions, linked phases, and their parameters. However,
+`add_from_data_path` creates an experiment with default configuration.
+The worker must then apply the template's configuration on top.
+
+**Two approaches:**
+
+| Approach                                   | Pros                                               | Cons                                                                                                      |
+| ------------------------------------------ | -------------------------------------------------- | --------------------------------------------------------------------------------------------------------- |
+| **A. Create from CIF, reload data**        | All config comes from CIF; no manual param copying | Requires `_load_ascii_data_to_experiment()` to work after CIF construction; CIF must round-trip perfectly |
+| **B. Create from data path, apply config** | Data loading is clean; explicit param assignment   | Requires enumerating which template settings to copy; more fragile if experiment categories change        |
+
+**Recommended: Approach A** — create from template CIF, then reload
+data. This is more robust because:
+
+- CIF serialisation already captures the full experiment state.
+- Adding new categories or parameters requires no changes to the worker.
+- `_load_ascii_data_to_experiment()` already exists on all concrete
+  experiment classes.
+
+```python
+# Approach A sketch:
+# 3. Create experiment from template CIF (full config + template data)
+project.experiments.add_from_cif_str(template.experiment_cif)
+expt = project.experiments['template']
+expt.name = 'expt'  # rename for consistency
+
+# 4. Replace data from new data path
+expt._load_ascii_data_to_experiment(data_path)
+```
+
+**Prerequisite:** `_load_ascii_data_to_experiment` must correctly
+overwrite the existing data category contents (clear old data points and
+load new ones). This needs to be verified and potentially adjusted.
+
+**Open question:** Does `experiment.as_cif` round-trip cleanly through
+`ExperimentFactory.from_cif_str()`? If not, Approach A is blocked until
+CIF round-trip is reliable (see `issues_open.md` issue #1 and #12).
+Approach B would be the fallback. See § 8 open question 2.
+
+### 6.5 Parameter propagation strategy
+
+After each chunk completes:
+
+1. Take the results from the **last file** in the chunk (by file sort
+   order).
+2. Extract all free parameter values.
+3. Use these as `initial_params` for all workers in the next chunk.
+
+**Why the last file?** Assuming files are ordered (by time, temperature,
+dose), the last file's parameters are the closest extrapolation for the
+next chunk. Within a chunk, all workers start from the same values, so
+parameter evolution within a chunk comes only from the fit — not from
+propagation.
+
+**Edge case — last file's fit failed:** if the last file in a chunk
+fails, fall back to the last _successful_ result in the chunk (scanning
+from the end). If no file in the chunk succeeded, keep the previous
+chunk's parameters and log a warning.
+
+**Within-chunk ordering matters for `max_workers=1`:** when running
+sequentially (`max_workers=1`), the chunk size is 1, so propagation
+happens after every file — identical to the current single-fit mode.
+This preserves backward compatibility.
+
+### 6.6 CSV output format
+
+The CSV uses a flat header with two columns per free parameter (value +
+uncertainty) plus metadata columns:
+
+```
+file_path,chi_squared,reduced_chi_squared,fit_success,n_iterations,cosio.cell.length_a,cosio.cell.length_a.uncertainty,cosio.atom_sites.Co1.b_iso,cosio.atom_sites.Co1.b_iso.uncertainty,template.instrument.calib_twotheta_offset,template.instrument.calib_twotheta_offset.uncertainty,...
+/data/scan_001.xye,142.3,1.23,True,45,10.312,0.001,0.31,0.02,0.29,0.01,...
+/data/scan_002.xye,138.1,1.19,True,32,10.315,0.002,0.32,0.03,0.28,0.01,...
+```
+
+| Column                      | Type  | Description                     |
+| --------------------------- | ----- | ------------------------------- |
+| `file_path`                 | str   | Absolute path to the data file  |
+| `chi_squared`               | float | χ² of the fit                   |
+| `reduced_chi_squared`       | float | Reduced χ²                      |
+| `fit_success`               | bool  | Whether the minimizer converged |
+| `n_iterations`              | int   | Number of minimizer iterations  |
+| `{unique_name}`             | float | Fitted value of parameter       |
+| `{unique_name}.uncertainty` | float | Uncertainty of parameter        |
+
+**Implementation:** use `csv.DictWriter` (stdlib). Header is written
+once on file creation. Rows are appended after each chunk and flushed
+immediately (no buffered writes that could be lost on crash).
+
+### 6.7 Crash recovery
+
+On entry, `fit_sequential()` checks for an existing CSV at `output_csv`:
+
+1. If the file exists and is non-empty:
+   - Read all rows.
+   - Build a set of already-fitted `file_path` values.
+   - Extract parameter values from the last row as starting values
+     (overrides `initial_params` from the template).
+   - Log a message: `"Resuming from row N (M files already fitted)."`.
+2. If the file does not exist or is empty:
+   - Create the file with the header row.
+   - Use the template's current parameter values as starting values.
+
+The file path comparison uses absolute paths to avoid mismatches.
+
+### 6.8 Verbosity and progress reporting
+
+| Verbosity | Behaviour                                                           |
+| --------- | ------------------------------------------------------------------- |
+| `full`    | Per-chunk progress: chunk N/M, per-file χ² table (updated in place) |
+| `short`   | One-line per chunk: `✅ Chunk 3/500: 10 files, avg χ² = 1.45`       |
+| `silent`  | No output                                                           |
+
+Workers always run silently. Only the main process produces console
+output.
+
+---
+
+## 7. Alternatives Considered
+
+### 7.1 Threading instead of multiprocessing
+
+Threading (`concurrent.futures.ThreadPoolExecutor`) is simpler — no
+pickling, shared memory. However:
+
+- `cryspy` is pure Python and does not release the GIL. Threaded fits
+  would execute serially despite multiple threads.
+- `crysfml` is a Fortran extension; it may release the GIL during
+  computation, but this is not guaranteed.
+- Singletons (`UidMapHandler`, `ConstraintsHandler`) would need locks or
+  thread-local storage.
+
+**Verdict:** threading does not provide true parallelism for the primary
+compute backend. Multiprocessing is required.
+
+### 7.2 Dask / joblib / Ray
+
+These are mature parallel computing frameworks.
+
+| Framework | Pros                            | Cons                                   |
+| --------- | ------------------------------- | -------------------------------------- |
+| Dask      | Lazy graphs, dashboard, retry   | Heavy dependency, learning curve       |
+| joblib    | Simple `Parallel(n_jobs=N)` API | Less control over chunking/propagation |
+| Ray       | Distributed, stateful actors    | Very heavy dependency                  |
+
+**Verdict:** the stdlib `concurrent.futures.ProcessPoolExecutor` is
+sufficient. Adding a large dependency is not justified when the
+parallelism pattern (chunked map with reduce between chunks) is
+straightforward. Can reconsider if cluster-scale execution becomes a
+requirement.
+
+### 7.3 Adaptive chunk sizing
+
+Start with large chunks when parameter evolution is slow (fits converge
+quickly), shrink chunks when fits fail or χ² increases. This maximises
+parallelism when it is safe.
+
+**Verdict:** valuable optimisation but adds complexity. Defer to a
+future iteration. Fixed `chunk_size = max_workers` is the safe default.
+
+### 7.4 Neighbour-seeded propagation
+
+Instead of propagating from the last file in a chunk, propagate from the
+nearest successful fit to each file (based on file order or metadata
+like temperature). This helps with non-monotonic parameter evolution.
+
+**Verdict:** interesting for non-sequential scans but out of scope for
+the initial implementation. The file-order assumption is valid for the
+primary use case (temperature / dose scans).
+
+### 7.5 Parallel single-fit for pre-loaded experiments
+
+Add `max_workers` to the existing `fit()` method for pre-loaded
+experiments. Internally, chunk the experiments and fit in parallel using
+the same worker pattern.
+
+**Verdict:** this is a natural follow-up that reuses the same
+infrastructure. However, the primary value of `fit_sequential` is
+avoiding bulk preloading. Adding `max_workers` to `fit()` is a separate,
+smaller enhancement that can come later.
+
+---
+
+## 8. Open Questions
+
+1. **Should `max_workers='auto'` use `os.cpu_count()`?** Or a fraction
+   like `cpu_count() - 1` to leave headroom? Or
+   `min(cpu_count(), len(data_paths))`?
+
+2. **CIF round-trip reliability.** Approach A (§ 6.4) depends on
+   `experiment.as_cif → ExperimentFactory.from_cif_str()` reproducing
+   the full experiment state. Is this currently reliable, or does it
+   lose information (e.g. category type selections, calculator tag)? If
+   unreliable, Approach B (explicit config copy) is the fallback but
+   requires more maintenance.
+
+3. **Does `_load_ascii_data_to_experiment()` work on an experiment that
+   already has data?** If it appends rather than replaces, a clear/reset
+   step is needed before the call.
+
+4. **Should the worker use the project's current verbosity, or always
+   run silently?** Workers running in subprocesses cannot safely write
+   to the same terminal. Silent is safest, but progress from workers is
+   lost.
+
+5. **Constraint UIDs.** Constraints reference parameters by UID
+   (`param_uid`). When the worker creates a fresh project, UIDs are
+   regenerated. If UIDs are deterministic (derived from CIF path), this
+   is fine. If they are random, constraints will fail in the worker.
+   Need to verify UID generation strategy.
+
+6. **`plot_param_series` unification.** Should the existing
+   `plot_param_series()` learn to read from CSV, or should there be a
+   separate `plot_param_series_from_csv()` method? Unification is
+   cleaner for the user but adds complexity.
+
+7. **Metadata in CSV.** Should the CSV include per-file metadata (e.g.
+   temperature, dose) extracted from the data files? This would make the
+   CSV self-contained for plotting. But it requires knowing which
+   metadata to extract — perhaps via a user-provided extraction function
+   or regex pattern.
+
+---
+
+## 9. Implementation Plan
+
+Each phase is independently testable and deployable.
+
+### Phase 1: Streaming sequential fit (max_workers=1)
+
+- Add `fit_sequential()` to `Analysis`.
+- Implement `SequentialFitTemplate` dataclass.
+- Implement `_fit_worker()` as a plain function (called directly, no
+  subprocess).
+- Implement CSV writing with `csv.DictWriter`.
+- Implement crash recovery (CSV reading + resumption).
+- Implement parameter propagation (simple: last result → next
+  iteration).
+- Unit tests for CSV writing, crash recovery, and parameter propagation.
+- Integration test: small sequential fit (5 files), verify CSV output.
+
+**Validates:** the core data flow, CSV format, and parameter
+propagation. No multiprocessing complexity yet.
+
+### Phase 2: Parallel fitting (max_workers > 1)
+
+- Refactor `_fit_worker()` to be a module-level picklable function.
+- Implement `ProcessPoolExecutor` dispatch with `spawn` context.
+- Handle worker failures (catch exceptions, mark as failed in CSV,
+  continue with next chunk).
+- Implement propagation with failed-fit fallback.
+- Integration test: parallel sequential fit (10 files, 2 workers).
+
+**Validates:** multiprocessing isolation, singleton safety, no shared
+state corruption.
+
+### Phase 3: Plotting from CSV
+
+- Add `plot_param_series_from_csv()` to `Project`.
+- Read CSV with `pandas`, resolve column names to parameter
+  unique_names.
+- Reuse the existing `Plotter.plot_scatter` backend.
+- Optionally unify with `plot_param_series()`.
+
+### Phase 4: Dataset replay
+
+- Add `apply_params_from_csv()` to `Project` (or a helper method).
+- Load a specific CSV row, override parameter values in the live
+  project.
+- Reload data from the file path in the CSV row.
+- Allow `plot_meas_vs_calc()` to work with the replayed state.
+
+### Phase 5 (optional): max_workers on existing fit()
+
+- Add `max_workers` parameter to `Analysis.fit()`.
+- When `fit_mode == 'single'` and `max_workers > 1`, serialize
+  pre-loaded experiments and dispatch to the same worker pool.
+- Propagate between chunks, store snapshots in memory (existing
+  behaviour) or CSV if `output_csv` is provided.
+
+---
+
+## 10. Dependencies and Risks
+
+### New dependencies
+
+**None.** `concurrent.futures`, `csv`, `multiprocessing`, `dataclasses`
+are all stdlib.
+
+### Risks
+
+| Risk                                              | Mitigation                                                                      |
+| ------------------------------------------------- | ------------------------------------------------------------------------------- |
+| CIF round-trip loses information                  | Verify with a round-trip test before Phase 1; fall back to Approach B if needed |
+| UID non-determinism breaks constraints in workers | Verify UID generation; make deterministic if needed                             |
+| Worker memory leak (large N, long-running pool)   | Use `max_tasks_per_child=100` on the pool to recycle workers                    |
+| Pickling failures for SequentialFitTemplate       | Keep it a plain dataclass with only str/dict/list fields                        |
+| crysfml Fortran global state in forked processes  | Enforced `spawn` context avoids fork issues                                     |
+
+### Related open issues
+
+- **Issue #1 (Project.load):** `fit_sequential` does not depend on
+  `load()`, but Phase 4 (dataset replay) benefits from it.
+- **Issue #7 (dummy Experiments wrapper):** `fit_sequential` bypasses
+  this entirely — each worker creates its own `Experiments`.
+- **Issue #4 (constraint refresh):** the worker's fresh project applies
+  constraints from scratch — no stale state. But the main process must
+  still correctly capture alias/constraint definitions in the template.
+
+---
+
+## 11. Summary
+
+| Aspect               | Decision                                                |
+| -------------------- | ------------------------------------------------------- |
+| Parallelism backend  | `concurrent.futures.ProcessPoolExecutor` with `spawn`   |
+| Worker isolation     | Each worker creates a fresh `Project` — no shared state |
+| Data flow            | Template CIF + data path → worker → result dict → CSV   |
+| Parameter seeding    | Last successful result in chunk → next chunk            |
+| Crash recovery       | Read existing CSV, skip fitted files, resume            |
+| Configuration        | `max_workers` argument on `fit_sequential()`            |
+| New dependencies     | None (stdlib only)                                      |
+| First implementation | Phase 1 (sequential, no parallelism) to validate design |

From 876fc31d8ab70bfdfa66d2ffe44adb2b5b7f3615 Mon Sep 17 00:00:00 2001
From: Andrew Sazonov <andrew.sazonov@icloud.com>
Date: Thu, 2 Apr 2026 19:45:43 +0200
Subject: [PATCH 2/5] Improve architecture design document for sequential
 fitting feature

---
 .../architecture/sequential_fitting_design.md | 694 ++++++++++++------
 1 file changed, 450 insertions(+), 244 deletions(-)

diff --git a/docs/architecture/sequential_fitting_design.md b/docs/architecture/sequential_fitting_design.md
index bf3ac81b..039af98f 100644
--- a/docs/architecture/sequential_fitting_design.md
+++ b/docs/architecture/sequential_fitting_design.md
@@ -1,7 +1,7 @@
 # Sequential Fitting — Architecture Design
 
-**Status:** Draft — for discussion before implementation  
-**Date:** 2026-04-02
+**Status:** Draft — for discussion before implementation **Date:**
+2026-04-02
 
 ---
 
@@ -59,6 +59,20 @@ for each experiment in project.experiments:
   `{structure}_{experiment}` — pure-Python computation, does not release
   the GIL.
 
+### 2.3 UID vs unique_name
+
+Parameter UIDs are **random** (16-char `secrets.choice`) and
+non-deterministic. They do not survive CIF round-trips or cross-process
+recreation. The `unique_name` property (e.g. `cosio.cell.length_a`) is
+**deterministic** — derived from the parent chain — and is already used
+as the column key in CIF serialisation (`CifHandler.uid` returns
+`unique_name`).
+
+The alias system currently stores `param_uid` (the random UID) as a
+`StringDescriptor`. For sequential fitting to work across processes,
+constraints must reference parameters by `unique_name` instead. This is
+a prerequisite change (see § 9.1).
+
 ---
 
 ## 3. Requirements
@@ -73,12 +87,13 @@ Numbered to match the original request.
 4. **Chunk-based processing** — load N datasets, fit N independently in
    parallel, propagate, repeat.
 5. **Incremental CSV output** — one row per dataset, written after each
-   chunk; includes χ², fit status, and all fitted parameter values +
-   uncertainties.
+   chunk; includes χ², fit status, diffrn metadata, and all fitted
+   parameter values + uncertainties.
 6. **Crash recovery** — on restart, read CSV, skip already-fitted files,
    resume from the last fitted row's parameters.
 7. **Separate initial fit** — the user runs a regular `fit()` on one
-   dataset first to establish good starting values.
+   dataset first to establish good starting values. The template is a
+   complete working experiment, not just a skeleton.
 8. **Store all fitted parameters in CSV** — both structure and
    experiment params, so any dataset can be replayed later. The project
    file retains only the template structure CIF and template experiment
@@ -94,18 +109,17 @@ Numbered to match the original request.
 
 A new method `Analysis.fit_sequential()` orchestrates the batch. It does
 **not** reuse the existing `fit()` loop — the data-flow is fundamentally
-different (data paths in → CSV out, experiments are ephemeral).
+different (data directory in → CSV out, experiments are ephemeral).
 
 ```
-User sets up: 1 Structure + 1 template Experiment
+User sets up: 1 Structure + 1 template Experiment (fully configured)
          │
          ▼
   project.analysis.fit()          ← initial fit on template
          │
          ▼
   project.analysis.fit_sequential(
-      data_paths=[...],
-      output_csv='results.csv',
+      data_dir='data/',
       max_workers=4,
   )
          │
@@ -127,20 +141,52 @@ User sets up: 1 Structure + 1 template Experiment
   │     ▼      ▼           ▼     │
   │  Collect results             │
   │  Append rows to CSV          │
+  │  Report progress             │
   │  Propagate params from       │
   │  last file in chunk          │
   └──────────────────────────────┘
          │
          ▼
-  CSV with all results
-  (one row per dataset)
+  CSV in project_dir/analysis/results.csv
 ```
 
 ---
 
 ## 5. User-Facing API
 
-### 5.1 The template workflow
+### 5.1 Data path handling
+
+Data files can come from various sources: a local directory, a ZIP
+archive, or (in future) an online catalogue. The library already
+provides helpers for extracting paths:
+
+- `ed.extract_data_paths_from_zip(zip_path)` → extracts to a temp dir,
+  returns sorted file paths.
+- `ed.extract_data_paths_from_dir(dir_path)` → lists files in a
+  directory, returns sorted file paths.
+
+For sequential fitting, the user points `fit_sequential` at a
+**directory** containing the data files. If the data arrives as a ZIP,
+the user first extracts it:
+
+```python
+# From a ZIP archive — extract first, then point to directory
+ed.extract_data_paths_from_zip('scans.zip', destination='data/')
+project.analysis.fit_sequential(data_dir='data/')
+
+# From a local directory — use directly
+project.analysis.fit_sequential(data_dir='/experiment/scans/')
+```
+
+The `extract_data_paths_from_zip` function gains an optional
+`destination` parameter. When provided, it extracts to that directory
+instead of a temp dir. This gives a consistent directory-based entry
+point for all data sources (local, ZIP, future catalogue downloads).
+
+Internally, `fit_sequential` calls `extract_data_paths_from_dir` to
+discover and sort files from the given directory.
+
+### 5.2 The template workflow
 
 ```python
 import easydiffraction as ed
@@ -155,12 +201,10 @@ structure.space_group.name_h_m = 'P n m a'
 structure.cell.length_a = 10.31
 # ... atom sites ...
 
-# ── Template experiment ──────────────────────────────────
-data_paths = ed.extract_data_paths_from_zip('scans.zip')
-
+# ── Template experiment (fully configured) ───────────────
 project.experiments.add_from_data_path(
     name='template',
-    data_path=data_paths[0],
+    data_path='data/scan_001.xye',
     sample_form='powder',
     beam_mode='constant wavelength',
     radiation_probe='neutron',
@@ -182,7 +226,7 @@ for point in expt.background:
 # ── Constraints (optional) ───────────────────────────────
 project.analysis.aliases.create(
     label='biso_Co1',
-    param_uid=structure.atom_sites['Co1'].b_iso.uid,
+    param_unique_name=structure.atom_sites['Co1'].b_iso.unique_name,
 )
 project.analysis.constraints.create(expression='biso_Co2 = biso_Co1')
 project.analysis.apply_constraints()
@@ -191,71 +235,143 @@ project.analysis.apply_constraints()
 project.analysis.fit()
 project.analysis.show_fit_results()
 
-# ── Sequential fit over all files ────────────────────────
+# ── Save project (defines project path) ──────────────────
+project.save_as(dir_path='cosio_project')
+
+# ── Sequential fit over all files in data/ ───────────────
 project.analysis.fit_sequential(
-    data_paths=data_paths,
-    output_csv='results.csv',
+    data_dir='data/',
     max_workers=4,
 )
 ```
 
-### 5.2 Method signature
+### 5.3 Method signature
 
 ```python
 def fit_sequential(
     self,
-    data_paths: list[str],
-    output_csv: str,
-    max_workers: int = 1,
+    data_dir: str,
+    max_workers: int | str = 1,
     chunk_size: int | None = None,
+    file_pattern: str = '*',
+    metadata_patterns: dict[str, str] | None = None,
     verbosity: str | None = None,
 ) -> None:
 ```
 
-| Parameter     | Description                                                                           |
-| ------------- | ------------------------------------------------------------------------------------- |
-| `data_paths`  | Ordered list of data file paths to process.                                           |
-| `output_csv`  | Path to the output CSV file. Created if missing, appended if exists (crash recovery). |
-| `max_workers` | Number of parallel worker processes. `1` = sequential (no subprocess overhead).       |
-| `chunk_size`  | Files per chunk. Default `None` → uses `max_workers`.                                 |
-| `verbosity`   | `'full'`, `'short'`, `'silent'`. Default: project verbosity.                          |
+| Parameter           | Description                                                                                |
+| ------------------- | ------------------------------------------------------------------------------------------ |
+| `data_dir`          | Path to directory containing data files.                                                   |
+| `max_workers`       | Number of parallel worker processes. `1` = sequential. `'auto'` = physical CPU count.      |
+| `chunk_size`        | Files per chunk. Default `None` → uses `max_workers`.                                      |
+| `file_pattern`      | Glob pattern to filter files in `data_dir`. Default `'*'` (all non-hidden files).          |
+| `metadata_patterns` | Dict mapping diffrn field names to regex patterns for metadata extraction from data files. |
+| `verbosity`         | `'full'`, `'short'`, `'silent'`. Default: project verbosity.                               |
+
+The CSV output path is **not** a parameter — it is determined by the
+project directory structure (see § 5.4).
+
+When `max_workers='auto'`, the worker count is resolved as
+`os.cpu_count()` (physical CPU count), following the `pytest-xdist`
+convention for `-n auto`.
+
+### 5.4 Project directory structure
+
+The existing project layout gains an `analysis/` directory:
+
+```
+project_dir/
+├── project.cif
+├── analysis.cif
+├── summary.cif
+├── structures/
+│   └── cosio.cif
+├── experiments/
+│   └── template.cif           ← the fully-configured template
+└── analysis/
+    └── results.csv            ← sequential fit output
+```
+
+The CSV path is always `project_dir / 'analysis' / 'results.csv'`. This
+means:
 
-### 5.3 Plotting results from CSV
+- The user must call `save_as()` before `fit_sequential()` to establish
+  the project path.
+- `fit_sequential()` validates that `project.info.path` is set.
+- No need to pass `output_csv` — the location is deterministic and
+  discoverable.
 
-After `fit_sequential()`, parameter evolution is read from the CSV file
-rather than from in-memory snapshots:
+### 5.5 Plotting results
+
+`plot_param_series()` **always** reads from the CSV file in the
+project's `analysis/` directory. This unifies the small-N and large-N
+cases — every sequential or single-mode fit produces a CSV, making
+results persistent, portable, and usable by external tools.
 
 ```python
-project.plot_param_series_from_csv(
-    csv_path='results.csv',
+# Plot parameter evolution (reads from analysis/results.csv)
+project.plot_param_series(
     param=structure.cell.length_a,
     versus=expt.diffrn.ambient_temperature,
 )
 ```
 
-Alternatively, `plot_param_series()` could detect whether the data
-source is in-memory snapshots (small N) or a CSV file (large N) based on
-whether `output_csv` was produced. This is a UX decision to discuss.
+The method resolves `param` and `versus` to their `unique_name`
+internally, then looks up the corresponding CSV columns. The user passes
+the live descriptor object (e.g. `structure.cell.length_a`) for
+discoverability and autocomplete — they never need to type or know the
+`unique_name` string.
+
+**Why `param` objects, not strings?** The user already has a reference
+to `structure.cell.length_a` — passing it directly is more natural and
+typo-safe than passing `'cosio.cell.length_a'`. The method extracts
+`.unique_name` itself. This matches the current API.
 
-### 5.4 Replaying a single dataset
+### 5.6 Replaying a single dataset
 
 To replot or inspect one dataset from the batch:
 
 ```python
-# Load template project
-project = ed.Project.load('my_project')
+# Load project (when Project.load() is implemented)
+project = ed.Project.load('cosio_project')
 
 # Replace fitted params for dataset #500 from CSV
-project.apply_params_from_csv('results.csv', row=500)
+project.apply_params_from_csv(row=500)
 
-# Plot
+# Plot (uses the template experiment with overridden params)
 project.plot_meas_vs_calc(expt_name='template')
 ```
 
-This requires loading the dataset's data file and overriding parameter
-values from the CSV row. The exact API can be refined; the key point is
-that CSV + template CIF is sufficient to reconstruct any dataset's
-state.
+The CSV row index identifies the dataset. `apply_params_from_csv`:
+
+1. Reads the CSV row.
+2. Loads the data file from the `file_path` column into the template
+   experiment.
+3. Overrides all parameter values from the CSV columns.
+
+### 5.7 Diffrn metadata in CSV
+
+The CSV includes all descriptors from the `diffrn` category
+(temperature, pressure, magnetic field, electric field) as additional
+columns. This makes the CSV self-contained for plotting parameter
+evolution against any condition axis, e.g. temperature vs. magnetic
+field, without needing to re-read the original data files.
+
+Diffrn values are extracted per file in the worker. The extraction
+pattern is passed via the `metadata_patterns` argument:
+
+```python
+project.analysis.fit_sequential(
+    data_dir='data/',
+    max_workers=4,
+    metadata_patterns={
+        'ambient_temperature': r'^TEMP\s+([0-9.]+)',
+    },
+)
+```
+
+This follows the existing `ed.extract_metadata()` pattern already used
+in `ed-17.py`.
 
 ---
 
@@ -268,8 +384,8 @@ state.
 - Exactly 1 structure in `project.structures`.
 - Exactly 1 experiment in `project.experiments` (the template).
 - At least 1 free parameter.
-- `output_csv` path is writable.
-- `data_paths` is non-empty.
+- `project.info.path` is set (project has been saved).
+- `data_dir` exists and contains at least 1 data file.
 
 ### 6.2 Template snapshot
 
@@ -279,19 +395,21 @@ Before dispatching workers, the method captures a **template snapshot**
 ```python
 @dataclass(frozen=True)
 class SequentialFitTemplate:
-    structure_cif: str          # structure.as_cif
-    experiment_cif: str         # experiment.as_cif (template experiment)
-    experiment_axes: dict       # {sample_form, beam_mode, radiation_probe, scattering_type}
-    initial_params: dict        # {unique_name: value} for ALL free params
-    free_param_names: list[str] # unique_names of free params
-    alias_defs: list[dict]      # [{label, param_uid}, ...]
-    constraint_defs: list[str]  # [expression, ...]
-    minimizer_tag: str          # e.g. 'lmfit'
-    calculator_tag: str         # e.g. 'cryspy'
+    structure_cif: str            # structure.as_cif
+    experiment_cif: str           # experiment.as_cif (full template)
+    initial_params: dict          # {unique_name: value} for ALL free params
+    free_param_unique_names: list # unique_names of free params
+    alias_defs: list[dict]        # [{label, param_unique_name}, ...]
+    constraint_defs: list[str]    # [expression, ...]
+    minimizer_tag: str            # e.g. 'lmfit'
+    calculator_tag: str           # e.g. 'cryspy'
+    metadata_patterns: dict       # {diffrn_field: regex_pattern}
+    diffrn_field_names: list      # ['ambient_temperature', ...]
 ```
 
 This is a plain, picklable data object (no live references to
-`GuardedBase` instances).
+`GuardedBase` instances). Note: uses `unique_name` instead of random
+`uid` for parameter identification.
 
 ### 6.3 Chunk-based processing
 
@@ -315,11 +433,15 @@ for chunk in chunked(remaining_paths, chunk_size):
     # Sort results to match file order (as_completed is unordered)
     results.sort(key=lambda r: data_paths.index(r['file_path']))
 
-    _append_to_csv(output_csv, results)
+    _append_to_csv(csv_path, results)
+
+    # Report progress (main process only, between chunks)
+    _report_chunk_progress(chunk_idx, total_chunks, results, verbosity)
 
-    # Propagate: use last file's params as next chunk's starting values
-    last_result = results[-1]
-    template = replace(template, initial_params=last_result['params'])
+    # Propagate: use last successful file's params as next starting values
+    last_ok = _last_successful(results)
+    if last_ok is not None:
+        template = replace(template, initial_params=last_ok['params'])
 ```
 
 The `'spawn'` context is required because:
@@ -329,6 +451,16 @@ The `'spawn'` context is required because:
 - `spawn` creates a fresh Python interpreter per worker — singletons
   (`UidMapHandler`, `ConstraintsHandler`) are naturally isolated.
 
+**Progress reporting** happens in the main process, between chunks.
+Workers always run silently. After each chunk completes, the main
+process prints/updates progress:
+
+| Verbosity | Between-chunk output                                                  |
+| --------- | --------------------------------------------------------------------- |
+| `full`    | Per-chunk table: chunk N/M, per-file χ² and status (updated in place) |
+| `short`   | One-line per chunk: `✅ Chunk 3/500: 10 files, avg χ² = 1.45`         |
+| `silent`  | No output                                                             |
+
 ### 6.4 Worker function
 
 The worker is a **module-level function** (required for pickling by
@@ -342,101 +474,106 @@ def _fit_worker(
     """
     Fit a single dataset in an isolated process.
 
-    Creates a fresh Project, loads the template configuration,
+    Creates a fresh Project, loads the template configuration via CIF,
     replaces data from data_path, applies initial parameters, fits,
     and returns a plain dict of results.
     """
     # 1. Create fresh project (isolated singletons, no shared state)
     project = Project(name='_worker')
 
-    # 2. Load structure from CIF
+    # 2. Load structure from template CIF
     project.structures.add_from_cif_str(template.structure_cif)
 
-    # 3. Create experiment from data path (loads measured data)
-    project.experiments.add_from_data_path(
-        name='expt',
-        data_path=data_path,
-        sample_form=template.experiment_axes['sample_form'],
-        beam_mode=template.experiment_axes['beam_mode'],
-        radiation_probe=template.experiment_axes['radiation_probe'],
-        scattering_type=template.experiment_axes['scattering_type'],
-        verbosity='silent',
-    )
-
-    # 4. Apply template experiment configuration
-    _apply_template_config(
-        experiment=project.experiments['expt'],
-        template=template,
-    )
+    # 3. Create experiment from template CIF (full config + template data)
+    project.experiments.add_from_cif_str(template.experiment_cif)
+    expt = project.experiments[0]
+
+    # 4. Replace data from new data path
+    expt._load_ascii_data_to_experiment(data_path)
 
     # 5. Override parameter values from propagated starting values
     _apply_param_overrides(project, template.initial_params)
 
     # 6. Set free flags
-    _set_free_params(project, template.free_param_names)
+    _set_free_params(project, template.free_param_unique_names)
 
-    # 7. Apply constraints
+    # 7. Apply constraints (using unique_names, not random UIDs)
     _apply_constraints(project, template.alias_defs, template.constraint_defs)
 
     # 8. Set calculator and minimizer
-    project.experiments['expt'].calculator_type = template.calculator_tag
+    expt.calculator_type = template.calculator_tag
     project.analysis.current_minimizer = template.minimizer_tag
 
-    # 9. Fit
+    # 9. Extract diffrn metadata from data file
+    diffrn_values = _extract_metadata(data_path, template.metadata_patterns)
+
+    # 10. Fit
     project.analysis.fit(verbosity='silent')
 
-    # 10. Collect results
-    return _collect_results(project, data_path)
+    # 11. Collect results
+    return _collect_results(project, data_path, diffrn_values)
 ```
 
-#### Step 4: Applying template configuration
+#### Step 3+4: CIF round-trip then data reload (Approach A)
 
-The template experiment's CIF contains instrument, peak, background,
-excluded regions, linked phases, and their parameters. However,
-`add_from_data_path` creates an experiment with default configuration.
-The worker must then apply the template's configuration on top.
+The template experiment's CIF contains the full configuration:
+instrument, peak profile, background points, excluded regions, linked
+phases, diffrn conditions, and the template's measured data. Loading
+from CIF reconstructs all of this. Then
+`_load_ascii_data_to_experiment(data_path)` **replaces** the data
+category contents (it reassigns `self._items` to a fresh list — verified
+in `PdCwlData._create_items_set_xcoord_and_id`).
 
-**Two approaches:**
+**Prerequisite: CIF round-trip must be reliable.** The path is:
 
-| Approach                                   | Pros                                               | Cons                                                                                                      |
-| ------------------------------------------ | -------------------------------------------------- | --------------------------------------------------------------------------------------------------------- |
-| **A. Create from CIF, reload data**        | All config comes from CIF; no manual param copying | Requires `_load_ascii_data_to_experiment()` to work after CIF construction; CIF must round-trip perfectly |
-| **B. Create from data path, apply config** | Data loading is clean; explicit param assignment   | Requires enumerating which template settings to copy; more fragile if experiment categories change        |
+```
+experiment.as_cif → ExperimentFactory.from_cif_str() → expt_obj
+```
 
-**Recommended: Approach A** — create from template CIF, then reload
-data. This is more robust because:
+This calls `_from_gemmi_block` which:
 
-- CIF serialisation already captures the full experiment state.
-- Adding new categories or parameters requires no changes to the worker.
-- `_load_ascii_data_to_experiment()` already exists on all concrete
-  experiment classes.
+1. Reads `ExperimentType` from CIF → resolves the concrete class.
+2. Creates the experiment (with default categories).
+3. Calls `category.from_cif(block)` on each category.
 
-```python
-# Approach A sketch:
-# 3. Create experiment from template CIF (full config + template data)
-project.experiments.add_from_cif_str(template.experiment_cif)
-expt = project.experiments['template']
-expt.name = 'expt'  # rename for consistency
+**Known risk:** `category_collection_to_cif` truncates output at
+`max_display=20` rows for display purposes. If the template experiment's
+background or data has >20 items, the CIF will be incomplete. **Fix:**
+serialisation used for the template snapshot must pass
+`max_display=None` to emit all rows. This is a prerequisite fix (§ 9.2).
 
-# 4. Replace data from new data path
-expt._load_ascii_data_to_experiment(data_path)
-```
+### 6.5 Parameter identification by unique_name
 
-**Prerequisite:** `_load_ascii_data_to_experiment` must correctly
-overwrite the existing data category contents (clear old data points and
-load new ones). This needs to be verified and potentially adjusted.
+All parameter identification in the sequential fitting system uses
+`unique_name` (e.g. `cosio.cell.length_a`), not the random `uid`.
 
-**Open question:** Does `experiment.as_cif` round-trip cleanly through
-`ExperimentFactory.from_cif_str()`? If not, Approach A is blocked until
-CIF round-trip is reliable (see `issues_open.md` issue #1 and #12).
-Approach B would be the fallback. See § 8 open question 2.
+- **CSV columns** are keyed by `unique_name`.
+- **Template `initial_params`** is `{unique_name: value}`.
+- **`free_param_unique_names`** lists which params to mark as free.
+- **Alias definitions** reference `param_unique_name` instead of
+  `param_uid`.
 
-### 6.5 Parameter propagation strategy
+The `_apply_param_overrides` helper walks all parameters in the project,
+matches by `unique_name`, and sets values:
+
+```python
+def _apply_param_overrides(
+    project: Project,
+    overrides: dict[str, float],
+) -> None:
+    all_params = project.structures.parameters + project.experiments.parameters
+    by_name = {p.unique_name: p for p in all_params}
+    for name, value in overrides.items():
+        if name in by_name:
+            by_name[name].value = value
+```
+
+### 6.6 Parameter propagation strategy
 
 After each chunk completes:
 
-1. Take the results from the **last file** in the chunk (by file sort
-   order).
+1. Take the results from the **last successful file** in the chunk (by
+   file sort order).
 2. Extract all free parameter values.
 3. Use these as `initial_params` for all workers in the next chunk.
 
@@ -446,50 +583,49 @@ next chunk. Within a chunk, all workers start from the same values, so
 parameter evolution within a chunk comes only from the fit — not from
 propagation.
 
-**Edge case — last file's fit failed:** if the last file in a chunk
-fails, fall back to the last _successful_ result in the chunk (scanning
-from the end). If no file in the chunk succeeded, keep the previous
-chunk's parameters and log a warning.
+**Edge case — all fits in a chunk failed:** keep the previous chunk's
+parameters and log a warning.
 
-**Within-chunk ordering matters for `max_workers=1`:** when running
-sequentially (`max_workers=1`), the chunk size is 1, so propagation
-happens after every file — identical to the current single-fit mode.
-This preserves backward compatibility.
+**When `max_workers=1`:** the chunk size is 1, so propagation happens
+after every file — identical to the current single-fit mode. This
+preserves backward compatibility.
 
-### 6.6 CSV output format
+### 6.7 CSV output format
 
-The CSV uses a flat header with two columns per free parameter (value +
-uncertainty) plus metadata columns:
+The CSV uses a flat header with metadata columns, diffrn columns, and
+two columns per free parameter (value + uncertainty):
 
-```
-file_path,chi_squared,reduced_chi_squared,fit_success,n_iterations,cosio.cell.length_a,cosio.cell.length_a.uncertainty,cosio.atom_sites.Co1.b_iso,cosio.atom_sites.Co1.b_iso.uncertainty,template.instrument.calib_twotheta_offset,template.instrument.calib_twotheta_offset.uncertainty,...
-/data/scan_001.xye,142.3,1.23,True,45,10.312,0.001,0.31,0.02,0.29,0.01,...
-/data/scan_002.xye,138.1,1.19,True,32,10.315,0.002,0.32,0.03,0.28,0.01,...
+```csv
+file_path,chi_squared,reduced_chi_squared,fit_success,n_iterations,diffrn.ambient_temperature,diffrn.ambient_pressure,cosio.cell.length_a,cosio.cell.length_a.uncertainty,template.instrument.calib_twotheta_offset,template.instrument.calib_twotheta_offset.uncertainty,...
+/data/scan_001.xye,142.3,1.23,True,45,300.0,,10.312,0.001,0.29,0.01,...
+/data/scan_002.xye,138.1,1.19,True,32,310.0,,10.315,0.002,0.28,0.01,...
 ```
 
-| Column                      | Type  | Description                     |
-| --------------------------- | ----- | ------------------------------- |
-| `file_path`                 | str   | Absolute path to the data file  |
-| `chi_squared`               | float | χ² of the fit                   |
-| `reduced_chi_squared`       | float | Reduced χ²                      |
-| `fit_success`               | bool  | Whether the minimizer converged |
-| `n_iterations`              | int   | Number of minimizer iterations  |
-| `{unique_name}`             | float | Fitted value of parameter       |
-| `{unique_name}.uncertainty` | float | Uncertainty of parameter        |
+| Column group                | Description                                |
+| --------------------------- | ------------------------------------------ |
+| `file_path`                 | Absolute path to the data file             |
+| `chi_squared`               | χ² of the fit                              |
+| `reduced_chi_squared`       | Reduced χ²                                 |
+| `fit_success`               | Whether the minimizer converged            |
+| `n_iterations`              | Number of minimizer iterations             |
+| `diffrn.*`                  | Diffrn metadata (temperature, pressure, …) |
+| `{unique_name}`             | Fitted value of free parameter             |
+| `{unique_name}.uncertainty` | Uncertainty of free parameter              |
 
 **Implementation:** use `csv.DictWriter` (stdlib). Header is written
 once on file creation. Rows are appended after each chunk and flushed
 immediately (no buffered writes that could be lost on crash).
 
-### 6.7 Crash recovery
+### 6.8 Crash recovery
 
-On entry, `fit_sequential()` checks for an existing CSV at `output_csv`:
+On entry, `fit_sequential()` checks for an existing CSV at the project's
+`analysis/results.csv`:
 
 1. If the file exists and is non-empty:
    - Read all rows.
    - Build a set of already-fitted `file_path` values.
-   - Extract parameter values from the last row as starting values
-     (overrides `initial_params` from the template).
+   - Extract parameter values from the last successful row as starting
+     values (overrides template's current values).
    - Log a message: `"Resuming from row N (M files already fitted)."`.
 2. If the file does not exist or is empty:
    - Create the file with the header row.
@@ -497,17 +633,6 @@ On entry, `fit_sequential()` checks for an existing CSV at `output_csv`:
 
 The file path comparison uses absolute paths to avoid mismatches.
 
-### 6.8 Verbosity and progress reporting
-
-| Verbosity | Behaviour                                                           |
-| --------- | ------------------------------------------------------------------- |
-| `full`    | Per-chunk progress: chunk N/M, per-file χ² table (updated in place) |
-| `short`   | One-line per chunk: `✅ Chunk 3/500: 10 files, avg χ² = 1.45`       |
-| `silent`  | No output                                                           |
-
-Workers always run silently. Only the main process produces console
-output.
-
 ---
 
 ## 7. Alternatives Considered
@@ -573,52 +698,124 @@ infrastructure. However, the primary value of `fit_sequential` is
 avoiding bulk preloading. Adding `max_workers` to `fit()` is a separate,
 smaller enhancement that can come later.
 
+### 7.6 Unified FitModel abstraction
+
+Every experiment already contains a full list of structural parameters
+via `linked_phases` (powder) or `linked_crystal` (single crystal). An
+experiment + its linked structures together form a complete model for
+computing a diffraction pattern. A `FitModel` class could merge
+structure and experiment parameters into a single flat parameter set.
+
+**Assessment:** the current code already merges them at fit time
+(`structures.free_parameters + experiments.free_parameters`). A
+`FitModel` class would formalise this and could simplify the
+`_residual_function` signature (one model instead of structures +
+experiments + analysis). However, introducing a new abstraction now
+would be premature — the current merging works and the sequential
+fitting design does not depend on it. If the fitting internals are
+refactored later (issue #7), `FitModel` would be a natural outcome.
+
 ---
 
-## 8. Open Questions
-
-1. **Should `max_workers='auto'` use `os.cpu_count()`?** Or a fraction
-   like `cpu_count() - 1` to leave headroom? Or
-   `min(cpu_count(), len(data_paths))`?
-
-2. **CIF round-trip reliability.** Approach A (§ 6.4) depends on
-   `experiment.as_cif → ExperimentFactory.from_cif_str()` reproducing
-   the full experiment state. Is this currently reliable, or does it
-   lose information (e.g. category type selections, calculator tag)? If
-   unreliable, Approach B (explicit config copy) is the fallback but
-   requires more maintenance.
-
-3. **Does `_load_ascii_data_to_experiment()` work on an experiment that
-   already has data?** If it appends rather than replaces, a clear/reset
-   step is needed before the call.
-
-4. **Should the worker use the project's current verbosity, or always
-   run silently?** Workers running in subprocesses cannot safely write
-   to the same terminal. Silent is safest, but progress from workers is
-   lost.
-
-5. **Constraint UIDs.** Constraints reference parameters by UID
-   (`param_uid`). When the worker creates a fresh project, UIDs are
-   regenerated. If UIDs are deterministic (derived from CIF path), this
-   is fine. If they are random, constraints will fail in the worker.
-   Need to verify UID generation strategy.
-
-6. **`plot_param_series` unification.** Should the existing
-   `plot_param_series()` learn to read from CSV, or should there be a
-   separate `plot_param_series_from_csv()` method? Unification is
-   cleaner for the user but adds complexity.
-
-7. **Metadata in CSV.** Should the CSV include per-file metadata (e.g.
-   temperature, dose) extracted from the data files? This would make the
-   CSV self-contained for plotting. But it requires knowing which
-   metadata to extract — perhaps via a user-provided extraction function
-   or regex pattern.
+## 8. Resolved Questions
+
+These were open questions in the previous draft; decisions are recorded
+here.
+
+1. **`max_workers='auto'`** → uses `os.cpu_count()` (physical CPU
+   count), following the `pytest-xdist` convention.
+
+2. **CIF round-trip reliability** → must be verified and fixed as a
+   prerequisite. Known risk: `category_collection_to_cif` truncates at
+   20 rows — the template snapshot must bypass this. See § 9.2.
+
+3. **`_load_ascii_data_to_experiment()` on existing data** → **verified:
+   it replaces, not appends.** `_create_items_set_xcoord_and_id`
+   reassigns `self._items` to a fresh list.
+
+4. **Worker verbosity** → workers run silently. Progress is reported by
+   the main process between chunks. See § 6.3.
+
+5. **Constraint parameter identification** → switch from random `uid` to
+   deterministic `unique_name`. See § 6.5 and § 9.1.
+
+6. **`plot_param_series` unification** → always read from CSV. One
+   method, no `_from_csv` variant. See § 5.5.
+
+7. **Metadata in CSV** → include all diffrn fields (temperature,
+   pressure, magnetic field, electric field). See § 5.7.
+
+8. **CSV output path** → deterministic:
+   `project_dir/analysis/results.csv`. No argument needed. See § 5.4.
+
+9. **`data_paths` argument** → replaced with `data_dir` (path to
+   directory). Files are discovered via `extract_data_paths_from_dir`.
+   For ZIP sources, extract first. See § 5.1.
 
 ---
 
-## 9. Implementation Plan
+## 9. Prerequisite Changes
+
+These changes are needed before implementing `fit_sequential()` itself.
+Each is a separate, atomic change.
 
-Each phase is independently testable and deployable.
+### 9.1 Switch alias `param_uid` to `param_unique_name`
+
+The `Alias` category currently stores `param_uid` (random UID). Change
+to `param_unique_name` (deterministic `unique_name`). Update:
+
+- `Alias._param_uid` → `Alias._param_unique_name`
+- `CifHandler(names=['_alias.param_uid'])` →
+  `CifHandler(names=['_alias.param_unique_name'])`
+- `ConstraintsHandler` to resolve via `unique_name` lookup instead of
+  UID lookup.
+- `UidMapHandler` — may no longer be needed for constraint resolution
+  (but still used for other purposes).
+- Tutorial `ed-17.py` and any tests that create aliases.
+
+### 9.2 Fix `category_collection_to_cif` truncation
+
+`category_collection_to_cif` has `max_display=20` which truncates loop
+output. For CIF used in save/load/round-trip, all rows must be emitted.
+
+Options:
+
+- (a) Remove `max_display` from `category_collection_to_cif` entirely,
+  add truncation only in display methods.
+- (b) Add a `full=True` parameter and use it when serialising for
+  persistence.
+
+### 9.3 Verify CIF round-trip for experiments
+
+Write an integration test:
+
+1. Create a fully configured experiment (instrument, peak, background,
+   excluded regions, linked phases, data).
+2. Serialise to CIF (`experiment.as_cif`).
+3. Reconstruct from CIF (`ExperimentFactory.from_cif_str(cif_str)`).
+4. Compare all parameter values.
+
+Fix any parameters that don't survive the round-trip.
+
+### 9.4 Add `destination` parameter to `extract_data_paths_from_zip`
+
+Currently extracts to a temp dir. Add optional `destination` parameter
+to extract to a user-specified directory, enabling a clean two-step
+workflow (extract → fit_sequential).
+
+---
+
+## 10. Implementation Plan
+
+Each phase is independently testable and deployable. Prerequisite
+changes (§ 9) are done first.
+
+### Phase 0: Prerequisites
+
+- 9.1: Switch alias `param_uid` → `param_unique_name`
+- 9.2: Fix CIF collection truncation
+- 9.3: Verify CIF round-trip for experiments
+- 9.4: Add `destination` to `extract_data_paths_from_zip`
 
 ### Phase 1: Streaming sequential fit (max_workers=1)
 
@@ -626,15 +823,20 @@ Each phase is independently testable and deployable.
 - Implement `SequentialFitTemplate` dataclass.
 - Implement `_fit_worker()` as a plain function (called directly, no
   subprocess).
-- Implement CSV writing with `csv.DictWriter`.
+- Create `analysis/` directory in project path.
+- Implement CSV writing with `csv.DictWriter` (including diffrn metadata
+  columns).
 - Implement crash recovery (CSV reading + resumption).
-- Implement parameter propagation (simple: last result → next
+- Implement parameter propagation (last successful result → next
   iteration).
-- Unit tests for CSV writing, crash recovery, and parameter propagation.
-- Integration test: small sequential fit (5 files), verify CSV output.
+- Update `plot_param_series()` to read from CSV.
+- Unit tests for CSV writing, crash recovery, parameter propagation.
+- Integration test: small sequential fit (5 files), verify CSV output
+  and plotting.
 
-**Validates:** the core data flow, CSV format, and parameter
-propagation. No multiprocessing complexity yet.
+**Validates:** the core data flow, CSV format, parameter propagation,
+and CIF round-trip in a real fitting scenario. No multiprocessing
+complexity yet.
 
 ### Phase 2: Parallel fitting (max_workers > 1)
 
@@ -643,38 +845,36 @@ propagation. No multiprocessing complexity yet.
 - Handle worker failures (catch exceptions, mark as failed in CSV,
   continue with next chunk).
 - Implement propagation with failed-fit fallback.
+- Add `max_workers='auto'` support.
 - Integration test: parallel sequential fit (10 files, 2 workers).
 
 **Validates:** multiprocessing isolation, singleton safety, no shared
 state corruption.
 
-### Phase 3: Plotting from CSV
-
-- Add `plot_param_series_from_csv()` to `Project`.
-- Read CSV with `pandas`, resolve column names to parameter
-  unique_names.
-- Reuse the existing `Plotter.plot_scatter` backend.
-- Optionally unify with `plot_param_series()`.
-
-### Phase 4: Dataset replay
+### Phase 3: Dataset replay
 
-- Add `apply_params_from_csv()` to `Project` (or a helper method).
+- Add `apply_params_from_csv()` to `Project`.
 - Load a specific CSV row, override parameter values in the live
   project.
 - Reload data from the file path in the CSV row.
 - Allow `plot_meas_vs_calc()` to work with the replayed state.
 
+### Phase 4: CSV output for existing single-fit mode
+
+- Update the existing single-fit loop in `Analysis.fit()` to write
+  results to `analysis/results.csv` as well (same CSV format).
+- This gives backward compatibility: `ed-17.py` style workflows also get
+  persistent CSV output and can use the unified `plot_param_series()`.
+
 ### Phase 5 (optional): max_workers on existing fit()
 
 - Add `max_workers` parameter to `Analysis.fit()`.
-- When `fit_mode == 'single'` and `max_workers > 1`, serialize
+- When `fit_mode == 'single'` and `max_workers > 1`, serialise
   pre-loaded experiments and dispatch to the same worker pool.
-- Propagate between chunks, store snapshots in memory (existing
-  behaviour) or CSV if `output_csv` is provided.
 
 ---
 
-## 10. Dependencies and Risks
+## 11. Dependencies and Risks
 
 ### New dependencies
 
@@ -683,35 +883,41 @@ are all stdlib.
 
 ### Risks
 
-| Risk                                              | Mitigation                                                                      |
-| ------------------------------------------------- | ------------------------------------------------------------------------------- |
-| CIF round-trip loses information                  | Verify with a round-trip test before Phase 1; fall back to Approach B if needed |
-| UID non-determinism breaks constraints in workers | Verify UID generation; make deterministic if needed                             |
-| Worker memory leak (large N, long-running pool)   | Use `max_tasks_per_child=100` on the pool to recycle workers                    |
-| Pickling failures for SequentialFitTemplate       | Keep it a plain dataclass with only str/dict/list fields                        |
-| crysfml Fortran global state in forked processes  | Enforced `spawn` context avoids fork issues                                     |
+| Risk                                             | Mitigation                                               |
+| ------------------------------------------------ | -------------------------------------------------------- |
+| CIF round-trip loses information                 | Prerequisite § 9.3 verifies and fixes before Phase 1     |
+| CIF collection truncation at 20 rows             | Prerequisite § 9.2 fixes before Phase 1                  |
+| Worker memory leak (large N, long-running pool)  | Use `max_tasks_per_child=100` on the pool                |
+| Pickling failures for SequentialFitTemplate      | Keep it a plain dataclass with only str/dict/list fields |
+| crysfml Fortran global state in forked processes | Enforced `spawn` context avoids fork issues              |
 
 ### Related open issues
 
 - **Issue #1 (Project.load):** `fit_sequential` does not depend on
-  `load()`, but Phase 4 (dataset replay) benefits from it.
+  `load()`, but Phase 3 (dataset replay) benefits from it.
 - **Issue #7 (dummy Experiments wrapper):** `fit_sequential` bypasses
   this entirely — each worker creates its own `Experiments`.
 - **Issue #4 (constraint refresh):** the worker's fresh project applies
-  constraints from scratch — no stale state. But the main process must
-  still correctly capture alias/constraint definitions in the template.
+  constraints from scratch — no stale state. The prerequisite § 9.1
+  (unique_name-based aliases) also improves constraint robustness in the
+  main process.
 
 ---
 
-## 11. Summary
-
-| Aspect               | Decision                                                |
-| -------------------- | ------------------------------------------------------- |
-| Parallelism backend  | `concurrent.futures.ProcessPoolExecutor` with `spawn`   |
-| Worker isolation     | Each worker creates a fresh `Project` — no shared state |
-| Data flow            | Template CIF + data path → worker → result dict → CSV   |
-| Parameter seeding    | Last successful result in chunk → next chunk            |
-| Crash recovery       | Read existing CSV, skip fitted files, resume            |
-| Configuration        | `max_workers` argument on `fit_sequential()`            |
-| New dependencies     | None (stdlib only)                                      |
-| First implementation | Phase 1 (sequential, no parallelism) to validate design |
+## 12. Summary
+
+| Aspect              | Decision                                                       |
+| ------------------- | -------------------------------------------------------------- |
+| Parallelism backend | `concurrent.futures.ProcessPoolExecutor` with `spawn`          |
+| Worker isolation    | Each worker creates a fresh `Project` — no shared state        |
+| Data source         | `data_dir` argument; ZIP → extract first                       |
+| Data flow           | Template CIF + data path → worker → result dict → CSV          |
+| Parameter IDs       | `unique_name` (deterministic), not `uid` (random)              |
+| Parameter seeding   | Last successful result in chunk → next chunk                   |
+| CSV location        | `project_dir/analysis/results.csv` (deterministic)             |
+| CSV contents        | Fit metrics + diffrn metadata + all free param values/uncert   |
+| Crash recovery      | Read existing CSV, skip fitted files, resume                   |
+| Plotting            | Unified `plot_param_series()` always reads from CSV            |
+| Configuration       | `max_workers` + `data_dir` on `fit_sequential()`               |
+| New dependencies    | None (stdlib only)                                             |
+| First step          | Phase 0 (prerequisites) then Phase 1 (sequential, no parallel) |

From 277d34e46725182113afc8e19c375b44e39bc376 Mon Sep 17 00:00:00 2001
From: Andrew Sazonov <andrew.sazonov@icloud.com>
Date: Thu, 2 Apr 2026 20:26:10 +0200
Subject: [PATCH 3/5] Update sequential fitting design

---
 .../architecture/sequential_fitting_design.md | 248 ++++++++++++++++--
 1 file changed, 220 insertions(+), 28 deletions(-)

diff --git a/docs/architecture/sequential_fitting_design.md b/docs/architecture/sequential_fitting_design.md
index 039af98f..faaf3b87 100644
--- a/docs/architecture/sequential_fitting_design.md
+++ b/docs/architecture/sequential_fitting_design.md
@@ -254,19 +254,19 @@ def fit_sequential(
     max_workers: int | str = 1,
     chunk_size: int | None = None,
     file_pattern: str = '*',
-    metadata_patterns: dict[str, str] | None = None,
+    extract_diffrn: Callable[[str], dict[str, float | str]] | None = None,
     verbosity: str | None = None,
 ) -> None:
 ```
 
-| Parameter           | Description                                                                                |
-| ------------------- | ------------------------------------------------------------------------------------------ |
-| `data_dir`          | Path to directory containing data files.                                                   |
-| `max_workers`       | Number of parallel worker processes. `1` = sequential. `'auto'` = physical CPU count.      |
-| `chunk_size`        | Files per chunk. Default `None` → uses `max_workers`.                                      |
-| `file_pattern`      | Glob pattern to filter files in `data_dir`. Default `'*'` (all non-hidden files).          |
-| `metadata_patterns` | Dict mapping diffrn field names to regex patterns for metadata extraction from data files. |
-| `verbosity`         | `'full'`, `'short'`, `'silent'`. Default: project verbosity.                               |
+| Parameter        | Description                                                                                        |
+| ---------------- | -------------------------------------------------------------------------------------------------- |
+| `data_dir`       | Path to directory containing data files.                                                           |
+| `max_workers`    | Number of parallel worker processes. `1` = sequential. `'auto'` = physical CPU count.              |
+| `chunk_size`     | Files per chunk. Default `None` → uses `max_workers`.                                              |
+| `file_pattern`   | Glob pattern to filter files in `data_dir`. Default `'*'` (all non-hidden files).                  |
+| `extract_diffrn` | User callback: `f(file_path) → {diffrn_field: value}`. Called per file. `None` = no diffrn metadata.|
+| `verbosity`      | `'full'`, `'short'`, `'silent'`. Default: project verbosity.                                       |
 
 The CSV output path is **not** a parameter — it is determined by the
 project directory structure (see § 5.4).
@@ -277,21 +277,45 @@ convention for `-n auto`.
 
 ### 5.4 Project directory structure
 
-The existing project layout gains an `analysis/` directory:
+The current project layout has `analysis.cif` as a top-level file next
+to directories like `structures/` and `experiments/`. Adding an
+`analysis/` directory alongside a file named `analysis.cif` at the same
+level is confusing and violates the principle of least surprise.
+
+**Decision:** move `analysis.cif` into the `analysis/` directory. All
+analysis-related artifacts live under one directory — settings *and*
+results:
 
 ```
 project_dir/
 ├── project.cif
-├── analysis.cif
 ├── summary.cif
 ├── structures/
 │   └── cosio.cif
 ├── experiments/
 │   └── template.cif           ← the fully-configured template
 └── analysis/
+    ├── analysis.cif           ← analysis settings (was at top level)
     └── results.csv            ← sequential fit output
 ```
 
+This is a clean, self-consistent layout:
+
+- Every major concept (`structures`, `experiments`, `analysis`) gets its
+  own directory.
+- `project.cif` and `summary.cif` remain at the top level because they
+  describe the project as a whole, not a single domain.
+- No name collision between a file and a directory.
+- Future analysis artifacts (e.g. per-experiment fit logs, constraint
+  snapshots) naturally live under `analysis/`.
+
+**Migration:** `Project.save()` writes `analysis.cif` to
+`project_dir/analysis/analysis.cif` instead of `project_dir/analysis.cif`.
+`Project.load()` (when implemented) reads from the new location.
+Existing saved projects with `analysis.cif` at the top level can be
+handled by a fallback in `load()` — check the new path first, then fall
+back to the old path.
+
 The CSV path is always `project_dir / 'analysis' / 'results.csv'`. This
 means:
 
@@ -357,21 +381,62 @@ columns. This makes the CSV self-contained for plotting parameter
 evolution against any condition axis, e.g. temperature vs. magnetic
 field, without needing to re-read the original data files.
 
-Diffrn values are extracted per file in the worker. The extraction
-pattern is passed via the `metadata_patterns` argument:
+#### Why metadata extraction stays explicit
+
+In the current `ed-17.py` tutorial, the user writes:
+
+```python
+expt.diffrn.ambient_temperature = ed.extract_metadata(
+    file_path=data_path,
+    pattern=r'^TEMP\s+([0-9.]+)',
+)
+```
+
+This is explicit and understandable — the user sees exactly what is
+extracted and where it goes. Hiding extraction inside `fit_sequential()`
+via a `metadata_patterns` dict would be less transparent: the user would
+not see the assignment, could not inspect the value before fitting, and
+the mapping between regex capture groups and diffrn fields would be
+implicit.
+
+**Decision:** metadata extraction is a user-provided **callback**
+instead of a hidden dict-based extraction. The user defines a plain
+function that receives a file path and returns a dict of diffrn values.
+This keeps the extraction visible, testable, and flexible (e.g. the user
+can read from file headers, file names, companion JSON, or a database):
 
 ```python
+def extract_diffrn(file_path: str) -> dict[str, float | str]:
+    return {
+        'ambient_temperature': ed.extract_metadata(
+            file_path=file_path,
+            pattern=r'^TEMP\s+([0-9.]+)',
+        ),
+    }
+
 project.analysis.fit_sequential(
     data_dir='data/',
     max_workers=4,
-    metadata_patterns={
-        'ambient_temperature': r'^TEMP\s+([0-9.]+)',
-    },
+    extract_diffrn=extract_diffrn,
 )
 ```
 
-This follows the existing `ed.extract_metadata()` pattern already used
-in `ed-17.py`.
+If `extract_diffrn` is `None` (the default), the diffrn columns in the
+CSV are left empty. The callback is called once per file in the **main
+process** after worker results are collected (not inside the worker —
+see § 6.2 for why). The returned dict keys must match `diffrn` descriptor
+names (e.g. `'ambient_temperature'`, `'ambient_pressure'`). Unrecognised
+keys are ignored with a warning.
+
+This approach:
+
+- Keeps extraction logic **visible** in the user's notebook.
+- Allows **any** extraction strategy (regex, filename parsing, external
+  lookup), not just regex on file content.
+- Is easily testable — the user can call `extract_diffrn(path)` outside
+  of fitting to verify.
+- Follows the existing pattern where users explicitly assign diffrn
+  values rather than having the library guess.
 
 ---
 
@@ -403,13 +468,27 @@ class SequentialFitTemplate:
     constraint_defs: list[str]    # [expression, ...]
     minimizer_tag: str            # e.g. 'lmfit'
     calculator_tag: str           # e.g. 'cryspy'
-    metadata_patterns: dict       # {diffrn_field: regex_pattern}
     diffrn_field_names: list      # ['ambient_temperature', ...]
 ```
 
 This is a plain, picklable data object (no live references to
-`GuardedBase` instances). Note: uses `unique_name` instead of random
-`uid` for parameter identification.
+`GuardedBase` instances, no callables). Note: uses `unique_name` instead
+of random `uid` for parameter identification.
+
+**`extract_diffrn` callback handling:** the user-provided callback is
+*not* stored in the template because arbitrary callables (lambdas,
+closures) cannot be pickled reliably for multiprocessing. Instead:
+
+- In **single-worker mode** (`max_workers=1`), the callback is called
+  directly in the main process after `_fit_worker()` returns.
+- In **multi-worker mode**, the callback is called in the main process
+  after collecting results from the worker pool. Since diffrn metadata
+  extraction is I/O-bound (reading file headers) and fast, running it in
+  the main process is not a bottleneck.
+
+This means `_fit_worker()` does not extract diffrn metadata — it only
+fits. The main process calls `extract_diffrn(data_path)` for each file
+and merges the returned dict into the result row before writing to CSV.
 
 ### 6.3 Chunk-based processing
 
@@ -433,6 +512,12 @@ for chunk in chunked(remaining_paths, chunk_size):
     # Sort results to match file order (as_completed is unordered)
     results.sort(key=lambda r: data_paths.index(r['file_path']))
 
+    # Extract diffrn metadata in the main process (avoids pickling callables)
+    if extract_diffrn is not None:
+        for result in results:
+            diffrn_values = extract_diffrn(result['file_path'])
+            result.update(diffrn_values)
+
     _append_to_csv(csv_path, results)
 
     # Report progress (main process only, between chunks)
@@ -504,16 +589,18 @@ def _fit_worker(
     expt.calculator_type = template.calculator_tag
     project.analysis.current_minimizer = template.minimizer_tag
 
-    # 9. Extract diffrn metadata from data file
-    diffrn_values = _extract_metadata(data_path, template.metadata_patterns)
-
-    # 10. Fit
+    # 9. Fit
     project.analysis.fit(verbosity='silent')
 
-    # 11. Collect results
-    return _collect_results(project, data_path, diffrn_values)
+    # 10. Collect results (fit metrics + parameter values, no diffrn metadata)
+    return _collect_results(project, data_path)
 ```
 
+**Note:** diffrn metadata extraction (`extract_diffrn` callback) is not
+called in the worker. The main process calls the callback after
+collecting worker results and merges the metadata into each result row
+before writing to CSV. This avoids pickling issues with callables.
+
 #### Step 3+4: CIF round-trip then data reload (Approach A)
 
 The template experiment's CIF contains the full configuration:
@@ -743,7 +830,9 @@ here.
    method, no `_from_csv` variant. See § 5.5.
 
 7. **Metadata in CSV** → include all diffrn fields (temperature,
-   pressure, magnetic field, electric field). See § 5.7.
+   pressure, magnetic field, electric field). Extraction is done via a
+   user-provided callback (`extract_diffrn`), not hidden inside
+   `fit_sequential`. See § 5.7.
 
 8. **CSV output path** → deterministic:
    `project_dir/analysis/results.csv`. No argument needed. See § 5.4.
@@ -752,6 +841,15 @@ here.
    directory). Files are discovered via `extract_data_paths_from_dir`.
    For ZIP sources, extract first. See § 5.1.
 
+10. **Project directory structure** → move `analysis.cif` into
+    `analysis/` directory. All analysis artifacts (settings + results)
+    live under one directory. See § 5.4 and § 9.6.
+
+11. **Singletons (`UidMapHandler`, `ConstraintsHandler`)** → replace
+    with instance-owned state on `Project` and `Analysis`. Fixes
+    notebook rerun issues, simplifies worker isolation, resolves
+    issue #4. See § 9.5.
+
 ---
 
 ## 9. Prerequisite Changes
@@ -803,6 +901,94 @@ Currently extracts to a temp dir. Add optional `destination` parameter
 to extract to a user-specified directory, enabling a clean two-step
 workflow (extract → fit_sequential).
 
+### 9.5 Replace singletons with instance-owned state
+
+#### Problem
+
+`UidMapHandler` and `ConstraintsHandler` are process-global singletons
+(`SingletonBase`). This causes three concrete problems:
+
+1. **Notebook reruns.** When a user re-executes cells in a Jupyter
+   notebook, the old `Project` is garbage-collected but the singleton
+   retains all aliases, constraints, and UID entries from the previous
+   run. The new `Project` inherits stale state, leading to ghost
+   constraints and spurious errors. Users must restart the kernel to get
+   a clean state — a common source of confusion.
+
+2. **Sequential fitting workers.** The `spawn` context used for
+   `ProcessPoolExecutor` creates fresh interpreters, so singletons are
+   naturally isolated per worker. This is why multiprocessing works
+   despite the singletons. However, the main process's singleton still
+   accumulates state across chunks and calls — not harmful in the current
+   design (workers don't touch the main singleton), but fragile if the
+   architecture evolves.
+
+3. **Multiple projects.** If a user creates two `Project` instances in
+   the same session (e.g. to compare fits), their constraints and UID
+   maps collide in the shared singleton.
+
+#### Proposed fix
+
+Move the state owned by singletons into `Analysis` (for constraints) and
+`Project` (for the UID map):
+
+| Current singleton       | New owner               | Lifetime                  |
+| ----------------------- | ----------------------- | ------------------------- |
+| `ConstraintsHandler`    | `Analysis._constraints_engine` | Per-`Analysis` instance |
+| `UidMapHandler`         | `Project._uid_map`      | Per-`Project` instance    |
+
+The objects are the same classes, just no longer singletons — they are
+instantiated in `__init__` and passed explicitly to the components that
+need them (e.g. `Parameter.__init__` receives a `uid_map` reference from
+its owning project, `ConstraintsHandler` is accessed via
+`self.project.analysis._constraints_engine`).
+
+#### Impact on sequential fitting
+
+- **Simplifies workers:** each worker's `Project()` naturally creates
+  its own `_uid_map` and `_constraints_engine`. No singleton isolation
+  concern at all.
+- **Simplifies crash recovery and notebook reruns:** creating a new
+  `Project` starts with a blank slate, no stale state leaks.
+- **No impact on the `fit_sequential` API** — the change is purely
+  internal.
+
+#### Scope and sequencing
+
+This is a self-contained refactor that can be done independently of
+sequential fitting. It improves correctness for existing workflows
+(notebook reruns, issue #4) and simplifies the sequential fitting
+implementation. It is listed as a prerequisite because it eliminates a
+class of bugs that would otherwise need workaround code in the worker.
+
+However, if the refactor proves too large for the initial sequential
+fitting work, the `spawn`-based multiprocessing provides natural
+isolation and the singletons can be addressed in a follow-up. The
+sequential fitting design does **not** depend on this change — it works
+either way.
+
+#### Relationship to issue #4
+
+Open issue #4 ("Refresh constraint state before auto-apply") is a
+symptom of the singleton problem. If constraints are instance-owned,
+there is no stale state to refresh — the constraint engine always
+reflects the current `Analysis` instance's aliases and constraints.
+Fixing the singleton issue resolves issue #4 as a side effect.
+
+### 9.6 Move `analysis.cif` into `analysis/` directory
+
+Currently `analysis.cif` lives at the project root alongside
+`project.cif` and `summary.cif`. Adding an `analysis/` directory for
+`results.csv` next to a file named `analysis.cif` at the same level
+creates a naming conflict and a confusing layout.
+
+**Fix:** update `Project.save()` to write `analysis.cif` to
+`project_dir/analysis/analysis.cif`. Update `Project.load()` (when
+implemented) to read from the new path, with a fallback to the old
+path for backward compatibility with existing saved projects. Update
+docs (`architecture.md`, `project.md`), tests, and the save output
+messages.
+
 ---
 
 ## 10. Implementation Plan
@@ -816,6 +1002,9 @@ changes (§ 9) are done first.
 - 9.2: Fix CIF collection truncation
 - 9.3: Verify CIF round-trip for experiments
 - 9.4: Add `destination` to `extract_data_paths_from_zip`
+- 9.5: Replace singletons with instance-owned state (recommended but not
+  blocking — `spawn` provides natural isolation)
+- 9.6: Move `analysis.cif` into `analysis/` directory
 
 ### Phase 1: Streaming sequential fit (max_workers=1)
 
@@ -916,8 +1105,11 @@ are all stdlib.
 | Parameter seeding   | Last successful result in chunk → next chunk                   |
 | CSV location        | `project_dir/analysis/results.csv` (deterministic)             |
 | CSV contents        | Fit metrics + diffrn metadata + all free param values/uncert   |
+| Metadata extraction | User-provided `extract_diffrn` callback, not hidden in lib     |
 | Crash recovery      | Read existing CSV, skip fitted files, resume                   |
 | Plotting            | Unified `plot_param_series()` always reads from CSV            |
 | Configuration       | `max_workers` + `data_dir` on `fit_sequential()`               |
+| Project layout      | `analysis.cif` moves into `analysis/` directory                |
+| Singletons          | Replace with instance-owned state (recommended prerequisite)   |
 | New dependencies    | None (stdlib only)                                             |
 | First step          | Phase 0 (prerequisites) then Phase 1 (sequential, no parallel) |

From 34085ce0d607dbd3ab1247a455c2c4c3034487b4 Mon Sep 17 00:00:00 2001
From: Andrew Sazonov <andrew.sazonov@icloud.com>
Date: Thu, 2 Apr 2026 20:38:51 +0200
Subject: [PATCH 4/5] Final update sequential fitting design

---
 .../architecture/sequential_fitting_design.md | 312 +++++++++++++-----
 1 file changed, 226 insertions(+), 86 deletions(-)

diff --git a/docs/architecture/sequential_fitting_design.md b/docs/architecture/sequential_fitting_design.md
index faaf3b87..a6ae83cb 100644
--- a/docs/architecture/sequential_fitting_design.md
+++ b/docs/architecture/sequential_fitting_design.md
@@ -993,73 +993,214 @@ messages.
 
 ## 10. Implementation Plan
 
-Each phase is independently testable and deployable. Prerequisite
-changes (§ 9) are done first.
-
-### Phase 0: Prerequisites
-
-- 9.1: Switch alias `param_uid` → `param_unique_name`
-- 9.2: Fix CIF collection truncation
-- 9.3: Verify CIF round-trip for experiments
-- 9.4: Add `destination` to `extract_data_paths_from_zip`
-- 9.5: Replace singletons with instance-owned state (recommended but not
-  blocking — `spawn` provides natural isolation)
-- 9.6: Move `analysis.cif` into `analysis/` directory
-
-### Phase 1: Streaming sequential fit (max_workers=1)
-
-- Add `fit_sequential()` to `Analysis`.
-- Implement `SequentialFitTemplate` dataclass.
-- Implement `_fit_worker()` as a plain function (called directly, no
-  subprocess).
-- Create `analysis/` directory in project path.
-- Implement CSV writing with `csv.DictWriter` (including diffrn metadata
-  columns).
-- Implement crash recovery (CSV reading + resumption).
-- Implement parameter propagation (last successful result → next
-  iteration).
-- Update `plot_param_series()` to read from CSV.
-- Unit tests for CSV writing, crash recovery, parameter propagation.
-- Integration test: small sequential fit (5 files), verify CSV output
-  and plotting.
-
-**Validates:** the core data flow, CSV format, parameter propagation,
-and CIF round-trip in a real fitting scenario. No multiprocessing
-complexity yet.
-
-### Phase 2: Parallel fitting (max_workers > 1)
-
-- Refactor `_fit_worker()` to be a module-level picklable function.
-- Implement `ProcessPoolExecutor` dispatch with `spawn` context.
-- Handle worker failures (catch exceptions, mark as failed in CSV,
-  continue with next chunk).
-- Implement propagation with failed-fit fallback.
-- Add `max_workers='auto'` support.
-- Integration test: parallel sequential fit (10 files, 2 workers).
-
-**Validates:** multiprocessing isolation, singleton safety, no shared
-state corruption.
-
-### Phase 3: Dataset replay
-
-- Add `apply_params_from_csv()` to `Project`.
-- Load a specific CSV row, override parameter values in the live
-  project.
-- Reload data from the file path in the CSV row.
-- Allow `plot_meas_vs_calc()` to work with the replayed state.
-
-### Phase 4: CSV output for existing single-fit mode
-
-- Update the existing single-fit loop in `Analysis.fit()` to write
-  results to `analysis/results.csv` as well (same CSV format).
-- This gives backward compatibility: `ed-17.py` style workflows also get
-  persistent CSV output and can use the unified `plot_param_series()`.
-
-### Phase 5 (optional): max_workers on existing fit()
-
-- Add `max_workers` parameter to `Analysis.fit()`.
-- When `fit_mode == 'single'` and `max_workers > 1`, serialise
-  pre-loaded experiments and dispatch to the same worker pool.
+The plan is structured as a sequence of pull requests. Each PR is
+independently mergeable and testable. Foundation issues (#7, #4, #1)
+are resolved first because they clean up the fitting internals that
+`fit_sequential` builds on top of.
+
+### Foundation PRs (resolve existing issues)
+
+#### PR 1 — Eliminate dummy Experiments wrapper in single-fit mode (issue #7)
+
+> **Title:** `Accept single Experiment in Fitter.fit()`
+>
+> **Description:** Refactor `Fitter.fit()` and `_residual_function()` to
+> accept a plain list of `ExperimentBase` objects instead of requiring an
+> `Experiments` collection. Remove the dummy `Experiments` wrapper and the
+> `object.__setattr__` hack in `Analysis.fit()` single-mode loop. Update
+> `_residual_function` to iterate over the list directly. Update all
+> callers (single-fit, joint-fit). Update unit and integration tests.
+
+**Why first:** the current dummy-wrapper pattern is the exact antipattern
+that `fit_sequential` workers would otherwise inherit. Fixing it now
+gives the worker a clean `Fitter.fit(structures, [experiment])` call
+without any collection ceremony.
+
+#### PR 2 — Replace singletons with instance-owned state (issue #4 + § 9.5)
+
+> **Title:** `Move ConstraintsHandler and UidMapHandler to instance scope`
+>
+> **Description:** Replace the `SingletonBase` pattern for
+> `ConstraintsHandler` and `UidMapHandler` with per-project instances.
+> `Project.__init__` creates `_uid_map`; `Analysis.__init__` creates
+> `_constraints_engine`. Thread the references through to `Parameter`
+> and constraint resolution. Remove `SingletonBase` class if no longer
+> used. Update all call sites that use `.get()`. This also fixes issue #4
+> (stale constraint state) as a side effect — the constraint engine is
+> always in sync with its owning `Analysis`.
+
+**Why second:** removes the global mutable state that makes notebook
+reruns unreliable and multi-project sessions impossible. Sequential
+fitting workers benefit from natural isolation (each `Project()` has its
+own engine), but the main benefit is correctness for existing workflows.
+
+This is a sub-step breakdown if the PR proves too large:
+
+- **PR 2a:** `Move UidMapHandler to Project instance scope`
+- **PR 2b:** `Move ConstraintsHandler to Analysis instance scope`
+- **PR 2c:** `Remove SingletonBase if unused`
+
+#### PR 3 — Implement Project.load() (issue #1)
+
+> **Title:** `Implement Project.load() from CIF directory`
+>
+> **Description:** Implement `Project.load(dir_path)` that reads
+> `project.cif`, `structures/*.cif`, `experiments/*.cif`, and
+> `analysis/analysis.cif` from the project directory and reconstructs the
+> full project state. Handle the old layout (`analysis.cif` at root) as a
+> fallback. Add integration test: save → load → compare all parameter
+> values.
+
+**Why third:** the CIF round-trip reliability that `load()` proves is
+the same reliability that `fit_sequential` workers depend on (they
+reconstruct a project from CIF strings). Implementing `load()` forces
+us to fix any serialisation gaps before they become worker bugs. Phase 3
+(dataset replay) also directly uses `load()`.
+
+### Sequential-fitting prerequisite PRs
+
+#### PR 4 — Switch alias param_uid to param_unique_name (§ 9.1)
+
+> **Title:** `Use unique_name instead of random UID in aliases`
+>
+> **Description:** Rename `Alias._param_uid` to
+> `Alias._param_unique_name`. Update `CifHandler` names. Change
+> `ConstraintsHandler` to resolve parameters via `unique_name` lookup
+> instead of UID. Update `ed-17.py` tutorial and all tests that create
+> aliases.
+
+#### PR 5 — Fix CIF collection truncation (§ 9.2)
+
+> **Title:** `Remove max_display truncation from CIF serialisation`
+>
+> **Description:** Remove `max_display=20` from
+> `category_collection_to_cif`. Add truncation only in display methods
+> (`show_as_cif()`). Ensures experiments with many background/data points
+> survive CIF round-trips.
+
+#### PR 6 — Verify CIF round-trip for experiments (§ 9.3)
+
+> **Title:** `Add CIF round-trip integration test for experiments`
+>
+> **Description:** Write an integration test that creates a fully
+> configured experiment (instrument, peak, background, excluded regions,
+> linked phases, data), serialises to CIF, reconstructs from CIF, and
+> asserts all parameter values match. Fix any parameters that don't
+> survive the round-trip.
+
+#### PR 7 — Move analysis.cif into analysis/ directory (§ 9.6)
+
+> **Title:** `Move analysis.cif into analysis/ directory`
+>
+> **Description:** Update `Project.save()` to write `analysis.cif` to
+> `project_dir/analysis/analysis.cif`. Update `Project.load()` to read
+> from the new path (with fallback to old path). Update docs
+> (`architecture.md`, `project.md`), tests, and console output messages.
+
+#### PR 8 — Add destination to extract_data_paths_from_zip (§ 9.4)
+
+> **Title:** `Add destination parameter to extract_data_paths_from_zip`
+>
+> **Description:** Add optional `destination` keyword to
+> `extract_data_paths_from_zip`. When provided, extracts to the given
+> directory instead of a temp dir. Enables clean two-step workflow:
+> extract ZIP → pass directory to `fit_sequential()`.
+
+### Sequential-fitting core PRs
+
+#### PR 9 — Streaming sequential fit (max_workers=1)
+
+> **Title:** `Add fit_sequential() for streaming single-worker fitting`
+>
+> **Description:** Add `Analysis.fit_sequential(data_dir, ...)` method.
+> Implements: `SequentialFitTemplate` dataclass, `_fit_worker()` plain
+> function (called directly, no subprocess), CSV writing with
+> `csv.DictWriter`, crash recovery (read CSV + resume), parameter
+> propagation (last successful → next iteration). Include
+> `extract_diffrn` callback support for metadata columns. Unit tests for
+> CSV writing, crash recovery, parameter propagation.
+
+This is a sub-step breakdown if the PR proves too large:
+
+- **PR 9a:** `Add SequentialFitTemplate and _fit_worker function` —
+  dataclass, worker function, no CSV, no recovery.
+- **PR 9b:** `Add CSV output and crash recovery to fit_sequential` —
+  CSV writing, reading, resumption logic.
+- **PR 9c:** `Add parameter propagation and extract_diffrn callback` —
+  chunk-to-chunk seeding, diffrn metadata columns.
+
+#### PR 10 — Update plot_param_series to read from CSV
+
+> **Title:** `Unify plot_param_series to always read from CSV`
+>
+> **Description:** Refactor `plot_param_series()` to read from
+> `project_dir/analysis/results.csv` instead of in-memory
+> `_parameter_snapshots`. Works for both `fit_sequential()` (Phase 1+)
+> and existing `fit()` single-mode (Phase 4). Remove the old
+> `_parameter_snapshots` dict.
+
+#### PR 11 — Parallel fitting (max_workers > 1)
+
+> **Title:** `Add multiprocessing support to fit_sequential`
+>
+> **Description:** Refactor `_fit_worker()` to be module-level picklable.
+> Add `ProcessPoolExecutor` dispatch with `spawn` context. Handle worker
+> failures (catch exceptions, mark as failed in CSV). Add
+> `max_workers='auto'` support (`os.cpu_count()`). Integration test:
+> parallel sequential fit (10 files, 2 workers).
+
+### Post-sequential PRs
+
+#### PR 12 — Dataset replay from CSV
+
+> **Title:** `Add apply_params_from_csv() for dataset replay`
+>
+> **Description:** Add `Project.apply_params_from_csv(row_index)` that
+> loads a CSV row, overrides parameter values in the live project, and
+> reloads data from the file path in that row. Enables
+> `plot_meas_vs_calc()` for any previously fitted dataset.
+
+#### PR 13 — CSV output for existing single-fit mode
+
+> **Title:** `Write results.csv from existing single-fit mode`
+>
+> **Description:** Update the existing `Analysis.fit()` single-mode loop
+> to write results to `analysis/results.csv` (same CSV format as
+> `fit_sequential`). This gives `ed-17.py`-style workflows persistent CSV
+> output and unified `plot_param_series()`.
+
+#### PR 14 (optional) — Parallel single-fit for pre-loaded experiments
+
+> **Title:** `Add max_workers to Analysis.fit() for pre-loaded experiments`
+>
+> **Description:** Add `max_workers` parameter to `Analysis.fit()`. When
+> `fit_mode == 'single'` and `max_workers > 1`, serialise pre-loaded
+> experiments and dispatch to the same worker pool used by
+> `fit_sequential`. Reuses the same `_fit_worker` and
+> `SequentialFitTemplate` infrastructure.
+
+### Dependency graph
+
+```
+PR 1 (issue #7: eliminate dummy Experiments)
+  └─► PR 2 (issue #4: singletons → instance-owned)
+        └─► PR 3 (issue #1: Project.load)
+              └─► PR 4 (alias unique_name)
+                    └─► PR 5 (CIF truncation)
+                          └─► PR 6 (CIF round-trip test)
+                                ├─► PR 7 (analysis.cif → analysis/)
+                                │     └─► PR 9 (streaming sequential fit)
+                                │           ├─► PR 10 (plot from CSV)
+                                │           │     └─► PR 13 (CSV for existing fit)
+                                │           └─► PR 11 (parallel fitting)
+                                │                 └─► PR 14 (optional: parallel fit())
+                                └─► PR 8 (zip destination)
+                                      └─► PR 12 (dataset replay)
+```
+
+Note: PRs 4–8 are largely independent of each other and can be
+parallelised or reordered as long as PRs 1–3 are done first and PRs 4–6
+are done before PR 9.
 
 ---
 
@@ -1072,24 +1213,23 @@ are all stdlib.
 
 ### Risks
 
-| Risk                                             | Mitigation                                               |
-| ------------------------------------------------ | -------------------------------------------------------- |
-| CIF round-trip loses information                 | Prerequisite § 9.3 verifies and fixes before Phase 1     |
-| CIF collection truncation at 20 rows             | Prerequisite § 9.2 fixes before Phase 1                  |
-| Worker memory leak (large N, long-running pool)  | Use `max_tasks_per_child=100` on the pool                |
-| Pickling failures for SequentialFitTemplate      | Keep it a plain dataclass with only str/dict/list fields |
-| crysfml Fortran global state in forked processes | Enforced `spawn` context avoids fork issues              |
-
-### Related open issues
-
-- **Issue #1 (Project.load):** `fit_sequential` does not depend on
-  `load()`, but Phase 3 (dataset replay) benefits from it.
-- **Issue #7 (dummy Experiments wrapper):** `fit_sequential` bypasses
-  this entirely — each worker creates its own `Experiments`.
-- **Issue #4 (constraint refresh):** the worker's fresh project applies
-  constraints from scratch — no stale state. The prerequisite § 9.1
-  (unique_name-based aliases) also improves constraint robustness in the
-  main process.
+| Risk                                             | Mitigation                                             |
+| ------------------------------------------------ | ------------------------------------------------------ |
+| CIF round-trip loses information                 | PR 3 (load) + PR 6 (round-trip test) verify before PR 9|
+| CIF collection truncation at 20 rows             | PR 5 fixes before PR 9                                 |
+| Worker memory leak (large N, long-running pool)  | Use `max_tasks_per_child=100` on the pool              |
+| Pickling failures for SequentialFitTemplate      | Keep it a plain dataclass with only str/dict/list fields|
+| crysfml Fortran global state in forked processes | Enforced `spawn` context avoids fork issues            |
+
+### Resolved open issues (now prerequisites)
+
+- **Issue #7 (dummy Experiments wrapper):** resolved in PR 1. The worker
+  uses the clean `Fitter.fit(structures, [experiment])` API.
+- **Issue #4 (constraint refresh) + § 9.5 (singletons):** resolved in
+  PR 2. Instance-owned constraint engine eliminates stale state.
+- **Issue #1 (Project.load):** resolved in PR 3. CIF round-trip
+  reliability is proven before workers depend on it. Dataset replay
+  (PR 12) uses `load()` directly.
 
 ---
 
@@ -1112,4 +1252,4 @@ are all stdlib.
 | Project layout      | `analysis.cif` moves into `analysis/` directory                |
 | Singletons          | Replace with instance-owned state (recommended prerequisite)   |
 | New dependencies    | None (stdlib only)                                             |
-| First step          | Phase 0 (prerequisites) then Phase 1 (sequential, no parallel) |
+| First step          | PRs 1–3 (foundation issues), then PRs 4–8 (prerequisites), then PR 9+ |

From 0e42f9f0fa0b3c4f12fc6341153bd715c59fcf04 Mon Sep 17 00:00:00 2001
From: Andrew Sazonov <andrew.sazonov@icloud.com>
Date: Thu, 2 Apr 2026 21:12:16 +0200
Subject: [PATCH 5/5] Accept list of experiments in Fitter.fit instead of
 collection

---
 .../architecture/sequential_fitting_design.md | 177 +++++++++---------
 src/easydiffraction/analysis/analysis.py      |  27 ++-
 .../analysis/fit_helpers/metrics.py           |  16 +-
 src/easydiffraction/analysis/fitting.py       |  77 +++-----
 .../analysis/fit_helpers/test_metrics.py      |   6 +-
 .../easydiffraction/analysis/test_analysis.py |   3 +
 .../easydiffraction/analysis/test_fitting.py  |  24 +--
 7 files changed, 153 insertions(+), 177 deletions(-)

diff --git a/docs/architecture/sequential_fitting_design.md b/docs/architecture/sequential_fitting_design.md
index a6ae83cb..fd10a104 100644
--- a/docs/architecture/sequential_fitting_design.md
+++ b/docs/architecture/sequential_fitting_design.md
@@ -259,14 +259,14 @@ def fit_sequential(
 ) -> None:
 ```
 
-| Parameter        | Description                                                                                        |
-| ---------------- | -------------------------------------------------------------------------------------------------- |
-| `data_dir`       | Path to directory containing data files.                                                           |
-| `max_workers`    | Number of parallel worker processes. `1` = sequential. `'auto'` = physical CPU count.              |
-| `chunk_size`     | Files per chunk. Default `None` → uses `max_workers`.                                              |
-| `file_pattern`   | Glob pattern to filter files in `data_dir`. Default `'*'` (all non-hidden files).                  |
-| `extract_diffrn` | User callback: `f(file_path) → {diffrn_field: value}`. Called per file. `None` = no diffrn metadata.|
-| `verbosity`      | `'full'`, `'short'`, `'silent'`. Default: project verbosity.                                       |
+| Parameter        | Description                                                                                          |
+| ---------------- | ---------------------------------------------------------------------------------------------------- |
+| `data_dir`       | Path to directory containing data files.                                                             |
+| `max_workers`    | Number of parallel worker processes. `1` = sequential. `'auto'` = physical CPU count.                |
+| `chunk_size`     | Files per chunk. Default `None` → uses `max_workers`.                                                |
+| `file_pattern`   | Glob pattern to filter files in `data_dir`. Default `'*'` (all non-hidden files).                    |
+| `extract_diffrn` | User callback: `f(file_path) → {diffrn_field: value}`. Called per file. `None` = no diffrn metadata. |
+| `verbosity`      | `'full'`, `'short'`, `'silent'`. Default: project verbosity.                                         |
 
 The CSV output path is **not** a parameter — it is determined by the
 project directory structure (see § 5.4).
@@ -283,7 +283,7 @@ to directories like `structures/` and `experiments/`. Adding an
 level is confusing and violates the principle of least surprise.
 
 **Decision:** move `analysis.cif` into the `analysis/` directory. All
-analysis-related artifacts live under one directory — settings *and*
+analysis-related artifacts live under one directory — settings _and_
 results:
 
 ```
@@ -310,11 +310,11 @@ This is a clean, self-consistent layout:
   snapshots) naturally live under `analysis/`.
 
 **Migration:** `Project.save()` writes `analysis.cif` to
-`project_dir/analysis/analysis.cif` instead of `project_dir/analysis.cif`.
-`Project.load()` (when implemented) reads from the new location.
-Existing saved projects with `analysis.cif` at the top level can be
-handled by a fallback in `load()` — check the new path first, then fall
-back to the old path.
+`project_dir/analysis/analysis.cif` instead of
+`project_dir/analysis.cif`. `Project.load()` (when implemented) reads
+from the new location. Existing saved projects with `analysis.cif` at
+the top level can be handled by a fallback in `load()` — check the new
+path first, then fall back to the old path.
 
 The CSV path is always `project_dir / 'analysis' / 'results.csv'`. This
 means:
@@ -424,9 +424,9 @@ project.analysis.fit_sequential(
 If `extract_diffrn` is `None` (the default), the diffrn columns in the
 CSV are left empty. The callback is called once per file in the **main
 process** after worker results are collected (not inside the worker —
-see § 6.2 for why). The returned dict keys must match `diffrn` descriptor
-names (e.g. `'ambient_temperature'`, `'ambient_pressure'`). Unrecognised
-keys are ignored with a warning.
+see § 6.2 for why). The returned dict keys must match `diffrn`
+descriptor names (e.g. `'ambient_temperature'`, `'ambient_pressure'`).
+Unrecognised keys are ignored with a warning.
 
 This approach:
 
@@ -476,7 +476,7 @@ This is a plain, picklable data object (no live references to
 of random `uid` for parameter identification.
 
 **`extract_diffrn` callback handling:** the user-provided callback is
-*not* stored in the template because arbitrary callables (lambdas,
+_not_ stored in the template because arbitrary callables (lambdas,
 closures) cannot be pickled reliably for multiprocessing. Instead:
 
 - In **single-worker mode** (`max_workers=1`), the callback is called
@@ -847,8 +847,8 @@ here.
 
 11. **Singletons (`UidMapHandler`, `ConstraintsHandler`)** → replace
     with instance-owned state on `Project` and `Analysis`. Fixes
-    notebook rerun issues, simplifies worker isolation, resolves
-    issue #4. See § 9.5.
+    notebook rerun issues, simplifies worker isolation, resolves issue
+    #4. See § 9.5.
 
 ---
 
@@ -919,9 +919,9 @@ workflow (extract → fit_sequential).
    `ProcessPoolExecutor` creates fresh interpreters, so singletons are
    naturally isolated per worker. This is why multiprocessing works
    despite the singletons. However, the main process's singleton still
-   accumulates state across chunks and calls — not harmful in the current
-   design (workers don't touch the main singleton), but fragile if the
-   architecture evolves.
+   accumulates state across chunks and calls — not harmful in the
+   current design (workers don't touch the main singleton), but fragile
+   if the architecture evolves.
 
 3. **Multiple projects.** If a user creates two `Project` instances in
    the same session (e.g. to compare fits), their constraints and UID
@@ -932,10 +932,10 @@ workflow (extract → fit_sequential).
 Move the state owned by singletons into `Analysis` (for constraints) and
 `Project` (for the UID map):
 
-| Current singleton       | New owner               | Lifetime                  |
-| ----------------------- | ----------------------- | ------------------------- |
-| `ConstraintsHandler`    | `Analysis._constraints_engine` | Per-`Analysis` instance |
-| `UidMapHandler`         | `Project._uid_map`      | Per-`Project` instance    |
+| Current singleton    | New owner                      | Lifetime                |
+| -------------------- | ------------------------------ | ----------------------- |
+| `ConstraintsHandler` | `Analysis._constraints_engine` | Per-`Analysis` instance |
+| `UidMapHandler`      | `Project._uid_map`             | Per-`Project` instance  |
 
 The objects are the same classes, just no longer singletons — they are
 instantiated in `__init__` and passed explicitly to the components that
@@ -984,18 +984,17 @@ creates a naming conflict and a confusing layout.
 
 **Fix:** update `Project.save()` to write `analysis.cif` to
 `project_dir/analysis/analysis.cif`. Update `Project.load()` (when
-implemented) to read from the new path, with a fallback to the old
-path for backward compatibility with existing saved projects. Update
-docs (`architecture.md`, `project.md`), tests, and the save output
-messages.
+implemented) to read from the new path, with a fallback to the old path
+for backward compatibility with existing saved projects. Update docs
+(`architecture.md`, `project.md`), tests, and the save output messages.
 
 ---
 
 ## 10. Implementation Plan
 
 The plan is structured as a sequence of pull requests. Each PR is
-independently mergeable and testable. Foundation issues (#7, #4, #1)
-are resolved first because they clean up the fitting internals that
+independently mergeable and testable. Foundation issues (#7, #4, #1) are
+resolved first because they clean up the fitting internals that
 `fit_sequential` builds on top of.
 
 ### Foundation PRs (resolve existing issues)
@@ -1005,29 +1004,32 @@ are resolved first because they clean up the fitting internals that
 > **Title:** `Accept single Experiment in Fitter.fit()`
 >
 > **Description:** Refactor `Fitter.fit()` and `_residual_function()` to
-> accept a plain list of `ExperimentBase` objects instead of requiring an
-> `Experiments` collection. Remove the dummy `Experiments` wrapper and the
-> `object.__setattr__` hack in `Analysis.fit()` single-mode loop. Update
-> `_residual_function` to iterate over the list directly. Update all
-> callers (single-fit, joint-fit). Update unit and integration tests.
-
-**Why first:** the current dummy-wrapper pattern is the exact antipattern
-that `fit_sequential` workers would otherwise inherit. Fixing it now
-gives the worker a clean `Fitter.fit(structures, [experiment])` call
-without any collection ceremony.
+> accept a plain list of `ExperimentBase` objects instead of requiring
+> an `Experiments` collection. Remove the dummy `Experiments` wrapper
+> and the `object.__setattr__` hack in `Analysis.fit()` single-mode
+> loop. Update `_residual_function` to iterate over the list directly.
+> Update all callers (single-fit, joint-fit). Update unit and
+> integration tests.
+
+**Why first:** the current dummy-wrapper pattern is the exact
+antipattern that `fit_sequential` workers would otherwise inherit.
+Fixing it now gives the worker a clean
+`Fitter.fit(structures, [experiment])` call without any collection
+ceremony.
 
 #### PR 2 — Replace singletons with instance-owned state (issue #4 + § 9.5)
 
-> **Title:** `Move ConstraintsHandler and UidMapHandler to instance scope`
+> **Title:**
+> `Move ConstraintsHandler and UidMapHandler to instance scope`
 >
 > **Description:** Replace the `SingletonBase` pattern for
 > `ConstraintsHandler` and `UidMapHandler` with per-project instances.
 > `Project.__init__` creates `_uid_map`; `Analysis.__init__` creates
 > `_constraints_engine`. Thread the references through to `Parameter`
 > and constraint resolution. Remove `SingletonBase` class if no longer
-> used. Update all call sites that use `.get()`. This also fixes issue #4
-> (stale constraint state) as a side effect — the constraint engine is
-> always in sync with its owning `Analysis`.
+> used. Update all call sites that use `.get()`. This also fixes issue
+> #4 (stale constraint state) as a side effect — the constraint engine
+> is always in sync with its owning `Analysis`.
 
 **Why second:** removes the global mutable state that makes notebook
 reruns unreliable and multi-project sessions impossible. Sequential
@@ -1046,15 +1048,15 @@ This is a sub-step breakdown if the PR proves too large:
 >
 > **Description:** Implement `Project.load(dir_path)` that reads
 > `project.cif`, `structures/*.cif`, `experiments/*.cif`, and
-> `analysis/analysis.cif` from the project directory and reconstructs the
-> full project state. Handle the old layout (`analysis.cif` at root) as a
-> fallback. Add integration test: save → load → compare all parameter
-> values.
+> `analysis/analysis.cif` from the project directory and reconstructs
+> the full project state. Handle the old layout (`analysis.cif` at root)
+> as a fallback. Add integration test: save → load → compare all
+> parameter values.
 
 **Why third:** the CIF round-trip reliability that `load()` proves is
 the same reliability that `fit_sequential` workers depend on (they
-reconstruct a project from CIF strings). Implementing `load()` forces
-us to fix any serialisation gaps before they become worker bugs. Phase 3
+reconstruct a project from CIF strings). Implementing `load()` forces us
+to fix any serialisation gaps before they become worker bugs. Phase 3
 (dataset replay) also directly uses `load()`.
 
 ### Sequential-fitting prerequisite PRs
@@ -1075,8 +1077,8 @@ us to fix any serialisation gaps before they become worker bugs. Phase 3
 >
 > **Description:** Remove `max_display=20` from
 > `category_collection_to_cif`. Add truncation only in display methods
-> (`show_as_cif()`). Ensures experiments with many background/data points
-> survive CIF round-trips.
+> (`show_as_cif()`). Ensures experiments with many background/data
+> points survive CIF round-trips.
 
 #### PR 6 — Verify CIF round-trip for experiments (§ 9.3)
 
@@ -1124,8 +1126,8 @@ This is a sub-step breakdown if the PR proves too large:
 
 - **PR 9a:** `Add SequentialFitTemplate and _fit_worker function` —
   dataclass, worker function, no CSV, no recovery.
-- **PR 9b:** `Add CSV output and crash recovery to fit_sequential` —
-  CSV writing, reading, resumption logic.
+- **PR 9b:** `Add CSV output and crash recovery to fit_sequential` — CSV
+  writing, reading, resumption logic.
 - **PR 9c:** `Add parameter propagation and extract_diffrn callback` —
   chunk-to-chunk seeding, diffrn metadata columns.
 
@@ -1143,9 +1145,9 @@ This is a sub-step breakdown if the PR proves too large:
 
 > **Title:** `Add multiprocessing support to fit_sequential`
 >
-> **Description:** Refactor `_fit_worker()` to be module-level picklable.
-> Add `ProcessPoolExecutor` dispatch with `spawn` context. Handle worker
-> failures (catch exceptions, mark as failed in CSV). Add
+> **Description:** Refactor `_fit_worker()` to be module-level
+> picklable. Add `ProcessPoolExecutor` dispatch with `spawn` context.
+> Handle worker failures (catch exceptions, mark as failed in CSV). Add
 > `max_workers='auto'` support (`os.cpu_count()`). Integration test:
 > parallel sequential fit (10 files, 2 workers).
 
@@ -1166,12 +1168,13 @@ This is a sub-step breakdown if the PR proves too large:
 >
 > **Description:** Update the existing `Analysis.fit()` single-mode loop
 > to write results to `analysis/results.csv` (same CSV format as
-> `fit_sequential`). This gives `ed-17.py`-style workflows persistent CSV
-> output and unified `plot_param_series()`.
+> `fit_sequential`). This gives `ed-17.py`-style workflows persistent
+> CSV output and unified `plot_param_series()`.
 
 #### PR 14 (optional) — Parallel single-fit for pre-loaded experiments
 
-> **Title:** `Add max_workers to Analysis.fit() for pre-loaded experiments`
+> **Title:**
+> `Add max_workers to Analysis.fit() for pre-loaded experiments`
 >
 > **Description:** Add `max_workers` parameter to `Analysis.fit()`. When
 > `fit_mode == 'single'` and `max_workers > 1`, serialise pre-loaded
@@ -1213,13 +1216,13 @@ are all stdlib.
 
 ### Risks
 
-| Risk                                             | Mitigation                                             |
-| ------------------------------------------------ | ------------------------------------------------------ |
-| CIF round-trip loses information                 | PR 3 (load) + PR 6 (round-trip test) verify before PR 9|
-| CIF collection truncation at 20 rows             | PR 5 fixes before PR 9                                 |
-| Worker memory leak (large N, long-running pool)  | Use `max_tasks_per_child=100` on the pool              |
-| Pickling failures for SequentialFitTemplate      | Keep it a plain dataclass with only str/dict/list fields|
-| crysfml Fortran global state in forked processes | Enforced `spawn` context avoids fork issues            |
+| Risk                                             | Mitigation                                               |
+| ------------------------------------------------ | -------------------------------------------------------- |
+| CIF round-trip loses information                 | PR 3 (load) + PR 6 (round-trip test) verify before PR 9  |
+| CIF collection truncation at 20 rows             | PR 5 fixes before PR 9                                   |
+| Worker memory leak (large N, long-running pool)  | Use `max_tasks_per_child=100` on the pool                |
+| Pickling failures for SequentialFitTemplate      | Keep it a plain dataclass with only str/dict/list fields |
+| crysfml Fortran global state in forked processes | Enforced `spawn` context avoids fork issues              |
 
 ### Resolved open issues (now prerequisites)
 
@@ -1235,21 +1238,21 @@ are all stdlib.
 
 ## 12. Summary
 
-| Aspect              | Decision                                                       |
-| ------------------- | -------------------------------------------------------------- |
-| Parallelism backend | `concurrent.futures.ProcessPoolExecutor` with `spawn`          |
-| Worker isolation    | Each worker creates a fresh `Project` — no shared state        |
-| Data source         | `data_dir` argument; ZIP → extract first                       |
-| Data flow           | Template CIF + data path → worker → result dict → CSV          |
-| Parameter IDs       | `unique_name` (deterministic), not `uid` (random)              |
-| Parameter seeding   | Last successful result in chunk → next chunk                   |
-| CSV location        | `project_dir/analysis/results.csv` (deterministic)             |
-| CSV contents        | Fit metrics + diffrn metadata + all free param values/uncert   |
-| Metadata extraction | User-provided `extract_diffrn` callback, not hidden in lib     |
-| Crash recovery      | Read existing CSV, skip fitted files, resume                   |
-| Plotting            | Unified `plot_param_series()` always reads from CSV            |
-| Configuration       | `max_workers` + `data_dir` on `fit_sequential()`               |
-| Project layout      | `analysis.cif` moves into `analysis/` directory                |
-| Singletons          | Replace with instance-owned state (recommended prerequisite)   |
-| New dependencies    | None (stdlib only)                                             |
+| Aspect              | Decision                                                              |
+| ------------------- | --------------------------------------------------------------------- |
+| Parallelism backend | `concurrent.futures.ProcessPoolExecutor` with `spawn`                 |
+| Worker isolation    | Each worker creates a fresh `Project` — no shared state               |
+| Data source         | `data_dir` argument; ZIP → extract first                              |
+| Data flow           | Template CIF + data path → worker → result dict → CSV                 |
+| Parameter IDs       | `unique_name` (deterministic), not `uid` (random)                     |
+| Parameter seeding   | Last successful result in chunk → next chunk                          |
+| CSV location        | `project_dir/analysis/results.csv` (deterministic)                    |
+| CSV contents        | Fit metrics + diffrn metadata + all free param values/uncert          |
+| Metadata extraction | User-provided `extract_diffrn` callback, not hidden in lib            |
+| Crash recovery      | Read existing CSV, skip fitted files, resume                          |
+| Plotting            | Unified `plot_param_series()` always reads from CSV                   |
+| Configuration       | `max_workers` + `data_dir` on `fit_sequential()`                      |
+| Project layout      | `analysis.cif` moves into `analysis/` directory                       |
+| Singletons          | Replace with instance-owned state (recommended prerequisite)          |
+| New dependencies    | None (stdlib only)                                                    |
 | First step          | PRs 1–3 (foundation issues), then PRs 4–8 (prerequisites), then PR 9+ |
diff --git a/src/easydiffraction/analysis/analysis.py b/src/easydiffraction/analysis/analysis.py
index 41d9a861..c21db3e6 100644
--- a/src/easydiffraction/analysis/analysis.py
+++ b/src/easydiffraction/analysis/analysis.py
@@ -3,6 +3,7 @@
 
 from contextlib import suppress
 
+import numpy as np
 import pandas as pd
 
 from easydiffraction.analysis.categories.aliases.factory import AliasesFactory
@@ -18,7 +19,6 @@
 from easydiffraction.core.variable import NumericDescriptor
 from easydiffraction.core.variable import Parameter
 from easydiffraction.core.variable import StringDescriptor
-from easydiffraction.datablocks.experiment.collection import Experiments
 from easydiffraction.display.tables import TableRenderer
 from easydiffraction.io.cif.serialize import analysis_to_cif
 from easydiffraction.utils.enums import VerbosityEnum
@@ -627,10 +627,16 @@ def fit(self, verbosity: str | None = None) -> None:
                 console.paragraph(
                     f"Using all experiments 🔬 {experiments.names} for '{mode.value}' fitting"
                 )
+            # Resolve weights to a plain numpy array
+            experiments_list = list(experiments.values())
+            weights_list = [
+                self._joint_fit_experiments[name].weight.value for name in experiments.names
+            ]
+            weights_array = np.array(weights_list, dtype=np.float64)
             self.fitter.fit(
                 structures,
-                experiments,
-                weights=self._joint_fit_experiments,
+                experiments_list,
+                weights=weights_array,
                 analysis=self,
                 verbosity=verb,
             )
@@ -659,8 +665,6 @@ def fit(self, verbosity: str | None = None) -> None:
                 console.print('📈 Goodness-of-fit (reduced χ²) per experiment:')
                 short_display_handle = _make_display_handle()
 
-            # TODO: Find a better way without creating dummy
-            #  experiments?
             for _idx, expt_name in enumerate(expt_names, start=1):
                 if verb is VerbosityEnum.FULL:
                     console.paragraph(
@@ -668,17 +672,10 @@ def fit(self, verbosity: str | None = None) -> None:
                     )
 
                 experiment = experiments[expt_name]
-                dummy_experiments = Experiments()  # TODO: Find a better name
-
-                # This is a workaround to set the parent project
-                # of the dummy experiments collection, so that
-                # parameters can be resolved correctly during fitting.
-                object.__setattr__(dummy_experiments, '_parent', self.project)  # noqa: PLC2801
-
-                dummy_experiments.add(experiment)
+                experiments_list = [experiment]
                 self.fitter.fit(
                     structures,
-                    dummy_experiments,
+                    experiments_list,
                     analysis=self,
                     verbosity=verb,
                 )
@@ -749,7 +746,7 @@ def show_fit_results(self) -> None:
             return
 
         structures = self.project.structures
-        experiments = self.project.experiments
+        experiments = list(self.project.experiments.values())
 
         self.fitter._process_fit_results(structures, experiments)
 
diff --git a/src/easydiffraction/analysis/fit_helpers/metrics.py b/src/easydiffraction/analysis/fit_helpers/metrics.py
index 381699da..61700006 100644
--- a/src/easydiffraction/analysis/fit_helpers/metrics.py
+++ b/src/easydiffraction/analysis/fit_helpers/metrics.py
@@ -1,11 +1,15 @@
 # SPDX-FileCopyrightText: 2026 EasyScience contributors <https://github.com/easyscience>
 # SPDX-License-Identifier: BSD-3-Clause
 
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
 
 import numpy as np
 
-from easydiffraction.datablocks.experiment.collection import Experiments
-from easydiffraction.datablocks.structure.collection import Structures
+if TYPE_CHECKING:
+    from easydiffraction.datablocks.experiment.item.base import ExperimentBase
+    from easydiffraction.datablocks.structure.collection import Structures
 
 
 def calculate_r_factor(
@@ -146,7 +150,7 @@ def calculate_reduced_chi_square(
 
 def get_reliability_inputs(
     structures: Structures,
-    experiments: Experiments,
+    experiments: list[ExperimentBase],
 ) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]:
     """
     Collect observed and calculated data for reliability calculations.
@@ -155,8 +159,8 @@ def get_reliability_inputs(
     ----------
     structures : Structures
         Collection of structures.
-    experiments : Experiments
-        Collection of experiments.
+    experiments : list[ExperimentBase]
+        List of experiments.
 
     Returns
     -------
@@ -170,7 +174,7 @@ def get_reliability_inputs(
     y_obs_all = []
     y_calc_all = []
     y_err_all = []
-    for experiment in experiments.values():
+    for experiment in experiments:
         for structure in structures:
             structure._update_categories()
         experiment._update_categories()
diff --git a/src/easydiffraction/analysis/fitting.py b/src/easydiffraction/analysis/fitting.py
index 73b7612b..aa281082 100644
--- a/src/easydiffraction/analysis/fitting.py
+++ b/src/easydiffraction/analysis/fitting.py
@@ -1,6 +1,8 @@
 # SPDX-FileCopyrightText: 2026 EasyScience contributors <https://github.com/easyscience>
 # SPDX-License-Identifier: BSD-3-Clause
 
+from __future__ import annotations
+
 from typing import TYPE_CHECKING
 from typing import Any
 
@@ -9,12 +11,12 @@
 from easydiffraction.analysis.fit_helpers.metrics import get_reliability_inputs
 from easydiffraction.analysis.minimizers.factory import MinimizerFactory
 from easydiffraction.core.variable import Parameter
-from easydiffraction.datablocks.experiment.collection import Experiments
-from easydiffraction.datablocks.structure.collection import Structures
 from easydiffraction.utils.enums import VerbosityEnum
 
 if TYPE_CHECKING:
     from easydiffraction.analysis.fit_helpers.reporting import FitResults
+    from easydiffraction.datablocks.experiment.item.base import ExperimentBase
+    from easydiffraction.datablocks.structure.collection import Structures
 
 
 class Fitter:
@@ -29,7 +31,7 @@ def __init__(self, selection: str = 'lmfit') -> None:
     def fit(
         self,
         structures: Structures,
-        experiments: Experiments,
+        experiments: list[ExperimentBase],
         weights: np.ndarray | None = None,
         analysis: object = None,
         verbosity: VerbosityEnum = VerbosityEnum.FULL,
@@ -45,17 +47,25 @@ def fit(
         ----------
         structures : Structures
             Collection of structures.
-        experiments : Experiments
-            Collection of experiments.
+        experiments : list[ExperimentBase]
+            List of experiments to fit.
         weights : np.ndarray | None, default=None
-            Optional weights for joint fitting.
+            Per-experiment weights as a 1-D array (length must match
+            *experiments*). When ``None``, equal weights are used.
         analysis : object, default=None
             Optional Analysis object to update its categories during
             fitting.
         verbosity : VerbosityEnum, default=VerbosityEnum.FULL
             Console output verbosity.
         """
-        params = structures.free_parameters + experiments.free_parameters
+        expt_free_params: list[Parameter] = []
+        for expt in experiments:
+            expt_free_params.extend(
+                p
+                for p in expt.parameters
+                if isinstance(p, Parameter) and not p.constrained and p.free
+            )
+        params = structures.free_parameters + expt_free_params
 
         if not params:
             print('⚠️ No parameters selected for fitting.')
@@ -93,7 +103,7 @@ def objective_function(engine_params: dict[str, Any]) -> np.ndarray:
     def _process_fit_results(
         self,
         structures: Structures,
-        experiments: Experiments,
+        experiments: list[ExperimentBase],
     ) -> None:
         """
         Collect reliability inputs and display fit results.
@@ -107,8 +117,8 @@ def _process_fit_results(
         ----------
         structures : Structures
             Collection of structures.
-        experiments : Experiments
-            Collection of experiments.
+        experiments : list[ExperimentBase]
+            List of experiments.
         """
         y_obs, y_calc, y_err = get_reliability_inputs(
             structures,
@@ -127,35 +137,12 @@ def _process_fit_results(
                 f_calc=f_calc,
             )
 
-    def _collect_free_parameters(
-        self,
-        structures: Structures,
-        experiments: Experiments,
-    ) -> list[Parameter]:
-        """
-        Collect free parameters from structures and experiments.
-
-        Parameters
-        ----------
-        structures : Structures
-            Collection of structures.
-        experiments : Experiments
-            Collection of experiments.
-
-        Returns
-        -------
-        list[Parameter]
-            List of free parameters.
-        """
-        free_params: list[Parameter] = structures.free_parameters + experiments.free_parameters
-        return free_params
-
     def _residual_function(
         self,
         engine_params: dict[str, Any],
         parameters: list[Parameter],
         structures: Structures,
-        experiments: Experiments,
+        experiments: list[ExperimentBase],
         weights: np.ndarray | None = None,
         analysis: object = None,
     ) -> np.ndarray:
@@ -173,10 +160,11 @@ def _residual_function(
             List of parameters being optimized.
         structures : Structures
             Collection of structures.
-        experiments : Experiments
-            Collection of experiments.
+        experiments : list[ExperimentBase]
+            List of experiments.
         weights : np.ndarray | None, default=None
-            Optional weights for joint fitting.
+            Per-experiment weights as a 1-D array. When ``None``, equal
+            weights are used.
         analysis : object, default=None
             Optional Analysis object to update its categories during
             fitting.
@@ -199,25 +187,18 @@ def _residual_function(
             analysis._update_categories(called_by_minimizer=True)
 
         # Prepare weights for joint fitting
-        num_expts: int = len(experiments.names)
-        if weights is None:
-            _weights = np.ones(num_expts)
-        else:
-            _weights_list: list[float] = []
-            for name in experiments.names:
-                _weight = weights[name].weight.value
-                _weights_list.append(_weight)
-            _weights = np.array(_weights_list, dtype=np.float64)
+        num_expts: int = len(experiments)
+        _weights = np.ones(num_expts) if weights is None else np.asarray(weights, dtype=np.float64)
 
         # Normalize weights so they sum to num_expts
         # We should obtain the same reduced chi_squared when a single
         # dataset is split into two parts and fit together. If weights
         # sum to one, then reduced chi_squared will be half as large as
         # expected.
-        _weights *= num_expts / np.sum(_weights)
+        _weights = _weights * (num_expts / np.sum(_weights))
         residuals: list[float] = []
 
-        for experiment, weight in zip(experiments.values(), _weights, strict=True):
+        for experiment, weight in zip(experiments, _weights, strict=True):
             # Update experiment-specific calculations
             experiment._update_categories(called_by_minimizer=True)
 
diff --git a/tests/unit/easydiffraction/analysis/fit_helpers/test_metrics.py b/tests/unit/easydiffraction/analysis/fit_helpers/test_metrics.py
index 906989e1..eff28fc0 100644
--- a/tests/unit/easydiffraction/analysis/fit_helpers/test_metrics.py
+++ b/tests/unit/easydiffraction/analysis/fit_helpers/test_metrics.py
@@ -43,14 +43,10 @@ def __init__(self):
         def _update_categories(self, called_by_minimizer=False):
             pass
 
-    class Expts(dict):
-        def values(self):
-            return [Expt()]
-
     class DummyStructures(dict):
         pass
 
-    y_obs, y_calc, y_err = M.get_reliability_inputs(DummyStructures(), Expts())
+    y_obs, y_calc, y_err = M.get_reliability_inputs(DummyStructures(), [Expt()])
     assert y_obs.shape == (2,)
     assert y_calc.shape == (2,)
     assert y_err.shape == (2,)
diff --git a/tests/unit/easydiffraction/analysis/test_analysis.py b/tests/unit/easydiffraction/analysis/test_analysis.py
index 19bc3c2a..0b0a8944 100644
--- a/tests/unit/easydiffraction/analysis/test_analysis.py
+++ b/tests/unit/easydiffraction/analysis/test_analysis.py
@@ -147,6 +147,9 @@ class MockProject:
         class experiments_cls:
             names = []
 
+            def values(self):
+                return []
+
         experiments = experiments_cls()
 
     project = MockProject()
diff --git a/tests/unit/easydiffraction/analysis/test_fitting.py b/tests/unit/easydiffraction/analysis/test_fitting.py
index ed9ea419..60bb794e 100644
--- a/tests/unit/easydiffraction/analysis/test_fitting.py
+++ b/tests/unit/easydiffraction/analysis/test_fitting.py
@@ -13,15 +13,11 @@ def test_module_import():
 def test_fitter_early_exit_when_no_params(capsys, monkeypatch):
     from easydiffraction.analysis.fitting import Fitter
 
-    class DummyCollection:
+    class DummyStructures:
         free_parameters = []
 
-        def __init__(self):
-            self._names = ['e1']
-
-        @property
-        def names(self):
-            return self._names
+    class DummyExperiment:
+        parameters = []
 
     class DummyMin:
         tracker = type('T', (), {'track': staticmethod(lambda a, b: a)})()
@@ -32,7 +28,7 @@ def fit(self, params, obj, verbosity=None):
     f = Fitter()
     # Avoid creating a real minimizer
     f.minimizer = DummyMin()
-    f.fit(structures=DummyCollection(), experiments=DummyCollection())
+    f.fit(structures=DummyStructures(), experiments=[DummyExperiment()])
     out = capsys.readouterr().out
     assert 'No parameters selected for fitting' in out
 
@@ -50,15 +46,11 @@ class DummyParam:
         value = 1.0
         _fit_start_value = None
 
-    class DummyCollection:
+    class DummyStructures:
         free_parameters = [DummyParam()]
 
-        def __init__(self):
-            self._names = ['e1']
-
-        @property
-        def names(self):
-            return self._names
+    class DummyExperiment:
+        parameters = []
 
     class MockFitResults:
         pass
@@ -84,7 +76,7 @@ def mock_process(*args, **kwargs):
 
     monkeypatch.setattr(f, '_process_fit_results', mock_process)
 
-    f.fit(structures=DummyCollection(), experiments=DummyCollection())
+    f.fit(structures=DummyStructures(), experiments=[DummyExperiment()])
 
     assert not process_called['called'], (
         'Fitter.fit() should not call _process_fit_results automatically. '