diff --git a/documentation/source/users/rmg/input.rst b/documentation/source/users/rmg/input.rst index 1780cca69e2..1db15076453 100644 --- a/documentation/source/users/rmg/input.rst +++ b/documentation/source/users/rmg/input.rst @@ -204,17 +204,85 @@ The last section is specifying that RMG is estimating kinetics of reactions from kineticsEstimator = 'rate rules' -The following is an example of a database block, based on above chosen libraries and options:: +.. _auto_library_selection: + +Automatic Library and Family Selection +-------------------------------------- +Instead of manually listing every library, you can let RMG choose the appropriate +thermo libraries, kinetics libraries, transport libraries, seed mechanisms, and +kinetics families automatically based on the species and reactor conditions in your +input file. Use the ``'auto'`` keyword in any library field:: database( - thermoLibraries = ['primaryThermoLibrary', 'GRI-Mech3.0'], - reactionLibraries = [('Glarborg/C3',False)], - seedMechanisms = ['GRI-Mech3.0'], + thermoLibraries = 'auto', + reactionLibraries = 'auto', + transportLibraries = 'auto', + seedMechanisms = 'auto', + kineticsFamilies = 'auto', kineticsDepositories = ['training'], - kineticsFamilies = 'defult', kineticsEstimator = 'rate rules', ) +When ``'auto'`` is specified, RMG analyzes the initial species and reactor +conditions to detect the chemistry present (e.g., nitrogen, sulfur, oxygen, +halogens, surface, liquid phase) and selects the relevant library sets. +The triggered sets and their corresponding libraries are logged at the start +of the RMG run. + +**Mixing manual and auto selection.** You can combine user-specified libraries +with ``'auto'`` in a list. The position of ``'auto'`` controls the priority: +libraries before it have higher priority, libraries after it have lower:: + + thermoLibraries = ['myCustomLib', 'auto'] + + thermoLibraries = ['auto', 'myFallbackLib'] + +If a library you listed manually also appears in the auto-selected set, it will +not be added twice. It keeps the position you gave it, and the auto-selected +copy is skipped. + +**PAH libraries and the ```` keyword.** +The auto-selection splits high-temperature C/H chemistry into two tiers: + +* **CH_pyrolysis_core** — fundamental high-T radical and small-molecule chemistry + (e.g., acetylene initiation, alkane cracking). Always included when carbon is + present and the maximum reactor temperature is at least 800 K. +* **PAH_formation** — aromatic ring formation, naphthalene pathways (CPD + HACA), + and larger PAH growth. This is a large set (~70 kinetics libraries) that can + significantly increase model size and generation time. + +For **pure C/H pyrolysis** (no oxygen in any input species), both tiers are +included automatically — PAH formation is expected in such systems. + +For **oxygenated systems** (any species contains O, including oxygenated fuels +like ethanol or DME), only CH_pyrolysis_core is included by default because +PAH chemistry is typically a minor pathway. If you know your system forms +significant amounts of aromatics (e.g., fuel-rich partial oxidation), you can +explicitly request the PAH libraries by adding the ``''`` keyword +to any library field:: + + database( + thermoLibraries = ['auto', ''], + reactionLibraries = ['auto', ''], + seedMechanisms = 'auto', + transportLibraries = 'auto', + kineticsFamilies = 'auto', + ) + +The ``''`` keyword is consumed during processing (it does not appear +in the final library list) — it only serves as a signal to include the +PAH_formation set. It can be placed anywhere in the list; its position does +not affect library priority. + +**Previewing the selection.** A Jupyter notebook is provided at +:file:`ipython/auto_library_selection.ipynb` that lets you preview exactly which +libraries and families RMG would choose for a given input file, without running +the full job. + +.. note:: + The ``'auto'`` keyword is opt-in. If you do not use it you must list every library explicitly. + + .. _species_list: List of species diff --git a/ipython/auto_library_selection.ipynb b/ipython/auto_library_selection.ipynb new file mode 100644 index 00000000000..4bbce229249 --- /dev/null +++ b/ipython/auto_library_selection.ipynb @@ -0,0 +1,103 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "uak48ywvas", + "source": "# Auto Library & Family Selection Preview\n\nPreview what RMG's `'auto'` mode would select for thermo libraries, kinetics libraries,\ntransport libraries, seed mechanisms, and kinetics families — given an RMG input file.\n\nJust set `input_file_path` below and run all cells.", + "metadata": {} + }, + { + "cell_type": "code", + "id": "5vx9v43udjl", + "source": "input_file_path = '../examples/rmg/superminimal/input.py'", + "metadata": {}, + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "id": "vyt4k7q9as", + "source": "import os\n\nfrom rmgpy.rmg.main import RMG\nfrom rmgpy.rmg.input import read_input_file\nfrom rmgpy.data.auto_database import (\n AUTO,\n detect_chemistry,\n determine_chemistry_sets,\n determine_kinetics_families,\n expand_chemistry_sets,\n load_recommended_yml,\n merge_with_user_libraries,\n resolve_auto_kinetics_families,\n _has_pah_libs_keyword,\n)\n\n# Load the input file into an RMG object (without loading the database)\nrmg = RMG()\nread_input_file(os.path.expanduser(input_file_path), rmg)\n\nprint(f'Input file: {os.path.abspath(input_file_path)}')\nprint(f'Species: {[spec.label or spec.molecule[0].to_smiles() for spec in rmg.initial_species]}')\nprint(f'Reactors: {len(rmg.reaction_systems)}')\nprint(f'Solvent: {rmg.solvent or \"(none)\"}')\nprint()\nprint('Current database() settings (before auto-selection):')\nprint(f' thermoLibraries: {rmg.thermo_libraries}')\nprint(f' reactionLibraries: {rmg.reaction_libraries}')\nprint(f' seedMechanisms: {rmg.seed_mechanisms}')\nprint(f' transportLibraries: {rmg.transport_libraries}')\nprint(f' kineticsFamilies: {rmg.kinetics_families}')", + "metadata": {}, + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "if94i2lxggr", + "source": "## Chemistry Detection\n\nAnalyze the input species and reactor conditions to determine what chemistry is present.", + "metadata": {} + }, + { + "cell_type": "code", + "id": "grunflwiug8", + "source": "profile = detect_chemistry(rmg.initial_species, rmg.reaction_systems, rmg.solvent)\npah_libs_requested = _has_pah_libs_keyword(rmg)\n\nprint(f'Elements: {\"/\".join(sorted(profile.elements_present))}')\nprint(f'Max T: {profile.max_temperature:.0f} K')\nprint(f'Nitrogen: {profile.has_nitrogen}')\nprint(f'Sulfur: {profile.has_sulfur}')\nprint(f'Oxygen: {profile.has_oxygen}')\nprint(f'Carbon: {profile.has_carbon}')\nprint(f'Halogens: {profile.has_halogens}')\nprint(f'Electrochem: {profile.has_electrochem}')\nprint(f'Surface: {profile.has_surface}')\nprint(f'Liquid: {profile.has_liquid}')\nprint(f': {pah_libs_requested}')", + "metadata": {}, + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "k9r657hlttp", + "source": "## Triggered Chemistry Sets & Kinetics Family Sets\n\nWhich named sets from `recommended_libraries.yml` and `recommended.py` would be activated.", + "metadata": {} + }, + { + "cell_type": "code", + "id": "fkjvg12kwkj", + "source": "chem_sets = determine_chemistry_sets(profile, pah_libs_requested)\nfamily_sets = determine_kinetics_families(profile)\n\nprint('Chemistry sets (for thermo/kinetics/transport/seed libraries):')\nfor s in chem_sets:\n print(f' - {s.value}')\nprint()\nprint('Kinetics family sets:')\nfor s in family_sets:\n print(f' - {s.value}')", + "metadata": {}, + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "ldplsa4l6wa", + "source": "## What `'auto'` Would Select\n\nThe libraries and families that RMG would use if all fields were set to `'auto'`.\nThis always shows the auto-selected result, regardless of what the input file currently specifies.", + "metadata": {} + }, + { + "cell_type": "code", + "id": "87ac142mi0b", + "source": "recommended_data = load_recommended_yml(rmg.database_directory)\nauto_thermo, auto_kinetics, auto_transport, auto_seeds = expand_chemistry_sets(\n recommended_data, chem_sets\n)\nauto_families = resolve_auto_kinetics_families(family_sets, rmg.database_directory)\n\ndef print_list(label, items):\n items = items or []\n print(f'\\n{label} ({len(items)}):')\n for item in items:\n print(f' {item}')\n\nprint_list('Thermo libraries', auto_thermo)\nprint_list('Reaction libraries', auto_kinetics)\nprint_list('Seed mechanisms', auto_seeds)\nprint_list('Transport libraries', auto_transport)\nprint_list('Kinetics families', auto_families)", + "metadata": {}, + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "w3ocgmc0ils", + "source": "## Actual Resolution of the Current Input File\n\nProcesses the input file's `database()` settings as RMG would at startup.\nIf the input file uses `'auto'`, `''`, or `['!family', 'auto']`,\nyou'll see the resolved result. If it uses manual library lists, they pass through unchanged.", + "metadata": {} + }, + { + "cell_type": "code", + "id": "eft98a4ciwl", + "source": "from rmgpy.data.auto_database import auto_select_libraries, PAH_LIBS, to_reaction_library_tuples\n\n# Work on a fresh copy so we don't mutate the rmg object used above\nimport copy\nrmg2 = copy.deepcopy(rmg)\n\n# Run the same auto-selection that main.py would run\nauto_select_libraries(rmg2)\n\n# Convert reaction libraries to tuples (as main.py does before load_database)\nif isinstance(rmg2.reaction_libraries, list):\n output_edge = getattr(rmg2, 'reaction_libraries_output_edge', set())\n rmg2.reaction_libraries = to_reaction_library_tuples(rmg2.reaction_libraries, output_edge)\n\nhas_auto = any(\n getattr(rmg, attr, None) == 'auto'\n or (isinstance(getattr(rmg, attr, None), list) and 'auto' in getattr(rmg, attr))\n for attr in ('thermo_libraries', 'reaction_libraries', 'seed_mechanisms',\n 'transport_libraries', 'kinetics_families')\n)\n\nif not has_auto:\n print('The input file does not use \\'auto\\' in any database field.')\n print('The settings below are exactly what was specified in the input file.\\n')\n\nprint_list('Thermo libraries', rmg2.thermo_libraries)\n\nrxn_lib_names = [name for name, _ in rmg2.reaction_libraries] if isinstance(rmg2.reaction_libraries, list) else rmg2.reaction_libraries\nprint_list('Reaction libraries', rxn_lib_names or [])\n\nedge_libs = [name for name, flag in rmg2.reaction_libraries if flag] if isinstance(rmg2.reaction_libraries, list) else []\nif edge_libs:\n print(f'\\n (output unused edge reactions for: {\", \".join(edge_libs)})')\n\nprint_list('Seed mechanisms', rmg2.seed_mechanisms or [])\nprint_list('Transport libraries', rmg2.transport_libraries or [])\n\nif isinstance(rmg2.kinetics_families, list):\n print_list('Kinetics families', rmg2.kinetics_families)\nelse:\n print(f'\\nKinetics families: {rmg2.kinetics_families!r} (resolved at database load time)')", + "metadata": {}, + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "source": "", + "id": "7dba6e3be37a396b", + "outputs": [], + "execution_count": null + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.9.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/rmgpy/data/auto_database.py b/rmgpy/data/auto_database.py new file mode 100644 index 00000000000..5a4d4039ef5 --- /dev/null +++ b/rmgpy/data/auto_database.py @@ -0,0 +1,603 @@ +#!/usr/bin/env python3 + +############################################################################### +# # +# RMG - Reaction Mechanism Generator # +# # +# Copyright (c) 2002-2023 Prof. William H. Green (whgreen@mit.edu), # +# Prof. Richard H. West (r.west@neu.edu) and the RMG Team (rmg_dev@mit.edu) # +# # +# Permission is hereby granted, free of charge, to any person obtaining a # +# copy of this software and associated documentation files (the 'Software'), # +# to deal in the Software without restriction, including without limitation # +# the rights to use, copy, modify, merge, publish, distribute, sublicense, # +# and/or sell copies of the Software, and to permit persons to whom the # +# Software is furnished to do so, subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be included in # +# all copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # +# DEALINGS IN THE SOFTWARE. # +# # +############################################################################### + +""" +Automated kinetics library, kinetics family, thermo library and transport library selection for RMG +based on the chemistry detected in the input species and reactor conditions. +""" + +import logging +import os +from dataclasses import dataclass, field +from enum import Enum +from typing import Any, List, Optional, Set, Tuple, Union + +import yaml + +from rmgpy.exceptions import InputError +from rmgpy.solver.liquid import LiquidReactor +from rmgpy.solver.surface import SurfaceReactor + +try: + from rmgpy.rmg.reactionmechanismsimulator_reactors import ( + ConstantTLiquidSurfaceReactor as RMSLiqSurf, + ConstantTVLiquidReactor as RMSLiq, + ) +except ImportError: + RMSLiqSurf = None + RMSLiq = None + +# Values used in input files to request auto-selection +AUTO = 'auto' +PAH_LIBS = '' + +# Temperature threshold (in K) above which CH pyrolysis libraries are included +CH_PYROLYSIS_T_THRESHOLD = 800.0 + +# Elements that trigger the halogens chemistry set +HALOGEN_ELEMENTS = {'F', 'Cl', 'Br', 'I'} +# Elements that trigger the metal/electrochem chemistry set (currently only Li) +ELECTROCHEM_ELEMENTS = {'Li'} + + +class ChemistrySet(str, Enum): + """Named chemistry sets defined in recommended_libraries.yml.""" + PRIMARY = 'primary' + NITROGEN = 'nitrogen' + SULFUR = 'sulfur' + OXIDATION = 'oxidation' + CH_PYROLYSIS_CORE = 'CH_pyrolysis_core' + PAH_FORMATION = 'PAH_formation' + LIQUID_OXIDATION = 'liquid_oxidation' + SURFACE = 'surface' + SURFACE_NITROGEN = 'surface_nitrogen' + HALOGENS = 'halogens' + ELECTROCHEM = 'electrochem' + + +class FamilySet(str, Enum): + """Named kinetics family sets defined in recommended.py.""" + DEFAULT = 'default' + CH_PYROLYSIS = 'ch_pyrolysis' + LIQUID_PEROXIDE = 'liquid_peroxide' + SURFACE = 'surface' + HALOGENS = 'halogens' + ELECTROCHEM = 'electrochem' + + +@dataclass +class ChemistryProfile: + """Detected chemistry characteristics from initial species and reactor conditions.""" + elements_present: Set[str] = field(default_factory=set) + has_nitrogen: bool = False + has_sulfur: bool = False + has_oxygen: bool = False + has_carbon: bool = False + has_halogens: bool = False + has_electrochem: bool = False + has_surface: bool = False + has_liquid: bool = False + max_temperature: float = 0.0 + + +def detect_chemistry(initial_species: list, + reaction_systems: list, + solvent: Optional[str], + ) -> ChemistryProfile: + """ + Analyze initial species and reactor conditions to build a ChemistryProfile. + + Args: + initial_species: list of Species objects from the RMG input. + reaction_systems: list of reactor system objects. + solvent: solvent name string, or None for gas phase. + + Returns: + ChemistryProfile with detected characteristics. + """ + profile = ChemistryProfile() + + for spc in initial_species: + if not spc.reactive: + continue + mol = spc.molecule[0] + element_counts = mol.get_element_count() + profile.elements_present.update(element_counts.keys()) + if mol.is_surface_site() or 'X' in element_counts: + profile.has_surface = True + + profile.has_nitrogen = 'N' in profile.elements_present + profile.has_sulfur = 'S' in profile.elements_present + profile.has_oxygen = 'O' in profile.elements_present + profile.has_carbon = 'C' in profile.elements_present + profile.has_halogens = bool(profile.elements_present & HALOGEN_ELEMENTS) # bool(set intersection) + profile.has_electrochem = bool(profile.elements_present & ELECTROCHEM_ELEMENTS) + + for reactor in reaction_systems: + if isinstance(reactor, LiquidReactor): + profile.has_liquid = True + if isinstance(reactor, SurfaceReactor): + profile.has_surface = True + # RMS reactor types (may not be installed) + if RMSLiq is not None: + if isinstance(reactor, (RMSLiq, RMSLiqSurf)): + profile.has_liquid = True + if isinstance(reactor, RMSLiqSurf): + profile.has_surface = True + + T = _get_reactor_max_temperature(reactor) + if T is not None and T > profile.max_temperature: + profile.max_temperature = T + + if solvent is not None: + profile.has_liquid = True + + return profile + + +def _get_reactor_max_temperature(reactor: Any) -> Optional[float]: + """ + Extract the maximum temperature (in K) from a reactor system. + Handles both single-value and range temperature specifications. + + Returns: + float temperature in K, or None if it cannot be determined. + """ + T = getattr(reactor, 'T', None) + if T is not None: + if isinstance(T, list): + return max(t.value_si for t in T) + return T.value_si + + # SimpleReactor stores Trange when a temperature range is given + Trange = getattr(reactor, 'Trange', None) + if Trange is not None and isinstance(Trange, list) and len(Trange) > 0: + return max(t.value_si for t in Trange) + + # RMS reactors store T in initial_conditions + ic = getattr(reactor, 'initial_conditions', None) + if ic is not None and isinstance(ic, dict): + if 'T' in ic: + return float(ic['T']) + # Nested dict for liquid-surface reactors + for phase_key in ('liquid', 'gas', 'Default'): + if isinstance(ic.get(phase_key), dict) and 'T' in ic[phase_key]: + return float(ic[phase_key]['T']) + + return None + + +def _has_pah_libs_keyword(rmg: Any) -> bool: + """Check whether the user included the keyword in any library field.""" + for attr in ('thermo_libraries', 'reaction_libraries', 'seed_mechanisms', 'transport_libraries'): + val = getattr(rmg, attr, None) + if isinstance(val, list): + for item in val: + name = item[0] if isinstance(item, tuple) else item + if name == PAH_LIBS: + return True + return False + + +def determine_chemistry_sets(profile: ChemistryProfile, + pah_libs_requested: bool = False, + ) -> List[ChemistrySet]: + """ + Determine which chemistry sets to activate based on the detected profile. + + CH pyrolysis logic: + - CH_pyrolysis_core is always added when C present AND T >= 800 K. + - PAH_formation is added when: + (a) C + T >= 800 K + no O in species (pure C/H pyrolysis), OR + (b) C + T >= 800 K + keyword requested by user. + + Args: + profile: ChemistryProfile instance. + pah_libs_requested: bool, True if user included keyword. + + Returns: + List of ChemistrySet values in priority order. + """ + sets = [ChemistrySet.PRIMARY] + + if profile.has_nitrogen: + sets.append(ChemistrySet.NITROGEN) + + if profile.has_sulfur: + sets.append(ChemistrySet.SULFUR) + + if profile.has_oxygen: + sets.append(ChemistrySet.OXIDATION) + + high_T_carbon = profile.has_carbon and profile.max_temperature >= CH_PYROLYSIS_T_THRESHOLD + + if high_T_carbon: + sets.append(ChemistrySet.CH_PYROLYSIS_CORE) + + if not profile.has_oxygen or pah_libs_requested: + sets.append(ChemistrySet.PAH_FORMATION) + + if profile.has_liquid and profile.has_oxygen: + sets.append(ChemistrySet.LIQUID_OXIDATION) + + if profile.has_surface: + sets.append(ChemistrySet.SURFACE) + + if profile.has_surface and profile.has_nitrogen: + sets.append(ChemistrySet.SURFACE_NITROGEN) + + if profile.has_halogens: + sets.append(ChemistrySet.HALOGENS) + + if profile.has_electrochem: + sets.append(ChemistrySet.ELECTROCHEM) + + return sets + + +def determine_kinetics_families(profile: ChemistryProfile) -> List[FamilySet]: + """ + Determine which kinetics family sets to activate based on the detected profile. + + These correspond to named sets in RMG-database/input/kinetics/families/recommended.py. + + Args: + profile: ChemistryProfile instance. + + Returns: + List of FamilySet values to combine. + """ + family_sets = [FamilySet.DEFAULT] + + if profile.has_carbon and profile.max_temperature >= CH_PYROLYSIS_T_THRESHOLD: + family_sets.append(FamilySet.CH_PYROLYSIS) + + if profile.has_liquid and profile.has_oxygen: + family_sets.append(FamilySet.LIQUID_PEROXIDE) + + if profile.has_surface: + family_sets.append(FamilySet.SURFACE) + + if profile.has_halogens: + family_sets.append(FamilySet.HALOGENS) + + if profile.has_electrochem: + family_sets.append(FamilySet.ELECTROCHEM) + + return family_sets + + +def load_recommended_yml(database_directory: str) -> dict: + """ + Load the recommended_libraries.yml file from the RMG database. + + Args: + database_directory: path to the RMG database 'input' directory. + + Returns: + dict parsed from YAML. + """ + yml_path = os.path.join(database_directory, 'recommended_libraries.yml') + if not os.path.isfile(yml_path): + raise InputError(f"Could not find recommended_libraries.yml at {yml_path}. " + f"This file is required for 'auto' library selection.") + with open(yml_path, 'r') as f: + return yaml.safe_load(f) + + +def expand_chemistry_sets(recommended_data: dict, + set_names: List[Union[ChemistrySet, str]], + ) -> Tuple[List[str], List[str], List[str], List[str]]: + """ + Expand named chemistry sets into concrete library lists. + + Args: + recommended_data: dict from recommended_libraries.yml. + set_names: list of chemistry set names to expand. + + Returns: + Tuple of (thermo_libraries, kinetics_libraries, transport_libraries, seed_libraries) + where each is a list of library name strings. + """ + # Primary must always be expanded first so its libraries have highest priority. + # Use == which works for both enum members and plain strings (str,Enum == 'value' is True). + has_primary = any(s == ChemistrySet.PRIMARY for s in set_names) + other_sets = [s for s in set_names if s != ChemistrySet.PRIMARY] + set_names = ([ChemistrySet.PRIMARY] if has_primary else []) + other_sets + + thermo, kinetics, transport, seed = [], [], [], [] + + for set_name in set_names: + if set_name not in recommended_data: + raise InputError(f"Chemistry set '{set_name}' not found in recommended_libraries.yml. " + f"Available sets: {list(recommended_data.keys())}") + set_data = recommended_data[set_name] + + for entry in set_data.get('thermo', []): + name = entry if isinstance(entry, str) else entry['name'] + if name not in thermo: + thermo.append(name) + + for entry in set_data.get('kinetics', []): + if isinstance(entry, str): + if entry not in kinetics: + kinetics.append(entry) + elif isinstance(entry, dict): + name = entry['name'] + if entry.get('seed', False): + if name not in seed: + seed.append(name) + else: + if name not in kinetics: + kinetics.append(name) + + for entry in set_data.get('transport', []): + name = entry if isinstance(entry, str) else entry['name'] + if name not in transport: + transport.append(name) + + return thermo, kinetics, transport, seed + + +def merge_with_user_libraries(user_spec: Any, auto_libs: List[str]) -> Union[List[str], None]: + """ + Merge user-specified libraries with auto-selected libraries, + respecting the position of the 'auto' token. tokens + are silently removed (they've already been used as a signal). + + Args: + user_spec: the user's library specification. Can be: + - 'auto' (string): fully replace with auto_libs + - list containing 'auto' token: replace token in-place with auto_libs + - list without 'auto': return as-is (with stripped) + - None or []: return as-is + auto_libs: list of auto-selected library names. + + Returns: + Resolved list of library names. + """ + if user_spec == AUTO: + return list(auto_libs) + + if not isinstance(user_spec, list): + return user_spec + + # Collect all user-specified library names (excluding special tokens) + user_lib_names = set() + for item in user_spec: + if item not in (AUTO, PAH_LIBS): + name = item[0] if isinstance(item, tuple) else item + user_lib_names.add(name) + + # Filter auto libs to exclude any already specified by user + filtered_auto = [lib for lib in auto_libs if lib not in user_lib_names] + + # Replace tokens in-place + result = [] + for item in user_spec: + if item == AUTO: + result.extend(filtered_auto) + elif item == PAH_LIBS: + continue + else: + result.append(item) + + return result + + +def to_reaction_library_tuples(reaction_libraries: List[str], + output_edge: set, + ) -> List[Tuple[str, bool]]: + """ + Convert a plain list of reaction library names back to (name, bool) tuples. + + The bool indicates whether unused edge reactions from this library should + be appended to the chemkin output file. + + Args: + reaction_libraries: list of library name strings. + output_edge: set of library names that had the True option. + + Returns: + List of (name, bool) tuples. + """ + return [(name, name in output_edge) for name in reaction_libraries] + + +def resolve_auto_kinetics_families(family_set_names: List[Union[FamilySet, str]], + database_directory: str, + ) -> List[str]: + """ + Resolve 'auto' kinetics families by combining the named sets from recommended.py. + + Reads the recommended.py file from the database and combines all requested sets + into a single list of family names. + + Args: + family_set_names: list of family set names (e.g., ['default', 'surface']). + database_directory: path to the RMG database root. + + Returns: + List of family name strings. + """ + recommended_path = os.path.join(database_directory, 'kinetics', 'families', 'recommended.py') + if not os.path.isfile(recommended_path): + raise InputError(f"Could not find recommended.py at {recommended_path}. " + f"This file is required for 'auto' kinetics families selection.") + + # Execute the recommended.py file to get the family sets. + # Use restricted globals (no builtins) since this file should only define plain sets. + local_context = {} + with open(recommended_path, 'r') as f: + exec(f.read(), {'__builtins__': {}}, local_context) + + combined = [] + for set_name in family_set_names: + if set_name not in local_context: + raise InputError(f"Kinetics family set '{set_name}' not found in recommended.py. " + f"Available sets: {[k for k in local_context if not k.startswith('_')]}") + family_set = local_context[set_name] + for family in family_set: + if family not in combined: + combined.append(family) + + return combined + + +def _log_lib_list(label: str, libs: Optional[list], width: int = 80) -> None: + """Log a library list with wrapping so lines stay under `width` chars.""" + if libs is None: + libs = [] + indent = ' ' + header = f' {label} ({len(libs)}): ' + if not libs: + logging.info(f'{header}(none)') + return + lines = [header] + current_line = indent + for i, name in enumerate(libs): + entry = name if i == 0 else f', {name}' + if len(current_line) + len(entry) > width and current_line != indent: + lines.append(current_line) + current_line = indent + name + else: + current_line += entry + lines.append(current_line) + logging.info('\n'.join(lines)) + + +def auto_select_libraries(rmg): + """ + Main entry point for auto library selection. + + Inspects the RMG object's initial_species, reaction_systems, and solvent + to detect the chemistry, then resolves any 'auto' tokens in the library + specifications. + + Modifies the rmg object in-place. + + Args: + rmg: the RMG job object with populated initial_species, reaction_systems, etc. + """ + # Check if any field uses 'auto' or '' + has_special = False + for attr in ('thermo_libraries', 'reaction_libraries', 'seed_mechanisms', 'transport_libraries', 'kinetics_families'): + val = getattr(rmg, attr, None) + if val in (AUTO, PAH_LIBS): + has_special = True + break + if isinstance(val, list): + for item in val: + name = item[0] if isinstance(item, tuple) else item + if name in (AUTO, PAH_LIBS): + has_special = True + break + if has_special: + break + + if not has_special: + return + + # Check for keyword before we strip it + pah_libs_requested = _has_pah_libs_keyword(rmg) + + # Detect chemistry + profile = detect_chemistry(rmg.initial_species, rmg.reaction_systems, rmg.solvent) + + # Build a compact fingerprint of what we detected + flags = [] + for symbol in sorted(profile.elements_present): + flags.append(symbol) + tags = [] + if profile.has_surface: + tags.append('surface') + if profile.has_liquid: + tags.append('liquid') + if pah_libs_requested: + tags.append('PAH_libs') + phase_str = f' | {", ".join(tags)}' if tags else '' + fingerprint = f'[{"/".join(flags)}] T_max={profile.max_temperature:.0f} K{phase_str}' + + logging.info('') + logging.info('~' * 80) + logging.info(f' Auto-selecting libraries for: {fingerprint}') + logging.info('~' * 80) + + # Determine chemistry sets + set_names = determine_chemistry_sets(profile, pah_libs_requested) + logging.info(f' Chemistry sets triggered: {", ".join(s.value for s in set_names)}') + + # Load and expand recommended_libraries.yml + recommended_data = load_recommended_yml(rmg.database_directory) + auto_thermo, auto_kinetics, auto_transport, auto_seeds = expand_chemistry_sets( + recommended_data, set_names + ) + + # Resolve each library field + rmg.thermo_libraries = merge_with_user_libraries(rmg.thermo_libraries, auto_thermo) + rmg.reaction_libraries = merge_with_user_libraries(rmg.reaction_libraries, auto_kinetics) + rmg.seed_mechanisms = merge_with_user_libraries(rmg.seed_mechanisms, auto_seeds) + rmg.transport_libraries = merge_with_user_libraries(rmg.transport_libraries, auto_transport) + + _log_lib_list('Thermo libraries', rmg.thermo_libraries) + _log_lib_list('Reaction libraries', rmg.reaction_libraries) + _log_lib_list('Seed mechanisms', rmg.seed_mechanisms) + _log_lib_list('Transport libraries', rmg.transport_libraries) + + # Resolve kinetics families + needs_auto_families = ( + rmg.kinetics_families == AUTO + or (isinstance(rmg.kinetics_families, list) and AUTO in rmg.kinetics_families) + ) + if needs_auto_families: + family_set_names = determine_kinetics_families(profile) + auto_families = resolve_auto_kinetics_families( + family_set_names, rmg.database_directory + ) + logging.info(f' Kinetics family sets: {", ".join(s.value for s in family_set_names)}') + + if rmg.kinetics_families == AUTO: + rmg.kinetics_families = auto_families + else: + # List with 'auto' + possible !exclusions and explicit additions + exclusions = {item[1:] for item in rmg.kinetics_families + if isinstance(item, str) and item.startswith('!')} + additions = [item for item in rmg.kinetics_families + if isinstance(item, str) and item != AUTO and not item.startswith('!')] + rmg.kinetics_families = [f for f in auto_families if f not in exclusions] + for fam in additions: + if fam not in rmg.kinetics_families: + rmg.kinetics_families.append(fam) + if exclusions: + logging.info(f' Families excluded by user: {", ".join(sorted(exclusions))}') + + _log_lib_list('Kinetics families', rmg.kinetics_families) + + logging.info('~' * 80) + logging.info('') diff --git a/rmgpy/rmg/input.py b/rmgpy/rmg/input.py index de2ddf0c331..e159dfa72ef 100644 --- a/rmgpy/rmg/input.py +++ b/rmgpy/rmg/input.py @@ -63,6 +63,7 @@ TerminationRateRatio, TerminationTime, ) +from rmgpy.data.auto_database import AUTO, PAH_LIBS from rmgpy.util import as_list ################################################################################ @@ -86,14 +87,55 @@ def database( # We don't actually load the database until after we're finished reading # the input file rmg.database_directory = settings['database.directory'] - rmg.thermo_libraries = as_list(thermoLibraries, default=[]) - rmg.transport_libraries = as_list(transportLibraries, default=None) - # Modify reaction library list such that all entries are tuples - reaction_libraries = as_list(reactionLibraries, default=[]) - rmg.reaction_libraries = [(name, False) if not isinstance(name, tuple) else name for name in reaction_libraries] + # Handle 'auto' token: pass through for later resolution by auto_select_libraries(). + # '' is only valid as a token inside a list, not as a standalone value. + for field_name, field_val in [('thermoLibraries', thermoLibraries), + ('transportLibraries', transportLibraries), + ('reactionLibraries', reactionLibraries), + ('seedMechanisms', seedMechanisms)]: + if field_val == PAH_LIBS: + raise InputError(f"'{PAH_LIBS}' cannot be used as a standalone value for {field_name}. " + f"Use it as a token inside a list, e.g. ['{AUTO}', '{PAH_LIBS}'].") + + if thermoLibraries == AUTO: + rmg.thermo_libraries = AUTO + else: + rmg.thermo_libraries = as_list(thermoLibraries, default=[]) + + if transportLibraries == AUTO: + rmg.transport_libraries = AUTO + else: + rmg.transport_libraries = as_list(transportLibraries, default=None) + + # Store reaction libraries as plain strings; remember which ones had True option + # (the bool indicates "also output unused edge reactions to chemkin file") + if reactionLibraries == AUTO: + rmg.reaction_libraries = AUTO + rmg.reaction_libraries_output_edge = set() + else: + reaction_libraries = as_list(reactionLibraries, default=[]) + rmg.reaction_libraries = [] + rmg.reaction_libraries_output_edge = set() + for item in reaction_libraries: + if isinstance(item, tuple): + name, option = item + if name == PAH_LIBS: + raise InputError(f"'{PAH_LIBS}' cannot be used as a tuple entry in reactionLibraries. " + f"Use it as a plain string token, e.g. ['{AUTO}', '{PAH_LIBS}'].") + rmg.reaction_libraries.append(name) + if option: + rmg.reaction_libraries_output_edge.add(name) + elif item in (AUTO, PAH_LIBS): + rmg.reaction_libraries.append(item) + else: + rmg.reaction_libraries.append(item) + + if seedMechanisms == AUTO: + rmg.seed_mechanisms = AUTO + else: + rmg.seed_mechanisms = as_list(seedMechanisms, default=[]) - rmg.seed_mechanisms = as_list(seedMechanisms, default=[]) rmg.statmech_libraries = as_list(frequenciesLibraries, default=[]) rmg.kinetics_estimator = kineticsEstimator @@ -107,12 +149,12 @@ def database( "['training','PrIMe'].") rmg.kinetics_depositories = kineticsDepositories - if kineticsFamilies in ('default', 'all', 'none'): + if kineticsFamilies in ('default', 'all', 'none', 'auto'): rmg.kinetics_families = kineticsFamilies else: if not isinstance(kineticsFamilies, list): - raise InputError("kineticsFamilies should be either 'default', 'all', 'none', or a list of names eg. " - "['H_Abstraction','R_Recombination'] or ['!Intra_Disproportionation'].") + raise InputError("kineticsFamilies should be either 'default', 'all', 'none', 'auto', or a list of names " + "eg. ['H_Abstraction','R_Recombination'] or ['!Intra_Disproportionation'].") rmg.kinetics_families = kineticsFamilies rmg.adsorption_groups = adsorptionGroups diff --git a/rmgpy/rmg/main.py b/rmgpy/rmg/main.py index 66a827e8890..7fe1ae03650 100644 --- a/rmgpy/rmg/main.py +++ b/rmgpy/rmg/main.py @@ -54,6 +54,7 @@ from rmgpy import settings from rmgpy.chemkin import ChemkinWriter from rmgpy.constraints import fails_species_constraints +from rmgpy.data.auto_database import auto_select_libraries, to_reaction_library_tuples from rmgpy.data.base import Entry from rmgpy.data.kinetics.library import KineticsLibrary from rmgpy.data.rmg import RMGDatabase @@ -182,6 +183,7 @@ def clear(self): self.thermo_libraries = None self.transport_libraries = None self.reaction_libraries = None + self.reaction_libraries_output_edge = set() self.statmech_libraries = None self.seed_mechanisms = None self.kinetics_families = None @@ -562,6 +564,15 @@ def initialize(self, **kwargs): ) ) + # Auto-select libraries if any field uses 'auto' or '' + auto_select_libraries(self) + + # Convert reaction libraries from plain strings to (name, bool) tuples + # (the bool controls whether unused edge reactions are written to the chemkin output) + if isinstance(self.reaction_libraries, list): + output_edge = getattr(self, 'reaction_libraries_output_edge', set()) + self.reaction_libraries = to_reaction_library_tuples(self.reaction_libraries, output_edge) + # Load databases self.load_database() diff --git a/test/rmgpy/data/autoDatabaseTest.py b/test/rmgpy/data/autoDatabaseTest.py new file mode 100644 index 00000000000..cfbd6cf37a5 --- /dev/null +++ b/test/rmgpy/data/autoDatabaseTest.py @@ -0,0 +1,685 @@ +#!/usr/bin/env python3 + +############################################################################### +# # +# RMG - Reaction Mechanism Generator # +# # +# Copyright (c) 2002-2023 Prof. William H. Green (whgreen@mit.edu), # +# Prof. Richard H. West (r.west@neu.edu) and the RMG Team (rmg_dev@mit.edu) # +# # +# Permission is hereby granted, free of charge, to any person obtaining a # +# copy of this software and associated documentation files (the 'Software'), # +# to deal in the Software without restriction, including without limitation # +# the rights to use, copy, modify, merge, publish, distribute, sublicense, # +# and/or sell copies of the Software, and to permit persons to whom the # +# Software is furnished to do so, subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be included in # +# all copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # +# DEALINGS IN THE SOFTWARE. # +# # +############################################################################### + +"""Tests for the automated library and family selection module (auto_database.py).""" + +import logging +import os +import unittest + +from rmgpy import settings +from rmgpy.data.auto_database import ( + AUTO, + PAH_LIBS, + ChemistryProfile, + ChemistrySet, + FamilySet, + _get_reactor_max_temperature, + _has_pah_libs_keyword, + _log_lib_list, + to_reaction_library_tuples, + auto_select_libraries, + detect_chemistry, + determine_chemistry_sets, + determine_kinetics_families, + expand_chemistry_sets, + load_recommended_yml, + merge_with_user_libraries, + resolve_auto_kinetics_families, +) +from rmgpy.molecule import Molecule +from rmgpy.quantity import Quantity +from rmgpy.rmg.main import RMG +from rmgpy.solver.liquid import LiquidReactor +from rmgpy.solver.simple import SimpleReactor +from rmgpy.solver.surface import SurfaceReactor +from rmgpy.species import Species + + +def _simple_reactor(T_K: float) -> SimpleReactor: + spc = Species().from_smiles('C') + return SimpleReactor(T=Quantity(T_K, 'K'), P=Quantity(1e5, 'Pa'), + initial_mole_fractions={spc: 1.0}, n_sims=1, termination=[]) + + +def _liquid_reactor(T_K: float) -> LiquidReactor: + spc = Species().from_smiles('C') + return LiquidReactor(T=Quantity(T_K, 'K'), initial_concentrations={spc: 0.1}, + n_sims=1, termination=[]) + + +def _surface_reactor(T_K: float) -> SurfaceReactor: + gas_spc = Species().from_smiles('[H][H]') + x_mol = Molecule().from_adjacency_list('1 X u0 p0 c0') + surf_spc = Species(molecule=[x_mol]) + return SurfaceReactor(T=Quantity(T_K, 'K'), P_initial=Quantity(1e5, 'Pa'), + initial_gas_mole_fractions={gas_spc: 1.0}, + initial_surface_coverages={surf_spc: 1.0}, + surface_volume_ratio=Quantity(1e1, 'm^-1'), + surface_site_density=Quantity(2.72e-9, 'mol/cm^2'), + n_sims=1, termination=[]) + + +def _rmg_with_auto(species_smiles, T_K, solvent=None, pah=False): + """Build an RMG object with 'auto' library settings.""" + rmg = RMG() + rmg.initial_species = [Species().from_smiles(s) for s in species_smiles] + rmg.reaction_systems = [_simple_reactor(T_K)] + rmg.solvent = solvent + rmg.database_directory = settings['database.directory'] + rmg.thermo_libraries = AUTO + rmg.reaction_libraries = [AUTO, PAH_LIBS] if pah else AUTO + rmg.seed_mechanisms = AUTO + rmg.transport_libraries = AUTO + rmg.kinetics_families = AUTO + rmg.reaction_libraries_output_edge = set() + return rmg + + +class TestGetReactorMaxTemperature(unittest.TestCase): + + def test_simple_reactor(self): + self.assertAlmostEqual(_get_reactor_max_temperature(_simple_reactor(1200.0)), 1200.0) + + def test_liquid_reactor(self): + self.assertAlmostEqual(_get_reactor_max_temperature(_liquid_reactor(350.0)), 350.0) + + def test_surface_reactor(self): + self.assertAlmostEqual(_get_reactor_max_temperature(_surface_reactor(600.0)), 600.0) + + def test_temperature_range(self): + spc = Species().from_smiles('C') + reactor = SimpleReactor(T=[Quantity(800, 'K'), Quantity(1500, 'K')], + P=Quantity(1e5, 'Pa'), + initial_mole_fractions={spc: 1.0}, n_sims=1, termination=[]) + self.assertAlmostEqual(_get_reactor_max_temperature(reactor), 1500.0) + + def test_no_T_attribute(self): + self.assertIsNone(_get_reactor_max_temperature(object())) + + +class TestHasPahLibsKeyword(unittest.TestCase): + + def test_in_thermo(self): + rmg = RMG() + rmg.thermo_libraries = ['auto', PAH_LIBS] + rmg.reaction_libraries = [] + rmg.seed_mechanisms = [] + rmg.transport_libraries = [] + self.assertTrue(_has_pah_libs_keyword(rmg)) + + def test_in_reaction_libraries(self): + rmg = RMG() + rmg.thermo_libraries = [] + rmg.reaction_libraries = ['myLib', ''] + rmg.seed_mechanisms = [] + rmg.transport_libraries = [] + self.assertTrue(_has_pah_libs_keyword(rmg)) + + def test_absent(self): + rmg = RMG() + rmg.thermo_libraries = ['auto'] + rmg.reaction_libraries = ['auto'] + rmg.seed_mechanisms = ['auto'] + rmg.transport_libraries = ['auto'] + self.assertFalse(_has_pah_libs_keyword(rmg)) + + def test_string_field_not_iterated(self): + rmg = RMG() + rmg.thermo_libraries = 'auto' + rmg.reaction_libraries = 'auto' + rmg.seed_mechanisms = 'auto' + rmg.transport_libraries = 'auto' + self.assertFalse(_has_pah_libs_keyword(rmg)) + + +class TestDetectChemistry(unittest.TestCase): + + def test_hydrocarbon(self): + species = [Species().from_smiles('C'), Species().from_smiles('[H][H]')] + profile = detect_chemistry(species, [_simple_reactor(500.0)], solvent=None) + self.assertTrue(profile.has_carbon) + self.assertFalse(profile.has_nitrogen) + self.assertFalse(profile.has_oxygen) + self.assertAlmostEqual(profile.max_temperature, 500.0) + + def test_oxygenated_fuel(self): + species = [Species().from_smiles('CCO'), Species().from_smiles('N#N')] + profile = detect_chemistry(species, [_simple_reactor(1000.0)], solvent=None) + self.assertTrue(profile.has_oxygen) + self.assertTrue(profile.has_carbon) + self.assertTrue(profile.has_nitrogen) + + def test_O2(self): + species = [Species().from_smiles('C'), Species().from_smiles('[O][O]')] + profile = detect_chemistry(species, [_simple_reactor(1500.0)], solvent=None) + self.assertTrue(profile.has_oxygen) + + def test_nitrogen(self): + profile = detect_chemistry([Species().from_smiles('N')], [_simple_reactor(500.0)], solvent=None) + self.assertTrue(profile.has_nitrogen) + + def test_sulfur(self): + profile = detect_chemistry([Species().from_smiles('S')], [_simple_reactor(500.0)], solvent=None) + self.assertTrue(profile.has_sulfur) + + def test_halogens(self): + profile = detect_chemistry([Species().from_smiles('CCl')], [_simple_reactor(500.0)], solvent=None) + self.assertTrue(profile.has_halogens) + + def test_liquid_reactor(self): + profile = detect_chemistry([Species().from_smiles('C')], [_liquid_reactor(300.0)], solvent=None) + self.assertTrue(profile.has_liquid) + + def test_surface_reactor(self): + profile = detect_chemistry([Species().from_smiles('[H][H]')], [_surface_reactor(600.0)], solvent=None) + self.assertTrue(profile.has_surface) + + def test_solvent(self): + profile = detect_chemistry([Species().from_smiles('C')], [_simple_reactor(300.0)], solvent='water') + self.assertTrue(profile.has_liquid) + + def test_max_T_multiple_reactors(self): + profile = detect_chemistry([Species().from_smiles('C')], + [_simple_reactor(500.0), _simple_reactor(1200.0)], solvent=None) + self.assertAlmostEqual(profile.max_temperature, 1200.0) + + def test_surface_species(self): + x_mol = Molecule().from_adjacency_list('1 X u0 p0 c0') + profile = detect_chemistry([Species(molecule=[x_mol])], [_simple_reactor(500.0)], solvent=None) + self.assertTrue(profile.has_surface) + + def test_mixed_reactors(self): + profile = detect_chemistry([Species().from_smiles('C')], + [_simple_reactor(800.0), _liquid_reactor(350.0)], solvent=None) + self.assertTrue(profile.has_liquid) + self.assertAlmostEqual(profile.max_temperature, 800.0) + + def test_no_reactors(self): + profile = detect_chemistry([Species().from_smiles('CCO')], [], solvent=None) + self.assertTrue(profile.has_oxygen) + self.assertAlmostEqual(profile.max_temperature, 0.0) + + def test_nonreactive_species_ignored(self): + reactive = Species().from_smiles('C') + bath_gas = Species(reactive=False).from_smiles('N#N') + profile = detect_chemistry([reactive, bath_gas], [_simple_reactor(500.0)], solvent=None) + self.assertTrue(profile.has_carbon) + self.assertFalse(profile.has_nitrogen) + + +class TestDetermineChemistrySets(unittest.TestCase): + + def test_primary_always(self): + self.assertEqual(determine_chemistry_sets(ChemistryProfile(), False), [ChemistrySet.PRIMARY]) + + def test_nitrogen(self): + self.assertIn(ChemistrySet.NITROGEN, determine_chemistry_sets(ChemistryProfile(has_nitrogen=True), False)) + + def test_sulfur(self): + self.assertIn(ChemistrySet.SULFUR, determine_chemistry_sets(ChemistryProfile(has_sulfur=True), False)) + + def test_oxidation(self): + self.assertIn(ChemistrySet.OXIDATION, determine_chemistry_sets(ChemistryProfile(has_oxygen=True), False)) + + def test_ch_pyrolysis_core(self): + self.assertIn(ChemistrySet.CH_PYROLYSIS_CORE, + determine_chemistry_sets(ChemistryProfile(has_carbon=True, max_temperature=900.0), False)) + + def test_ch_pyrolysis_core_low_T(self): + self.assertNotIn(ChemistrySet.CH_PYROLYSIS_CORE, + determine_chemistry_sets(ChemistryProfile(has_carbon=True, max_temperature=500.0), False)) + + def test_pah_no_oxygen(self): + self.assertIn(ChemistrySet.PAH_FORMATION, + determine_chemistry_sets(ChemistryProfile(has_carbon=True, max_temperature=1000.0), False)) + + def test_pah_blocked_by_oxygen(self): + self.assertNotIn(ChemistrySet.PAH_FORMATION, + determine_chemistry_sets( + ChemistryProfile(has_carbon=True, has_oxygen=True, max_temperature=1000.0), False)) + + def test_pah_with_keyword(self): + self.assertIn(ChemistrySet.PAH_FORMATION, + determine_chemistry_sets(ChemistryProfile(has_carbon=True, has_oxygen=True, max_temperature=1000.0), True)) + + def test_liquid_oxidation(self): + self.assertIn(ChemistrySet.LIQUID_OXIDATION, + determine_chemistry_sets(ChemistryProfile(has_liquid=True, has_oxygen=True), False)) + + def test_surface(self): + sets = determine_chemistry_sets(ChemistryProfile(has_surface=True), False) + self.assertIn(ChemistrySet.SURFACE, sets) + self.assertNotIn(ChemistrySet.SURFACE_NITROGEN, sets) + + def test_surface_nitrogen(self): + sets = determine_chemistry_sets(ChemistryProfile(has_surface=True, has_nitrogen=True), False) + self.assertIn(ChemistrySet.SURFACE, sets) + self.assertIn(ChemistrySet.SURFACE_NITROGEN, sets) + + def test_halogens(self): + self.assertIn(ChemistrySet.HALOGENS, + determine_chemistry_sets(ChemistryProfile(has_halogens=True), False)) + + def test_electrochem(self): + self.assertIn(ChemistrySet.ELECTROCHEM, + determine_chemistry_sets(ChemistryProfile(has_electrochem=True), False)) + + def test_ordering(self): + profile = ChemistryProfile(has_nitrogen=True, has_sulfur=True, has_oxygen=True, + has_carbon=True, max_temperature=1200.0) + sets = determine_chemistry_sets(profile, False) + self.assertEqual(sets[0], ChemistrySet.PRIMARY) + self.assertLess(sets.index(ChemistrySet.NITROGEN), sets.index(ChemistrySet.SULFUR)) + + def test_full_combo(self): + profile = ChemistryProfile( + has_nitrogen=True, has_sulfur=True, has_oxygen=True, has_carbon=True, + has_halogens=True, has_electrochem=True, has_surface=True, has_liquid=True, + max_temperature=1200.0) + sets = determine_chemistry_sets(profile, True) + for expected in ChemistrySet: + self.assertIn(expected, sets) + + +class TestDetermineKineticsFamilies(unittest.TestCase): + + def test_default(self): + self.assertEqual(determine_kinetics_families(ChemistryProfile()), [FamilySet.DEFAULT]) + + def test_ch_pyrolysis(self): + self.assertIn(FamilySet.CH_PYROLYSIS, + determine_kinetics_families(ChemistryProfile(has_carbon=True, max_temperature=1000.0))) + + def test_ch_pyrolysis_low_T(self): + self.assertNotIn(FamilySet.CH_PYROLYSIS, + determine_kinetics_families(ChemistryProfile(has_carbon=True, max_temperature=500.0))) + + def test_liquid_peroxide(self): + self.assertIn(FamilySet.LIQUID_PEROXIDE, + determine_kinetics_families(ChemistryProfile(has_liquid=True, has_oxygen=True))) + + def test_surface(self): + self.assertIn(FamilySet.SURFACE, + determine_kinetics_families(ChemistryProfile(has_surface=True))) + + def test_halogens(self): + self.assertIn(FamilySet.HALOGENS, + determine_kinetics_families(ChemistryProfile(has_halogens=True))) + + def test_electrochem(self): + self.assertIn(FamilySet.ELECTROCHEM, + determine_kinetics_families(ChemistryProfile(has_electrochem=True))) + + +class TestLoadRecommendedYml(unittest.TestCase): + + def setUp(self): + self.db_dir = settings['database.directory'] + if not os.path.isfile(os.path.join(self.db_dir, 'recommended_libraries.yml')): + self.skipTest('recommended_libraries.yml not found') + + def test_loads(self): + data = load_recommended_yml(self.db_dir) + self.assertIsInstance(data, dict) + self.assertIn('primary', data) + + def test_all_chemistry_sets_present(self): + data = load_recommended_yml(self.db_dir) + for s in ChemistrySet: + self.assertIn(s.value, data) + + def test_required_keys(self): + data = load_recommended_yml(self.db_dir) + for name, entry in data.items(): + self.assertIn('thermo', entry, f'{name} missing thermo') + self.assertIn('kinetics', entry, f'{name} missing kinetics') + + def test_invalid_path(self): + with self.assertRaises(Exception): + load_recommended_yml('/nonexistent') + + +class TestExpandChemistrySets(unittest.TestCase): + + def setUp(self): + self.db_dir = settings['database.directory'] + if not os.path.isfile(os.path.join(self.db_dir, 'recommended_libraries.yml')): + self.skipTest('recommended_libraries.yml not found') + self.rec = load_recommended_yml(self.db_dir) + + def test_primary(self): + thermo, _, transport, seeds = expand_chemistry_sets(self.rec, ['primary']) + self.assertIn('primaryThermoLibrary', thermo) + self.assertIn('primaryH2O2', seeds) + self.assertIn('PrimaryTransportLibrary', transport) + + def test_nitrogen(self): + thermo, kinetics, _, _ = expand_chemistry_sets(self.rec, ['nitrogen']) + self.assertIn('NH3', thermo) + self.assertIn('primaryNitrogenLibrary', kinetics) + + def test_ch_pyrolysis_core(self): + thermo, kinetics, _, _ = expand_chemistry_sets(self.rec, ['CH_pyrolysis_core']) + self.assertIn('CurranPentane', thermo) + self.assertIn('C2H2_init', kinetics) + + def test_pah_formation(self): + thermo, kinetics, _, _ = expand_chemistry_sets(self.rec, ['PAH_formation']) + self.assertIn('naphthalene_H', thermo) + self.assertIn('Mebel_C6H5_C2H2', kinetics) + + def test_surface_nitrogen(self): + _, kinetics, _, _ = expand_chemistry_sets(self.rec, ['surface_nitrogen']) + self.assertIn('Surface/Ammonia/Schneider_Pt111', kinetics) + self.assertIn('Surface/DOC/Nitrogen', kinetics) + + def test_no_duplicates(self): + thermo, kinetics, _, seeds = expand_chemistry_sets( + self.rec, ['primary', 'nitrogen', 'oxidation', 'CH_pyrolysis_core']) + self.assertEqual(len(thermo), len(set(thermo))) + self.assertEqual(len(kinetics), len(set(kinetics))) + + def test_primary_always_first(self): + thermo, _, _, _ = expand_chemistry_sets(self.rec, ['nitrogen', 'primary']) + self.assertEqual(thermo[0], 'primaryThermoLibrary') + + def test_all_primary_before_other(self): + primary_thermo = self.rec['primary']['thermo'] + thermo, _, _, _ = expand_chemistry_sets(self.rec, ['primary', 'oxidation', 'CH_pyrolysis_core']) + last_primary = max(thermo.index(lib) for lib in primary_thermo) + non_primary = [lib for lib in thermo if lib not in primary_thermo] + if non_primary: + first_other = min(thermo.index(lib) for lib in non_primary) + self.assertLess(last_primary, first_other) + + def test_invalid_set(self): + with self.assertRaises(Exception): + expand_chemistry_sets(self.rec, ['nonexistent']) + + def test_empty(self): + thermo, kinetics, transport, seeds = expand_chemistry_sets(self.rec, []) + self.assertEqual(thermo, []) + + +class TestMergeWithUserLibraries(unittest.TestCase): + + def test_auto_string(self): + self.assertEqual(merge_with_user_libraries('auto', ['a', 'b']), ['a', 'b']) + + def test_no_auto(self): + self.assertEqual(merge_with_user_libraries(['x', 'y'], ['a']), ['x', 'y']) + + def test_auto_middle(self): + self.assertEqual(merge_with_user_libraries(['x', 'auto', 'y'], ['a', 'b']), ['x', 'a', 'b', 'y']) + + def test_auto_start(self): + self.assertEqual(merge_with_user_libraries(['auto', 'y'], ['a', 'b']), ['a', 'b', 'y']) + + def test_auto_end(self): + self.assertEqual(merge_with_user_libraries(['x', 'auto'], ['a']), ['x', 'a']) + + def test_dedup(self): + self.assertEqual(merge_with_user_libraries(['x', 'auto'], ['x', 'a']), ['x', 'a']) + + def test_none(self): + self.assertIsNone(merge_with_user_libraries(None, ['a'])) + + def test_empty(self): + self.assertEqual(merge_with_user_libraries([], ['a']), []) + + def test_pah_stripped(self): + self.assertEqual(merge_with_user_libraries(['x', PAH_LIBS, 'auto'], ['a']), ['x', 'a']) + + def test_pah_no_auto(self): + self.assertEqual(merge_with_user_libraries(['x', PAH_LIBS], ['a']), ['x']) + + def test_auto_empty_libs(self): + self.assertEqual(merge_with_user_libraries('auto', []), []) + + +class TestToReactionLibraryTuples(unittest.TestCase): + + def test_all_false(self): + self.assertEqual(to_reaction_library_tuples(['a', 'b'], set()), [('a', False), ('b', False)]) + + def test_some_true(self): + self.assertEqual(to_reaction_library_tuples(['a', 'b', 'c'], {'b'}), [('a', False), ('b', True), ('c', False)]) + + def test_empty(self): + self.assertEqual(to_reaction_library_tuples([], set()), []) + + def test_all_true(self): + self.assertEqual(to_reaction_library_tuples(['a', 'b'], {'a', 'b'}), [('a', True), ('b', True)]) + + +class TestResolveAutoKineticsFamilies(unittest.TestCase): + + def setUp(self): + self.db_dir = settings['database.directory'] + if not os.path.isfile(os.path.join(self.db_dir, 'kinetics', 'families', 'recommended.py')): + self.skipTest('recommended.py not found') + + def test_default(self): + families = resolve_auto_kinetics_families([FamilySet.DEFAULT], self.db_dir) + self.assertIn('H_Abstraction', families) + self.assertIn('R_Recombination', families) + + def test_default_plus_surface(self): + families = resolve_auto_kinetics_families([FamilySet.DEFAULT, FamilySet.SURFACE], self.db_dir) + self.assertIn('Surface_Adsorption_Single', families) + + def test_no_duplicates(self): + families = resolve_auto_kinetics_families( + [FamilySet.DEFAULT, FamilySet.SURFACE, FamilySet.HALOGENS], self.db_dir) + self.assertEqual(len(families), len(set(families))) + + def test_invalid_set(self): + with self.assertRaises(Exception): + resolve_auto_kinetics_families(['nonexistent'], self.db_dir) + + def test_invalid_path(self): + with self.assertRaises(Exception): + resolve_auto_kinetics_families([FamilySet.DEFAULT], '/nonexistent') + + def test_returns_strings(self): + for f in resolve_auto_kinetics_families([FamilySet.DEFAULT], self.db_dir): + self.assertIsInstance(f, str) + + +class TestLogLibList(unittest.TestCase): + + def test_none(self): + with self.assertLogs(level=logging.INFO) as cm: + _log_lib_list('Test', None) + self.assertIn('(none)', cm.output[0]) + + def test_empty(self): + with self.assertLogs(level=logging.INFO) as cm: + _log_lib_list('Test', []) + self.assertIn('(none)', cm.output[0]) + + def test_short(self): + with self.assertLogs(level=logging.INFO) as cm: + _log_lib_list('Libs', ['a', 'b']) + self.assertIn('Libs (2)', cm.output[0]) + + def test_wrapping(self): + libs = [f'very_long_library_name_{i}' for i in range(10)] + with self.assertLogs(level=logging.INFO) as cm: + _log_lib_list('Libs', libs, width=60) + self.assertIn('\n', cm.output[0]) + + +class TestAutoSelectLibraries(unittest.TestCase): + + def setUp(self): + if not os.path.isfile(os.path.join(settings['database.directory'], 'recommended_libraries.yml')): + self.skipTest('recommended_libraries.yml not found') + + def test_noop_without_auto(self): + rmg = RMG() + rmg.initial_species = [Species().from_smiles('C')] + rmg.reaction_systems = [_simple_reactor(500.0)] + rmg.solvent = None + rmg.database_directory = settings['database.directory'] + rmg.thermo_libraries = ['myLib'] + rmg.reaction_libraries = ['myKinLib'] + rmg.seed_mechanisms = [] + rmg.transport_libraries = None + rmg.kinetics_families = 'default' + auto_select_libraries(rmg) + self.assertEqual(rmg.thermo_libraries, ['myLib']) + + def test_methane_oxidation(self): + rmg = _rmg_with_auto(['C', '[O][O]'], 1500.0) + auto_select_libraries(rmg) + self.assertIn('primaryThermoLibrary', rmg.thermo_libraries) + self.assertIn('FFCM1(-)', rmg.thermo_libraries) + self.assertIn('H_Abstraction', rmg.kinetics_families) + + def test_ethane_pyrolysis_includes_pah(self): + rmg = _rmg_with_auto(['CC'], 1200.0) + auto_select_libraries(rmg) + self.assertIn('naphthalene_H', rmg.thermo_libraries) + self.assertIn('Mebel_C6H5_C2H2', rmg.reaction_libraries) + + def test_ethanol_no_pah(self): + rmg = _rmg_with_auto(['CCO'], 1000.0) + auto_select_libraries(rmg) + self.assertNotIn('naphthalene_H', rmg.thermo_libraries) + self.assertIn('FFCM1(-)', rmg.thermo_libraries) + + def test_ethanol_with_pah(self): + rmg = _rmg_with_auto(['CCO'], 1000.0, pah=True) + auto_select_libraries(rmg) + self.assertIn('naphthalene_H', rmg.thermo_libraries) + + def test_families_resolved(self): + rmg = _rmg_with_auto(['C'], 500.0) + auto_select_libraries(rmg) + self.assertIsInstance(rmg.kinetics_families, list) + self.assertIn('H_Abstraction', rmg.kinetics_families) + + def test_families_auto_with_exclusion(self): + """['!H_Abstraction', 'auto'] should resolve auto families minus H_Abstraction.""" + rmg = _rmg_with_auto(['C'], 500.0) + rmg.kinetics_families = ['!H_Abstraction', AUTO] + auto_select_libraries(rmg) + self.assertIsInstance(rmg.kinetics_families, list) + self.assertNotIn('H_Abstraction', rmg.kinetics_families) + self.assertIn('R_Recombination', rmg.kinetics_families) + + def test_families_auto_with_multiple_exclusions(self): + rmg = _rmg_with_auto(['C'], 500.0) + rmg.kinetics_families = ['!H_Abstraction', '!Disproportionation', AUTO] + auto_select_libraries(rmg) + self.assertNotIn('H_Abstraction', rmg.kinetics_families) + self.assertNotIn('Disproportionation', rmg.kinetics_families) + self.assertIn('R_Recombination', rmg.kinetics_families) + + def test_families_auto_with_addition(self): + """['auto', 'MyCustomFamily'] should include auto families + the custom one.""" + rmg = _rmg_with_auto(['C'], 500.0) + rmg.kinetics_families = [AUTO, 'MyCustomFamily'] + auto_select_libraries(rmg) + self.assertIn('H_Abstraction', rmg.kinetics_families) + self.assertIn('MyCustomFamily', rmg.kinetics_families) + + def test_families_auto_with_exclusion_and_addition(self): + """['!H_Abstraction', 'auto', 'MyCustomFamily'] should work.""" + rmg = _rmg_with_auto(['C'], 500.0) + rmg.kinetics_families = ['!H_Abstraction', AUTO, 'MyCustomFamily'] + auto_select_libraries(rmg) + self.assertNotIn('H_Abstraction', rmg.kinetics_families) + self.assertIn('R_Recombination', rmg.kinetics_families) + self.assertIn('MyCustomFamily', rmg.kinetics_families) + + +class TestEndToEnd(unittest.TestCase): + + def test_methane_oxidation(self): + species = [Species().from_smiles('C'), Species().from_smiles('[O][O]')] + profile = detect_chemistry(species, [_simple_reactor(1500.0)], solvent=None) + sets = determine_chemistry_sets(profile, False) + self.assertIn(ChemistrySet.OXIDATION, sets) + self.assertIn(ChemistrySet.CH_PYROLYSIS_CORE, sets) + self.assertNotIn(ChemistrySet.PAH_FORMATION, sets) + + def test_ethane_pyrolysis(self): + species = [Species().from_smiles('CC')] + profile = detect_chemistry(species, [_simple_reactor(1200.0)], solvent=None) + sets = determine_chemistry_sets(profile, False) + self.assertIn(ChemistrySet.PAH_FORMATION, sets) + self.assertNotIn(ChemistrySet.OXIDATION, sets) + + def test_liquid_pentane_oxidation(self): + species = [Species().from_smiles('CCCCC'), Species().from_smiles('[O][O]')] + profile = detect_chemistry(species, [_liquid_reactor(400.0)], solvent='pentane') + sets = determine_chemistry_sets(profile, False) + self.assertIn(ChemistrySet.LIQUID_OXIDATION, sets) + self.assertIn(FamilySet.LIQUID_PEROXIDE, determine_kinetics_families(profile)) + + def test_nitrogen_sulfur(self): + species = [Species().from_smiles('N'), Species().from_smiles('S')] + profile = detect_chemistry(species, [_simple_reactor(500.0)], solvent=None) + sets = determine_chemistry_sets(profile, False) + self.assertIn(ChemistrySet.NITROGEN, sets) + self.assertIn(ChemistrySet.SULFUR, sets) + self.assertNotIn(ChemistrySet.OXIDATION, sets) + + def test_inert_with_nitrogen(self): + species = [Species().from_smiles('[Ar]'), Species().from_smiles('N#N')] + profile = detect_chemistry(species, [_simple_reactor(300.0)], solvent=None) + sets = determine_chemistry_sets(profile, False) + self.assertEqual(sets, [ChemistrySet.PRIMARY, ChemistrySet.NITROGEN]) + + def test_oxyfuel_with_inert_bath_gas(self): + """ + Formic acid (OC=O) with non-reactive N2 bath gas at 1000 K. + N2 is inert so nitrogen should NOT be triggered. + Oxygen is in the fuel so oxidation + CH_pyrolysis_core should trigger. + PAH_formation should NOT trigger (oxygen present, no ). + """ + formic_acid = Species().from_smiles('OC=O') + n2_bath = Species(reactive=False).from_smiles('N#N') + profile = detect_chemistry([formic_acid, n2_bath], [_simple_reactor(1000.0)], solvent=None) + self.assertTrue(profile.has_oxygen) + self.assertTrue(profile.has_carbon) + self.assertFalse(profile.has_nitrogen) # N2 is non-reactive, should be ignored + sets = determine_chemistry_sets(profile, pah_libs_requested=False) + self.assertIn(ChemistrySet.PRIMARY, sets) + self.assertIn(ChemistrySet.OXIDATION, sets) + self.assertIn(ChemistrySet.CH_PYROLYSIS_CORE, sets) + self.assertNotIn(ChemistrySet.PAH_FORMATION, sets) + self.assertNotIn(ChemistrySet.NITROGEN, sets) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/rmgpy/rmg/inputTest.py b/test/rmgpy/rmg/inputTest.py index 00160d974b9..f9b083822aa 100644 --- a/test/rmgpy/rmg/inputTest.py +++ b/test/rmgpy/rmg/inputTest.py @@ -30,6 +30,7 @@ from unittest.mock import patch import rmgpy.rmg.input as inp +from rmgpy.exceptions import InputError from rmgpy.rmg.main import RMG from rmgpy.rmg.model import CoreEdgeReactionModel from rmgpy.ml.estimator import ADMONITION @@ -66,33 +67,123 @@ def teardown_class(self): def test_importing_database_reaction_libraries_from_string(self): """ - Test that we can import Reaction Libraries using the non-tuple form. + Test that reaction libraries given as plain strings are stored as strings. """ global rmg - # add database properties to RMG inp.database(reactionLibraries=["test"]) - assert isinstance(rmg.reaction_libraries[0], tuple) - assert not rmg.reaction_libraries[0][1] + assert rmg.reaction_libraries == ["test"] + assert "test" not in rmg.reaction_libraries_output_edge def test_importing_database_reaction_libraries_from_false_tuple(self): """ - Test that we can import Reaction Libraries using the Tuple False form. + Test that (name, False) tuples are stored as plain strings without output_edge. """ global rmg - # add database properties to RMG inp.database(reactionLibraries=[("test", False)]) - assert isinstance(rmg.reaction_libraries[0], tuple) - assert not rmg.reaction_libraries[0][1] + assert rmg.reaction_libraries == ["test"] + assert "test" not in rmg.reaction_libraries_output_edge def test_importing_database_reaction_libraries_from_true_tuple(self): """ - Test that we can import Reaction Libraries using the Tuple True form. + Test that (name, True) tuples are stored as plain strings with the name in output_edge. """ global rmg - # add database properties to RMG inp.database(reactionLibraries=[("test", True)]) - assert isinstance(rmg.reaction_libraries[0], tuple) - assert rmg.reaction_libraries[0][1] + assert rmg.reaction_libraries == ["test"] + assert "test" in rmg.reaction_libraries_output_edge + +class TestInputDatabaseAutoSelection: + """Tests for 'auto' and '' token handling in database().""" + + def test_thermo_auto_string(self): + global rmg + inp.database(thermoLibraries='auto') + assert rmg.thermo_libraries == 'auto' + + def test_thermo_auto_in_list(self): + global rmg + inp.database(thermoLibraries=['myLib', 'auto']) + assert rmg.thermo_libraries == ['myLib', 'auto'] + + def test_thermo_pah_libs_in_list(self): + global rmg + inp.database(thermoLibraries=['auto', '']) + assert rmg.thermo_libraries == ['auto', ''] + + def test_transport_auto_string(self): + global rmg + inp.database(transportLibraries='auto') + assert rmg.transport_libraries == 'auto' + + def test_reaction_libraries_auto_string(self): + global rmg + inp.database(reactionLibraries='auto') + assert rmg.reaction_libraries == 'auto' + + def test_reaction_libraries_auto_in_list(self): + global rmg + inp.database(reactionLibraries=['auto', '']) + assert 'auto' in rmg.reaction_libraries + assert '' in rmg.reaction_libraries + + def test_reaction_libraries_mixed_auto_and_tuple(self): + global rmg + inp.database(reactionLibraries=[('myLib', True), 'auto']) + assert rmg.reaction_libraries == ['myLib', 'auto'] + assert 'myLib' in rmg.reaction_libraries_output_edge + + def test_seed_mechanisms_auto(self): + global rmg + inp.database(seedMechanisms='auto') + assert rmg.seed_mechanisms == 'auto' + + def test_kinetics_families_auto(self): + global rmg + inp.database(kineticsFamilies='auto') + assert rmg.kinetics_families == 'auto' + + def test_kinetics_families_auto_with_exclusion(self): + global rmg + inp.database(kineticsFamilies=['!H_Abstraction', 'auto']) + assert rmg.kinetics_families == ['!H_Abstraction', 'auto'] + + def test_default_no_auto(self): + """Without 'auto', fields should behave as before.""" + global rmg + inp.database( + thermoLibraries=['primaryThermoLibrary'], + reactionLibraries=[('lib1', False)], + seedMechanisms=[], + transportLibraries=None, + kineticsFamilies='default', + ) + assert rmg.thermo_libraries == ['primaryThermoLibrary'] + assert rmg.reaction_libraries == ['lib1'] + assert 'lib1' not in rmg.reaction_libraries_output_edge + assert rmg.seed_mechanisms == [] + assert rmg.transport_libraries is None + assert rmg.kinetics_families == 'default' + + def test_pah_libs_standalone_thermo_raises(self): + with pytest.raises(InputError): + inp.database(thermoLibraries='') + + def test_pah_libs_standalone_reaction_raises(self): + with pytest.raises(InputError): + inp.database(reactionLibraries='') + + def test_pah_libs_standalone_transport_raises(self): + with pytest.raises(InputError): + inp.database(transportLibraries='') + + def test_pah_libs_standalone_seeds_raises(self): + with pytest.raises(InputError): + inp.database(seedMechanisms='') + + def test_pah_libs_tuple_in_reaction_libs_raises(self): + with pytest.raises(InputError): + inp.database(reactionLibraries=[('', True)]) + @pytest.mark.skip(reason=ADMONITION) class TestInputMLEstimator: