ReactionMechanismGenerator · alongd · Mar 27, 2026 · Mar 27, 2026 · Mar 27, 2026 · Mar 27, 2026
diff --git a/documentation/source/users/rmg/input.rst b/documentation/source/users/rmg/input.rst
@@ -204,17 +204,85 @@ The last section is specifying that RMG is estimating kinetics of reactions from
 	kineticsEstimator = 'rate rules'
 
 
-The following is an example of a database block, based on above chosen libraries and options::
+.. _auto_library_selection:
+
+Automatic Library and Family Selection
+--------------------------------------
+Instead of manually listing every library, you can let RMG choose the appropriate
+thermo libraries, kinetics libraries, transport libraries, seed mechanisms, and
+kinetics families automatically based on the species and reactor conditions in your
+input file. Use the ``'auto'`` keyword in any library field::
 
 	database(
-		thermoLibraries = ['primaryThermoLibrary', 'GRI-Mech3.0'],
-		reactionLibraries = [('Glarborg/C3',False)],
-		seedMechanisms = ['GRI-Mech3.0'],
+		thermoLibraries = 'auto',
+		reactionLibraries = 'auto',
+		transportLibraries = 'auto',
+		seedMechanisms = 'auto',
+		kineticsFamilies = 'auto',
 		kineticsDepositories = ['training'],
-		kineticsFamilies = 'defult',
 		kineticsEstimator = 'rate rules',
 	)
 
+When ``'auto'`` is specified, RMG analyzes the initial species and reactor
+conditions to detect the chemistry present (e.g., nitrogen, sulfur, oxygen,
+halogens, surface, liquid phase) and selects the relevant library sets.
+The triggered sets and their corresponding libraries are logged at the start
+of the RMG run.
+
+**Mixing manual and auto selection.** You can combine user-specified libraries
+with ``'auto'`` in a list. The position of ``'auto'`` controls the priority:
+libraries before it have higher priority, libraries after it have lower::
+
+	thermoLibraries = ['myCustomLib', 'auto']
+
+	thermoLibraries = ['auto', 'myFallbackLib']
+
+If a library you listed manually also appears in the auto-selected set, it will
+not be added twice. It keeps the position you gave it, and the auto-selected
+copy is skipped.
+
+**PAH libraries and the ``<PAH_libs>`` keyword.**
+The auto-selection splits high-temperature C/H chemistry into two tiers:
+
+* **CH_pyrolysis_core** — fundamental high-T radical and small-molecule chemistry
+  (e.g., acetylene initiation, alkane cracking). Always included when carbon is
+  present and the maximum reactor temperature is at least 800 K.
+* **PAH_formation** — aromatic ring formation, naphthalene pathways (CPD + HACA),
+  and larger PAH growth. This is a large set (~70 kinetics libraries) that can
+  significantly increase model size and generation time.
+
+For **pure C/H pyrolysis** (no oxygen in any input species), both tiers are
+included automatically — PAH formation is expected in such systems.
+
+For **oxygenated systems** (any species contains O, including oxygenated fuels
+like ethanol or DME), only CH_pyrolysis_core is included by default because
+PAH chemistry is typically a minor pathway. If you know your system forms
+significant amounts of aromatics (e.g., fuel-rich partial oxidation), you can
+explicitly request the PAH libraries by adding the ``'<PAH_libs>'`` keyword
+to any library field::
+
+	database(
+		thermoLibraries = ['auto', '<PAH_libs>'],
+		reactionLibraries = ['auto', '<PAH_libs>'],
+		seedMechanisms = 'auto',
+		transportLibraries = 'auto',
+		kineticsFamilies = 'auto',
+	)
+
+The ``'<PAH_libs>'`` keyword is consumed during processing (it does not appear
+in the final library list) — it only serves as a signal to include the
+PAH_formation set. It can be placed anywhere in the list; its position does
+not affect library priority.
+
+**Previewing the selection.** A Jupyter notebook is provided at
+:file:`ipython/auto_library_selection.ipynb` that lets you preview exactly which
+libraries and families RMG would choose for a given input file, without running
+the full job.
+
+.. note::
+	The ``'auto'`` keyword is opt-in. If you do not use it you must list every library explicitly.
+
+
 .. _species_list:
 
 List of species

diff --git a/ipython/auto_library_selection.ipynb b/ipython/auto_library_selection.ipynb
@@ -0,0 +1,103 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "uak48ywvas",
+   "source": "# Auto Library & Family Selection Preview\n\nPreview what RMG's `'auto'` mode would select for thermo libraries, kinetics libraries,\ntransport libraries, seed mechanisms, and kinetics families — given an RMG input file.\n\nJust set `input_file_path` below and run all cells.",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "id": "5vx9v43udjl",
+   "source": "input_file_path = '../examples/rmg/superminimal/input.py'",
+   "metadata": {},
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "vyt4k7q9as",
+   "source": "import os\n\nfrom rmgpy.rmg.main import RMG\nfrom rmgpy.rmg.input import read_input_file\nfrom rmgpy.data.auto_database import (\n    AUTO,\n    detect_chemistry,\n    determine_chemistry_sets,\n    determine_kinetics_families,\n    expand_chemistry_sets,\n    load_recommended_yml,\n    merge_with_user_libraries,\n    resolve_auto_kinetics_families,\n    _has_pah_libs_keyword,\n)\n\n# Load the input file into an RMG object (without loading the database)\nrmg = RMG()\nread_input_file(os.path.expanduser(input_file_path), rmg)\n\nprint(f'Input file: {os.path.abspath(input_file_path)}')\nprint(f'Species:    {[spec.label or spec.molecule[0].to_smiles() for spec in rmg.initial_species]}')\nprint(f'Reactors:   {len(rmg.reaction_systems)}')\nprint(f'Solvent:    {rmg.solvent or \"(none)\"}')\nprint()\nprint('Current database() settings (before auto-selection):')\nprint(f'  thermoLibraries:    {rmg.thermo_libraries}')\nprint(f'  reactionLibraries:  {rmg.reaction_libraries}')\nprint(f'  seedMechanisms:     {rmg.seed_mechanisms}')\nprint(f'  transportLibraries: {rmg.transport_libraries}')\nprint(f'  kineticsFamilies:   {rmg.kinetics_families}')",
+   "metadata": {},
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "markdown",
+   "id": "if94i2lxggr",
+   "source": "## Chemistry Detection\n\nAnalyze the input species and reactor conditions to determine what chemistry is present.",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "id": "grunflwiug8",
+   "source": "profile = detect_chemistry(rmg.initial_species, rmg.reaction_systems, rmg.solvent)\npah_libs_requested = _has_pah_libs_keyword(rmg)\n\nprint(f'Elements:     {\"/\".join(sorted(profile.elements_present))}')\nprint(f'Max T:        {profile.max_temperature:.0f} K')\nprint(f'Nitrogen:     {profile.has_nitrogen}')\nprint(f'Sulfur:       {profile.has_sulfur}')\nprint(f'Oxygen:       {profile.has_oxygen}')\nprint(f'Carbon:       {profile.has_carbon}')\nprint(f'Halogens:     {profile.has_halogens}')\nprint(f'Electrochem:  {profile.has_electrochem}')\nprint(f'Surface:      {profile.has_surface}')\nprint(f'Liquid:       {profile.has_liquid}')\nprint(f'<PAH_libs>:   {pah_libs_requested}')",
+   "metadata": {},
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "markdown",
+   "id": "k9r657hlttp",
+   "source": "## Triggered Chemistry Sets & Kinetics Family Sets\n\nWhich named sets from `recommended_libraries.yml` and `recommended.py` would be activated.",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "id": "fkjvg12kwkj",
+   "source": "chem_sets = determine_chemistry_sets(profile, pah_libs_requested)\nfamily_sets = determine_kinetics_families(profile)\n\nprint('Chemistry sets (for thermo/kinetics/transport/seed libraries):')\nfor s in chem_sets:\n    print(f'  - {s.value}')\nprint()\nprint('Kinetics family sets:')\nfor s in family_sets:\n    print(f'  - {s.value}')",
+   "metadata": {},
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ldplsa4l6wa",
+   "source": "## What `'auto'` Would Select\n\nThe libraries and families that RMG would use if all fields were set to `'auto'`.\nThis always shows the auto-selected result, regardless of what the input file currently specifies.",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "id": "87ac142mi0b",
+   "source": "recommended_data = load_recommended_yml(rmg.database_directory)\nauto_thermo, auto_kinetics, auto_transport, auto_seeds = expand_chemistry_sets(\n    recommended_data, chem_sets\n)\nauto_families = resolve_auto_kinetics_families(family_sets, rmg.database_directory)\n\ndef print_list(label, items):\n    items = items or []\n    print(f'\\n{label} ({len(items)}):')\n    for item in items:\n        print(f'  {item}')\n\nprint_list('Thermo libraries', auto_thermo)\nprint_list('Reaction libraries', auto_kinetics)\nprint_list('Seed mechanisms', auto_seeds)\nprint_list('Transport libraries', auto_transport)\nprint_list('Kinetics families', auto_families)",
+   "metadata": {},
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "markdown",
+   "id": "w3ocgmc0ils",
+   "source": "## Actual Resolution of the Current Input File\n\nProcesses the input file's `database()` settings as RMG would at startup.\nIf the input file uses `'auto'`, `'<PAH_libs>'`, or `['!family', 'auto']`,\nyou'll see the resolved result. If it uses manual library lists, they pass through unchanged.",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "id": "eft98a4ciwl",
+   "source": "from rmgpy.data.auto_database import auto_select_libraries, PAH_LIBS, to_reaction_library_tuples\n\n# Work on a fresh copy so we don't mutate the rmg object used above\nimport copy\nrmg2 = copy.deepcopy(rmg)\n\n# Run the same auto-selection that main.py would run\nauto_select_libraries(rmg2)\n\n# Convert reaction libraries to tuples (as main.py does before load_database)\nif isinstance(rmg2.reaction_libraries, list):\n    output_edge = getattr(rmg2, 'reaction_libraries_output_edge', set())\n    rmg2.reaction_libraries = to_reaction_library_tuples(rmg2.reaction_libraries, output_edge)\n\nhas_auto = any(\n    getattr(rmg, attr, None) == 'auto'\n    or (isinstance(getattr(rmg, attr, None), list) and 'auto' in getattr(rmg, attr))\n    for attr in ('thermo_libraries', 'reaction_libraries', 'seed_mechanisms',\n                 'transport_libraries', 'kinetics_families')\n)\n\nif not has_auto:\n    print('The input file does not use \\'auto\\' in any database field.')\n    print('The settings below are exactly what was specified in the input file.\\n')\n\nprint_list('Thermo libraries', rmg2.thermo_libraries)\n\nrxn_lib_names = [name for name, _ in rmg2.reaction_libraries] if isinstance(rmg2.reaction_libraries, list) else rmg2.reaction_libraries\nprint_list('Reaction libraries', rxn_lib_names or [])\n\nedge_libs = [name for name, flag in rmg2.reaction_libraries if flag] if isinstance(rmg2.reaction_libraries, list) else []\nif edge_libs:\n    print(f'\\n  (output unused edge reactions for: {\", \".join(edge_libs)})')\n\nprint_list('Seed mechanisms', rmg2.seed_mechanisms or [])\nprint_list('Transport libraries', rmg2.transport_libraries or [])\n\nif isinstance(rmg2.kinetics_families, list):\n    print_list('Kinetics families', rmg2.kinetics_families)\nelse:\n    print(f'\\nKinetics families: {rmg2.kinetics_families!r} (resolved at database load time)')",
+   "metadata": {},
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": "",
+   "id": "7dba6e3be37a396b",
+   "outputs": [],
+   "execution_count": null
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.9.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}