Video_chunking/app.py at main · Gambitnl/Video_chunking · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""Gradio web UI for D&D Session Processor - Modern UI"""
import os
import json
import socket
import sys
from pathlib import Path
sys.path.append(str(Path(__file__).resolve().parent))
import subprocess
from pathlib import Path
from threading import Event, RLock

import gradio as gr
import requests

from typing import Any, Dict, List, Optional, Tuple

from src.pipeline import DDSessionProcessor
from src.exceptions import CancelledError
from src.config import Config
from src.diarizer import SpeakerProfileManager
from src.character_profile import CharacterProfileManager
from src.logger import (
    get_log_file_path,
    get_logger,
    get_console_log_level,
    set_console_log_level,
    LOG_LEVEL_CHOICES,
)
from src.audit import log_audit_event
from src.party_config import PartyConfigManager, CampaignManager
from src.knowledge_base import CampaignKnowledgeBase
from src.preflight import PreflightIssue
from src.ui.constants import StatusIndicators
from src.ui.helpers import StatusMessages, UIComponents
from src.ui.state_store import UIStateStore
from src.campaign_dashboard import CampaignDashboard
from src.story_notebook import StoryNotebookManager
from src.artifact_counter import CampaignArtifactCounter

# Modern UI imports
from src.ui.theme import create_modern_theme, MODERN_CSS
from src.ui.process_session_tab_modern import create_process_session_tab_modern
from src.ui.api_key_manager import load_api_keys, save_api_keys
from src.ui.campaign_tab_modern import create_campaign_tab_modern
from src.ui.characters_tab_modern import (
    create_characters_tab_modern,
    character_tab_snapshot,
)
from src.ui.party_management_tab import create_party_management_tab
from src.ui.stories_output_tab_modern import create_stories_output_tab_modern
from src.ui.settings_tools_tab_modern import create_settings_tools_tab_modern
from src.ui.session_artifacts_tab import create_session_artifacts_tab, refresh_sessions as refresh_artifact_sessions
from src.ui.search_tab import create_search_tab
from src.ui.analytics_tab import create_analytics_tab
from src.ui.character_analytics_tab import create_character_analytics_tab
from src.ui.diagnostics_helpers import (
    run_health_check,
    export_diagnostics,
    list_conversations,
    clear_all_conversations,
)
from src.ui.campaign_dashboard_helpers import (
    campaign_overview_markdown,
    knowledge_summary_markdown,
    session_library_markdown,
    narrative_hint_markdown,
    diagnostics_markdown,
    chat_status_markdown,
    build_campaign_manifest,
)

from src.google_drive_auth import (
    get_auth_url,
    exchange_code_for_token,
    get_document_content,
    is_authenticated,
    revoke_credentials,
    authenticate_automatically
)
from src.restart_manager import RestartManager
from src.lock_manager import LockManager, LockTimeoutError
from src.ui.setup_wizard import create_setup_wizard

# Global dictionary to track cancel events for active processing sessions
# Key: session_id, Value: threading.Event
_active_cancel_events: Dict[str, Event] = {}

# Global lock for campaign modification operations to prevent race conditions
# Replaced by LockManager but kept for backward compatibility if needed
# campaign_modification_lock = RLock()

def ui_load_api_keys() -> Tuple[str, str, str, str]:
    """Load API keys on UI startup and format for Gradio."""
    keys = load_api_keys()
    groq_key = keys.get("GROQ_API_KEY", "")
    openai_key = keys.get("OPENAI_API_KEY", "")
    hf_key = keys.get("HUGGING_FACE_API_KEY", "")

    if groq_key or openai_key or hf_key:
        status = StatusMessages.success("API Keys", "Existing API keys loaded from `.env`.")
    else:
        status = StatusMessages.info("API Keys", "Enter your API keys to enable cloud services.")
    return groq_key, openai_key, hf_key, status

def _save_config_helper(group_name: str, allow_empty: bool = False, **kwargs) -> str:
    """
    Helper function to validate and save configuration settings.

    Args:
        group_name: Display name for the configuration group (e.g., "API Keys")
        allow_empty: If True, return info message when no changes to save
        **kwargs: Configuration parameters to validate and save

    Returns:
        Status message for UI display
    """
    try:
        from src.ui.config_manager import ConfigManager

        validated, errors = ConfigManager.validate_config(**kwargs)

        if errors:
            return StatusMessages.error(group_name, f"Validation failed: {'; '.join(errors)}")

        if not validated:
            if allow_empty:
                return StatusMessages.info(group_name, "No changes to save.")
            # Even if empty, we might want to show success for consistency
            return StatusMessages.info(group_name, "No changes to save.")

        ConfigManager.save_config(validated)
        return StatusMessages.success(
            group_name,
            f"{group_name} saved successfully. Restart the application to apply changes."
        )

    except Exception as e:
        logger.exception("Failed to save %s from UI", group_name)
        return StatusMessages.error(group_name, f"Failed to save: {e}")


def ui_save_api_keys(
    groq_api_key: str,
    openai_api_key: str,
    hugging_face_api_key: str
) -> str:
    """UI wrapper to save API keys from Gradio inputs."""
    return _save_config_helper(
        "API Keys",
        allow_empty=True,
        groq_api_key=groq_api_key if groq_api_key else None,
        openai_api_key=openai_api_key if openai_api_key else None,
        hugging_face_api_key=hugging_face_api_key if hugging_face_api_key else None,
    )


def ui_save_model_config(
    whisper_backend: str,
    whisper_model: str,
    whisper_language: str,
    diarization_backend: str,
    llm_backend: str,
) -> str:
    """UI wrapper to save model configuration."""
    return _save_config_helper(
        "Model Configuration",
        whisper_backend=whisper_backend,
        whisper_model=whisper_model,
        whisper_language=whisper_language,
        diarization_backend=diarization_backend,
        llm_backend=llm_backend,
    )


def ui_save_processing_config(
    chunk_length: int,
    chunk_overlap: int,
    sample_rate: int,
    clean_stale: bool,
    save_intermediate: bool,
) -> str:
    """UI wrapper to save processing settings."""
    return _save_config_helper(
        "Processing Settings",
        chunk_length_seconds=chunk_length,
        chunk_overlap_seconds=chunk_overlap,
        audio_sample_rate=sample_rate,
        clean_stale_clips=clean_stale,
        save_intermediate_outputs=save_intermediate,
    )


def ui_save_ollama_config(
    ollama_model: str,
    ollama_fallback: str,
    ollama_url: str,
) -> str:
    """UI wrapper to save Ollama settings."""
    return _save_config_helper(
        "Ollama Settings",
        allow_empty=True,
        ollama_model=ollama_model if ollama_model else None,
        ollama_fallback_model=ollama_fallback if ollama_fallback else None,
        ollama_base_url=ollama_url if ollama_url else None,
    )


def ui_save_advanced_config(
    groq_max_calls: int,
    groq_rate_period: float,
    groq_rate_burst: int,
    colab_poll: int,
    colab_timeout: int,
) -> str:
    """UI wrapper to save advanced settings."""
    return _save_config_helper(
        "Advanced Settings",
        groq_max_calls_per_second=groq_max_calls,
        groq_rate_limit_period_seconds=groq_rate_period,
        groq_rate_limit_burst=groq_rate_burst,
        colab_poll_interval=colab_poll,
        colab_timeout=colab_timeout,
    )


def ui_restart_application() -> str:
    """UI handler for application restart."""
    logger.info("Restart requested from UI")
    log_audit_event(
        "ui.app.restart",
        actor="ui",
        source="gradio",
        metadata={"restart_method": "ui_button"},
    )

    success = RestartManager.restart_application(delay_seconds=2.0)

    if success:
        return StatusMessages.success(
            "Restart Initiated",
            "Application is restarting... The page will reload automatically.",
            "Wait a few seconds, then refresh your browser if needed."
        )
    else:
        return StatusMessages.error(
            "Restart Failed",
            "Could not initiate automatic restart.",
            RestartManager.get_restart_instructions()
        )


PROJECT_ROOT = Path(__file__).resolve().parent
NOTEBOOK_CONTEXT = ""
story_manager = StoryNotebookManager()
character_profile_manager = CharacterProfileManager()
speaker_profile_manager = SpeakerProfileManager()
logger = get_logger(__name__)

# Create global artifact counter with 5-minute cache
_artifact_counter = CampaignArtifactCounter(
    output_dir=Config.OUTPUT_DIR,
    cache_ttl_seconds=300,
    logger=logger
)


def cancel_processing(session_id: str) -> str:
    """
    Cancel an active processing session.

    Args:
        session_id: The session ID to cancel

    Returns:
        Status message indicating whether cancellation was successful
    """
    cancel_event = _active_cancel_events.get(session_id)
    if cancel_event:
        cancel_event.set()
        logger.info(f"Cancellation requested for session '{session_id}'")
        return f"Cancellation requested for session '{session_id}'. The pipeline will stop at the next checkpoint."
    else:
        logger.warning(f"No active processing found for session '{session_id}'")
        return f"No active processing found for session '{session_id}'. It may have already completed or not started yet."


def _notebook_status() -> str:
    return StoryNotebookManager.format_notebook_status(NOTEBOOK_CONTEXT)


def _set_notebook_context(value: str) -> None:
    global NOTEBOOK_CONTEXT
    NOTEBOOK_CONTEXT = value


def _persist_active_campaign(campaign_id: Optional[str]) -> None:
    """Persist the active campaign selection while suppressing UI errors."""

    try:
        ui_state_store.save_active_campaign(campaign_id)
    except Exception as exc:  # pragma: no cover - defensive logging
        logger.warning("Failed to persist active campaign: %s", exc)


campaign_manager = CampaignManager()
ui_state_store = UIStateStore()
campaign_names = campaign_manager.get_campaign_names()


def _refresh_campaign_names() -> Dict[str, str]:
    """Forces a reload of campaign data from disk and returns an updated name map."""
    global campaign_names
    # Using a global lock for campaign list refresh to avoid reading partial writes
    try:
        with LockManager.lock("global", "campaign_list", timeout=10.0, owner_id="refresh_campaigns"):
            # By calling _load_campaigns() directly, we bypass any in-memory cache and
            # ensure the CampaignManager has the latest data from the campaigns.json file.
            # This is critical after operations like creation or deletion.
            campaign_manager.campaigns = campaign_manager._load_campaigns()
            campaign_names = campaign_manager.get_campaign_names()
            logger.debug(f"Refreshed campaign names: {list(campaign_names.values())}")
            return campaign_names
    except LockTimeoutError:
        logger.warning("Could not acquire global campaign list lock, returning cached names.")
        return campaign_names


def _campaign_id_from_name(display_name: Optional[str], campaign_names_map: Optional[Dict[str, str]] = None) -> Optional[str]:
    """Resolve campaign_id from display name."""
    if not display_name:
        return None

    # Use provided map or refresh if not provided
    names = campaign_names_map if campaign_names_map is not None else _refresh_campaign_names()

    for cid, name in names.items():
        if name == display_name:
            return cid
    return None


def _format_campaign_badge(campaign_id: Optional[str]) -> str:
    """Return campaign badge markdown for the Process tab."""
    if not campaign_id:
        return StatusMessages.warning(
            "Campaign",
            "No campaign selected. Use the Campaign Launcher to choose one."
        )

    campaign = campaign_manager.get_campaign(campaign_id)
    if not campaign:
        return StatusMessages.error(
            "Campaign",
            f"Campaign `{campaign_id}` was not found in campaigns.json."
        )

    party_line = "No party assigned yet."
    if campaign.party_id:
        party = party_manager.get_party(campaign.party_id)
        if party:
            party_line = f"Party `{campaign.party_id}`: {len(party.characters)} characters."
        else:
            party_line = f"Party `{campaign.party_id}` not found. Update party configuration."

    return StatusMessages.success(
        "Campaign Active",
        f"Working in **{campaign.name}** (`{campaign_id}`).",
        party_line,
    )


def _build_campaign_manifest(campaign_id: Optional[str]) -> str:
    return build_campaign_manifest(
        campaign_id,
        campaign_manager,
        party_manager,
        _artifact_counter
    )


def _campaign_summary_message(campaign_id: Optional[str], *, is_new: bool = False) -> str:
    """Return summary markdown for the active campaign."""
    if not campaign_id:
        return StatusMessages.warning(
            "No Campaign Selected",
            "Choose an existing campaign or create a new one to begin."
        )

    campaign = campaign_manager.get_campaign(campaign_id)
    if not campaign:
        return StatusMessages.error(
            "Campaign Not Found",
            f"Campaign `{campaign_id}` could not be loaded."
        )

    details = []
    if campaign.party_id:
        party = party_manager.get_party(campaign.party_id)
        if party:
            details.append(f"Party `{campaign.party_id}` with {len(party.characters)} characters.")
        else:
            details.append(f"Party `{campaign.party_id}` is missing.")
    else:
        details.append("No party linked yet.")

    prefix = "New campaign created." if is_new else "Campaign ready."
    return StatusMessages.success(
        "Campaign Loaded",
        f"{prefix} Active campaign: **{campaign.name}** (`{campaign_id}`).",
        " ".join(details),
    )


def _process_defaults_for_campaign(campaign_id: Optional[str]) -> Dict[str, Any]:
    """Return default process tab settings for the campaign."""
    defaults = {
        "party_selection": "Manual Entry",
        "num_speakers": 4,
        "skip_diarization": False,
        "skip_classification": False,
        "skip_snippets": True,
        "skip_knowledge": False,
    }

    if not campaign_id:
        return defaults

    campaign = campaign_manager.get_campaign(campaign_id)
    if not campaign:
        return defaults

    defaults["party_selection"] = campaign.party_id or "Manual Entry"
    defaults["num_speakers"] = campaign.settings.num_speakers
    defaults["skip_diarization"] = campaign.settings.skip_diarization
    defaults["skip_classification"] = campaign.settings.skip_classification
    defaults["skip_snippets"] = campaign.settings.skip_snippets
    defaults["skip_knowledge"] = campaign.settings.skip_knowledge
    return defaults


def _campaign_overview_markdown(campaign_id: Optional[str]) -> str:
    return campaign_overview_markdown(campaign_id, campaign_manager, _artifact_counter)


def _knowledge_summary_markdown(campaign_id: Optional[str]) -> str:
    return knowledge_summary_markdown(campaign_id)


def _character_overview_placeholder() -> str:
    return StatusMessages.info(
        "Character Overview",
        "Select a character to view their profile summary."
    )


def _character_tab_updates(
    campaign_id: Optional[str]
) -> Tuple[Any, Any, Any, Any, Any]:
    snapshot = character_tab_snapshot(campaign_id)
    characters = snapshot["characters"]
    default_value = characters[0] if characters else None
    dropdown_update = gr.update(choices=characters, value=default_value)
    table_update = gr.update(value=snapshot["table"])
    status_update = gr.update(value=snapshot["status"])
    overview_update = gr.update(value=_character_overview_placeholder())
    return status_update, table_update, dropdown_update, dropdown_update, overview_update


def _character_profiles_markdown(campaign_id: Optional[str]) -> str:
    snapshot = character_tab_snapshot(campaign_id)
    characters = snapshot["characters"]
    if not campaign_id or not characters:
        return snapshot["status"]

    manager = CharacterProfileManager()
    lines = ["### Characters"]
    for name in characters:
        profile = manager.get_profile(name)
        if not profile:
            continue
        class_name = profile.class_name or "Unknown class"
        level = profile.level if profile.level is not None else "n/a"
        last_updated = profile.last_updated or "unknown"
        lines.append(
            f"- **{profile.name}** ({class_name}, level {level}) - last updated {last_updated}"
        )
    return "\n".join(lines)


def _extract_party_dropdown_update(campaign_id: Optional[str]) -> Any:
    party_choices = [party for party in party_manager.list_parties() if party != "Manual Entry"]
    preferred_party = None
    if campaign_id:
        campaign = campaign_manager.get_campaign(campaign_id)
        if campaign and campaign.party_id and campaign.party_id in party_choices:
            preferred_party = campaign.party_id
    if preferred_party is None and party_choices:
        preferred_party = party_choices[0]
    return gr.update(choices=party_choices, value=preferred_party)


def _session_library_markdown(campaign_id: Optional[str]) -> str:
    return session_library_markdown(campaign_id, story_manager, campaign_manager)


def _narrative_hint_markdown(campaign_id: Optional[str]) -> str:
    return narrative_hint_markdown(campaign_id, story_manager)


def _diagnostics_markdown(campaign_id: Optional[str]) -> str:
    return diagnostics_markdown(campaign_id)


def _chat_status_markdown(campaign_id: Optional[str]) -> str:
    return chat_status_markdown(campaign_id)


def _resolve_audio_path(audio_file: Any) -> Path:
    """
    Normalize Gradio file input into a filesystem path with validation.

    Handles multiple input types from different Gradio versions and deployment
    environments: strings, Path objects, dicts, temporary file wrappers, and lists.

    Args:
        audio_file: Audio file input from Gradio (str, Path, dict, list, or object with .name)

    Returns:
        Path: Validated path to audio file

    Raises:
        ValueError: If no audio file provided or path doesn't exist
        TypeError: If audio file type is unsupported
    """
    if audio_file is None:
        raise ValueError("No audio file provided")

    candidate = audio_file

    # Handle list input (batch upload or multi-file selection)
    if isinstance(candidate, list):
        if not candidate:
            raise ValueError("Empty audio input list")
        candidate = candidate[0]

    # Handle dict input (some Gradio versions return dicts)
    if isinstance(candidate, dict):
        for key in ("name", "path", "tempfile"):
            value = candidate.get(key)
            if value:
                candidate = value
                break

    # Handle objects with .name attribute (file-like objects)
    # BUT skip Path objects which also have .name (returns just filename)
    if hasattr(candidate, "name") and not isinstance(candidate, (str, os.PathLike)):
        candidate = candidate.name

    # Validate type before converting to Path
    if not isinstance(candidate, (str, os.PathLike)):
        raise TypeError(f"Unsupported audio file type: {type(candidate)}")

    # Convert to Path and validate existence
    resolved_path = Path(candidate)
    if not resolved_path.exists():
        raise ValueError(f"Audio file not found: {resolved_path}")

    return resolved_path


def _create_processor_for_context(
    session_id: str,
    party_selection: Optional[str],
    character_names: str,
    player_names: str,
    num_speakers: Optional[int],
    language: Optional[str],
    campaign_id: Optional[str],
    transcription_backend: str,
    diarization_backend: str,
    classification_backend: str,
    *,
    allow_empty_names: bool = False,
    enable_audit_mode: bool = False,
    redact_prompts: bool = False,
    generate_scenes: bool = True,
    scene_summary_mode: str = "template",
) -> DDSessionProcessor:
    """Instantiate a session processor respecting party/manual inputs."""
    resolved_speakers = int(num_speakers) if num_speakers else 4
    resolved_language = language or "en"

    kwargs: Dict[str, Any] = {
        "session_id": session_id,
        "campaign_id": campaign_id,
        "num_speakers": resolved_speakers,
        "language": resolved_language,
        "transcription_backend": transcription_backend,
        "diarization_backend": diarization_backend,
        "classification_backend": classification_backend,
        "enable_audit_mode": enable_audit_mode,
        "redact_prompts": redact_prompts,
        "generate_scenes": generate_scenes,
        "scene_summary_mode": scene_summary_mode,
    }

    if party_selection and party_selection != "Manual Entry":
        kwargs["party_id"] = party_selection
    else:
        # Parse and validate character/player names with better edge case handling
        chars = [c.strip() for c in (character_names or "").split(',') if c.strip()]
        players = [p.strip() for p in (player_names or "").split(',') if p.strip()]

        # Remove duplicates while preserving order
        chars = list(dict.fromkeys(chars))
        players = list(dict.fromkeys(players))

        # Validate character names if required
        if not chars and not allow_empty_names:
            raise ValueError("Character names are required when using Manual Entry")

        if chars:
            kwargs["character_names"] = chars
        if players:
            kwargs["player_names"] = players

    return DDSessionProcessor(**kwargs)


def _format_highlighted_transcript(classification_file_path: Optional[str]) -> Optional[List[Tuple[str, str]]]:
    """
    Reads a classification JSON file and formats it for Gradio's HighlightedText component.
    """
    if not classification_file_path or not Path(classification_file_path).exists():
        return None

    try:
        with open(classification_file_path, 'r', encoding='utf-8') as f:
            segments = json.load(f)

        highlighted_text = []
        for segment in segments:
            text = segment.get("text", "")
            # Use the granular type if available, fall back to primary classification
            label = segment.get("classification_type", segment.get("classification", "UNKNOWN"))

            # Add speaker/character name for context, similar to a transcript
            actor = segment.get("character_name") or segment.get("speaker_name", "???")

            # Format with a newline for readability between segments
            formatted_text = f'{actor}: {text}\n'

            highlighted_text.append((formatted_text, label))

        return highlighted_text
    except Exception as e:
        logger.error(f"Failed to format highlighted transcript: {e}", exc_info=True)
        return None


def process_session(
    audio_file,
    session_id: str,
    party_selection: Optional[str],
    character_names: str,
    player_names: str,
    num_speakers: int,
    language: str,
    skip_diarization: bool,
    skip_classification: bool,
    skip_snippets: bool,
    skip_knowledge: bool,
    transcription_backend: str,
    diarization_backend: str,
    classification_backend: str,
    campaign_id: Optional[str] = None,
    enable_audit_mode: bool = False,
    redact_prompts: bool = False,
    generate_scenes: bool = True,
    scene_summary_mode: str = "template",
    progress=gr.Progress(),
) -> Dict:
    """Main session processing function with progress tracking."""
    try:
        if audio_file is None:
            return {"status": "error", "message": "Please upload an audio file."}

        resolved_session_id = session_id or "session"

        log_audit_event(
            "ui.session.process.start",
            actor="ui",
            source="gradio",
            metadata={
                "session_id": resolved_session_id,
                "party_selection": party_selection,
                "num_speakers": num_speakers,
                "skip_diarization": skip_diarization,
                "skip_classification": skip_classification,
                "skip_snippets": skip_snippets,
                "skip_knowledge": skip_knowledge,
                "transcription_backend": transcription_backend,
                "diarization_backend": diarization_backend,
                "classification_backend": classification_backend,
                "campaign_id": campaign_id,
                "enable_audit_mode": enable_audit_mode,
                "redact_prompts": redact_prompts,
                "generate_scenes": generate_scenes,
                "scene_summary_mode": scene_summary_mode,
            },
        )

        # Determine if using party config or manual entry
        processor = _create_processor_for_context(
            resolved_session_id,
            party_selection,
            character_names,
            player_names,
            num_speakers,
            language,
            campaign_id,
            transcription_backend,
            diarization_backend,
            classification_backend,
            allow_empty_names=False,
            enable_audit_mode=enable_audit_mode,
            redact_prompts=redact_prompts,
            generate_scenes=generate_scenes,
            scene_summary_mode=scene_summary_mode,
        )

        # Create cancel event for this session
        cancel_event = Event()
        _active_cancel_events[resolved_session_id] = cancel_event

        # Create progress callback for Gradio progress tracker
        def progress_callback(progress_value: float, message: str):
            """Update Gradio progress bar."""
            progress(progress_value, desc=message)

        try:
            # Acquire lock for the session to prevent concurrent processing of same session
            # Also lock campaign if modifying campaign data (which processing does)
            # Note: We use a long timeout because session processing is long running,
            # but we check lock availability first.
            # Actually, we just want to ensure no two processes run for SAME session.
            with LockManager.lock("session", resolved_session_id, timeout=1.0, owner_id="process_session"):
                # We also might want to "read lock" the campaign so it isn't deleted while processing
                # But we don't have read/write locks yet.

                pipeline_result = processor.process(
                    input_file=_resolve_audio_path(audio_file),
                    skip_diarization=skip_diarization,
                    skip_classification=skip_classification,
                    skip_snippets=skip_snippets,
                    skip_knowledge=skip_knowledge,
                    cancel_event=cancel_event,
                    progress_callback=progress_callback
                )
        except LockTimeoutError:
             return {
                "status": "error",
                "message": f"Session '{resolved_session_id}' is already being processed or locked.",
                "details": "Please wait for the current operation to finish or choose a different session ID."
            }
        finally:
            # Clean up cancel event regardless of outcome
            _active_cancel_events.pop(resolved_session_id, None)

        if not isinstance(pipeline_result, dict):
            log_audit_event(
                "ui.session.process.error",
                actor="ui",
                source="gradio",
                status="error",
                metadata={
                    "session_id": resolved_session_id,
                    "campaign_id": campaign_id,
                    "error": "Non-dict pipeline response",
                },
            )
            return {
                "status": "error",
                "message": "Pipeline did not return a result. Check preflight checks and logs for details.",
                "details": f"Unexpected pipeline response: {type(pipeline_result).__name__}",
            }

        output_files = pipeline_result.get("output_files") or {}

        def _read_output_file(key: str) -> str:
            path = output_files.get(key)
            if not path:
                return ""
            try:
                return Path(path).read_text(encoding="utf-8")
            except Exception:
                return ""

        snippet_export = pipeline_result.get("audio_segments") or {}
        snippet_payload = {
            "segments_dir": str(snippet_export.get("segments_dir")) if snippet_export.get("segments_dir") else None,
            "manifest": str(snippet_export.get("manifest")) if snippet_export.get("manifest") else None,
        }

        # IMPROVEMENT: Format the highlighted transcript for the new UI component
        highlighted_transcript = _format_highlighted_transcript(
            output_files.get("classification_json")
        )

        log_audit_event(
            "ui.session.process.complete",
            actor="ui",
            source="gradio",
            status="success",
            metadata={
                "session_id": resolved_session_id,
                "campaign_id": campaign_id,
                "outputs": list(output_files.keys()),
                "has_snippets": bool(snippet_payload["segments_dir"]),
            },
        )

        # Clear artifact cache for this campaign to ensure fresh counts on next UI refresh
        if campaign_id:
            _artifact_counter.clear_cache(campaign_id)
            logger.debug(f"Cleared artifact cache for campaign '{campaign_id}' after processing session '{resolved_session_id}'")

        return {
            "status": "success",
            "message": "Session processed successfully.",
            "full": _read_output_file("full"),
            "ic": _read_output_file("ic_only"),
            "ooc": _read_output_file("ooc_only"),
            "highlighted_transcript": highlighted_transcript, # Add new data to response
            "stats": pipeline_result.get("statistics") or {},
            "snippet": snippet_payload,
            "knowledge": pipeline_result.get("knowledge_extraction") or {},
            "output_files": output_files,
        }

    except CancelledError as e:
        logger.info(f"Session processing cancelled for '{resolved_session_id}': {e}")
        log_audit_event(
            "ui.session.process.cancelled",
            actor="user",
            source="gradio",
            status="cancelled",
            metadata={
                "session_id": resolved_session_id,
                "campaign_id": campaign_id,
            },
        )
        return {
            "status": "cancelled",
            "message": "Processing was cancelled by user.",
            "details": str(e),
        }

    except Exception as e:
        logger.exception("Error during session processing in Gradio UI")

        # Sanitize error message to avoid leaking internal paths
        error_msg = StatusMessages._sanitize_text(str(e))

        log_audit_event(
            "ui.session.process.error",
            actor="ui",
            source="gradio",
            status="error",
            metadata={
                "session_id": session_id or "session",
                "campaign_id": campaign_id,
                "error": error_msg,
            },
        )
        return {
            "status": "error",
            "message": error_msg,
            "details": f"See log for details: {get_log_file_path()}",
        }


def run_preflight_checks(
    party_selection,
    character_names,
    player_names,
    num_speakers,
    language,
    skip_diarization,
    skip_classification,
    transcription_backend,
    diarization_backend,
    classification_backend,
    campaign_id,
):
    """Run dependency checks before long-running processing."""
    try:
        processor = _create_processor_for_context(
            session_id="preflight_check",
            party_selection=party_selection,
            character_names=character_names or "",
            player_names=player_names or "",
            num_speakers=num_speakers,
            language=language,
            campaign_id=campaign_id,
            transcription_backend=transcription_backend,
            diarization_backend=diarization_backend,
            classification_backend=classification_backend,
            allow_empty_names=True,
        )
        processor.is_test_run = True
        issues = processor.run_preflight_checks_only(
            skip_diarization=bool(skip_diarization),
            skip_classification=bool(skip_classification),
        )

        if not issues:
            return StatusMessages.success(
                "Preflight Checks",
                "All systems ready for processing."
            )

        error_issues = [issue for issue in issues if issue.is_error()]
        warning_issues = [issue for issue in issues if not issue.is_error()]

        def _format_section(title: str, entries: List[PreflightIssue]) -> List[str]:
            if not entries:
                return []
            lines = [f"**{title}:**"]
            lines.extend(
                f"- `{issue.component}`: {issue.message}" for issue in entries
            )
            return lines

        sections: List[str] = []
        sections.extend(_format_section("Errors", error_issues))
        sections.extend(_format_section("Warnings", warning_issues))
        details = "\n".join(sections)

        if error_issues:
            return "\n".join([
                f"### {StatusIndicators.ERROR} Preflight Failed",
                "",
                "Resolve the blocking issues below before starting processing.",
                "",
                details,
            ])

        return "\n".join([
            f"### {StatusIndicators.WARNING} Preflight Warnings",
            "",
            "Processing can continue, but review the warnings below:",
            "",
            details,
        ])
    except Exception as exc:
        return StatusMessages.error(
            "Preflight Failed",
            "An unexpected error occurred while running the checks.",
            str(exc),
        )


# ============================================================================
# Pipeline Stage Control Functions
# ============================================================================

def update_skip_flags_from_stage(run_until_stage):
    """
    Automatically set skip flags based on 'Run Until Stage' selection.

    Args:
        run_until_stage: Selected stage to run until

    Returns:
        Tuple of (skip_diarization, skip_classification, skip_snippets, skip_knowledge)
    """
    stage_configs = {
        "full": (False, False, False, False),  # Run all stages
        "stage_4": (True, True, True, True),   # Stop after transcription
        "stage_5": (False, True, True, True),  # Stop after diarization
        "stage_6": (False, False, True, True), # Stop after classification
        "stage_7": (False, False, True, True), # Stop after outputs (skip snippets & knowledge)
    }

    skip_diarization, skip_classification, skip_snippets, skip_knowledge = stage_configs.get(
        run_until_stage, (False, False, False, False)
    )

    return (
        gr.update(value=skip_diarization),
        gr.update(value=skip_classification),
        gr.update(value=skip_snippets),
        gr.update(value=skip_knowledge),
    )


# ============================================================================
# Resume from Intermediate Outputs Functions