diff --git a/pyhealth/nlp/__init__.py b/pyhealth/nlp/__init__.py new file mode 100644 index 000000000..c34caa6b0 --- /dev/null +++ b/pyhealth/nlp/__init__.py @@ -0,0 +1 @@ +from .sentiment_scorer import SentimentScorer, normalize_sentiment_scores diff --git a/pyhealth/nlp/sentiment_scorer.py b/pyhealth/nlp/sentiment_scorer.py new file mode 100644 index 000000000..597bbc37e --- /dev/null +++ b/pyhealth/nlp/sentiment_scorer.py @@ -0,0 +1,159 @@ +""" +Sentence-level sentiment scoring for clinical text +==================================================== +Provides a VADER-based ``SentimentScorer`` that avoids the full-text +saturation problem specific to clinical discharge notes, and a utility for +Z-score normalising a column of raw scores across a sample dataset. + +Background +---------- +Standard full-text VADER compound scoring saturates at −1.0 for >94 % of +clinical discharge summaries because clinical language is lexically negative +("pain", "failure", "respiratory distress"). After Z-scoring a saturated +distribution every patient receives the same score, making the metric useless +for discrimination. + +Sentence-level averaging (score each sentence independently, take the mean) +avoids saturation and closely approximates the word-averaged approach used by +the ``pattern.en`` library in the original Boag et al. 2018 implementation. + +Reference +--------- +Boag et al. "Racial Disparities and Mistrust in End-of-Life Care." +MLHC 2018. https://arxiv.org/abs/1808.03827 +""" + +from typing import Dict, List, Optional + +import numpy as np + + +class SentimentScorer: + """Sentence-level VADER sentiment scorer for clinical text. + + Scores a document by: + 1. Tokenising into sentences with NLTK's ``sent_tokenize``. + 2. Computing the VADER compound score for each sentence. + 3. Returning the mean compound score across all sentences. + + This avoids the full-text VADER saturation problem that affects clinical + discharge notes (>94 % saturate at −1.0). + + The scorer is intentionally stateless and thread-safe after initialisation. + + Args: + language: Reserved for future multilingual support. Only ``"english"`` + is currently supported (NLTK sentence tokeniser). + + Examples: + >>> scorer = SentimentScorer() + >>> scorer.score("The patient is calm and alert. No acute distress.") + 0.412 + >>> scorer.score("Patient unresponsive, severe respiratory failure.") + -0.613 + """ + + def __init__(self, language: str = "english") -> None: + try: + from nltk.sentiment.vader import SentimentIntensityAnalyzer + from nltk.tokenize import sent_tokenize + except ImportError as e: + raise ImportError( + "nltk is required for SentimentScorer. " + "Install with: pip install nltk && python -c " + "\"import nltk; nltk.download('vader_lexicon'); " + "nltk.download('punkt_tab')\"" + ) from e + + self._sid = SentimentIntensityAnalyzer() + self._sent_tokenize = sent_tokenize + self.language = language + + def score(self, text: str) -> float: + """Compute the mean sentence-level VADER compound score for a document. + + Args: + text: Raw document text. Empty or whitespace-only text returns 0.0. + + Returns: + Mean VADER compound score in [−1.0, +1.0]. + Higher values indicate more positive sentiment. + """ + if not text or not text.strip(): + return 0.0 + sentences = self._sent_tokenize(text) + if not sentences: + return 0.0 + scores = [ + self._sid.polarity_scores(s)["compound"] + for s in sentences + ] + return float(np.mean(scores)) + + def score_batch(self, texts: List[str]) -> List[float]: + """Score a list of documents. + + Args: + texts: List of raw document strings. + + Returns: + List of mean sentence-level compound scores. + """ + return [self.score(t) for t in texts] + + def negate_and_zscore(self, raw_scores: Dict) -> Dict: + """Negate and Z-score a dict of {key: raw_score} values. + + Applies the normalisation from Boag et al. 2018: + + neg_score[key] = -(raw_score[key] - μ) / σ + + Higher output value → more negative sentiment → more mistrust signal. + + Args: + raw_scores: Dict mapping any key to a raw compound score. + + Returns: + Dict with the same keys mapped to negated Z-scores. + """ + vals = np.array(list(raw_scores.values()), dtype=np.float64) + mu, sigma = vals.mean(), vals.std() + if sigma == 0.0: + return {k: 0.0 for k in raw_scores} + return {k: float(-(v - mu) / sigma) for k, v in raw_scores.items()} + + +def normalize_sentiment_scores(sample_dataset, feature_key: str = "neg_sentiment") -> None: + """Z-score normalise a raw sentiment feature column in-place across all samples. + + The ``MistrustSentimentMIMIC3`` task stores the *raw* negated sentiment + score (``-mean_sentence_compound``) per sample. Because Z-scoring requires + the global mean and standard deviation across all patients, it cannot be + done inside ``__call__`` (which processes one patient at a time). Call + this utility **after** ``dataset.set_task()`` to complete the normalisation. + + The transformation applied is identical to Boag et al. 2018: + + neg_score = -(raw_score - μ_all) / σ_all + + Args: + sample_dataset: A PyHealth ``SampleDataset`` produced by + ``base_dataset.set_task(MistrustSentimentMIMIC3(...))``. + feature_key: Name of the sentiment feature in each sample. + Defaults to ``"neg_sentiment"``. + + Example: + >>> sample_dataset = base_dataset.set_task(MistrustSentimentMIMIC3(...)) + >>> normalize_sentiment_scores(sample_dataset) + >>> # neg_sentiment values are now Z-scored across the full dataset + """ + raw_vals = np.array( + [s[feature_key][0] for s in sample_dataset.samples], + dtype=np.float64, + ) + mu, sigma = raw_vals.mean(), raw_vals.std() + if sigma == 0.0: + return + for sample in sample_dataset.samples: + raw = sample[feature_key][0] + sample[feature_key] = [float((raw - mu) / sigma)] diff --git a/pyhealth/tasks/__init__.py b/pyhealth/tasks/__init__.py index 797988377..bd653780e 100644 --- a/pyhealth/tasks/__init__.py +++ b/pyhealth/tasks/__init__.py @@ -66,3 +66,5 @@ VariantClassificationClinVar, ) from .patient_linkage_mimic3 import PatientLinkageMIMIC3Task + +from .sentiment_mimic3 import MistrustSentimentMIMIC3 diff --git a/pyhealth/tasks/sentiment_mimic3.py b/pyhealth/tasks/sentiment_mimic3.py new file mode 100644 index 000000000..0cad4c8c0 --- /dev/null +++ b/pyhealth/tasks/sentiment_mimic3.py @@ -0,0 +1,219 @@ +""" +Negative Sentiment Mistrust Task for MIMIC-III +=============================================== +Implements the sentiment-based mistrust proxy from: + + Boag et al. "Racial Disparities and Mistrust in End-of-Life Care." + MLHC 2018. https://arxiv.org/abs/1808.03827 + +The original paper used ``pattern.en.sentiment(text.split())`` — a +word-averaged polarity approach — on concatenated discharge notes. Full-text +VADER compound scoring saturates at −1.0 for >94 % of clinical notes, making +it unsuitable as a direct replacement. Sentence-level VADER averaging avoids +saturation and closely reproduces the original's discriminative power. + +Method +------ +For each hospital admission: + 1. Collect all discharge summary notes (``CATEGORY = 'Discharge summary'``). + 2. Score each note by averaging VADER compound scores across its sentences. + 3. Average across multiple notes for the same admission. + 4. Negate: ``raw_neg_score = -mean_sentence_polarity`` + (higher → more negative → more mistrust signal). + +Z-score normalisation (``neg_score = -(raw - μ) / σ``) requires global +statistics and must be applied **after** ``set_task()`` using the provided +``normalize_sentiment_scores`` utility from ``pyhealth.nlp``. + +Output feature +-------------- +``neg_sentiment`` — a single-element list ``[float]`` stored as a ``"tensor"`` +feature. The one-element list satisfies PyHealth's TensorProcessor which +expects an iterable of numerics. + +Usage +----- + >>> from pyhealth.datasets import MIMIC3Dataset + >>> from pyhealth.tasks import MistrustSentimentMIMIC3 + >>> from pyhealth.nlp import normalize_sentiment_scores + >>> from pyhealth.models import LogisticRegression + >>> + >>> base_dataset = MIMIC3Dataset( + ... root="/path/to/mimic-iii/1.4", + ... tables=["NOTEEVENTS"], + ... ) + >>> task = MistrustSentimentMIMIC3() + >>> sample_dataset = base_dataset.set_task(task) + >>> normalize_sentiment_scores(sample_dataset) # Z-score in-place + >>> model = LogisticRegression(dataset=sample_dataset) +""" + +from typing import Any, Dict, List, Optional + +from pyhealth.tasks.base_task import BaseTask + + +class MistrustSentimentMIMIC3(BaseTask): + """Compute negative-sentiment mistrust proxy from MIMIC-III discharge notes. + + For each hospital admission the task produces one sample: + + - ``neg_sentiment``: a one-element list ``[float]`` — the raw negated + mean sentence-level VADER compound score across all discharge summaries + for this admission (schema: ``"tensor"``). Values are negated so that + higher = more negative sentiment = more mistrust signal. Call + ``pyhealth.nlp.normalize_sentiment_scores(sample_dataset)`` after + ``set_task()`` to complete the Z-score normalisation step. + + - ``noncompliance``: ``1`` if any note for this admission contains + ``"noncompliant"``, else ``0`` (schema: ``"binary"``). This matches + the output label of ``MistrustNoncomplianceMIMIC3``, enabling direct + comparison of the three mistrust proxies on the same task. + + Args: + min_notes: Minimum number of discharge summary notes required for a + sample to be included. Defaults to 1. + output_label: Column name and key of the binary output label. + Change to ``"autopsy_consent"`` to align with + ``MistrustAutopsyMIMIC3``. Defaults to ``"noncompliance"``. + + Examples: + >>> from pyhealth.datasets import MIMIC3Dataset + >>> from pyhealth.tasks import MistrustSentimentMIMIC3 + >>> from pyhealth.nlp import normalize_sentiment_scores + >>> + >>> base_dataset = MIMIC3Dataset( + ... root="/path/to/mimic-iii/1.4", + ... tables=["NOTEEVENTS"], + ... ) + >>> task = MistrustSentimentMIMIC3() + >>> sample_dataset = base_dataset.set_task(task) + >>> normalize_sentiment_scores(sample_dataset) + >>> len(sample_dataset) + 52726 + """ + + task_name: str = "MistrustSentimentMIMIC3" + input_schema: Dict[str, str] = {"neg_sentiment": "tensor"} + output_schema: Dict[str, str] = {"noncompliance": "binary"} + + def __init__( + self, + min_notes: int = 1, + output_label: str = "noncompliance", + ) -> None: + self.min_notes = min_notes + self.output_label = output_label + # output_schema is a class attribute; update it to reflect output_label + self.output_schema = {output_label: "binary"} + + # Lazy-initialise scorer to avoid importing nltk at module load time + self._scorer: Optional[Any] = None + + def _get_scorer(self): + """Lazily initialise SentimentScorer on first use.""" + if self._scorer is None: + from pyhealth.nlp import SentimentScorer + self._scorer = SentimentScorer() + return self._scorer + + @staticmethod + def _noncompliance_label(noteevents: List[Any]) -> int: + """Return 1 if any note contains 'noncompliant', else 0.""" + for ev in noteevents: + if "noncompliant" in str(getattr(ev, "text", "") or "").lower(): + return 1 + return 0 + + @staticmethod + def _autopsy_label(noteevents: List[Any]) -> Optional[int]: + """Return autopsy consent label (1/0) or None if absent/ambiguous.""" + consented = declined = False + for ev in noteevents: + text = str(getattr(ev, "text", "") or "").lower() + if "autopsy" not in text: + continue + for line in text.split("\n"): + if "autopsy" not in line: + continue + if any(w in line for w in ("decline", "not consent", "refuse", "denied")): + declined = True + if any(w in line for w in ("consent", "agree", "request")): + consented = True + if consented and declined: + return None + if consented: + return 1 + if declined: + return 0 + return None + + def __call__(self, patient: Any) -> List[Dict[str, Any]]: + """Process a single patient into negative-sentiment classification samples. + + Args: + patient: a PyHealth Patient object with ``noteevents`` loaded. + + Returns: + List of dicts, one per admission that has ≥ ``min_notes`` discharge + summaries, each containing: + - ``patient_id`` + - ``visit_id`` (hadm_id) + - ``neg_sentiment`` (list of one float — raw negated score) + - output label (``noncompliance`` or ``autopsy_consent``) + """ + scorer = self._get_scorer() + samples = [] + admissions = patient.get_events(event_type="admissions") + + for admission in admissions: + hadm_id = admission.hadm_id + + noteevents = patient.get_events( + event_type="noteevents", + filters=[("hadm_id", "==", hadm_id)], + ) + + # Extract discharge summaries only + discharge_notes = [ + ev for ev in noteevents + if str(getattr(ev, "category", "") or "").strip().lower() + == "discharge summary" + ] + + if len(discharge_notes) < self.min_notes: + continue + + # Score each note; average across notes for this admission + note_scores = [ + scorer.score(str(getattr(ev, "text", "") or "")) + for ev in discharge_notes + ] + raw_mean = float(sum(note_scores) / len(note_scores)) + + # Negate: higher value = more negative sentiment = more mistrust + raw_neg = -raw_mean + + # Derive output label + if self.output_label == "noncompliance": + label = self._noncompliance_label(noteevents) + elif self.output_label == "autopsy_consent": + label = self._autopsy_label(noteevents) + if label is None: + continue # exclude ambiguous/absent autopsy signal + else: + raise ValueError( + f"output_label must be 'noncompliance' or 'autopsy_consent', " + f"got '{self.output_label}'" + ) + + samples.append( + { + "patient_id": patient.patient_id, + "visit_id": hadm_id, + "neg_sentiment": [raw_neg], # 1-element list for TensorProcessor + self.output_label: label, + } + ) + + return samples