diff --git a/pyrit/datasets/seed_datasets/remote/__init__.py b/pyrit/datasets/seed_datasets/remote/__init__.py index 121a35dcf..8892c8591 100644 --- a/pyrit/datasets/seed_datasets/remote/__init__.py +++ b/pyrit/datasets/seed_datasets/remote/__init__.py @@ -43,6 +43,9 @@ from pyrit.datasets.seed_datasets.remote.harmful_qa_dataset import ( _HarmfulQADataset, ) # noqa: F401 +from pyrit.datasets.seed_datasets.remote.jailbreakv_28k_dataset import ( + _JailBreakV28KDataset, +) # noqa: F401 from pyrit.datasets.seed_datasets.remote.jbb_behaviors_dataset import ( _JBBBehaviorsDataset, ) # noqa: F401 @@ -123,6 +126,7 @@ "_HarmBenchDataset", "_HarmBenchMultimodalDataset", "_HarmfulQADataset", + "_JailBreakV28KDataset", "_JBBBehaviorsDataset", "_LibrAIDoNotAnswerDataset", "_LLMLatentAdversarialTrainingDataset", diff --git a/pyrit/datasets/seed_datasets/remote/jailbreakv_28k_dataset.py b/pyrit/datasets/seed_datasets/remote/jailbreakv_28k_dataset.py new file mode 100644 index 000000000..3f6bd78eb --- /dev/null +++ b/pyrit/datasets/seed_datasets/remote/jailbreakv_28k_dataset.py @@ -0,0 +1,111 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import logging +from typing import Literal + +from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( + _RemoteDatasetLoader, +) +from pyrit.models import SeedDataset, SeedPrompt + +logger = logging.getLogger(__name__) + + +class _JailBreakV28KDataset(_RemoteDatasetLoader): + """ + Loader for the JailBreakV_28K dataset (CSV Version V0.2). + + JailBreakV_28K is a large-scale benchmark for jailbreaking multimodal LLMs. + This loader parses the CSV version of the dataset, extracting jailbreak queries + and mapping them to safety policies. + + Reference: https://github.com/SaFo-Lab/JailBreakV_28K + """ + + # Mapping SaFo-Lab policy abbreviations to descriptive categories + POLICY_MAP = { + "P1": "Somatic Safety", + "P2": "Propriety", + "P3": "Privacy", + "P4": "Public Interest", + "P5": "Mental Health", + } + + def __init__( + self, + *, + source: str = ( + "https://raw.githubusercontent.com/SaFo-Lab/JailBreakV_28K/V0.2/JailBreakV_28K/JailBreakV_28K.csv" + ), + source_type: Literal["public_url", "file"] = "public_url", + ): + """ + Initialize the JailBreakV_28K dataset loader. + + Args: + source: URL to the JailBreakV_28K CSV file. + source_type: The type of source ('public_url' or 'file'). + """ + self.source = source + self.source_type: Literal["public_url", "file"] = source_type + + @property + def dataset_name(self) -> str: + """Return the dataset name.""" + return "jailbreakv_28k" + + async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset: + """ + Fetch JailBreakV_28K dataset and return as SeedDataset. + + Args: + cache: Whether to cache the fetched dataset. Defaults to True. + + Returns: + SeedDataset: A SeedDataset containing the JailBreakV_28K prompts. + + Raises: + ValueError: If no valid prompts could be loaded from the source. + """ + logger.info(f"Loading JailBreakV_28K CSV dataset from {self.source}") + + # The base loader infers CSV format from the .csv extension in the URL + examples = self._fetch_from_url( + source=self.source, + source_type=self.source_type, + cache=cache, + ) + + seed_prompts = [] + for example in examples: + query = example.get("jailbreak_query", "") + + # Safety check: Skip empty prompts or those containing Jinja2 syntax + if not query or "{{" in query or "{%" in query: + continue + + seed_prompts.append( + SeedPrompt( + value=str(query), + data_type="text", + dataset_name=self.dataset_name, + harm_categories=[ + str(self.POLICY_MAP.get(example.get("policy"), "Unknown Policy")), + str(example.get("category", "Uncategorized")), + ], + metadata={ + "policy_code": str(example.get("policy")), + "raw_category": str(example.get("category")), + }, + description=f"JailBreakV_28K prompt (Policy: {example.get('policy')})", + source=self.source, + ) + ) + + if not seed_prompts: + raise ValueError(f"No valid prompts could be loaded from {self.source}") + + logger.info(f"Successfully loaded {len(seed_prompts)} prompts from JailBreakV_28K") + + return SeedDataset(seeds=seed_prompts, dataset_name=self.dataset_name) diff --git a/tests/unit/datasets/test_jailbreakv_28k_dataset.py b/tests/unit/datasets/test_jailbreakv_28k_dataset.py new file mode 100644 index 000000000..c4be44819 --- /dev/null +++ b/tests/unit/datasets/test_jailbreakv_28k_dataset.py @@ -0,0 +1,98 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +from unittest.mock import patch + +import pytest + +from pyrit.datasets.seed_datasets.remote.jailbreakv_28k_dataset import _JailBreakV28KDataset +from pyrit.models import SeedDataset + + +@pytest.fixture +def mock_jailbreakv_csv_data(): + """Mock data representing rows from the JailBreakV CSV.""" + return [ + { + "jailbreak_query": "How to bypass security?", + "policy": "P1", + "category": "Technology", + }, + { + "jailbreak_query": "A prompt with Unicode 🐉", + "policy": "P2", + "category": "Deception", + }, + ] + + +class TestJailBreakV28KDataset: + """Test the JailBreakV_28K dataset loader with edge cases.""" + + def test_dataset_name(self): + """Test dataset_name property.""" + loader = _JailBreakV28KDataset() + assert loader.dataset_name == "jailbreakv_28k" + + @pytest.mark.asyncio + async def test_fetch_dataset_happy_path(self, mock_jailbreakv_csv_data): + """Test standard successful fetching and mapping.""" + loader = _JailBreakV28KDataset() + + with patch.object(loader, "_fetch_from_url", return_value=mock_jailbreakv_csv_data) as mock_fetch: + dataset = await loader.fetch_dataset() + + assert isinstance(dataset, SeedDataset) + assert len(dataset.seeds) == 2 + + # Verify Mapping (P1 -> Somatic Safety) + assert "Somatic Safety" in dataset.seeds[0].harm_categories + assert dataset.seeds[0].metadata["policy_code"] == "P1" + + # Verify Unicode handling + assert "🐉" in dataset.seeds[1].value + + # Verify correct call to base loader (no unexpected file_type argument) + mock_fetch.assert_called_once_with(source=loader.source, source_type=loader.source_type, cache=True) + + @pytest.mark.asyncio + async def test_fetch_dataset_skips_malformed_and_risky(self): + """Test that missing fields and Jinja2 syntax risks are skipped.""" + loader = _JailBreakV28KDataset() + risky_data = [ + {"jailbreak_query": "Valid prompt", "policy": "P1", "category": "Safety"}, + {"jailbreak_query": "Risky {{ template }}", "policy": "P1"}, + {"policy": "P2", "category": "Missing Query"}, + {"jailbreak_query": "", "policy": "P3"}, + ] + + with patch.object(loader, "_fetch_from_url", return_value=risky_data): + dataset = await loader.fetch_dataset() + assert len(dataset.seeds) == 1 + assert dataset.seeds[0].value == "Valid prompt" + + @pytest.mark.asyncio + async def test_fetch_dataset_handles_unknown_policy_codes(self): + """Test that unknown policy codes fall back gracefully.""" + loader = _JailBreakV28KDataset() + unknown_data = [{"jailbreak_query": "Test", "policy": "P99", "category": "Test"}] + + with patch.object(loader, "_fetch_from_url", return_value=unknown_data): + dataset = await loader.fetch_dataset() + assert "Unknown Policy" in dataset.seeds[0].harm_categories + assert dataset.seeds[0].metadata["policy_code"] == "P99" + + @pytest.mark.asyncio + async def test_fetch_dataset_empty_source_raises(self): + """Test that an empty response raises a ValueError.""" + loader = _JailBreakV28KDataset() + + with patch.object(loader, "_fetch_from_url", return_value=[]): + with pytest.raises(ValueError, match="No valid prompts could be loaded"): + await loader.fetch_dataset() + + def test_policy_map_is_complete(self): + """Verify the internal POLICY_MAP contains codes P1-P5.""" + loader = _JailBreakV28KDataset() + for i in range(1, 6): + assert f"P{i}" in loader.POLICY_MAP