Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pyrit/datasets/seed_datasets/remote/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@
from pyrit.datasets.seed_datasets.remote.harmful_qa_dataset import (
_HarmfulQADataset,
) # noqa: F401
from pyrit.datasets.seed_datasets.remote.jailbreakv_28k_dataset import (
_JailBreakV28KDataset,
) # noqa: F401
from pyrit.datasets.seed_datasets.remote.jbb_behaviors_dataset import (
_JBBBehaviorsDataset,
) # noqa: F401
Expand Down Expand Up @@ -123,6 +126,7 @@
"_HarmBenchDataset",
"_HarmBenchMultimodalDataset",
"_HarmfulQADataset",
"_JailBreakV28KDataset",
"_JBBBehaviorsDataset",
"_LibrAIDoNotAnswerDataset",
"_LLMLatentAdversarialTrainingDataset",
Expand Down
111 changes: 111 additions & 0 deletions pyrit/datasets/seed_datasets/remote/jailbreakv_28k_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import logging
from typing import Literal

from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import (
_RemoteDatasetLoader,
)
from pyrit.models import SeedDataset, SeedPrompt

logger = logging.getLogger(__name__)


class _JailBreakV28KDataset(_RemoteDatasetLoader):
"""
Loader for the JailBreakV_28K dataset (CSV Version V0.2).

JailBreakV_28K is a large-scale benchmark for jailbreaking multimodal LLMs.
This loader parses the CSV version of the dataset, extracting jailbreak queries
and mapping them to safety policies.

Reference: https://github.com/SaFo-Lab/JailBreakV_28K
"""

# Mapping SaFo-Lab policy abbreviations to descriptive categories
POLICY_MAP = {
"P1": "Somatic Safety",
"P2": "Propriety",
"P3": "Privacy",
"P4": "Public Interest",
"P5": "Mental Health",
}

def __init__(
self,
*,
source: str = (
"https://raw.githubusercontent.com/SaFo-Lab/JailBreakV_28K/V0.2/JailBreakV_28K/JailBreakV_28K.csv"
),
source_type: Literal["public_url", "file"] = "public_url",
):
"""
Initialize the JailBreakV_28K dataset loader.

Args:
source: URL to the JailBreakV_28K CSV file.
source_type: The type of source ('public_url' or 'file').
"""
self.source = source
self.source_type: Literal["public_url", "file"] = source_type

@property
def dataset_name(self) -> str:
"""Return the dataset name."""
return "jailbreakv_28k"

async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset:
"""
Fetch JailBreakV_28K dataset and return as SeedDataset.

Args:
cache: Whether to cache the fetched dataset. Defaults to True.

Returns:
SeedDataset: A SeedDataset containing the JailBreakV_28K prompts.

Raises:
ValueError: If no valid prompts could be loaded from the source.
"""
logger.info(f"Loading JailBreakV_28K CSV dataset from {self.source}")

# The base loader infers CSV format from the .csv extension in the URL
examples = self._fetch_from_url(
source=self.source,
source_type=self.source_type,
cache=cache,
)

seed_prompts = []
for example in examples:
query = example.get("jailbreak_query", "")

# Safety check: Skip empty prompts or those containing Jinja2 syntax
if not query or "{{" in query or "{%" in query:
continue

seed_prompts.append(
SeedPrompt(
value=str(query),
data_type="text",
dataset_name=self.dataset_name,
harm_categories=[
str(self.POLICY_MAP.get(example.get("policy"), "Unknown Policy")),
str(example.get("category", "Uncategorized")),
],
metadata={
"policy_code": str(example.get("policy")),
"raw_category": str(example.get("category")),
},
description=f"JailBreakV_28K prompt (Policy: {example.get('policy')})",
source=self.source,
)
)

if not seed_prompts:
raise ValueError(f"No valid prompts could be loaded from {self.source}")

logger.info(f"Successfully loaded {len(seed_prompts)} prompts from JailBreakV_28K")

return SeedDataset(seeds=seed_prompts, dataset_name=self.dataset_name)
98 changes: 98 additions & 0 deletions tests/unit/datasets/test_jailbreakv_28k_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from unittest.mock import patch

import pytest

from pyrit.datasets.seed_datasets.remote.jailbreakv_28k_dataset import _JailBreakV28KDataset
from pyrit.models import SeedDataset


@pytest.fixture
def mock_jailbreakv_csv_data():
"""Mock data representing rows from the JailBreakV CSV."""
return [
{
"jailbreak_query": "How to bypass security?",
"policy": "P1",
"category": "Technology",
},
{
"jailbreak_query": "A prompt with Unicode 🐉",
"policy": "P2",
"category": "Deception",
},
]


class TestJailBreakV28KDataset:
"""Test the JailBreakV_28K dataset loader with edge cases."""

def test_dataset_name(self):
"""Test dataset_name property."""
loader = _JailBreakV28KDataset()
assert loader.dataset_name == "jailbreakv_28k"

@pytest.mark.asyncio
async def test_fetch_dataset_happy_path(self, mock_jailbreakv_csv_data):
"""Test standard successful fetching and mapping."""
loader = _JailBreakV28KDataset()

with patch.object(loader, "_fetch_from_url", return_value=mock_jailbreakv_csv_data) as mock_fetch:
dataset = await loader.fetch_dataset()

assert isinstance(dataset, SeedDataset)
assert len(dataset.seeds) == 2

# Verify Mapping (P1 -> Somatic Safety)
assert "Somatic Safety" in dataset.seeds[0].harm_categories
assert dataset.seeds[0].metadata["policy_code"] == "P1"

# Verify Unicode handling
assert "🐉" in dataset.seeds[1].value

# Verify correct call to base loader (no unexpected file_type argument)
mock_fetch.assert_called_once_with(source=loader.source, source_type=loader.source_type, cache=True)

@pytest.mark.asyncio
async def test_fetch_dataset_skips_malformed_and_risky(self):
"""Test that missing fields and Jinja2 syntax risks are skipped."""
loader = _JailBreakV28KDataset()
risky_data = [
{"jailbreak_query": "Valid prompt", "policy": "P1", "category": "Safety"},
{"jailbreak_query": "Risky {{ template }}", "policy": "P1"},
{"policy": "P2", "category": "Missing Query"},
{"jailbreak_query": "", "policy": "P3"},
]

with patch.object(loader, "_fetch_from_url", return_value=risky_data):
dataset = await loader.fetch_dataset()
assert len(dataset.seeds) == 1
assert dataset.seeds[0].value == "Valid prompt"

@pytest.mark.asyncio
async def test_fetch_dataset_handles_unknown_policy_codes(self):
"""Test that unknown policy codes fall back gracefully."""
loader = _JailBreakV28KDataset()
unknown_data = [{"jailbreak_query": "Test", "policy": "P99", "category": "Test"}]

with patch.object(loader, "_fetch_from_url", return_value=unknown_data):
dataset = await loader.fetch_dataset()
assert "Unknown Policy" in dataset.seeds[0].harm_categories
assert dataset.seeds[0].metadata["policy_code"] == "P99"

@pytest.mark.asyncio
async def test_fetch_dataset_empty_source_raises(self):
"""Test that an empty response raises a ValueError."""
loader = _JailBreakV28KDataset()

with patch.object(loader, "_fetch_from_url", return_value=[]):
with pytest.raises(ValueError, match="No valid prompts could be loaded"):
await loader.fetch_dataset()

def test_policy_map_is_complete(self):
"""Verify the internal POLICY_MAP contains codes P1-P5."""
loader = _JailBreakV28KDataset()
for i in range(1, 6):
assert f"P{i}" in loader.POLICY_MAP