diff --git a/src/strands/vended_plugins/skills/skill.py b/src/strands/vended_plugins/skills/skill.py index a60c1cd6c..11afbd186 100644 --- a/src/strands/vended_plugins/skills/skill.py +++ b/src/strands/vended_plugins/skills/skill.py @@ -2,26 +2,31 @@ This module defines the Skill dataclass and provides classmethods for discovering, parsing, and loading skills from the filesystem, raw content, -or HTTPS URLs. Skills are directories containing a SKILL.md file with YAML -frontmatter metadata and markdown instructions. +HTTPS URLs, or Amazon S3. Skills are directories containing a SKILL.md file +with YAML frontmatter metadata and markdown instructions. """ from __future__ import annotations import logging import re +import tempfile import urllib.error import urllib.request +from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass, field from pathlib import Path from typing import Any +import boto3 import yaml logger = logging.getLogger(__name__) _SKILL_NAME_PATTERN = re.compile(r"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$") _MAX_SKILL_NAME_LENGTH = 64 +_S3_MIRROR_MAX_WORKERS = 4 +_S3_MIRROR_CACHE: dict[tuple[str, str | None], Path] = {} def _find_skill_md(skill_dir: Path) -> Path: @@ -204,6 +209,172 @@ def _build_skill_from_frontmatter( ) +def _mirror_skills_from_s3( + bucket: str, + prefix: str | None = None, + *, + s3_client: Any = None, + local_dir: str | Path | None = None, +) -> Path | None: + """Mirror skill directories from S3 to a local filesystem path. + + Discovers all SKILL.md files under the given prefix, then downloads each + skill directory (SKILL.md + subdirectories like scripts/, references/, + assets/) in parallel using a thread pool. + + Results are cached per (bucket, prefix) pair for the lifetime of the + process. Subsequent calls with the same arguments return the previously + mirrored directory without re-downloading. + + Args: + bucket: S3 bucket name. + prefix: Optional key prefix to scope the scan (e.g. "agents/orchestrator/"). + If None, scans the entire bucket. + s3_client: Optional pre-configured boto3 S3 client. If None, a default + client is created via ``boto3.client("s3")``. + local_dir: Optional local directory to mirror skills into. If None, a + temporary directory is created. + + Returns: + Path to the local directory containing mirrored skill subdirectories, + or None if no skills were found. + """ + cache_key = (bucket, prefix) + if cache_key in _S3_MIRROR_CACHE: + logger.debug("bucket=<%s>, prefix=<%s> | s3 mirror cache hit", bucket, prefix or "") + return _S3_MIRROR_CACHE[cache_key] + + if s3_client is None: + s3_client = boto3.client("s3") + + # Normalize prefix + normalized_prefix = "" + if prefix: + normalized_prefix = prefix.rstrip("/") + "/" + + # Discover all objects under the prefix + objects = _s3_list_all_objects(s3_client, bucket, normalized_prefix) + + if not objects: + logger.warning("bucket=<%s>, prefix=<%s> | no objects found", bucket, normalized_prefix) + return None + + # Find skill directories (those containing SKILL.md) + skill_dirs = _s3_find_skill_directories(objects, normalized_prefix) + + if not skill_dirs: + logger.warning("bucket=<%s>, prefix=<%s> | no SKILL.md files found", bucket, normalized_prefix) + return None + + logger.info( + "bucket=<%s>, prefix=<%s>, count=<%d> | found skills: %s", + bucket, + normalized_prefix, + len(skill_dirs), + ", ".join(skill_dirs), + ) + + dest = _s3_resolve_local_dir(local_dir) + download_tasks = _s3_build_download_tasks(objects, skill_dirs, normalized_prefix) + _s3_download_parallel(s3_client, bucket, download_tasks, dest) + + _S3_MIRROR_CACHE[cache_key] = dest + return dest + + +def _s3_resolve_local_dir(local_dir: str | Path | None) -> Path: + """Resolve or create the local directory for S3 mirroring.""" + if local_dir is None: + path = Path(tempfile.mkdtemp(prefix="strands-s3-skills-")) + else: + path = Path(local_dir) + path.mkdir(parents=True, exist_ok=True) + return path + + +def _s3_list_all_objects(s3_client: Any, bucket: str, prefix: str) -> list[str]: + """List all object keys under a prefix, handling pagination.""" + keys: list[str] = [] + paginator = s3_client.get_paginator("list_objects_v2") + pages = paginator.paginate(Bucket=bucket, Prefix=prefix) + + for page in pages: + for obj in page.get("Contents", []): + keys.append(obj["Key"]) + + return keys + + +def _s3_find_skill_directories(object_keys: list[str], prefix: str) -> list[str]: + """Find skill directory names (relative to prefix) that contain SKILL.md. + + Returns a sorted list of skill directory names (e.g. ["code-review", "pdf-processing"]). + """ + skill_dirs: list[str] = [] + + for key in object_keys: + relative = key[len(prefix) :] + parts = relative.split("/") + + # We expect: /SKILL.md (or skill.md) + if len(parts) == 2 and parts[1].lower() == "skill.md": + skill_dirs.append(parts[0]) + + return sorted(set(skill_dirs)) + + +def _s3_build_download_tasks( + object_keys: list[str], + skill_dirs: list[str], + prefix: str, +) -> list[tuple[str, str]]: + """Build a list of (s3_key, relative_local_path) tuples to download. + + Only includes objects that belong to a discovered skill directory. + """ + skill_dir_set = set(skill_dirs) + tasks: list[tuple[str, str]] = [] + + for key in object_keys: + relative = key[len(prefix) :] + parts = relative.split("/") + + if parts[0] in skill_dir_set and len(parts) >= 2: + if not key.endswith("/"): + tasks.append((key, relative)) + + return tasks + + +def _s3_download_parallel( + s3_client: Any, + bucket: str, + tasks: list[tuple[str, str]], + dest: Path, +) -> None: + """Download files from S3 in parallel using a thread pool.""" + if not tasks: + return + + def _download_one(task: tuple[str, str]) -> str: + s3_key, relative_path = task + local_path = dest / relative_path + local_path.parent.mkdir(parents=True, exist_ok=True) + s3_client.download_file(bucket, s3_key, str(local_path)) + return relative_path + + with ThreadPoolExecutor(max_workers=_S3_MIRROR_MAX_WORKERS) as executor: + futures = {executor.submit(_download_one, t): t for t in tasks} + for future in as_completed(futures): + try: + path = future.result() + logger.debug("s3_path=<%s> | downloaded", path) + except Exception: + s3_key, _ = futures[future] + logger.warning("bucket=<%s>, key=<%s> | failed to download", bucket, s3_key) + raise + + @dataclass class Skill: r"""Represents an agent skill with metadata and instructions. @@ -422,3 +593,62 @@ def from_directory(cls, skills_dir: str | Path, *, strict: bool = False) -> list logger.debug("path=<%s>, count=<%d> | loaded skills from directory", skills_dir, len(skills)) return skills + + @classmethod + def from_s3( + cls, + bucket: str, + prefix: str | None = None, + *, + s3_client: Any = None, + local_dir: str | Path | None = None, + strict: bool = False, + ) -> list[Skill]: + """Load skills from an Amazon S3 bucket. + + Scans ``bucket`` (under ``prefix`` if given) for directories containing + a SKILL.md file, mirrors each skill directory (including subdirectories + like scripts/, references/, assets/) to a local path, then delegates to + :meth:`from_directory` for parsing. + + Results are cached per (bucket, prefix) pair for the lifetime of the + process — subsequent calls with the same arguments return the cached + list without re-downloading. + + Example:: + + from strands.vended_plugins.skills import Skill + + # Load all skills under a prefix + skills = Skill.from_s3("my-bucket", prefix="agents/director/") + + # Load from bucket root with a custom S3 client + import boto3 + s3 = boto3.client("s3", region_name="eu-west-1") + skills = Skill.from_s3("my-bucket", s3_client=s3) + + Args: + bucket: S3 bucket name. + prefix: Optional key prefix to scope the scan (e.g. "agents/director/"). + If None, scans the entire bucket. + s3_client: Optional pre-configured boto3 S3 client. If None, a default + client is created via ``boto3.client("s3")``. + local_dir: Optional local directory to mirror skills into. If None, a + temporary directory is created. + strict: If True, raise on skill validation issues. If False (default), + warn and load anyway. Passed through to :meth:`from_directory`. + + Returns: + List of Skill instances loaded from S3. + """ + local_path = _mirror_skills_from_s3( + bucket=bucket, + prefix=prefix, + s3_client=s3_client, + local_dir=local_dir, + ) + + if local_path is None: + return [] + + return cls.from_directory(local_path, strict=strict) diff --git a/tests/strands/vended_plugins/skills/test_skill_from_s3.py b/tests/strands/vended_plugins/skills/test_skill_from_s3.py new file mode 100644 index 000000000..09b47747a --- /dev/null +++ b/tests/strands/vended_plugins/skills/test_skill_from_s3.py @@ -0,0 +1,354 @@ +"""Tests for Skill.from_s3 using moto to mock S3.""" + +import boto3 +import pytest +from moto import mock_aws + +from strands.vended_plugins.skills.skill import ( + _S3_MIRROR_CACHE, + Skill, + _mirror_skills_from_s3, + _s3_build_download_tasks, + _s3_find_skill_directories, + _s3_list_all_objects, +) + +BUCKET = "test-skills-bucket" +PREFIX = "my-agent/" + +SKILL_MD_CONTENT = """\ +--- +name: {name} +description: {description} +--- +# {name} + +These are the instructions for {name}. +""" + + +@pytest.fixture(autouse=True) +def _clear_cache(): + """Clear the S3 mirror cache before each test.""" + _S3_MIRROR_CACHE.clear() + yield + _S3_MIRROR_CACHE.clear() + + +def _put_skill(s3_client, skill_name: str, *, prefix: str = PREFIX, with_resources: bool = False): + """Upload a skill directory to the mocked S3 bucket.""" + content = SKILL_MD_CONTENT.format( + name=skill_name, + description=f"Description for {skill_name}", + ) + s3_client.put_object( + Bucket=BUCKET, + Key=f"{prefix}{skill_name}/SKILL.md", + Body=content.encode(), + ) + + if with_resources: + s3_client.put_object( + Bucket=BUCKET, + Key=f"{prefix}{skill_name}/scripts/run.py", + Body=b"#!/usr/bin/env python3\nprint('hello')\n", + ) + s3_client.put_object( + Bucket=BUCKET, + Key=f"{prefix}{skill_name}/references/guide.md", + Body=b"# Guide\nSome reference content.\n", + ) + s3_client.put_object( + Bucket=BUCKET, + Key=f"{prefix}{skill_name}/assets/config.json", + Body=b'{"key": "value"}\n', + ) + + +class TestSkillFromS3: + """Tests for Skill.from_s3 classmethod.""" + + @mock_aws + def test_loads_single_skill(self, tmp_path): + """Single skill with no resources loads correctly.""" + s3_client = boto3.client("s3", region_name="us-east-1") + s3_client.create_bucket(Bucket=BUCKET) + _put_skill(s3_client, "code-review") + + skills = Skill.from_s3( + BUCKET, + prefix=PREFIX, + s3_client=s3_client, + local_dir=tmp_path / "skills", + ) + + assert len(skills) == 1 + assert skills[0].name == "code-review" + assert skills[0].description == "Description for code-review" + assert "instructions for code-review" in skills[0].instructions + + @mock_aws + def test_loads_multiple_skills(self, tmp_path): + """Multiple skills are discovered and loaded.""" + s3_client = boto3.client("s3", region_name="us-east-1") + s3_client.create_bucket(Bucket=BUCKET) + _put_skill(s3_client, "pdf-processing") + _put_skill(s3_client, "code-review") + _put_skill(s3_client, "summarize") + + skills = Skill.from_s3( + BUCKET, + prefix=PREFIX, + s3_client=s3_client, + local_dir=tmp_path / "skills", + ) + + assert len(skills) == 3 + names = {s.name for s in skills} + assert names == {"pdf-processing", "code-review", "summarize"} + + @mock_aws + def test_mirrors_resource_files(self, tmp_path): + """Resource directories (scripts/, references/, assets/) are mirrored.""" + s3_client = boto3.client("s3", region_name="us-east-1") + s3_client.create_bucket(Bucket=BUCKET) + _put_skill(s3_client, "data-analysis", with_resources=True) + + skills = Skill.from_s3( + BUCKET, + prefix=PREFIX, + s3_client=s3_client, + local_dir=tmp_path / "skills", + ) + + assert len(skills) == 1 + skill_path = skills[0].path + assert skill_path is not None + assert (skill_path / "SKILL.md").exists() + assert (skill_path / "scripts" / "run.py").exists() + assert (skill_path / "references" / "guide.md").exists() + assert (skill_path / "assets" / "config.json").exists() + + @mock_aws + def test_empty_prefix_returns_empty(self, tmp_path): + """Empty bucket prefix returns an empty list without error.""" + s3_client = boto3.client("s3", region_name="us-east-1") + s3_client.create_bucket(Bucket=BUCKET) + + skills = Skill.from_s3( + BUCKET, + prefix="nonexistent/", + s3_client=s3_client, + local_dir=tmp_path / "skills", + ) + + assert skills == [] + + @mock_aws + def test_no_prefix_scans_entire_bucket(self, tmp_path): + """When prefix is None, scans the entire bucket.""" + s3_client = boto3.client("s3", region_name="us-east-1") + s3_client.create_bucket(Bucket=BUCKET) + + content = SKILL_MD_CONTENT.format(name="root-skill", description="A root-level skill") + s3_client.put_object( + Bucket=BUCKET, + Key="root-skill/SKILL.md", + Body=content.encode(), + ) + + skills = Skill.from_s3( + BUCKET, + s3_client=s3_client, + local_dir=tmp_path / "skills", + ) + + assert len(skills) == 1 + assert skills[0].name == "root-skill" + + @mock_aws + def test_caching(self, tmp_path): + """Second call with same (bucket, prefix) returns cached result without re-downloading.""" + s3_client = boto3.client("s3", region_name="us-east-1") + s3_client.create_bucket(Bucket=BUCKET) + + content = SKILL_MD_CONTENT.format(name="cached-skill", description="Cached") + s3_client.put_object( + Bucket=BUCKET, + Key="cache-test/cached-skill/SKILL.md", + Body=content.encode(), + ) + + local = tmp_path / "skills" + skills1 = Skill.from_s3( + BUCKET, + prefix="cache-test/", + s3_client=s3_client, + local_dir=local, + ) + + # Delete from S3 — second call should still return cached result + s3_client.delete_object(Bucket=BUCKET, Key="cache-test/cached-skill/SKILL.md") + skills2 = Skill.from_s3( + BUCKET, + prefix="cache-test/", + s3_client=s3_client, + local_dir=local, + ) + + assert len(skills1) == len(skills2) + assert skills1[0].name == skills2[0].name + + @mock_aws + def test_strict_mode_propagates(self, tmp_path): + """Strict mode is passed through to from_directory, skipping invalid skills.""" + s3_client = boto3.client("s3", region_name="us-east-1") + s3_client.create_bucket(Bucket=BUCKET) + + # Create a skill with a name that doesn't match its directory + s3_client.put_object( + Bucket=BUCKET, + Key=f"{PREFIX}wrong-dir/SKILL.md", + Body=b"---\nname: right-name\ndescription: test\n---\nBody.", + ) + + # In strict mode, from_file raises but from_directory catches and skips + skills = Skill.from_s3( + BUCKET, + prefix=PREFIX, + s3_client=s3_client, + local_dir=tmp_path / "skills", + strict=True, + ) + + # The mismatched skill is skipped, resulting in an empty list + assert skills == [] + + @mock_aws + def test_prefix_trailing_slash_normalized(self, tmp_path): + """Prefix with or without trailing slash produces the same result.""" + s3_client = boto3.client("s3", region_name="us-east-1") + s3_client.create_bucket(Bucket=BUCKET) + _put_skill(s3_client, "my-skill", prefix="agents/") + + skills_with_slash = Skill.from_s3( + BUCKET, + prefix="agents/", + s3_client=s3_client, + local_dir=tmp_path / "s1", + ) + + _S3_MIRROR_CACHE.clear() + + skills_without_slash = Skill.from_s3( + BUCKET, + prefix="agents", + s3_client=s3_client, + local_dir=tmp_path / "s2", + ) + + assert len(skills_with_slash) == len(skills_without_slash) == 1 + assert skills_with_slash[0].name == skills_without_slash[0].name + + @mock_aws + def test_no_skill_md_returns_empty(self, tmp_path): + """Bucket with files but no SKILL.md returns empty list.""" + s3_client = boto3.client("s3", region_name="us-east-1") + s3_client.create_bucket(Bucket=BUCKET) + s3_client.put_object( + Bucket=BUCKET, + Key=f"{PREFIX}some-dir/readme.txt", + Body=b"not a skill", + ) + + skills = Skill.from_s3( + BUCKET, + prefix=PREFIX, + s3_client=s3_client, + local_dir=tmp_path / "skills", + ) + + assert skills == [] + + +class TestMirrorSkillsFromS3: + """Tests for the _mirror_skills_from_s3 helper.""" + + @mock_aws + def test_creates_temp_dir_when_local_dir_is_none(self): + """When local_dir is None, a temp directory is created.""" + s3_client = boto3.client("s3", region_name="us-east-1") + s3_client.create_bucket(Bucket=BUCKET) + _put_skill(s3_client, "temp-skill") + + result = _mirror_skills_from_s3(BUCKET, prefix=PREFIX, s3_client=s3_client) + + assert result.exists() + assert result.is_dir() + assert "strands-s3-skills-" in str(result) + + @mock_aws + def test_creates_local_dir_if_not_exists(self, tmp_path): + """When local_dir doesn't exist, it is created.""" + s3_client = boto3.client("s3", region_name="us-east-1") + s3_client.create_bucket(Bucket=BUCKET) + _put_skill(s3_client, "new-dir-skill") + + target = tmp_path / "nested" / "dir" + result = _mirror_skills_from_s3(BUCKET, prefix=PREFIX, s3_client=s3_client, local_dir=target) + + assert result == target + assert result.exists() + + +class TestS3HelperFunctions: + """Tests for internal S3 helper functions.""" + + def test_find_skill_directories(self): + """Test _s3_find_skill_directories discovers correct directories.""" + keys = [ + "prefix/skill-a/SKILL.md", + "prefix/skill-a/scripts/run.py", + "prefix/skill-b/SKILL.md", + "prefix/other/readme.txt", + ] + result = _s3_find_skill_directories(keys, "prefix/") + assert result == ["skill-a", "skill-b"] + + def test_find_skill_directories_empty(self): + """Test _s3_find_skill_directories with no SKILL.md files.""" + keys = ["prefix/dir/readme.txt", "prefix/dir/other.py"] + result = _s3_find_skill_directories(keys, "prefix/") + assert result == [] + + def test_build_download_tasks(self): + """Test _s3_build_download_tasks filters to skill directories only.""" + keys = [ + "prefix/skill-a/SKILL.md", + "prefix/skill-a/scripts/run.py", + "prefix/other/readme.txt", + "prefix/skill-a/", # directory marker, should be skipped + ] + tasks = _s3_build_download_tasks(keys, ["skill-a"], "prefix/") + + assert len(tasks) == 2 + assert ("prefix/skill-a/SKILL.md", "skill-a/SKILL.md") in tasks + assert ("prefix/skill-a/scripts/run.py", "skill-a/scripts/run.py") in tasks + + @mock_aws + def test_list_all_objects_pagination(self): + """Test _s3_list_all_objects handles paginated results.""" + s3_client = boto3.client("s3", region_name="us-east-1") + s3_client.create_bucket(Bucket=BUCKET) + + # Upload enough objects to potentially trigger pagination + for i in range(5): + s3_client.put_object( + Bucket=BUCKET, + Key=f"prefix/file-{i}.txt", + Body=b"content", + ) + + keys = _s3_list_all_objects(s3_client, BUCKET, "prefix/") + assert len(keys) == 5 + assert all(k.startswith("prefix/") for k in keys)