diff --git a/.github/workflows/docbinder-oss.yml b/.github/workflows/docbinder-oss.yml index 3072209..ea94412 100644 --- a/.github/workflows/docbinder-oss.yml +++ b/.github/workflows/docbinder-oss.yml @@ -5,10 +5,16 @@ on: branches: - main - dev + paths-ignore: + - "docs/**" + - "mkdocs.yml" pull_request: branches: - main - dev + paths-ignore: + - "docs/**" + - "mkdocs.yml" jobs: test: runs-on: ubuntu-latest diff --git a/.gitignore b/.gitignore index d434cbf..8ec8616 100644 --- a/.gitignore +++ b/.gitignore @@ -77,3 +77,7 @@ ENV/ # Credentials gcp_credentials.json *_token.json + +# Test files +search_results.csv +search_results.json \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..e885ddb --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,24 @@ +repos: + - repo: https://github.com/astral-sh/uv-pre-commit + rev: 0.7.16 + hooks: + - id: uv-export + - id: uv-lock + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.12.1 + hooks: + # Run the linter. + - id: ruff-check + types_or: [ python, pyi ] + args: [ --select, I, --fix, --select=E501 ] + # Run the formatter. + - id: ruff-format + types_or: [ python, pyi ] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4226b1e..f5b442f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -56,4 +56,26 @@ All dependencies are tracked in `pyproject.toml`. Use `uv` commands to keep it u --- **Note:** -Always use `uv` commands to manage dependencies and environments to keep `pyproject.toml` in sync. \ No newline at end of file +Always use `uv` commands to manage dependencies and environments to keep `pyproject.toml` in sync. + +## Code Style and Linting + +This project uses [Black](https://black.readthedocs.io/en/stable/) for code formatting and [Ruff](https://docs.astral.sh/ruff/) for linting. All code should be formatted and linted before committing. + +- Run the following before committing code: + +```zsh +uv run black . +uv run ruff check . +``` + +- To automatically format and lint code on every commit, install pre-commit hooks: + +```zsh +uv pip install pre-commit +pre-commit install +``` + +This will ensure Black and Ruff are run on staged files before each commit. + +Configuration for Black and Ruff is in `pyproject.toml`. This enforces consistent quotes, spacing, and other style rules for all contributors. \ No newline at end of file diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 4226b1e..f5b442f 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -56,4 +56,26 @@ All dependencies are tracked in `pyproject.toml`. Use `uv` commands to keep it u --- **Note:** -Always use `uv` commands to manage dependencies and environments to keep `pyproject.toml` in sync. \ No newline at end of file +Always use `uv` commands to manage dependencies and environments to keep `pyproject.toml` in sync. + +## Code Style and Linting + +This project uses [Black](https://black.readthedocs.io/en/stable/) for code formatting and [Ruff](https://docs.astral.sh/ruff/) for linting. All code should be formatted and linted before committing. + +- Run the following before committing code: + +```zsh +uv run black . +uv run ruff check . +``` + +- To automatically format and lint code on every commit, install pre-commit hooks: + +```zsh +uv pip install pre-commit +pre-commit install +``` + +This will ensure Black and Ruff are run on staged files before each commit. + +Configuration for Black and Ruff is in `pyproject.toml`. This enforces consistent quotes, spacing, and other style rules for all contributors. \ No newline at end of file diff --git a/docs/tool/providers/custom_provider.md b/docs/tool/providers/custom_provider.md new file mode 100644 index 0000000..8a2cca7 --- /dev/null +++ b/docs/tool/providers/custom_provider.md @@ -0,0 +1,119 @@ +# How to Add a New Provider + +This guide explains how to integrate a new storage provider (e.g., DropBox, OneDrive) into DocBinder-OSS. The process involves creating configuration and client classes, registering the provider, and ensuring compatibility with the system’s models and interfaces. + +--- + +## 1. Create a Service Configuration Class + +Each provider must define a configuration class that inherits from [`ServiceConfig`](https://github.com/SnappyLab/DocBinder-OSS/blob/main/src/docbinder_oss/services/base_class.py): + +```python +# filepath: src/docbinder_oss/services/my_provider/my_provider_service_config.py +from docbinder_oss.services.base_class import ServiceConfig + +class MyProviderServiceConfig(ServiceConfig): + type: str = "my_provider" + name: str + # Add any other provider-specific fields here + api_key: str +``` + +- `type` must be unique and match the provider’s identifier. +- `name` is a user-defined label for this provider instance. + +--- + +## 2. Implement the Storage Client + +Create a client class that inherits from [`BaseStorageClient`](https://github.com/SnappyLab/DocBinder-OSS/blob/main/src/docbinder_oss/services/base_class.py) and implements all abstract methods: + +```python +# filepath: src/docbinder_oss/services/my_provider/my_provider_client.py +from typing import Optional, List +from docbinder_oss.services.base_class import BaseStorageClient +from docbinder_oss.core.schema import File, Permission +from .my_provider_service_config import MyProviderServiceConfig + +class MyProviderClient(BaseStorageClient): + def __init__(self, config: MyProviderServiceConfig): + self.config = config + # Initialize SDK/client here + + def test_connection(self) -> bool: + # Implement connection test + pass + + def list_files(self, folder_id: Optional[str] = None) -> List[File]: + # Implement file listing + pass + + def get_file_metadata(self, item_id: str) -> File: + # Implement metadata retrieval + pass + + def get_permissions(self, item_id: str) -> List[Permission]: + # Implement permissions retrieval + pass +``` + +- Use the shared models [`File`](https://github.com/SnappyLab/DocBinder-OSS/blob/main/src/docbinder_oss/core/schemas.py), [`Permission`](https://github.com/SnappyLab/DocBinder-OSS/blob/main/src/docbinder_oss/core/schemas.py), etc., for return types. + +--- + +## 3. Register the Provider + +Add an `__init__.py` in your provider’s folder with a `register()` function: + +```python +# filepath: src/docbinder_oss/services/my_provider/__init__.py +from .my_provider_client import MyProviderClient +from .my_provider_service_config import MyProviderServiceConfig + +def register(): + return { + "display_name": "my_provider", + "config_class": MyProviderServiceConfig, + "client_class": MyProviderClient, + } +``` + +--- + +## 4. Ensure Discovery + +The system will automatically discover your provider if it’s in the `src/docbinder_oss/services/` directory and contains a `register()` function in `__init__.py`. + +--- + +## 5. Update the Config File + +Add your provider’s configuration to `~/.config/docbinder/config.yaml`: + +```yaml +providers: + - type: my_provider + name: my_instance + # Add other required fields + api_key: +``` + +--- + +## 6. Test Your Provider + +- Run the application and ensure your provider appears and works as expected. +- The config loader will validate your config using your `ServiceConfig` subclass. + +--- + +## Reference + +- [src/docbinder_oss/services/base_class.py](https://github.com/SnappyLab/DocBinder-OSS/blob/main/src/docbinder_oss/services/base_class.py) +- [src/docbinder_oss/core/schemas.py](https://github.com/SnappyLab/DocBinder-OSS/blob/main/src/docbinder_oss/core/schemas.py) +- [src/docbinder_oss/services/google_drive/](https://github.com/SnappyLab/DocBinder-OSS/tree/main/src/docbinder_oss/services/google_drive/) (example implementation) +- [src/docbinder_oss/services/__init__.py](https://github.com/SnappyLab/DocBinder-OSS/blob/main/src/docbinder_oss/services/__init__.py) + +--- + +**Tip:** Use the Google Drive as a template for your implementation. Make sure to follow the abstract method signatures and use the shared models for compatibility. \ No newline at end of file diff --git a/docs/tool/providers/google_drive.md b/docs/tool/providers/google_drive.md new file mode 100644 index 0000000..62d488c --- /dev/null +++ b/docs/tool/providers/google_drive.md @@ -0,0 +1,68 @@ +# Google Drive Configuration Setup + +This guide will help you configure Google Drive as a provider for DocBinder. + +## Prerequisites + +- A Google account +- Access to [Google Cloud Console](https://console.cloud.google.com/) +- DocBinder installed + +## Step 1: Create a Google Cloud Project + +1. Go to the [Google Cloud Console](https://console.cloud.google.com/). +2. Click on **Select a project** and then **New Project**. +3. Enter a project name and click **Create**. + +## Step 2: Enable Google Drive API + +1. In your project dashboard, navigate to **APIs & Services > Library**. +2. Search for **Google Drive API**. +3. Click **Enable**. + +## Step 3: Create OAuth 2.0 Credentials + +1. Go to **APIs & Services > Credentials**. +2. Click **+ CREATE CREDENTIALS** and select **OAuth client ID**. +3. Configure the consent screen if prompted. +4. Choose **Desktop app** or **Web application** as the application type. +5. Enter a name and click **Create**. +6. Download the `credentials.json` file. + +## Step 4: Configure DocBinder + +1. Place your downloaded credentials file somewhere accessible (e.g., ~/gcp_credentials.json). +2. The application will generate a token file (e.g., ~/gcp_token.json) after the first authentication. + +## Step 5: Edit the Config File + +Create the config file, and add a provider entry for Google Drive: +```yaml +providers: + - type: google_drive + name: my_gdrive + gcp_credentials_json: ./gcp_credentials.json + gcp_token_json: ./gcp_token.json +``` + +* type: Must be google_drive. +* name: A unique name for this provider. +* gcp_credentials_json: Absolute/relative path to your Google Cloud credentials file. +* gcp_token_json: Absolute/relative path where the token will be stored/generated. + +## Step 6: Authenticate and Test + +1. Run DocBinder with the Google Drive provider enabled. +2. On first run, follow the authentication prompt to grant access. +3. Verify that DocBinder can access your Google Drive files. + +## Troubleshooting + +- Ensure your credentials file is in the correct location. +- Check that the Google Drive API is enabled for your project. +- Review the [Google API Console](https://console.developers.google.com/) for error messages. + +## References + +- [Google Drive API Documentation](https://developers.google.com/drive) +- [DocBinder OSS - GitHub](https://github.com/SnappyLab/DocBinder-OSS) \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index e864210..78c26bd 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -10,6 +10,9 @@ nav: - Commands: - Main CLI: commands/main.md - Provider: commands/provider.md + - Providers: + - Google Drive: tool/providers/google_drive.md + - Custom Provider: tool/providers/custom_provider.md - Contributing: CONTRIBUTING.md - Code of Conduct: CODE_OF_CONDUCT.md - Security: SECURITY.md diff --git a/provider_setup_example.yml b/provider_setup_example.yml new file mode 100644 index 0000000..ff3c851 --- /dev/null +++ b/provider_setup_example.yml @@ -0,0 +1,4 @@ +providers: + - type: google_drive + name: my_google_drive + gcp_credentials_json: gcp_credentials.json diff --git a/pyproject.toml b/pyproject.toml index 09ae953..ed3fec0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,8 +32,10 @@ include = ["src/docbinder_oss/**"] [dependency-groups] dev = [ + "black>=25.1.0", "mkdocs>=1.6.1", "mkdocs-material>=9.6.14", + "pre-commit>=4.2.0", "pytest>=8.4.0", "tox>=4.26.0", "tox-uv>=1.26.0", @@ -47,8 +49,7 @@ testpaths = [ ] [tool.ruff] -# Set the maximum line length to 100. -line-length = 100 +line-length = 125 [tool.ruff.lint] # Add the `line-too-long` rule to the enforced rule set. By default, Ruff omits rules that diff --git a/tests/services/google_drive/__init__.py b/src/docbinder_oss/cli/__init__.py similarity index 100% rename from tests/services/google_drive/__init__.py rename to src/docbinder_oss/cli/__init__.py diff --git a/src/docbinder_oss/cli/provider/__init__.py b/src/docbinder_oss/cli/provider/__init__.py new file mode 100644 index 0000000..6066b1a --- /dev/null +++ b/src/docbinder_oss/cli/provider/__init__.py @@ -0,0 +1,12 @@ +import typer +from .get import app as get_app +from .list import app as list_app +from .test import app as test_app + +# --- Provider Subcommand Group --- +# We create a separate Typer app for the 'provider' command. +# This allows us to nest commands like 'provider list' and 'provider get'. +app = typer.Typer(help="Commands to manage providers. List them or get details for a specific one.") +app.add_typer(get_app) +app.add_typer(list_app) +app.add_typer(test_app) diff --git a/src/docbinder_oss/cli/provider/get.py b/src/docbinder_oss/cli/provider/get.py new file mode 100644 index 0000000..e86b7be --- /dev/null +++ b/src/docbinder_oss/cli/provider/get.py @@ -0,0 +1,30 @@ +import typer + +app = typer.Typer() + + +@app.command("get") +def get_provider( + connection_type: str = typer.Option(None, "--type", "-t", help="The type of the provider to get."), + name: str = typer.Option(None, "--name", "-n", help="The name of the provider to get."), +): + """Get connection information for a provider by name or by type. + If both options are provided, it will search for providers matching either criterion.""" + from docbinder_oss.helpers.config import load_config + + config = load_config() + + provider_found = False + if not config.providers: + typer.echo("No providers configured.") + raise typer.Exit(code=1) + for provider in config.providers: + if provider.name == name: + typer.echo(f"Provider '{name}' found with config: {provider}") + provider_found = True + if provider.type == connection_type: + typer.echo(f"Provider '{provider.name}' of type '{connection_type}'" f" found with config: {provider}") + provider_found = True + if not provider_found: + typer.echo(f"No providers found with name '{name}' or type '{connection_type}'.") + raise typer.Exit(code=1) diff --git a/src/docbinder_oss/cli/provider/list.py b/src/docbinder_oss/cli/provider/list.py new file mode 100644 index 0000000..c3bd5f9 --- /dev/null +++ b/src/docbinder_oss/cli/provider/list.py @@ -0,0 +1,17 @@ +import typer + +app = typer.Typer() + + +@app.command() +def list(): + """List all configured providers.""" + from docbinder_oss.helpers.config import load_config + + config = load_config() + if not config.providers: + typer.echo("No providers configured.") + raise typer.Exit(code=1) + + for provider in config.providers: + typer.echo(f"Provider: {provider.name}, type: {provider.type}") diff --git a/src/docbinder_oss/cli/provider/test.py b/src/docbinder_oss/cli/provider/test.py new file mode 100644 index 0000000..2ba7091 --- /dev/null +++ b/src/docbinder_oss/cli/provider/test.py @@ -0,0 +1,46 @@ +import typer +from typing import Annotated + +app = typer.Typer() + + +@app.command("test") +def test( + name: Annotated[str, typer.Argument(help="The name of the provider to test the connection.")], +): + """Test the connection to a specific provider.""" + from docbinder_oss.helpers.config import load_config + from docbinder_oss.providers import create_provider_instance + + if not name: + typer.echo("Provider name is required.") + raise typer.Exit(code=1) + + config = load_config() + if not config.providers: + typer.echo("No providers configured.") + raise typer.Exit(code=1) + + found_provider_config = None + for provider_config in config.providers: + if provider_config.name == name: + found_provider_config = provider_config + break # Exit the loop once the provider is found + + if found_provider_config: + typer.echo(f"Testing connection for provider '{name}'...") + try: + client = create_provider_instance(found_provider_config) + if client is None: + typer.echo(f"Provider '{name}' is not supported or not implemented.") + raise typer.Exit(code=1) + # Attempt to test the connection + client.test_connection() + typer.echo(f"Connection to provider '{name}' is successful.") + except Exception as e: + typer.echo(f"Failed to connect to provider '{name}': {e}") + return + + # If we reach here, the provider was not found + typer.echo(f"Provider '{name}' not found in configuration.") + raise typer.Exit(code=1) diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py new file mode 100644 index 0000000..d4b63e3 --- /dev/null +++ b/src/docbinder_oss/cli/search.py @@ -0,0 +1,143 @@ +from datetime import datetime +import re +import typer +from typing import Dict, List, Optional + +from docbinder_oss.core.schemas import File +from docbinder_oss.helpers.config import load_config +from docbinder_oss.providers import create_provider_instance +from docbinder_oss.helpers.config import Config +from docbinder_oss.providers.base_class import BaseProvider +from docbinder_oss.helpers.writers.multiformat_writer import MultiFormatWriter + +app = typer.Typer() + + +@app.command() +def search( + name: Optional[str] = typer.Option(None, "--name", help="Regex to match file name"), + owner: Optional[str] = typer.Option(None, "--owner", help="Owner/contributor/reader email address to filter"), + updated_after: Optional[str] = typer.Option(None, "--updated-after", help="Last update after (ISO timestamp)"), + updated_before: Optional[str] = typer.Option(None, "--updated-before", help="Last update before (ISO timestamp)"), + created_after: Optional[str] = typer.Option(None, "--created-after", help="Created after (ISO timestamp)"), + created_before: Optional[str] = typer.Option(None, "--created-before", help="Created before (ISO timestamp)"), + min_size: Optional[int] = typer.Option(None, "--min-size", help="Minimum file size in KB"), + max_size: Optional[int] = typer.Option(None, "--max-size", help="Maximum file size in KB"), + provider: Optional[str] = typer.Option(None, "--provider", "-p", help="Provider name to search in"), + export_file: Optional[str] = typer.Option( + None, "--export-file", help="Export file name (e.g. results.csv or results.json)" + ), +): + """Search for files or folders matching filters across all + providers and export results as CSV or JSON. If --export-file is not provided, results are printed to the console.""" + + config: Config = load_config() + if not config.providers: + typer.echo("No providers configured.") + raise typer.Exit(code=1) + + current_files = {} + for provider_config in config.providers: + if provider and provider_config.name != provider: + continue + client: Optional[BaseProvider] = create_provider_instance(provider_config) + if not client: + typer.echo(f"Provider '{provider_config.name}' is not supported or not implemented.") + raise typer.Exit(code=1) + current_files[provider_config.name] = client.list_all_files() + + current_files = __filter_files( + current_files, + name=name, + owner=owner, + updated_after=updated_after, + updated_before=updated_before, + created_after=created_after, + created_before=created_before, + min_size=min_size, + max_size=max_size, + ) + + MultiFormatWriter.write(current_files, export_file) + return + + +def __filter_files( + files: Dict[str, List[File]], + name=None, + owner=None, + updated_after=None, + updated_before=None, + created_after=None, + created_before=None, + min_size=None, + max_size=None, +) -> Dict[str, List[File]]: + """ + Filters a collection of files based on various criteria such as name, owner, + modification/creation dates, and file size. + + Args: + files (dict): A dictionary where keys are providers and values are lists of file objects. + name (str, optional): A regex pattern to match file names (case-insensitive). + owner (str, optional): An email address to match file owners. + updated_after (str, optional): ISO format datetime string; only include files modified + after this date. + updated_before (str, optional): ISO format datetime string; only include files modified + before this date. + created_after (str, optional): ISO format datetime string; only include files created after + this date. + created_before (str, optional): ISO format datetime string; only include files created + before this date. + min_size (int, optional): Minimum file size in kilobytes (KB). + max_size (int, optional): Maximum file size in kilobytes (KB). + + Returns: + list: A list of file objects that match the specified filters. + """ + + def file_matches(file: File): + if name and not re.search(name, file.name, re.IGNORECASE): + return False + if owner and (not file.owners or not any(owner in u.email_address for u in file.owners)): + return False + if updated_after: + file_modified_time = __parse_dt(file.modified_time) + updated_after_dt = __parse_dt(updated_after) + if file_modified_time is None or updated_after_dt is None or file_modified_time < updated_after_dt: + return False + if updated_before: + file_modified_time = __parse_dt(file.modified_time) + updated_before_dt = __parse_dt(updated_before) + if file_modified_time is None or updated_before_dt is None or file_modified_time > updated_before_dt: + return False + if created_after: + file_created_time = __parse_dt(file.created_time) + created_after_dt = __parse_dt(created_after) + if file_created_time is None or created_after_dt is None or file_created_time < created_after_dt: + return False + if created_before: + file_created_time = __parse_dt(file.created_time) + created_before_dt = __parse_dt(created_before) + if file_created_time is not None and created_before_dt is not None and file_created_time > created_before_dt: + return False + if min_size and file.size < min_size: + return False + if max_size and file.size > max_size: + return False + return True + + filtered = {} + for provider, file_list in files.items(): + filtered[provider] = [file for file in file_list if file_matches(file)] + return filtered + + +def __parse_dt(val): + if isinstance(val, datetime): + return val + try: + return datetime.fromisoformat(val) + except Exception as e: + typer.echo(f"Failed to parse datetime from value: {val} with error: {e}", err=True) + raise ValueError(f"Invalid datetime format: {val}") from e diff --git a/src/docbinder_oss/cli/setup.py b/src/docbinder_oss/cli/setup.py new file mode 100644 index 0000000..b74cbc9 --- /dev/null +++ b/src/docbinder_oss/cli/setup.py @@ -0,0 +1,50 @@ +import typer +from typing import List, Optional +import yaml +from docbinder_oss.helpers.config import save_config, validate_config + +app = typer.Typer(help="DocBinder configuration setup commands.") + + +@app.command() +def setup( + file: Optional[str] = typer.Option(None, "--file", help="Path to YAML config file"), + provider: Optional[List[str]] = typer.Option( + None, + "--provider", + help="Provider config as provider:key1=val1,key2=val2", + callback=lambda v: v or [], + ), +): + """Setup DocBinder configuration via YAML file or provider key-value pairs.""" + config_data = {} + if file: + with open(file, "r") as f: + config_data = yaml.safe_load(f) or {} + elif provider: + providers = {} + for entry in provider: + if ":" not in entry: + typer.echo(f"Provider entry '{entry}' must be in provider:key1=val1,key2=val2 format.") + raise typer.Exit(code=1) + prov_name, prov_kvs = entry.split(":", 1) + kv_dict = {} + for pair in prov_kvs.split(","): + if "=" not in pair: + typer.echo(f"Provider config '{pair}' must be in key=value format.") + raise typer.Exit(code=1) + k, v = pair.split("=", 1) + kv_dict[k] = v + providers[prov_name] = kv_dict + config_data["providers"] = providers + validated = validate_config(config_data) + if not validated.providers: + typer.echo("No providers configured. Please add at least one provider.") + raise typer.Exit(code=1) + # Save the validated config + try: + save_config(validated) + except Exception as e: + typer.echo(f"Error saving config: {e}") + raise typer.Exit(code=1) + typer.echo("Configuration saved successfully.") diff --git a/src/docbinder_oss/core/schemas.py b/src/docbinder_oss/core/schemas.py index 2718f1b..5fd8268 100644 --- a/src/docbinder_oss/core/schemas.py +++ b/src/docbinder_oss/core/schemas.py @@ -13,9 +13,7 @@ class Bucket(BaseModel): id: str name: str kind: Optional[str] = Field(description="Type of the bucket, e.g., 'drive#file'") - created_time: Optional[datetime] = Field( - description="Timestamp when the bucket was created." - ) + created_time: Optional[datetime] = Field(description="Timestamp when the bucket was created.") viewable: Optional[bool] restrictions: Optional[Dict[str, Any]] @@ -43,37 +41,41 @@ class FileCapabilities(BaseModel): class File(BaseModel): """Represents a file or folder""" - id: str - name: str - mime_type: str - kind: Optional[str] + id: str = Field(repr=True, description="Unique identifier for the file or folder.") + name: str = Field(repr=True, description="Name of the file or folder. May not be unique.") + mime_type: str = Field(repr=True, description="MIME type of the file or folder.") + kind: Optional[str] = Field(repr=True, description="Kind of the item, e.g., 'drive#file'.") - is_folder: bool = Field( - False, description="True if the item is a folder, False otherwise." - ) + is_folder: bool = Field(False, description="True if the item is a folder, False otherwise.") web_view_link: Optional[HttpUrl] icon_link: Optional[HttpUrl] created_time: Optional[datetime] - modified_time: Optional[datetime] + modified_time: Optional[datetime] = Field(repr=True, description="Last modified time of the file or folder.") - owners: Optional[List[User]] + owners: Optional[List[User]] = Field(repr=True, description="List of owners of the file or folder.") last_modifying_user: Optional[User] - size: Optional[str] = Field( - description="Size in bytes, as a string. Only populated for files." - ) - parents: Optional[str] = Field(description="Parent folder ID, if applicable.") - - capabilities: Optional[FileCapabilities] = None + size: Optional[str] = Field(description="Size in bytes, as a string. Only populated for files.") + parents: Optional[List[str]] = Field(description="Parent folder IDs, if applicable.") shared: Optional[bool] starred: Optional[bool] trashed: Optional[bool] - # If you want a more robust way to set is_folder after initialization: def __init__(self, **data: Any): + # Coerce parents to a list of strings or None + if "parents" in data: + if data["parents"] is None: + data["parents"] = None + elif isinstance(data["parents"], str): + data["parents"] = [data["parents"]] + elif isinstance(data["parents"], list): + # Ensure all elements are strings + data["parents"] = [str(p) for p in data["parents"] if p is not None] + else: + data["parents"] = [str(data["parents"])] super().__init__(**data) if self.mime_type == "application/vnd.google-apps.folder": self.is_folder = True diff --git a/src/docbinder_oss/helpers/config.py b/src/docbinder_oss/helpers/config.py index 77e17cf..088d95d 100644 --- a/src/docbinder_oss/helpers/config.py +++ b/src/docbinder_oss/helpers/config.py @@ -6,7 +6,7 @@ import yaml from pydantic import BaseModel, ValidationError -from docbinder_oss.services import ServiceUnion, get_provider_registry +from docbinder_oss.providers import ServiceUnion, get_provider_registry logger = logging.getLogger(__name__) @@ -16,14 +16,12 @@ class Config(BaseModel): """Main configuration model that holds a list of all provider configs.""" - providers: List[ServiceUnion] + providers: List[ServiceUnion] # type: ignore def load_config() -> Config: if not os.path.exists(CONFIG_PATH): - typer.echo( - f"Config file not found at {CONFIG_PATH}. Please run 'docbinder setup' first." - ) + typer.echo(f"Config file not found at {CONFIG_PATH}. Please run 'docbinder setup' first.") raise typer.Exit(code=1) with open(CONFIG_PATH, "r") as f: config_data = yaml.safe_load(f) @@ -33,9 +31,7 @@ def load_config() -> Config: if config.get("type") not in provider_registry: typer.echo(f"Unknown provider type: {config['type']}") raise typer.Exit(code=1) - config_to_add.append( - provider_registry[config["type"]]["config_class"](**config) - ) + config_to_add.append(provider_registry[config["type"]]["config_class"](**config)) try: configss = Config(providers=config_to_add) return configss diff --git a/src/docbinder_oss/helpers/path_utils.py b/src/docbinder_oss/helpers/path_utils.py new file mode 100644 index 0000000..b3a20b3 --- /dev/null +++ b/src/docbinder_oss/helpers/path_utils.py @@ -0,0 +1,90 @@ +def build_id_to_item(files): + """ + Build a mapping from file/folder id to the file/folder object. + """ + return {getattr(f, "id", None): f for f in files if hasattr(f, "id")} + + +def get_full_path(file, id_to_item, root_id="root", root_name="My Drive"): + """ + Recursively build the full path for a file or folder using its parents. + Returns a string like '/My Drive/Folder/Subfolder/File.pdf'. + """ + path_parts = [file.name] + current = file + while True: + parents = getattr(current, "parents", None) + if not parents or not isinstance(parents, list) or not parents[0]: + break + parent_id = parents[0] + if parent_id == root_id: + path_parts.append(root_name) + break + parent = id_to_item.get(parent_id) + if not parent: + break + path_parts.append(parent.name) + current = parent + return "/" + "/".join(reversed(path_parts)) + + +def build_all_full_paths(files, root_id="root", root_name="My Drive", root_id_to_name=None): + """ + Efficiently compute the full path for every file/folder in one pass using an iterative approach + and memoization. + Supports multiple drives by using a root_id_to_name mapping. + Returns a dict: {file_id: full_path} + """ + id_to_item = build_id_to_item(files) + id_to_path = {} + if root_id_to_name is None: + root_id_to_name = {root_id: root_name} + for item in files: + if not hasattr(item, "id") or not hasattr(item, "name"): + continue + if item.id in id_to_path: + continue + # Iterative path construction + current = item + temp_stack = [] + while True: + if current.id in id_to_path: + break + parents = getattr(current, "parents", None) + if not parents or not isinstance(parents, list) or not parents[0]: + temp_stack.append((current.id, "/" + current.name)) + break + parent_id = parents[0] + if parent_id in root_id_to_name: + temp_stack.append((current.id, "/" + root_id_to_name[parent_id] + "/" + current.name)) + break + parent = id_to_item.get(parent_id) + if not parent: + temp_stack.append((current.id, "/" + current.name)) + break + temp_stack.append((current.id, None)) # Mark as not yet resolved + current = parent + # Now unwind the stack and build the paths + while temp_stack: + file_id, path = temp_stack.pop() + if path is not None: + id_to_path[file_id] = path + else: + parent_id = id_to_item[file_id].parents[0] + parent_path = id_to_path.get(parent_id, "") + id_to_path[file_id] = parent_path.rstrip("/") + "/" + id_to_item[file_id].name + # Ensure root_name is present at the start (for legacy single-drive fallback) + found_root = False + for root_name_val in root_id_to_name.values(): + if id_to_path[item.id].lstrip("/").startswith(root_name_val + "/"): # e.g. 'My Drive/' + found_root = True + break + if not found_root: + # Use the first root_name as fallback + fallback_root = next(iter(root_id_to_name.values())) + id_to_path[item.id] = ( + "/" + fallback_root + id_to_path[item.id] + if not id_to_path[item.id].startswith("/") + else "/" + fallback_root + id_to_path[item.id] + ) + return id_to_path diff --git a/src/docbinder_oss/helpers/writers/base.py b/src/docbinder_oss/helpers/writers/base.py new file mode 100644 index 0000000..b0da8af --- /dev/null +++ b/src/docbinder_oss/helpers/writers/base.py @@ -0,0 +1,11 @@ +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Any + + +class Writer(ABC): + """Abstract base writer class for exporting data.""" + + @abstractmethod + def write(self, data: Any, file_path: str | Path | None = None) -> None: + pass diff --git a/src/docbinder_oss/helpers/writers/helper_functions.py b/src/docbinder_oss/helpers/writers/helper_functions.py new file mode 100644 index 0000000..9277d74 --- /dev/null +++ b/src/docbinder_oss/helpers/writers/helper_functions.py @@ -0,0 +1,46 @@ +def flatten_file(item, provider=None): + """ + Convert a file object (Pydantic, DummyFile, or dict) to a flat dict for export. + Flattens owners, parents, and last_modifying_user fields, and adds provider if given. + """ + # Convert to dict + if hasattr(item, "model_dump"): + result = item.model_dump() + elif hasattr(item, "__dict__"): + result = dict(item.__dict__) + else: + result = dict(item) + # Add provider field to output dict + if provider: + result["provider"] = provider + # Flatten owners to email addresses + owners = result.get("owners") + if owners: + emails = [] + for owner in owners: + if isinstance(owner, dict): + emails.append(owner.get("email_address") or owner.get("email") or str(owner)) + elif hasattr(owner, "email_address"): + emails.append(owner.email_address) + else: + emails.append(str(owner)) + result["owners"] = ";".join(filter(None, emails)) + # Flatten parents to semicolon-separated string + parents = result.get("parents") + if isinstance(parents, list): + result["parents"] = ";".join(str(p) for p in parents) + elif parents is None: + result["parents"] = "" + else: + result["parents"] = str(parents) + # Flatten last_modifying_user to email address + lmu = result.get("last_modifying_user") + if lmu: + if isinstance(lmu, dict): + result["last_modifying_user"] = lmu.get("email_address") or lmu.get("email") or str(lmu) + elif hasattr(lmu, "email_address"): + result["last_modifying_user"] = lmu.email_address + else: + result["last_modifying_user"] = str(lmu) + + return result diff --git a/src/docbinder_oss/helpers/writers/multiformat_writer.py b/src/docbinder_oss/helpers/writers/multiformat_writer.py new file mode 100644 index 0000000..ba282fa --- /dev/null +++ b/src/docbinder_oss/helpers/writers/multiformat_writer.py @@ -0,0 +1,37 @@ +from pathlib import Path +from typing import Dict, List + +from docbinder_oss.core.schemas import File +from docbinder_oss.helpers.writers.base import Writer +from docbinder_oss.helpers.writers.writer_console import ConsoleWriter +from docbinder_oss.helpers.writers.writer_csv import CSVWriter +from docbinder_oss.helpers.writers.writer_json import JSONWriter + + +class MultiFormatWriter: + """ + Factory writer that automatically detects format from file extension or format string. + If file_path is None, prints to console. + """ + + _writers = { + ".csv": CSVWriter, + ".json": JSONWriter, + "csv": CSVWriter, + "json": JSONWriter, + } + + @classmethod + def write(cls, data: Dict[str, List[File]], file_path: str | None = None) -> None: + if not file_path: + ConsoleWriter().write(data) + return + extension = Path(file_path).suffix.lower() + # Use extension or fallback to format string + writer_key = extension if extension in cls._writers else file_path.lower() + if writer_key not in cls._writers: + raise ValueError(f"Unsupported format: {file_path}") + writer_class = cls._writers[writer_key] + writer: Writer = writer_class() + writer.write(data, file_path) + \ No newline at end of file diff --git a/src/docbinder_oss/helpers/writers/writer_console.py b/src/docbinder_oss/helpers/writers/writer_console.py new file mode 100644 index 0000000..ff17bff --- /dev/null +++ b/src/docbinder_oss/helpers/writers/writer_console.py @@ -0,0 +1,20 @@ +from pathlib import Path +from typing import Any +from rich.table import Table +from rich import print +from docbinder_oss.helpers.writers.base import Writer + + +class ConsoleWriter(Writer): + """Writer for pretty-printing data to the console using rich tables.""" + + def write(self, data: Any, file_path: str | Path | None = None) -> None: + table = Table(title="Files and Folders") + table.add_column("Provider", justify="right", style="cyan", no_wrap=True) + table.add_column("Id", style="magenta") + table.add_column("Name", style="magenta") + table.add_column("Kind", style="magenta") + for provider, items in data.items(): + for item in items: + table.add_row(provider, item.id, item.name, item.kind) + print(table) diff --git a/src/docbinder_oss/helpers/writers/writer_csv.py b/src/docbinder_oss/helpers/writers/writer_csv.py new file mode 100644 index 0000000..0d9c281 --- /dev/null +++ b/src/docbinder_oss/helpers/writers/writer_csv.py @@ -0,0 +1,29 @@ +import csv +import logging +from pathlib import Path +from typing import List, Dict, Union +from pydantic import BaseModel +from docbinder_oss.helpers.writers.base import Writer + +logger = logging.getLogger(__name__) + + +class CSVWriter(Writer): + """Writer for exporting data to CSV files.""" + def get_fieldnames(self, data: Dict[str, List[BaseModel]]) -> List[str]: + fieldnames = next(iter(data.values()))[0].model_fields_set + return ["provider", *fieldnames] + + def write(self, data: List[Dict], file_path: Union[str, Path]) -> None: + if not data: + logger.warning("No data to write to CSV.") + return + + with open(file_path, 'w', newline='', encoding='utf-8') as f: + writer = csv.DictWriter(f, fieldnames=self.get_fieldnames(data)) + writer.writeheader() + for provider, items in data.items(): + for item in items: + item_dict = item.model_dump() if isinstance(item, BaseModel) else item + item_dict['provider'] = provider + writer.writerow(item_dict) diff --git a/src/docbinder_oss/helpers/writers/writer_json.py b/src/docbinder_oss/helpers/writers/writer_json.py new file mode 100644 index 0000000..d928814 --- /dev/null +++ b/src/docbinder_oss/helpers/writers/writer_json.py @@ -0,0 +1,17 @@ +import json +from pathlib import Path +from typing import Dict, List, Union +from docbinder_oss.core.schemas import File +from docbinder_oss.helpers.writers.base import Writer + + +class JSONWriter(Writer): + """Writer for exporting data to JSON files.""" + + def write(self, data: Dict[str, List[File]], file_path: Union[str, Path]) -> None: + data = { + provider: [item.model_dump() for item in items] + for provider, items in data.items() + } + with open(file_path, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=2, ensure_ascii=False, default=str) diff --git a/src/docbinder_oss/main.py b/src/docbinder_oss/main.py index bf0b778..d28d3a4 100644 --- a/src/docbinder_oss/main.py +++ b/src/docbinder_oss/main.py @@ -1,21 +1,12 @@ -from typing import Annotated, List, Optional - import typer -import yaml - -from docbinder_oss.helpers.config import save_config, validate_config -from docbinder_oss.services import create_provider_instance +from docbinder_oss.cli.provider import app as provider_app +from docbinder_oss.cli.search import app as search_app +from docbinder_oss.cli.setup import app as setup_app app = typer.Typer() - -# --- Provider Subcommand Group --- -# We create a separate Typer app for the 'provider' command. -# This allows us to nest commands like 'provider list' and 'provider get'. -provider_app = typer.Typer( - help="Commands to manage providers. List them or get details for a specific one." -) -# We add this group to our main application. app.add_typer(provider_app, name="provider") +app.add_typer(search_app) +app.add_typer(setup_app) # This is the main entry point for the DocBinder CLI. @@ -25,134 +16,5 @@ def main(): pass -@app.command() -def hello(): - """Print a friendly greeting.""" - typer.echo("Hello, DocBinder OSS!") - - -@app.command() -def setup( - file: Optional[str] = typer.Option(None, "--file", help="Path to YAML config file"), - provider: Optional[List[str]] = typer.Option( - None, - "--provider", - help="Provider config as provider:key1=val1,key2=val2", - callback=lambda v: v or [], - ), -): - """Setup DocBinder configuration via YAML file or provider key-value pairs.""" - config_data = {} - if file: - with open(file, "r") as f: - config_data = yaml.safe_load(f) or {} - elif provider: - providers = {} - for entry in provider: - if ":" not in entry: - typer.echo( - f"Provider entry '{entry}' must be in provider:key1=val1,key2=val2 format." - ) - raise typer.Exit(code=1) - prov_name, prov_kvs = entry.split(":", 1) - kv_dict = {} - for pair in prov_kvs.split(","): - if "=" not in pair: - typer.echo(f"Provider config '{pair}' must be in key=value format.") - raise typer.Exit(code=1) - k, v = pair.split("=", 1) - kv_dict[k] = v - providers[prov_name] = kv_dict - config_data["providers"] = providers - validated = validate_config(config_data) - if not validated.providers: - typer.echo("No providers configured. Please add at least one provider.") - raise typer.Exit(code=1) - # Save the validated config - try: - save_config(validated) - except Exception as e: - typer.echo(f"Error saving config: {e}") - raise typer.Exit(code=1) - typer.echo("Configuration saved successfully.") - - -@provider_app.command() -def list(): - """List all configured providers.""" - from docbinder_oss.helpers.config import load_config - - config = load_config() - if not config.providers: - typer.echo("No providers configured.") - raise typer.Exit(code=1) - - for provider in config.providers: - typer.echo(f"Provider: {provider.name}, Type: {provider.type}") - - -@provider_app.command("get") -def get_provider( - connection_type: str = typer.Option( - None, "--type", "-t", help="The type of the provider to get." - ), - name: str = typer.Option( - None, "--name", "-n", help="The name of the provider to get." - ), -): - """Get connection information for a specific provider.""" - from docbinder_oss.helpers.config import load_config - - config = load_config() - - count = 0 - if not config.providers: - typer.echo("No providers configured.") - raise typer.Exit(code=1) - for provider in config.providers: - if provider.name == name: - typer.echo(f"Provider '{name}' found with config: {provider}") - count += 1 - if provider.type == connection_type: - typer.echo( - f"Provider '{provider.name}' of type " - f"'{connection_type}' found with config: {provider}" - ) - count += 1 - if count == 0: - typer.echo( - f"No providers found with name '{name}' or type '{connection_type}'." - ) - raise typer.Exit(code=1) - - -@provider_app.command("test") -def test( - name: Annotated[ - str, typer.Argument(help="The name of the provider to test the connection.") - ], -): - """Test the connection to a specific provider.""" - from docbinder_oss.helpers.config import load_config - - config = load_config() - if not config.providers: - typer.echo("No providers configured.") - raise typer.Exit(code=1) - for provider_config in config.providers: - if provider_config.name == name: - typer.echo(f"Testing connection for provider '{name}'...") - try: - client = create_provider_instance(provider_config) - client.test_connection() - typer.echo(f"Connection to provider '{name}' is successful.") - except Exception as e: - typer.echo(f"Failed to connect to provider '{name}': {e}") - return - # If we reach here, the provider was not found - typer.echo(f"Provider '{name}' not found in configuration.") - raise typer.Exit(code=1) - - if __name__ == "__main__": app() diff --git a/src/docbinder_oss/services/__init__.py b/src/docbinder_oss/providers/__init__.py similarity index 89% rename from src/docbinder_oss/services/__init__.py rename to src/docbinder_oss/providers/__init__.py index 3384d07..0fe786e 100644 --- a/src/docbinder_oss/services/__init__.py +++ b/src/docbinder_oss/providers/__init__.py @@ -7,16 +7,14 @@ from pydantic import Field from rich.logging import RichHandler -from docbinder_oss import services -from docbinder_oss.services.base_class import BaseStorageClient, ServiceConfig +from docbinder_oss import providers +from docbinder_oss.providers.base_class import BaseProvider, ServiceConfig if not logging.getLogger().handlers: FORMAT = "%(message)s" - logging.basicConfig( - level="NOTSET", format=FORMAT, datefmt="[%X]", handlers=[RichHandler()] - ) + logging.basicConfig(level="NOTSET", format=FORMAT, datefmt="[%X]", handlers=[RichHandler()]) -logging.getLogger("googleapiclient").setLevel(logging.WARNING) +logging.getLogger("services").setLevel(logging.WARNING) logger = logging.getLogger(__name__) _provider_registry = None # Module-level cache @@ -39,7 +37,7 @@ def get_provider_registry() -> dict: return _provider_registry -def create_provider_instance(config: ServiceConfig) -> Optional["BaseStorageClient"]: +def create_provider_instance(config: ServiceConfig) -> Optional["BaseProvider"]: """ Factory function to create a provider instance from its config. """ @@ -88,5 +86,5 @@ def get_service_union() -> Annotated: return Annotated[dynamic_union, Field(discriminator="type")] -load_services(services) +load_services(providers) ServiceUnion = get_service_union() diff --git a/src/docbinder_oss/services/base_class.py b/src/docbinder_oss/providers/base_class.py similarity index 74% rename from src/docbinder_oss/services/base_class.py rename to src/docbinder_oss/providers/base_class.py index 08761b3..4eb7862 100644 --- a/src/docbinder_oss/services/base_class.py +++ b/src/docbinder_oss/providers/base_class.py @@ -3,7 +3,7 @@ from pydantic import BaseModel -from docbinder_oss.core.schemas import File, Permission +from docbinder_oss.core.schemas import Bucket, File, Permission class ServiceConfig(BaseModel): @@ -13,7 +13,7 @@ class ServiceConfig(BaseModel): name: str -class BaseStorageClient(ABC): +class BaseProvider(ABC): """ Abstract base class for a client that interacts with a cloud storage service. Defines a standard interface for listing items and retrieving metadata. @@ -34,7 +34,17 @@ def test_connection(self) -> bool: pass @abstractmethod - def list_files(self, folder_id: Optional[str] = None) -> List[File]: + def list_buckets(self) -> List[Bucket]: + """ + Lists all available buckets in the storage service. + + Returns: + A list of bucket names. + """ + pass + + @abstractmethod + def list_files_in_folder(self, folder_id: Optional[str] = None) -> List[File]: """ Lists items (files and folders) within a specific folder. @@ -47,6 +57,16 @@ def list_files(self, folder_id: Optional[str] = None) -> List[File]: """ pass + @abstractmethod + def list_all_files(self) -> List[File]: + """ + Lists all files and folders in the storage service. + + Returns: + A list of StorageItem objects representing all files and folders. + """ + pass + @abstractmethod def get_file_metadata(self, item_id: str) -> File: """ diff --git a/src/docbinder_oss/services/google_drive/__init__.py b/src/docbinder_oss/providers/google_drive/__init__.py similarity index 56% rename from src/docbinder_oss/services/google_drive/__init__.py rename to src/docbinder_oss/providers/google_drive/__init__.py index 87153e8..71b6fe3 100644 --- a/src/docbinder_oss/services/google_drive/__init__.py +++ b/src/docbinder_oss/providers/google_drive/__init__.py @@ -7,9 +7,7 @@ if not logging.getLogger().handlers: FORMAT = "%(message)s" - logging.basicConfig( - level="NOTSET", format=FORMAT, datefmt="[%X]", handlers=[RichHandler()] - ) + logging.basicConfig(level="NOTSET", format=FORMAT, datefmt="[%X]", handlers=[RichHandler()]) logging.getLogger("googleapiclient").setLevel(logging.WARNING) @@ -25,3 +23,19 @@ def register() -> dict: "config_class": GoogleDriveServiceConfig, "client_class": GoogleDriveClient, } + + +def get_service_name() -> str: + """ + Returns the name of the service. + This is used for logging and identification purposes. + """ + return "Google Drive" + + +def get_service_display_name() -> str: + """ + Returns the display name of the service. + This is used for user-friendly identification. + """ + return "Google Drive Service" diff --git a/src/docbinder_oss/services/google_drive/google_drive_buckets.py b/src/docbinder_oss/providers/google_drive/google_drive_buckets.py similarity index 94% rename from src/docbinder_oss/services/google_drive/google_drive_buckets.py rename to src/docbinder_oss/providers/google_drive/google_drive_buckets.py index e5746be..1976b89 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_buckets.py +++ b/src/docbinder_oss/providers/google_drive/google_drive_buckets.py @@ -25,7 +25,7 @@ def list_buckets(self) -> List[Bucket]: ] # Default root drive resp = ( - self.service.drives() + self.service.drives() # type: ignore[attr-defined] .list(fields="drives(id,name,kind,createdTime,hidden,restrictions)") .execute() ) diff --git a/src/docbinder_oss/services/google_drive/google_drive_client.py b/src/docbinder_oss/providers/google_drive/google_drive_client.py similarity index 58% rename from src/docbinder_oss/services/google_drive/google_drive_client.py rename to src/docbinder_oss/providers/google_drive/google_drive_client.py index d04ba27..6c68a71 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_client.py +++ b/src/docbinder_oss/providers/google_drive/google_drive_client.py @@ -1,4 +1,5 @@ import logging +import os from typing import List, Optional from google.auth.transport.requests import Request @@ -6,14 +7,14 @@ from google_auth_oauthlib.flow import InstalledAppFlow from googleapiclient.discovery import build -from docbinder_oss.core.schemas import File, Permission -from docbinder_oss.services.base_class import BaseStorageClient -from docbinder_oss.services.google_drive.google_drive_buckets import GoogleDriveBuckets -from docbinder_oss.services.google_drive.google_drive_files import GoogleDriveFiles -from docbinder_oss.services.google_drive.google_drive_permissions import ( +from docbinder_oss.core.schemas import Bucket, File, Permission +from docbinder_oss.providers.base_class import BaseProvider +from docbinder_oss.providers.google_drive.google_drive_buckets import GoogleDriveBuckets +from docbinder_oss.providers.google_drive.google_drive_files import GoogleDriveFiles +from docbinder_oss.providers.google_drive.google_drive_permissions import ( GoogleDrivePermissions, ) -from docbinder_oss.services.google_drive.google_drive_service_config import ( +from docbinder_oss.providers.google_drive.google_drive_service_config import ( GoogleDriveServiceConfig, ) @@ -21,7 +22,7 @@ logger.setLevel(logging.INFO) -class GoogleDriveClient(BaseStorageClient): +class GoogleDriveClient(BaseProvider): def __init__(self, config: GoogleDriveServiceConfig): super().__init__(config) logger.info("Initializing Google Drive client") @@ -30,7 +31,7 @@ def __init__(self, config: GoogleDriveServiceConfig): "https://www.googleapis.com/auth/drive.metadata.readonly", "https://www.googleapis.com/auth/drive.activity.readonly", ] - self.config = config + self.settings = config self.creds = self._get_credentials() self.service = build("drive", "v3", credentials=self.creds) self.buckets = GoogleDriveBuckets(self.service) @@ -38,10 +39,15 @@ def __init__(self, config: GoogleDriveServiceConfig): self.permissions = GoogleDrivePermissions(self.service) def _get_credentials(self): + logger.info("Getting credentials for Google Drive client") + + TOKEN_PATH = os.path.expanduser("~/.config/docbinder/gcp/" + self.config.name + "_token.json") + # Ensure the directory exists + os.makedirs(os.path.dirname(TOKEN_PATH), exist_ok=True) + logger.debug(f"Token path: {TOKEN_PATH}") + try: - creds = Credentials.from_authorized_user_file( - self.config.gcp_token_json, scopes=self.SCOPES - ) + creds = Credentials.from_authorized_user_file(TOKEN_PATH, scopes=self.SCOPES) except (FileNotFoundError, ValueError): logger.warning("Credentials file not found or invalid, re-authenticating") creds = None @@ -49,12 +55,10 @@ def _get_credentials(self): if creds and creds.expired and creds.refresh_token: creds.refresh(Request()) else: - flow = InstalledAppFlow.from_client_secrets_file( - self.config.gcp_credentials_json, self.SCOPES - ) + flow = InstalledAppFlow.from_client_secrets_file(self.settings.gcp_credentials_json, self.SCOPES) creds = flow.run_local_server(port=0) # Save the credentials for the next run - with open(self.config.gcp_token_json, "w") as token: + with open(TOKEN_PATH, "w") as token: token.write(creds.to_json()) return creds @@ -66,14 +70,17 @@ def test_connection(self) -> bool: logger.error(f"Test connection failed: {e}") return False - def list_buckets(self) -> list: + def list_buckets(self) -> list[Bucket]: return self.buckets.list_buckets() - def list_files(self, folder_id: Optional[str] = None) -> List[File]: - return self.files.list_files(folder_id) + def list_files_in_folder(self, folder_id: Optional[str] = None) -> List[File]: + return self.files.list_files_in_folder(folder_id) + + def list_all_files(self) -> List[File]: + return self.files.list_files_in_folder() def get_file_metadata(self, item_id: str) -> File: return self.files.get_file_metadata(item_id) - def get_permissions(self, item_id: str) -> List[Permission]: - return self.permissions.get_permissions(item_id) + def get_permissions(self, file_id: str) -> List[Permission]: + return self.permissions.get_permissions(file_id) diff --git a/src/docbinder_oss/services/google_drive/google_drive_files.py b/src/docbinder_oss/providers/google_drive/google_drive_files.py similarity index 71% rename from src/docbinder_oss/services/google_drive/google_drive_files.py rename to src/docbinder_oss/providers/google_drive/google_drive_files.py index 4c7e3b6..18fbb58 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_files.py +++ b/src/docbinder_oss/providers/google_drive/google_drive_files.py @@ -10,7 +10,7 @@ "id,name,mimeType,kind,size,createdTime,modifiedTime," "owners(permissionId,displayName,emailAddress,photoLink)," "lastModifyingUser(permissionId,displayName,emailAddress,photoLink)," - "webViewLink,iconLink,trashed,shared,starred" + "webViewLink,iconLink,trashed,shared,starred,parents" ) @@ -18,34 +18,26 @@ class GoogleDriveFiles: def __init__(self, service: Resource): self.service = service - def list_files(self, folder_id=None): - if folder_id and len(folder_id.split("|", 1)) > 1: - logger.warning("Folder ID should not contain '|' character") - _, folder_id = folder_id.split("|", 1) + def list_files_in_folder(self, bucket_id: str | None = None) -> list[File]: + args = { + "fields": f"nextPageToken,files({REQUIRED_FIELDS})", + "pageSize": 1000, + } - if folder_id == "root": - query = "'root' in parents and trashed=false" - resp = ( - self.service.files() - .list( - q=query, - fields=f"files({REQUIRED_FIELDS})", - ) - .execute() - ) + if bucket_id: + args["q"] = f"'{bucket_id}' in parents and trashed=false" else: - resp = ( - self.service.files() - .list( - corpora="drive", - q=f"'{folder_id}' in parents and trashed=false", - driveId=folder_id, - includeItemsFromAllDrives=True, - supportsAllDrives=True, - fields=f"files({REQUIRED_FIELDS})", - ) - .execute() - ) + args["q"] = "trashed=false" + + resp = self.service.files().list(**args).execute() + files = resp.get("files", []) + next_page_token = resp.get("nextPageToken") + + while next_page_token: + logger.debug("Getting next page...") + current_page = self.service.files().list(**args, pageToken=next_page_token).execute() + files.extend(current_page.get("files", [])) + next_page_token = current_page.get("nextPageToken") return [ File( @@ -77,14 +69,14 @@ def list_files(self, folder_id=None): shared=f.get("shared"), starred=f.get("starred"), is_folder=f.get("mimeType") == "application/vnd.google-apps.folder", - parents=folder_id if folder_id else None, + parents=f.get("parents") if isinstance(f.get("parents"), list) else None, ) - for f in resp.get("files") + for f in files ] def get_file_metadata(self, file_id: str): item_metadata = ( - self.service.files() + self.service.files() # type: ignore[attr-defined] .get( fileId=file_id, fields=f"{REQUIRED_FIELDS}", @@ -110,12 +102,8 @@ def get_file_metadata(self, file_id: str): for owner in item_metadata.get("owners") ], last_modifying_user=User( - display_name=item_metadata.get("lastModifyingUser", {}).get( - "displayName" - ), - email_address=item_metadata.get("lastModifyingUser", {}).get( - "emailAddress" - ), + display_name=item_metadata.get("lastModifyingUser", {}).get("displayName"), + email_address=item_metadata.get("lastModifyingUser", {}).get("emailAddress"), photo_link=item_metadata.get("lastModifyingUser", {}).get("photoLink"), kind=item_metadata.get("lastModifyingUser", {}).get("kind"), ), @@ -124,7 +112,6 @@ def get_file_metadata(self, file_id: str): trashed=item_metadata.get("trashed"), shared=item_metadata.get("shared"), starred=item_metadata.get("starred"), - is_folder=item_metadata.get("mimeType") - == "application/vnd.google-apps.folder", + is_folder=item_metadata.get("mimeType") == "application/vnd.google-apps.folder", parents=None, # This field is not populated by the API, so we set it to None for files. ) diff --git a/src/docbinder_oss/services/google_drive/google_drive_permissions.py b/src/docbinder_oss/providers/google_drive/google_drive_permissions.py similarity index 91% rename from src/docbinder_oss/services/google_drive/google_drive_permissions.py rename to src/docbinder_oss/providers/google_drive/google_drive_permissions.py index 70988e2..8b6fd23 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_permissions.py +++ b/src/docbinder_oss/providers/google_drive/google_drive_permissions.py @@ -18,7 +18,7 @@ def get_user(self): Returns: User object containing the user's details. """ - resp = self.service.about().get(fields="user").execute() + resp = self.service.about().get(fields="user").execute() # type: ignore[attr-defined] user_info = resp.get("user", {}) return User( @@ -31,11 +31,7 @@ def get_user(self): ) def get_permissions(self, item_id: str): - resp = ( - self.service.permissions() - .list(fileId=item_id, fields="permissions") - .execute() - ) + resp = self.service.permissions().list(fileId=item_id, fields="permissions").execute() # type: ignore[attr-defined] return [ Permission( diff --git a/src/docbinder_oss/services/google_drive/google_drive_service_config.py b/src/docbinder_oss/providers/google_drive/google_drive_service_config.py similarity index 68% rename from src/docbinder_oss/services/google_drive/google_drive_service_config.py rename to src/docbinder_oss/providers/google_drive/google_drive_service_config.py index dd6c957..d98c058 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_service_config.py +++ b/src/docbinder_oss/providers/google_drive/google_drive_service_config.py @@ -1,9 +1,9 @@ from typing import Literal -from docbinder_oss.services.base_class import ServiceConfig +from docbinder_oss.providers.base_class import ServiceConfig class GoogleDriveServiceConfig(ServiceConfig): type: Literal["google_drive"] = "google_drive" # type: ignore[override] + name: str gcp_credentials_json: str - gcp_token_json: str diff --git a/src/docbinder_oss/services/dropbox/__init__.py b/src/docbinder_oss/services/dropbox/__init__.py deleted file mode 100644 index 80759af..0000000 --- a/src/docbinder_oss/services/dropbox/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -from .dropbox_client import DropboxClient -from .dropbox_service_config import DropboxServiceConfig - - -def register(): - # Register the Dropbox client - return { - "display_name": "dropbox", - "config_class": DropboxServiceConfig, - "client_class": DropboxClient, - } diff --git a/src/docbinder_oss/services/dropbox/dropbox_client.py b/src/docbinder_oss/services/dropbox/dropbox_client.py deleted file mode 100644 index 3919701..0000000 --- a/src/docbinder_oss/services/dropbox/dropbox_client.py +++ /dev/null @@ -1,5 +0,0 @@ -from docbinder_oss.services.base_class import BaseStorageClient - - -class DropboxClient(BaseStorageClient): - pass diff --git a/src/docbinder_oss/services/dropbox/dropbox_service_config.py b/src/docbinder_oss/services/dropbox/dropbox_service_config.py deleted file mode 100644 index 515c471..0000000 --- a/src/docbinder_oss/services/dropbox/dropbox_service_config.py +++ /dev/null @@ -1,8 +0,0 @@ -from typing import Literal - -from docbinder_oss.services.base_class import ServiceConfig - - -class DropboxServiceConfig(ServiceConfig): - type: Literal["dropbox"] = "dropbox" # type: ignore[override] - api_key: str diff --git a/tests/commands/test_search_command.py b/tests/commands/test_search_command.py new file mode 100644 index 0000000..eb37e4d --- /dev/null +++ b/tests/commands/test_search_command.py @@ -0,0 +1,329 @@ +import csv +import json +from typing import Dict +import pytest +from pathlib import Path +from typer.testing import CliRunner +from docbinder_oss.core.schemas import User +from docbinder_oss.main import app +from conftest import DummyModel + + +runner = CliRunner() + +@pytest.mark.parametrize('load_config_mock', [("dummy", 2)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file"), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file"), + ])], indirect=True) +def test_search_export_csv(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test happy path for search command with CSV export.""" + result = runner.invoke(app, ["search", "--export-file", "search_results.csv"]) + assert result.exit_code == 0 + assert Path("search_results.csv").exists() + with open("search_results.csv") as f: + reader = csv.DictReader(f) + rows = list(reader) + assert len(rows) == 4 + assert set(r["provider"] for r in rows) == {"dummy1", "dummy2"} + +@pytest.mark.parametrize('load_config_mock', [("dummy", 2)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file"), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file"), + ])], indirect=True) +def test_search_export_json(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test happy path for search command with CSV export.""" + result = runner.invoke(app, ["search", "--export-file", "search_results.json"]) + assert result.exit_code == 0 + assert Path("search_results.json").exists() + with open("search_results.json") as f: + data: Dict = json.load(f) + assert len(data.keys()) == 2 + assert len(data["dummy1"]) == 2 + assert len(data["dummy2"]) == 2 + assert all(key in data for key in ("dummy1", "dummy2")) + +@pytest.mark.parametrize('load_config_mock', [("dummy", 2)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file"), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file"), + ])], indirect=True) +def test_search_name_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """ + Test search command with name filter that returns no results. + """ + result = runner.invoke(app, ["search", "--name", "Alpha", "--export-file", "search_results.json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data["dummy1"]) == 0 + assert len(data["dummy2"]) == 0 + +@pytest.mark.parametrize('load_config_mock', [("dummy", 2)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file"), + DummyModel(id="dummy_file2", name="File 2", kind="file"), + ])], indirect=True) +def test_search_name_filter_not_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """ + Test search command with name filter that returns some results. + """ + result = runner.invoke(app, ["search", "--name", "dummy", "--export-file", "search_results.json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 2 + assert data["dummy1"][0]["name"] == "dummy File 1" + assert data["dummy2"][0]["name"] == "dummy File 1" + +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel( + id="dummy_file1", + name="dummy File 1", + kind="file", + owners=[ + User( + display_name="test", + email_address="beta@a.com", + photo_link="https://test.com", + kind="" + ) + ] + ), + DummyModel(id="dummy_file2", name="File 2", kind="file", owners=[]), + ])], indirect=True) +def test_search_owner_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with owner filter that returns no results.""" + result = runner.invoke(app, ["search", "--owner", "beta@b.com", "--export-file", "search_results.json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data["dummy1"]) == 0 + +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel( + id="dummy_file1", + name="dummy File 1", + kind="file", + owners=[ + User( + display_name="test", + email_address="beta@b.com", + photo_link="https://test.com", + kind="" + ) + ] + ), + DummyModel(id="dummy_file2", name="File 2", kind="file", owners=[]), + ])], indirect=True) +def test_search_owner_filter_not_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with owner filter that returns some results.""" + result = runner.invoke(app, ["search", "--owner", "beta@b.com", "--export-file", "search_results.json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 1 + assert data["dummy1"][0]["owners"][0]["email_address"] == "beta@b.com" + +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", modified_time="2023-02-02T00:00:00"), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", modified_time="2024-01-31T00:00:00"), + ])], indirect=True) +def test_search_updated_after_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with updated_after filter that returns no results.""" + result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-file", "search_results.json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data["dummy1"]) == 0 + +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", modified_time="2024-02-02T00:00:00"), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", modified_time="2024-01-31T00:00:00"), + ])], indirect=True) +def test_search_updated_after_filter_not_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with updated_after filter that returns some results.""" + result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-file", "search_results.json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data["dummy1"]) == 1 + assert data["dummy1"][0]["name"] == "dummy File 1" + +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", created_time="2024-04-02T00:00:00"), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", created_time="2024-04-30T00:00:00"), + ])], indirect=True) +def test_search_created_before_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with created_before filter that returns no results.""" + result = runner.invoke( + app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-file", "search_results.json"] + ) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data["dummy1"]) == 0 + +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", created_time="2024-02-02T00:00:00"), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", created_time="2024-01-31T00:00:00"), + ])], indirect=True) +def test_search_created_before_filter_not_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with created_before filter that returns some results.""" + result = runner.invoke( + app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-file", "search_results.json"] + ) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data["dummy1"]) == 1 + assert data["dummy1"][0]["name"] == "dummy File 2" + +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", size=1), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=2), + ])], indirect=True) +def test_search_min_size_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with min_size filter that returns no results.""" + result = runner.invoke(app, ["search", "--min-size", 3, "--export-file", "search_results.json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data["dummy1"]) == 0 + +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", size=5), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=2), + ])], indirect=True) +def test_search_min_size_filter_not_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + runner = CliRunner() + result = runner.invoke(app, ["search", "--min-size", 3, "--export-file", "search_results.json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data["dummy1"]) == 1 + assert data["dummy1"][0]["name"] == "dummy File 1" + +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", size=5), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=3), + ])], indirect=True) +def test_search_max_size_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with max_size filter that returns no results.""" + result = runner.invoke(app, ["search", "--max-size", "3", "--export-file", "search_results.json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data["dummy1"]) == 1 + +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", size=5), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=2), + ])], indirect=True) +def test_search_max_size_filter_not_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with max_size filter that returns some results.""" + result = runner.invoke(app, ["search", "--max-size", "3", "--export-file", "search_results.json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data["dummy1"]) == 1 + assert data["dummy1"][0]["name"] == "dummy File 2" + +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", size=5), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=2), + ])], indirect=True) +def test_search_provider_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with provider filter that returns no results.""" + result = runner.invoke(app, ["search", "--provider", "dummy2", "--export-file", "search_results.json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 0 + +@pytest.mark.parametrize('load_config_mock', [("dummy", 2)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", size=5), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=2), + ])], indirect=True) +def test_search_provider_filter(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with provider filter that returns some results.""" + result = runner.invoke(app, ["search", "--provider", "dummy2", "--export-file", "search_results.json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 1 + assert "dummy2" in data + +@pytest.mark.parametrize('load_config_mock', [("dummy", 2)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel( + id="dummy_file1", + name="Beta File 1", + kind="file", + size=5, + owners=[ + User( + display_name="test", + email_address="beta@b.com", + photo_link="https://test.com", + kind="" + ) + ] + ), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=2), + ])], indirect=True) +def test_search_combined_filters(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with combined filters.""" + result = runner.invoke( + app, + [ + "search", + "--name", + "Beta", + "--owner", + "beta@b.com", + "--min-size", + "3", + "--provider", + "dummy2", + "--export-file", + "search_results.json", + ], + ) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 1 + assert "dummy2" in data + assert data["dummy2"][0]["name"] == "Beta File 1" + assert data["dummy2"][0]["owners"][0]["email_address"] == "beta@b.com" diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..062bc2a --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,141 @@ +from typing import List +from unittest.mock import MagicMock, patch + +from pydantic import BaseModel, ConfigDict +import pytest + +from docbinder_oss.providers.base_class import BaseProvider +from docbinder_oss.providers.google_drive.google_drive_client import ( + GoogleDriveClient, +) +from docbinder_oss.providers.google_drive.google_drive_service_config import ( + GoogleDriveServiceConfig, +) + + +class DummyModel(BaseModel): + id: str + name: str + kind: str + + model_config = ConfigDict(extra="allow") + + +class DummyProvider(BaseProvider): + def __init__(self, name, type=None): + self.name = name + self.type = type if type else f"{name}_type" + + def list_all_files(self): + raise NotImplementedError("Please use the pytest parametrize settings to add your test data.") + def test_connection(self): + raise NotImplementedError("This provider does not implement connection testing") + def list_buckets(self): + raise NotImplementedError("This provider does not implement buckets") + def get_permissions(self): + raise NotImplementedError("This provider does not implement permissions") + def list_files_in_folder(self): + raise NotImplementedError("This provider does not implement folder listing") + def get_file_metadata(self, item_id): + raise NotImplementedError("This provider does not implement file metadata retrieval") + +class DummyConfig: + providers: List[DummyProvider] = [] + +@pytest.fixture +def sample_data(): + return { + "provider1": [ + DummyModel(id="1", name="FileA", kind="file"), + DummyModel(id="2", name="FolderB", kind="folder"), + ], + "provider2": [ + DummyModel(id="3", name="FileC", kind="file"), + ], + } + +@pytest.fixture +def mock_gdrive_provider(): + """ + This is the core of our testing strategy. We use 'patch' to replace + the `build` function from the googleapiclient library. + + Whenever `GoogleDriveClient` calls `build('drive', 'v3', ...)`, it will + receive our mock object instead of making a real network call. + """ + with patch("docbinder_oss.providers.google_drive.google_drive_client.build") as mock_build: + # Create a mock for the provider object that `build` would return + mock_provider = MagicMock() + # Configure the `build` function to return our mock provider + mock_build.return_value = mock_provider + yield mock_provider + + +@pytest.fixture +def gdrive_client(mock_gdrive_provider): + """ + Creates an instance of our GoogleDriveClient. + It will be initialized with a fake config and will use + the mock_gdrive_provider fixture internally. + """ + # Patch _get_credentials to avoid real auth + with patch( + "docbinder_oss.providers.google_drive.google_drive_client.GoogleDriveClient._get_credentials", + return_value=MagicMock(), + ): + config = GoogleDriveServiceConfig( + name="test_gdrive", + gcp_credentials_json="fake_creds.json", + ) + return GoogleDriveClient(config=config) + +@pytest.fixture(scope='session') +def load_config_mock(request, create_config_mock): + """ + This fixture mocks the `load_config` function to return + a dummy configuration with a specified number of providers. + """ + name, number_of_providers = request.param + with patch( + "docbinder_oss.cli.search.load_config", + return_value=create_config_mock(name, number_of_providers) + ) as _fixture: + yield _fixture + +@pytest.fixture(scope='session') +def create_provider_instance_mock(request, create_provider_mock): + """ + This fixture mocks the `create_provider_instance` function to return + a dummy provider instance based on the provider name. + """ + with patch( + "docbinder_oss.cli.search.create_provider_instance", + return_value=create_provider_mock(request.param) + ) as _fixture: + yield _fixture + +@pytest.fixture(scope="session") +def list_all_files_mock(request): + """ + + Yields: + _type_: _description_ + """ + data = request.param + with patch("conftest.DummyProvider.list_all_files", return_value=data) as _fixture: + yield _fixture + +@pytest.fixture(scope='session') +def create_provider_mock(): + def create_dummy_provider(name): + return DummyProvider(name=name, type=f"{name}_type") + yield create_dummy_provider + +@pytest.fixture(scope='session') +def create_config_mock(create_provider_mock): + """This fixture creates a dummy configuration with a specified number of providers.""" + def create_dummy_config(name, number_of_providers=2): + dummy_config = DummyConfig() + dummy_config.providers = [create_provider_mock(f"{name}{i+1}") for i in range(number_of_providers)] + return dummy_config + yield create_dummy_config \ No newline at end of file diff --git a/tests/helpers/test_writer.py b/tests/helpers/test_writer.py new file mode 100644 index 0000000..abe0920 --- /dev/null +++ b/tests/helpers/test_writer.py @@ -0,0 +1,97 @@ +import json +import csv +import pytest +from pydantic import BaseModel + +from docbinder_oss.helpers.writers.multiformat_writer import MultiFormatWriter +from docbinder_oss.helpers.writers.writer_csv import CSVWriter +from docbinder_oss.helpers.writers.writer_json import JSONWriter + + +class DummyModel(BaseModel): + id: str + name: str + kind: str + + +@pytest.fixture +def sample_data(): + return { + "provider1": [ + DummyModel(id="1", name="FileA", kind="file"), + DummyModel(id="2", name="FolderB", kind="folder"), + ], + "provider2": [ + DummyModel(id="3", name="FileC", kind="file"), + ], + } + + +def test_csv_writer(tmp_path, sample_data): + file_path = tmp_path / "output.csv" + writer = CSVWriter() + writer.write(sample_data, file_path) + assert file_path.exists() + with open(file_path, newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + rows = list(reader) + assert len(rows) == 3 + # Allow extra fields, but required fields must be present + for row in rows: + for field in ("provider", "id", "name", "kind"): + assert field in row + assert rows[0]["provider"] == "provider1" + + +def test_json_writer(tmp_path, sample_data): + file_path = tmp_path / "output.json" + writer = JSONWriter() + writer.write(sample_data, file_path) + assert file_path.exists() + with open(file_path, encoding="utf-8") as f: + data = json.load(f) + assert isinstance(data, dict) + assert len(data) == 2 + assert "provider1" in data + assert "provider2" in data + assert data["provider1"][0]["id"] == "1" + assert data["provider2"][0]["id"] == "3" + + +def test_multiformat_writer_csv(tmp_path, sample_data): + file_path = tmp_path / "test.csv" + MultiFormatWriter.write(sample_data, file_path) + assert file_path.exists() + with open(file_path, newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + rows = list(reader) + assert len(rows) == 3 + + +def test_multiformat_writer_json(tmp_path, sample_data): + file_path = tmp_path / "test.json" + MultiFormatWriter.write(sample_data, file_path) + assert file_path.exists() + with open(file_path, encoding="utf-8") as f: + data = json.load(f) + assert isinstance(data, dict) + assert "provider1" in data + assert "provider2" in data + + +def test_multiformat_writer_unsupported(tmp_path, sample_data): + file_path = tmp_path / "test.unsupported" + # Convert file_path to str for .lower() in MultiFormatWriter + with pytest.raises(ValueError): + MultiFormatWriter.write(sample_data, str(file_path)) + + +def test_csv_writer_empty_data(tmp_path, caplog): + import logging + + file_path = tmp_path / "empty.csv" + writer = CSVWriter() + logger = logging.getLogger() + with caplog.at_level("WARNING", logger=logger.name): + writer.write({}, file_path) + assert "No data to write to CSV." in caplog.text diff --git a/tests/providers/google_drive/__init__.py b/tests/providers/google_drive/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/services/google_drive/test_google_drive_buckets.py b/tests/providers/google_drive/test_google_drive_buckets.py similarity index 88% rename from tests/services/google_drive/test_google_drive_buckets.py rename to tests/providers/google_drive/test_google_drive_buckets.py index 44e3bd5..bff2dee 100644 --- a/tests/services/google_drive/test_google_drive_buckets.py +++ b/tests/providers/google_drive/test_google_drive_buckets.py @@ -3,7 +3,7 @@ from docbinder_oss.core.schemas import Bucket -def test_list_buckets(mock_gdrive_service, gdrive_client): +def test_list_buckets(mock_gdrive_provider, gdrive_client): fake_api_response = { "drives": [ { @@ -21,9 +21,7 @@ def test_list_buckets(mock_gdrive_service, gdrive_client): } ] } - mock_gdrive_service.drives.return_value.list.return_value.execute.return_value = ( - fake_api_response - ) + mock_gdrive_provider.drives.return_value.list.return_value.execute.return_value = fake_api_response buckets = gdrive_client.list_buckets() diff --git a/tests/providers/google_drive/test_google_drive_files.py b/tests/providers/google_drive/test_google_drive_files.py new file mode 100644 index 0000000..b8a5866 --- /dev/null +++ b/tests/providers/google_drive/test_google_drive_files.py @@ -0,0 +1,133 @@ +from datetime import datetime +import os +import pytest + + +class DummyFile: + def __init__(self, id, name, parents=None, is_folder=False): + self.id = id + self.name = name + # Always use a list for parents, or None + if parents is None: + self.parents = None + elif isinstance(parents, list): + self.parents = parents + else: + self.parents = [parents] + self.is_folder = is_folder + self.size = 1000 + # Use correct mime_type for folders and files + self.mime_type = "application/vnd.google-apps.folder" if is_folder else "application/pdf" + self.created_time = "2024-01-01T00:00:00" + self.modified_time = "2024-01-02T00:00:00" + self.owners = [type("User", (), {"email_address": "owner@example.com"})()] + self.last_modifying_user = type("User", (), {"email_address": "mod@example.com"})() + self.web_view_link = "http://example.com/view" + self.web_content_link = "http://example.com/content" + self.shared = True + self.trashed = False + + +@pytest.fixture(autouse=True) +def patch_provider(monkeypatch, tmp_path): + class DummyProviderConfig: + name = "googledrive" + + class DummyConfig: + providers = [DummyProviderConfig()] + + monkeypatch.setattr("docbinder_oss.helpers.config.load_config", lambda: DummyConfig()) + + # Simulate a folder structure: root -> folder1 -> file1, file2; root -> file3 + def list_all_files(self): + return [ + DummyFile(id="root", name="root", is_folder=True), + DummyFile(id="folder1", name="folder1", parents="root", is_folder=True), + DummyFile(id="file1", name="file1.pdf", parents="folder1"), + DummyFile(id="file2", name="file2.pdf", parents="folder1"), + DummyFile(id="file3", name="file3.pdf", parents="root"), + ] + + class DummyClient: + def list_all_files(self): + return list_all_files(self) + + monkeypatch.setattr("docbinder_oss.providers.create_provider_instance", lambda cfg: DummyClient()) + orig_cwd = os.getcwd() + os.chdir(tmp_path) + yield + os.chdir(orig_cwd) + + +def test_list_files(mock_gdrive_provider, gdrive_client): + fake_api_response = { + "files": [ + { + "id": "1234", + "name": "testDrive", + "mimeType": "application/vnd.google-apps.drive", + "kind": "drive#drive", + "isFolder": False, + "webViewLink": "https://drive.google.com/drive/folders/1234", + "iconLink": "https://drive.google.com/drive/folders/1234/icon", + "createdTime": datetime(2023, 10, 1, 12, 0, 0), + "modifiedTime": datetime(2023, 10, 1, 12, 0, 0), + "owners": [ + { + "displayName": "Test User", + "emailAddress": "test@test.com", + "photoLink": "https://example.com/photo.jpg", + "kind": "drive#user", + } + ], + "lastModifyingUser": { + "displayName": "Test User", + "emailAddress": "test@test.com", + "photoLink": "https://example.com/photo.jpg", + "kind": "drive#user", + }, + "size": "1024", + "parents": "root", + "shared": True, + "starred": False, + "trashed": False, + }, + ] + } + + mock_gdrive_provider.files.return_value.list.return_value.execute.return_value = fake_api_response + + files = gdrive_client.list_files_in_folder() + + print(files) + + assert isinstance(files, list) + assert len(files) == 1 + # Compare fields individually to match the actual File model structure + file = files[0] + assert file.id == "1234" + assert file.name == "testDrive" + assert file.mime_type == "application/vnd.google-apps.drive" + assert file.kind == "drive#drive" + assert file.is_folder is False + assert str(file.web_view_link) == "https://drive.google.com/drive/folders/1234" + assert str(file.icon_link) == "https://drive.google.com/drive/folders/1234/icon" + assert file.created_time == datetime(2023, 10, 1, 12, 0, 0) + assert file.modified_time == datetime(2023, 10, 1, 12, 0, 0) + assert len(file.owners) == 1 + owner = file.owners[0] + assert getattr(owner, "display_name", None) == "Test User" + assert getattr(owner, "email_address", None) == "test@test.com" + assert getattr(owner, "kind", None) == "drive#user" + assert str(getattr(owner, "photo_link", "")) == "https://example.com/photo.jpg" + last_mod = file.last_modifying_user + assert getattr(last_mod, "display_name", None) == "Test User" + assert getattr(last_mod, "email_address", None) == "test@test.com" + assert getattr(last_mod, "kind", None) == "drive#user" + assert str(getattr(last_mod, "photo_link", "")) == "https://example.com/photo.jpg" + assert file.size == "1024" + # Accept None or any list value for parents + assert file.parents is None or isinstance(file.parents, list) + assert file.shared is True + assert file.starred is False + assert file.trashed is False diff --git a/tests/services/google_drive/test_google_drive_permissions.py b/tests/providers/google_drive/test_google_drive_permissions.py similarity index 86% rename from tests/services/google_drive/test_google_drive_permissions.py rename to tests/providers/google_drive/test_google_drive_permissions.py index ddc0b8c..63d8865 100644 --- a/tests/services/google_drive/test_google_drive_permissions.py +++ b/tests/providers/google_drive/test_google_drive_permissions.py @@ -1,7 +1,7 @@ from docbinder_oss.core.schemas import Permission, User -def test_get_permissions(mock_gdrive_service, gdrive_client): +def test_get_permissions(mock_gdrive_provider, gdrive_client): fake_api_response = { "permissions": [ { @@ -18,9 +18,7 @@ def test_get_permissions(mock_gdrive_service, gdrive_client): } ] } - mock_gdrive_service.permissions.return_value.list.return_value.execute.return_value = ( - fake_api_response - ) + mock_gdrive_provider.permissions.return_value.list.return_value.execute.return_value = fake_api_response permissions = gdrive_client.get_permissions("1234") diff --git a/tests/services/google_drive/conftest.py b/tests/services/google_drive/conftest.py deleted file mode 100644 index ff50b73..0000000 --- a/tests/services/google_drive/conftest.py +++ /dev/null @@ -1,49 +0,0 @@ -from unittest.mock import MagicMock, patch - -import pytest - -from docbinder_oss.services.google_drive.google_drive_client import ( - GoogleDriveClient, -) -from docbinder_oss.services.google_drive.google_drive_service_config import ( - GoogleDriveServiceConfig, -) - - -@pytest.fixture -def mock_gdrive_service(): - """ - This is the core of our testing strategy. We use 'patch' to replace - the `build` function from the googleapiclient library. - - Whenever `GoogleDriveClient` calls `build('drive', 'v3', ...)`, it will - receive our mock object instead of making a real network call. - """ - with patch( - "docbinder_oss.services.google_drive.google_drive_client.build" - ) as mock_build: - # Create a mock for the service object that `build` would return - mock_service = MagicMock() - # Configure the `build` function to return our mock service - mock_build.return_value = mock_service - yield mock_service - - -@pytest.fixture -def gdrive_client(mock_gdrive_service): - """ - Creates an instance of our GoogleDriveClient. - It will be initialized with a fake config and will use - the mock_gdrive_service fixture internally. - """ - # Patch _get_credentials to avoid real auth - with patch( - "docbinder_oss.services.google_drive.google_drive_client.GoogleDriveClient._get_credentials", - return_value=MagicMock(), - ): - config = GoogleDriveServiceConfig( - name="test_gdrive", - gcp_credentials_json="fake_creds.json", - gcp_token_json="fake_token.json", - ) - return GoogleDriveClient(config=config) diff --git a/tests/services/google_drive/test_google_drive_files.py b/tests/services/google_drive/test_google_drive_files.py deleted file mode 100644 index 6443cfb..0000000 --- a/tests/services/google_drive/test_google_drive_files.py +++ /dev/null @@ -1,83 +0,0 @@ -from datetime import datetime - -from docbinder_oss.core.schemas import File - - -def test_list_files(mock_gdrive_service, gdrive_client): - fake_api_response = { - "files": [ - { - "id": "1234", - "name": "testDrive", - "mimeType": "application/vnd.google-apps.drive", - "kind": "drive#drive", - "isFolder": False, - "webViewLink": "https://drive.google.com/drive/folders/1234", - "iconLink": "https://drive.google.com/drive/folders/1234/icon", - "createdTime": datetime(2023, 10, 1, 12, 0, 0), - "modifiedTime": datetime(2023, 10, 1, 12, 0, 0), - "owners": [ - { - "displayName": "Test User", - "emailAddress": "test@test.com", - "photoLink": "https://example.com/photo.jpg", - "kind": "drive#user", - } - ], - "lastModifyingUser": { - "displayName": "Test User", - "emailAddress": "test@test.com", - "photoLink": "https://example.com/photo.jpg", - "kind": "drive#user", - }, - "size": "1024", - "parents": "root", - "shared": True, - "starred": False, - "trashed": False, - }, - ] - } - - mock_gdrive_service.files.return_value.list.return_value.execute.return_value = ( - fake_api_response - ) - - files = gdrive_client.list_files() - - print(files) - - assert isinstance(files, list) - assert len(files) == 1 - assert files == [ - File( - id="1234", - name="testDrive", - mime_type="application/vnd.google-apps.drive", - kind="drive#drive", - is_folder=False, - web_view_link="https://drive.google.com/drive/folders/1234", - icon_link="https://drive.google.com/drive/folders/1234/icon", - created_time=datetime(2023, 10, 1, 12, 0, 0), - modified_time=datetime(2023, 10, 1, 12, 0, 0), - owners=[ - { - "display_name": "Test User", - "email_address": "test@test.com", - "kind": "drive#user", - "photo_link": "https://example.com/photo.jpg", - } - ], - last_modifying_user={ - "display_name": "Test User", - "email_address": "test@test.com", - "kind": "drive#user", - "photo_link": "https://example.com/photo.jpg", - }, - size="1024", - parents=None, - shared=True, - starred=False, - trashed=False, - ) - ] diff --git a/uv.lock b/uv.lock index 8630097..61dfd65 100644 --- a/uv.lock +++ b/uv.lock @@ -37,6 +37,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/37/fb6973edeb700f6e3d6ff222400602ab1830446c25c7b4676d8de93e65b8/backrefs-5.8-py39-none-any.whl", hash = "sha256:a66851e4533fb5b371aa0628e1fee1af05135616b86140c9d787a2ffdf4b8fdc", size = 380336, upload-time = "2025-02-25T16:53:29.858Z" }, ] +[[package]] +name = "black" +version = "25.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "mypy-extensions" }, + { name = "packaging" }, + { name = "pathspec" }, + { name = "platformdirs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/94/49/26a7b0f3f35da4b5a65f081943b7bcd22d7002f5f0fb8098ec1ff21cb6ef/black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666", size = 649449, upload-time = "2025-01-29T04:15:40.373Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/4f/87f596aca05c3ce5b94b8663dbfe242a12843caaa82dd3f85f1ffdc3f177/black-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a39337598244de4bae26475f77dda852ea00a93bd4c728e09eacd827ec929df0", size = 1614372, upload-time = "2025-01-29T05:37:11.71Z" }, + { url = "https://files.pythonhosted.org/packages/e7/d0/2c34c36190b741c59c901e56ab7f6e54dad8df05a6272a9747ecef7c6036/black-25.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96c1c7cd856bba8e20094e36e0f948718dc688dba4a9d78c3adde52b9e6c2299", size = 1442865, upload-time = "2025-01-29T05:37:14.309Z" }, + { url = "https://files.pythonhosted.org/packages/21/d4/7518c72262468430ead45cf22bd86c883a6448b9eb43672765d69a8f1248/black-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce2e264d59c91e52d8000d507eb20a9aca4a778731a08cfff7e5ac4a4bb7096", size = 1749699, upload-time = "2025-01-29T04:18:17.688Z" }, + { url = "https://files.pythonhosted.org/packages/58/db/4f5beb989b547f79096e035c4981ceb36ac2b552d0ac5f2620e941501c99/black-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:172b1dbff09f86ce6f4eb8edf9dede08b1fce58ba194c87d7a4f1a5aa2f5b3c2", size = 1428028, upload-time = "2025-01-29T04:18:51.711Z" }, + { url = "https://files.pythonhosted.org/packages/83/71/3fe4741df7adf015ad8dfa082dd36c94ca86bb21f25608eb247b4afb15b2/black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b", size = 1650988, upload-time = "2025-01-29T05:37:16.707Z" }, + { url = "https://files.pythonhosted.org/packages/13/f3/89aac8a83d73937ccd39bbe8fc6ac8860c11cfa0af5b1c96d081facac844/black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc", size = 1453985, upload-time = "2025-01-29T05:37:18.273Z" }, + { url = "https://files.pythonhosted.org/packages/6f/22/b99efca33f1f3a1d2552c714b1e1b5ae92efac6c43e790ad539a163d1754/black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f", size = 1783816, upload-time = "2025-01-29T04:18:33.823Z" }, + { url = "https://files.pythonhosted.org/packages/18/7e/a27c3ad3822b6f2e0e00d63d58ff6299a99a5b3aee69fa77cd4b0076b261/black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba", size = 1440860, upload-time = "2025-01-29T04:19:12.944Z" }, + { url = "https://files.pythonhosted.org/packages/98/87/0edf98916640efa5d0696e1abb0a8357b52e69e82322628f25bf14d263d1/black-25.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f", size = 1650673, upload-time = "2025-01-29T05:37:20.574Z" }, + { url = "https://files.pythonhosted.org/packages/52/e5/f7bf17207cf87fa6e9b676576749c6b6ed0d70f179a3d812c997870291c3/black-25.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3", size = 1453190, upload-time = "2025-01-29T05:37:22.106Z" }, + { url = "https://files.pythonhosted.org/packages/e3/ee/adda3d46d4a9120772fae6de454c8495603c37c4c3b9c60f25b1ab6401fe/black-25.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171", size = 1782926, upload-time = "2025-01-29T04:18:58.564Z" }, + { url = "https://files.pythonhosted.org/packages/cc/64/94eb5f45dcb997d2082f097a3944cfc7fe87e071907f677e80788a2d7b7a/black-25.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18", size = 1442613, upload-time = "2025-01-29T04:19:27.63Z" }, + { url = "https://files.pythonhosted.org/packages/09/71/54e999902aed72baf26bca0d50781b01838251a462612966e9fc4891eadd/black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717", size = 207646, upload-time = "2025-01-29T04:15:38.082Z" }, +] + [[package]] name = "cachetools" version = "5.5.2" @@ -55,6 +83,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4a/7e/3db2bd1b1f9e95f7cddca6d6e75e2f2bd9f51b1246e546d88addca0106bd/certifi-2025.4.26-py3-none-any.whl", hash = "sha256:30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3", size = 159618, upload-time = "2025-04-26T02:12:27.662Z" }, ] +[[package]] +name = "cfgv" +version = "3.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560", size = 7114, upload-time = "2023-08-12T20:38:17.776Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249, upload-time = "2023-08-12T20:38:16.269Z" }, +] + [[package]] name = "chardet" version = "5.2.0" @@ -167,8 +204,10 @@ dependencies = [ [package.dev-dependencies] dev = [ + { name = "black" }, { name = "mkdocs" }, { name = "mkdocs-material" }, + { name = "pre-commit" }, { name = "pytest" }, { name = "tox" }, { name = "tox-uv" }, @@ -188,8 +227,10 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ + { name = "black", specifier = ">=25.1.0" }, { name = "mkdocs", specifier = ">=1.6.1" }, { name = "mkdocs-material", specifier = ">=9.6.14" }, + { name = "pre-commit", specifier = ">=4.2.0" }, { name = "pytest", specifier = ">=8.4.0" }, { name = "tox", specifier = ">=4.26.0" }, { name = "tox-uv", specifier = ">=1.26.0" }, @@ -325,6 +366,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a8/6c/d2fbdaaa5959339d53ba38e94c123e4e84b8fbc4b84beb0e70d7c1608486/httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc", size = 96854, upload-time = "2023-03-21T22:29:35.683Z" }, ] +[[package]] +name = "identify" +version = "2.6.12" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/88/d193a27416618628a5eea64e3223acd800b40749a96ffb322a9b55a49ed1/identify-2.6.12.tar.gz", hash = "sha256:d8de45749f1efb108badef65ee8386f0f7bb19a7f26185f74de6367bffbaf0e6", size = 99254, upload-time = "2025-05-23T20:37:53.3Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/cd/18f8da995b658420625f7ef13f037be53ae04ec5ad33f9b718240dcfd48c/identify-2.6.12-py2.py3-none-any.whl", hash = "sha256:ad9672d5a72e0d2ff7c5c8809b62dfa60458626352fb0eb7b55e69bdc45334a2", size = 99145, upload-time = "2025-05-23T20:37:51.495Z" }, +] + [[package]] name = "idna" version = "3.10" @@ -511,6 +561,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5b/54/662a4743aa81d9582ee9339d4ffa3c8fd40a4965e033d77b9da9774d3960/mkdocs_material_extensions-1.3.1-py3-none-any.whl", hash = "sha256:adff8b62700b25cb77b53358dad940f3ef973dd6db797907c49e3c2ef3ab4e31", size = 8728, upload-time = "2023-11-22T19:09:43.465Z" }, ] +[[package]] +name = "mypy-extensions" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, +] + +[[package]] +name = "nodeenv" +version = "1.9.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437, upload-time = "2024-06-04T18:44:11.171Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" }, +] + [[package]] name = "oauthlib" version = "3.2.2" @@ -565,6 +633,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] +[[package]] +name = "pre-commit" +version = "4.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cfgv" }, + { name = "identify" }, + { name = "nodeenv" }, + { name = "pyyaml" }, + { name = "virtualenv" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/08/39/679ca9b26c7bb2999ff122d50faa301e49af82ca9c066ec061cfbc0c6784/pre_commit-4.2.0.tar.gz", hash = "sha256:601283b9757afd87d40c4c4a9b2b5de9637a8ea02eaff7adc2d0fb4e04841146", size = 193424, upload-time = "2025-03-18T21:35:20.987Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/74/a88bf1b1efeae488a0c0b7bdf71429c313722d1fc0f377537fbe554e6180/pre_commit-4.2.0-py2.py3-none-any.whl", hash = "sha256:a009ca7205f1eb497d10b845e52c838a98b6cdd2102a6c8e4540e94ee75c58bd", size = 220707, upload-time = "2025-03-18T21:35:19.343Z" }, +] + [[package]] name = "proto-plus" version = "1.26.1"