From 9a5ee1726c9c553b6d419f4d331199c0b42d5da3 Mon Sep 17 00:00:00 2001 From: Christophe Beke Date: Sun, 15 Jun 2025 15:35:03 +0200 Subject: [PATCH 01/39] Added the docbinder search command that allows to search through all google drive documents --- src/docbinder_oss/main.py | 184 ++++++++++-------- src/docbinder_oss/provider.py | 93 +++++++++ src/docbinder_oss/services/base_class.py | 10 + .../services/google_drive/__init__.py | 15 ++ .../google_drive/google_drive_client.py | 10 +- 5 files changed, 233 insertions(+), 79 deletions(-) create mode 100644 src/docbinder_oss/provider.py diff --git a/src/docbinder_oss/main.py b/src/docbinder_oss/main.py index 949fc89..720e3d4 100644 --- a/src/docbinder_oss/main.py +++ b/src/docbinder_oss/main.py @@ -1,22 +1,12 @@ -from typing import Annotated, List, Optional +from typing import List, Optional import typer import yaml from docbinder_oss.helpers.config import save_config, validate_config -from docbinder_oss.services import create_provider_instance app = typer.Typer() -# --- Provider Subcommand Group --- -# We create a separate Typer app for the 'provider' command. -# This allows us to nest commands like 'provider list' and 'provider get'. -provider_app = typer.Typer( - help="Commands to manage providers. List them or get details for a specific one." -) -# We add this group to our main application. -app.add_typer(provider_app, name="provider") - # This is the main entry point for the DocBinder CLI. @app.callback() @@ -76,82 +66,120 @@ def setup( raise typer.Exit(code=1) typer.echo("Configuration saved successfully.") - -@provider_app.command() -def list(): - """List all configured providers.""" - from docbinder_oss.helpers.config import load_config - - config = load_config() - if not config.providers: - typer.echo("No providers configured.") - raise typer.Exit(code=1) - - for provider in config.providers: - typer.echo(f"Provider: {provider.name}, Type: {provider.type}") - - -@provider_app.command("get") -def get_provider( - connection_type: str = typer.Option( - None, "--type", "-t", help="The type of the provider to get." - ), - name: str = typer.Option( - None, "--name", "-n", help="The name of the provider to get." - ), +@app.command() +def search( + name: Optional[str] = typer.Option(None, "--name", help="Regex to match file name"), + owner: Optional[str] = typer.Option(None, "--owner", help="Owner/contributor/reader email address to filter"), + updated_after: Optional[str] = typer.Option(None, "--updated-after", help="Last update after (ISO timestamp)"), + updated_before: Optional[str] = typer.Option(None, "--updated-before", help="Last update before (ISO timestamp)"), + created_after: Optional[str] = typer.Option(None, "--created-after", help="Created after (ISO timestamp)"), + created_before: Optional[str] = typer.Option(None, "--created-before", help="Created before (ISO timestamp)"), + min_size: Optional[int] = typer.Option(None, "--min-size", help="Minimum file size in KB"), + max_size: Optional[int] = typer.Option(None, "--max-size", help="Maximum file size in KB"), + provider: Optional[str] = typer.Option(None, "--provider", "-p", help="Provider name to search in"), + export_format: str = typer.Option("csv", "--export-format", help="Export format: csv or json", show_default=True), ): - """Get connection information for a specific provider.""" + """Search for files or folders matching filters across all providers and export results as CSV or JSON.""" + import re + import csv + import json + from datetime import datetime from docbinder_oss.helpers.config import load_config + from docbinder_oss.services import create_provider_instance config = load_config() - - count = 0 if not config.providers: typer.echo("No providers configured.") raise typer.Exit(code=1) - for provider in config.providers: - if provider.name == name: - typer.echo(f"Provider '{name}' found with config: {provider}") - count += 1 - if provider.type == connection_type: - typer.echo( - f"Provider '{provider.name}' of type '{connection_type}' found with config: {provider}" - ) - count += 1 - if count == 0: - typer.echo( - f"No providers found with name '{name}' or type '{connection_type}'." - ) - raise typer.Exit(code=1) - - -@provider_app.command("test") -def test( - name: Annotated[ - str, typer.Argument(help="The name of the provider to test the connection.") - ], -): - """Test the connection to a specific provider.""" - from docbinder_oss.helpers.config import load_config - config = load_config() - if not config.providers: - typer.echo("No providers configured.") - raise typer.Exit(code=1) + results = [] for provider_config in config.providers: - if provider_config.name == name: - typer.echo(f"Testing connection for provider '{name}'...") - try: - client = create_provider_instance(provider_config) - client.test_connection() - typer.echo(f"Connection to provider '{name}' is successful.") - except Exception as e: - typer.echo(f"Failed to connect to provider '{name}': {e}") - return - # If we reach here, the provider was not found - typer.echo(f"Provider '{name}' not found in configuration.") - raise typer.Exit(code=1) - + if provider and provider_config.name != provider: + continue + client = create_provider_instance(provider_config) + if client is None or not hasattr(client, "list_all_files"): + continue + try: + files = client.list_all_files() + for item in files: + # Name regex filter + if name: + if not re.search(name, item.name or "", re.IGNORECASE): + continue + # Owner/contributor/reader email filter + if owner: + emails = set() + owners_list = getattr(item, "owners", None) or [] + emails.update([u.email_address for u in owners_list if u and getattr(u, "email_address", None)]) + last_mod_user = getattr(item, "last_modifying_user", None) + if last_mod_user and getattr(last_mod_user, "email_address", None): + emails.add(last_mod_user.email_address) + if owner not in emails: + continue + # Last update filter + if updated_after: + if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) < datetime.fromisoformat(updated_after): + continue + if updated_before: + if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) > datetime.fromisoformat(updated_before): + continue + # Created at filter + if created_after: + if not item.created_time or datetime.fromisoformat(str(item.created_time)) < datetime.fromisoformat(created_after): + continue + if created_before: + if not item.created_time or datetime.fromisoformat(str(item.created_time)) > datetime.fromisoformat(created_before): + continue + # Size filter (in KB) + if min_size is not None: + try: + if not item.size or int(item.size) < min_size * 1024: + continue + except Exception: + continue + if max_size is not None: + try: + if not item.size or int(item.size) > max_size * 1024: + continue + except Exception: + continue + # Collect all possible params for export + results.append({ + "provider": provider_config.name, + "id": getattr(item, "id", None), + "name": getattr(item, "name", None), + "size": getattr(item, "size", None), + "mime_type": getattr(item, "mime_type", None), + "created_time": getattr(item, "created_time", None), + "modified_time": getattr(item, "modified_time", None), + "owners": ",".join([u.email_address for u in (getattr(item, "owners", None) or []) if u and getattr(u, "email_address", None)]) if getattr(item, "owners", None) else None, + "last_modifying_user": getattr(getattr(item, "last_modifying_user", None), "email_address", None), + "web_view_link": getattr(item, "web_view_link", None), + "web_content_link": getattr(item, "web_content_link", None), + "shared": getattr(item, "shared", None), + "trashed": getattr(item, "trashed", None), + }) + except Exception as e: + typer.echo(f"Error searching provider '{provider_config.name}': {e}") + # Write results to CSV or JSON + if results: + fieldnames = [ + "provider", "id", "name", "size", "mime_type", "created_time", "modified_time", "owners", "last_modifying_user", "web_view_link", "web_content_link", "shared", "trashed" + ] + if export_format.lower() == "json": + with open("search_results.json", "w") as jsonfile: + json.dump(results, jsonfile, indent=2, default=str) + typer.echo(f"{len(results)} results written to search_results.json") + else: + with open("search_results.csv", "w", newline="") as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + for row in results: + writer.writerow(row) + typer.echo(f"{len(results)} results written to search_results.csv") + else: + typer.echo("No results found.") + return results if __name__ == "__main__": app() diff --git a/src/docbinder_oss/provider.py b/src/docbinder_oss/provider.py new file mode 100644 index 0000000..4f97fa1 --- /dev/null +++ b/src/docbinder_oss/provider.py @@ -0,0 +1,93 @@ +from typing import Annotated +import typer +from .main import app +from docbinder_oss.services import create_provider_instance + + +# --- Provider Subcommand Group --- +# We create a separate Typer app for the 'provider' command. +# This allows us to nest commands like 'provider list' and 'provider get'. +provider_app = typer.Typer( + help="Commands to manage providers. List them or get details for a specific one." +) +# We add this group to our main application. +app.add_typer(provider_app, name="provider") + +@provider_app.command() +def list(): + """List all configured providers.""" + from docbinder_oss.helpers.config import load_config + + config = load_config() + if not config.providers: + typer.echo("No providers configured.") + raise typer.Exit(code=1) + + for provider in config.providers: + typer.echo(f"Provider: {provider.name}, Type: {provider.type}") + + +@provider_app.command("get") +def get_provider( + connection_type: str = typer.Option( + None, "--type", "-t", help="The type of the provider to get." + ), + name: str = typer.Option( + None, "--name", "-n", help="The name of the provider to get." + ), +): + """Get connection information for a specific provider.""" + from docbinder_oss.helpers.config import load_config + + config = load_config() + + count = 0 + if not config.providers: + typer.echo("No providers configured.") + raise typer.Exit(code=1) + for provider in config.providers: + if provider.name == name: + typer.echo(f"Provider '{name}' found with config: {provider}") + count += 1 + if provider.type == connection_type: + typer.echo( + f"Provider '{provider.name}' of type '{connection_type}' found with config: {provider}" + ) + count += 1 + if count == 0: + typer.echo( + f"No providers found with name '{name}' or type '{connection_type}'." + ) + raise typer.Exit(code=1) + + +@provider_app.command("test") +def test( + name: Annotated[ + str, typer.Argument(help="The name of the provider to test the connection.") + ], +): + """Test the connection to a specific provider.""" + from docbinder_oss.helpers.config import load_config + + config = load_config() + if not config.providers: + typer.echo("No providers configured.") + raise typer.Exit(code=1) + for provider_config in config.providers: + if provider_config.name == name: + typer.echo(f"Testing connection for provider '{name}'...") + try: + client = create_provider_instance(provider_config) + if client is None: + typer.echo(f"Provider '{name}' is not supported or not implemented.") + raise typer.Exit(code=1) + # Attempt to test the connection + client.test_connection() + typer.echo(f"Connection to provider '{name}' is successful.") + except Exception as e: + typer.echo(f"Failed to connect to provider '{name}': {e}") + return + # If we reach here, the provider was not found + typer.echo(f"Provider '{name}' not found in configuration.") + raise typer.Exit(code=1) diff --git a/src/docbinder_oss/services/base_class.py b/src/docbinder_oss/services/base_class.py index 08761b3..dd51cec 100644 --- a/src/docbinder_oss/services/base_class.py +++ b/src/docbinder_oss/services/base_class.py @@ -47,6 +47,16 @@ def list_files(self, folder_id: Optional[str] = None) -> List[File]: """ pass + @abstractmethod + def list_all_files(self) -> List[File]: + """ + Lists all files and folders in the storage service. + + Returns: + A list of StorageItem objects representing all files and folders. + """ + pass + @abstractmethod def get_file_metadata(self, item_id: str) -> File: """ diff --git a/src/docbinder_oss/services/google_drive/__init__.py b/src/docbinder_oss/services/google_drive/__init__.py index 87153e8..6f3bc44 100644 --- a/src/docbinder_oss/services/google_drive/__init__.py +++ b/src/docbinder_oss/services/google_drive/__init__.py @@ -25,3 +25,18 @@ def register() -> dict: "config_class": GoogleDriveServiceConfig, "client_class": GoogleDriveClient, } + +def get_service_name() -> str: + """ + Returns the name of the service. + This is used for logging and identification purposes. + """ + return "Google Drive" + +def get_service_display_name() -> str: + """ + Returns the display name of the service. + This is used for user-friendly identification. + """ + return "Google Drive Service" + return \ No newline at end of file diff --git a/src/docbinder_oss/services/google_drive/google_drive_client.py b/src/docbinder_oss/services/google_drive/google_drive_client.py index a08c28f..77aab51 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_client.py +++ b/src/docbinder_oss/services/google_drive/google_drive_client.py @@ -42,7 +42,7 @@ def _get_credentials(self): TOKEN_PATH = os.path.expanduser("~/.config/docbinder/gcp/" + self.config.name + "_token.json") # Ensure the directory exists os.makedirs(os.path.dirname(TOKEN_PATH), exist_ok=True) - + try: creds = Credentials.from_authorized_user_file( TOKEN_PATH, scopes=self.SCOPES @@ -76,6 +76,14 @@ def list_buckets(self) -> list: def list_files(self, folder_id: Optional[str] = None) -> List[File]: return self.files.list_files(folder_id) + + def list_all_files(self) -> List[File]: + buckets = self.list_buckets() + all_files = [] + for bucket in buckets: + files = self.files.list_files(bucket.id) + all_files.extend(files) + return all_files def get_file_metadata(self, item_id: str) -> File: return self.files.get_file_metadata(item_id) From 3722d39837a7d909f94d7764fe292ae83d6aec9b Mon Sep 17 00:00:00 2001 From: Christophe Beke Date: Sun, 15 Jun 2025 15:52:39 +0200 Subject: [PATCH 02/39] Changed all commands to dedicated folders and better structure. Also added tests for the new search functionality. --- src/docbinder_oss/commands/__init__.py | 0 .../commands/provider/__init__.py | 11 ++ src/docbinder_oss/commands/provider/get.py | 35 ++++ src/docbinder_oss/commands/provider/list.py | 15 ++ src/docbinder_oss/commands/provider/test.py | 35 ++++ src/docbinder_oss/commands/search.py | 118 ++++++++++++ src/docbinder_oss/commands/setup.py | 50 ++++++ src/docbinder_oss/main.py | 169 +----------------- src/docbinder_oss/provider.py | 93 ---------- tests/commands/test_search_command.py | 154 ++++++++++++++++ tests/services/test_search_export.py | 149 +++++++++++++++ 11 files changed, 571 insertions(+), 258 deletions(-) create mode 100644 src/docbinder_oss/commands/__init__.py create mode 100644 src/docbinder_oss/commands/provider/__init__.py create mode 100644 src/docbinder_oss/commands/provider/get.py create mode 100644 src/docbinder_oss/commands/provider/list.py create mode 100644 src/docbinder_oss/commands/provider/test.py create mode 100644 src/docbinder_oss/commands/search.py create mode 100644 src/docbinder_oss/commands/setup.py delete mode 100644 src/docbinder_oss/provider.py create mode 100644 tests/commands/test_search_command.py create mode 100644 tests/services/test_search_export.py diff --git a/src/docbinder_oss/commands/__init__.py b/src/docbinder_oss/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/docbinder_oss/commands/provider/__init__.py b/src/docbinder_oss/commands/provider/__init__.py new file mode 100644 index 0000000..4fa1055 --- /dev/null +++ b/src/docbinder_oss/commands/provider/__init__.py @@ -0,0 +1,11 @@ +import typer +from docbinder_oss.main import app + +# --- Provider Subcommand Group --- +# We create a separate Typer app for the 'provider' command. +# This allows us to nest commands like 'provider list' and 'provider get'. +provider_app = typer.Typer( + help="Commands to manage providers. List them or get details for a specific one." +) +# We add this group to our main application. +app.add_typer(provider_app, name="provider") \ No newline at end of file diff --git a/src/docbinder_oss/commands/provider/get.py b/src/docbinder_oss/commands/provider/get.py new file mode 100644 index 0000000..a20a7fa --- /dev/null +++ b/src/docbinder_oss/commands/provider/get.py @@ -0,0 +1,35 @@ +from docbinder_oss.commands.provider import provider_app +import typer + +@provider_app.command("get") +def get_provider( + connection_type: str = typer.Option( + None, "--type", "-t", help="The type of the provider to get." + ), + name: str = typer.Option( + None, "--name", "-n", help="The name of the provider to get." + ), +): + """Get connection information for a specific provider.""" + from docbinder_oss.helpers.config import load_config + + config = load_config() + + count = 0 + if not config.providers: + typer.echo("No providers configured.") + raise typer.Exit(code=1) + for provider in config.providers: + if provider.name == name: + typer.echo(f"Provider '{name}' found with config: {provider}") + count += 1 + if provider.type == connection_type: + typer.echo( + f"Provider '{provider.name}' of type '{connection_type}' found with config: {provider}" + ) + count += 1 + if count == 0: + typer.echo( + f"No providers found with name '{name}' or type '{connection_type}'." + ) + raise typer.Exit(code=1) \ No newline at end of file diff --git a/src/docbinder_oss/commands/provider/list.py b/src/docbinder_oss/commands/provider/list.py new file mode 100644 index 0000000..a6fc0b7 --- /dev/null +++ b/src/docbinder_oss/commands/provider/list.py @@ -0,0 +1,15 @@ +from docbinder_oss.commands.provider import provider_app +import typer + +@provider_app.command() +def list(): + """List all configured providers.""" + from docbinder_oss.helpers.config import load_config + + config = load_config() + if not config.providers: + typer.echo("No providers configured.") + raise typer.Exit(code=1) + + for provider in config.providers: + typer.echo(f"Provider: {provider.name}, Type: {provider.type}") \ No newline at end of file diff --git a/src/docbinder_oss/commands/provider/test.py b/src/docbinder_oss/commands/provider/test.py new file mode 100644 index 0000000..354b8fa --- /dev/null +++ b/src/docbinder_oss/commands/provider/test.py @@ -0,0 +1,35 @@ +from docbinder_oss.commands.provider import provider_app +import typer +from typing import Annotated +from docbinder_oss.services import create_provider_instance + +@provider_app.command("test") +def test( + name: Annotated[ + str, typer.Argument(help="The name of the provider to test the connection.") + ], +): + """Test the connection to a specific provider.""" + from docbinder_oss.helpers.config import load_config + + config = load_config() + if not config.providers: + typer.echo("No providers configured.") + raise typer.Exit(code=1) + for provider_config in config.providers: + if provider_config.name == name: + typer.echo(f"Testing connection for provider '{name}'...") + try: + client = create_provider_instance(provider_config) + if client is None: + typer.echo(f"Provider '{name}' is not supported or not implemented.") + raise typer.Exit(code=1) + # Attempt to test the connection + client.test_connection() + typer.echo(f"Connection to provider '{name}' is successful.") + except Exception as e: + typer.echo(f"Failed to connect to provider '{name}': {e}") + return + # If we reach here, the provider was not found + typer.echo(f"Provider '{name}' not found in configuration.") + raise typer.Exit(code=1) \ No newline at end of file diff --git a/src/docbinder_oss/commands/search.py b/src/docbinder_oss/commands/search.py new file mode 100644 index 0000000..9e61827 --- /dev/null +++ b/src/docbinder_oss/commands/search.py @@ -0,0 +1,118 @@ +import typer +from typing import Optional +from docbinder_oss.main import app + +@app.command() +def search( + name: Optional[str] = typer.Option(None, "--name", help="Regex to match file name"), + owner: Optional[str] = typer.Option(None, "--owner", help="Owner/contributor/reader email address to filter"), + updated_after: Optional[str] = typer.Option(None, "--updated-after", help="Last update after (ISO timestamp)"), + updated_before: Optional[str] = typer.Option(None, "--updated-before", help="Last update before (ISO timestamp)"), + created_after: Optional[str] = typer.Option(None, "--created-after", help="Created after (ISO timestamp)"), + created_before: Optional[str] = typer.Option(None, "--created-before", help="Created before (ISO timestamp)"), + min_size: Optional[int] = typer.Option(None, "--min-size", help="Minimum file size in KB"), + max_size: Optional[int] = typer.Option(None, "--max-size", help="Maximum file size in KB"), + provider: Optional[str] = typer.Option(None, "--provider", "-p", help="Provider name to search in"), + export_format: str = typer.Option("csv", "--export-format", help="Export format: csv or json", show_default=True), +): + """Search for files or folders matching filters across all providers and export results as CSV or JSON.""" + import re + import csv + import json + from datetime import datetime + from docbinder_oss.helpers.config import load_config + from docbinder_oss.services import create_provider_instance + + config = load_config() + if not config.providers: + typer.echo("No providers configured.") + raise typer.Exit(code=1) + + results = [] + for provider_config in config.providers: + if provider and provider_config.name != provider: + continue + client = create_provider_instance(provider_config) + if client is None or not hasattr(client, "list_all_files"): + continue + try: + files = client.list_all_files() + for item in files: + # Name regex filter + if name: + if not re.search(name, item.name or "", re.IGNORECASE): + continue + # Owner/contributor/reader email filter + if owner: + emails = set() + owners_list = getattr(item, "owners", None) or [] + emails.update([u.email_address for u in owners_list if u and getattr(u, "email_address", None)]) + last_mod_user = getattr(item, "last_modifying_user", None) + if last_mod_user and getattr(last_mod_user, "email_address", None): + emails.add(last_mod_user.email_address) + if owner not in emails: + continue + # Last update filter + if updated_after: + if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) < datetime.fromisoformat(updated_after): + continue + if updated_before: + if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) > datetime.fromisoformat(updated_before): + continue + # Created at filter + if created_after: + if not item.created_time or datetime.fromisoformat(str(item.created_time)) < datetime.fromisoformat(created_after): + continue + if created_before: + if not item.created_time or datetime.fromisoformat(str(item.created_time)) > datetime.fromisoformat(created_before): + continue + # Size filter (in KB) + if min_size is not None: + try: + if not item.size or int(item.size) < min_size * 1024: + continue + except Exception: + continue + if max_size is not None: + try: + if not item.size or int(item.size) > max_size * 1024: + continue + except Exception: + continue + # Collect all possible params for export + results.append({ + "provider": provider_config.name, + "id": getattr(item, "id", None), + "name": getattr(item, "name", None), + "size": getattr(item, "size", None), + "mime_type": getattr(item, "mime_type", None), + "created_time": getattr(item, "created_time", None), + "modified_time": getattr(item, "modified_time", None), + "owners": ",".join([u.email_address for u in (getattr(item, "owners", None) or []) if u and getattr(u, "email_address", None)]) if getattr(item, "owners", None) else None, + "last_modifying_user": getattr(getattr(item, "last_modifying_user", None), "email_address", None), + "web_view_link": getattr(item, "web_view_link", None), + "web_content_link": getattr(item, "web_content_link", None), + "shared": getattr(item, "shared", None), + "trashed": getattr(item, "trashed", None), + }) + except Exception as e: + typer.echo(f"Error searching provider '{provider_config.name}': {e}") + # Write results to CSV or JSON + if results: + fieldnames = [ + "provider", "id", "name", "size", "mime_type", "created_time", "modified_time", "owners", "last_modifying_user", "web_view_link", "web_content_link", "shared", "trashed" + ] + if export_format.lower() == "json": + with open("search_results.json", "w") as jsonfile: + json.dump(results, jsonfile, indent=2, default=str) + typer.echo(f"{len(results)} results written to search_results.json") + else: + with open("search_results.csv", "w", newline="") as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + for row in results: + writer.writerow(row) + typer.echo(f"{len(results)} results written to search_results.csv") + else: + typer.echo("No results found.") + return results \ No newline at end of file diff --git a/src/docbinder_oss/commands/setup.py b/src/docbinder_oss/commands/setup.py new file mode 100644 index 0000000..dbe9839 --- /dev/null +++ b/src/docbinder_oss/commands/setup.py @@ -0,0 +1,50 @@ +import typer +from typing import List, Optional +import yaml +from docbinder_oss.helpers.config import save_config, validate_config +from docbinder_oss.main import app + +@app.command() +def setup( + file: Optional[str] = typer.Option(None, "--file", help="Path to YAML config file"), + provider: Optional[List[str]] = typer.Option( + None, + "--provider", + help="Provider config as provider:key1=val1,key2=val2", + callback=lambda v: v or [], + ), +): + """Setup DocBinder configuration via YAML file or provider key-value pairs.""" + config_data = {} + if file: + with open(file, "r") as f: + config_data = yaml.safe_load(f) or {} + elif provider: + providers = {} + for entry in provider: + if ":" not in entry: + typer.echo( + f"Provider entry '{entry}' must be in provider:key1=val1,key2=val2 format." + ) + raise typer.Exit(code=1) + prov_name, prov_kvs = entry.split(":", 1) + kv_dict = {} + for pair in prov_kvs.split(","): + if "=" not in pair: + typer.echo(f"Provider config '{pair}' must be in key=value format.") + raise typer.Exit(code=1) + k, v = pair.split("=", 1) + kv_dict[k] = v + providers[prov_name] = kv_dict + config_data["providers"] = providers + validated = validate_config(config_data) + if not validated.providers: + typer.echo("No providers configured. Please add at least one provider.") + raise typer.Exit(code=1) + # Save the validated config + try: + save_config(validated) + except Exception as e: + typer.echo(f"Error saving config: {e}") + raise typer.Exit(code=1) + typer.echo("Configuration saved successfully.") \ No newline at end of file diff --git a/src/docbinder_oss/main.py b/src/docbinder_oss/main.py index 720e3d4..0aff0b5 100644 --- a/src/docbinder_oss/main.py +++ b/src/docbinder_oss/main.py @@ -1,12 +1,12 @@ -from typing import List, Optional - import typer -import yaml - from docbinder_oss.helpers.config import save_config, validate_config app = typer.Typer() +from docbinder_oss.commands import search +from docbinder_oss.commands import setup +from docbinder_oss.commands.provider import list, get, test + # This is the main entry point for the DocBinder CLI. @app.callback() @@ -20,166 +20,5 @@ def hello(): """Print a friendly greeting.""" typer.echo("Hello, DocBinder OSS!") - -@app.command() -def setup( - file: Optional[str] = typer.Option(None, "--file", help="Path to YAML config file"), - provider: Optional[List[str]] = typer.Option( - None, - "--provider", - help="Provider config as provider:key1=val1,key2=val2", - callback=lambda v: v or [], - ), -): - """Setup DocBinder configuration via YAML file or provider key-value pairs.""" - config_data = {} - if file: - with open(file, "r") as f: - config_data = yaml.safe_load(f) or {} - elif provider: - providers = {} - for entry in provider: - if ":" not in entry: - typer.echo( - f"Provider entry '{entry}' must be in provider:key1=val1,key2=val2 format." - ) - raise typer.Exit(code=1) - prov_name, prov_kvs = entry.split(":", 1) - kv_dict = {} - for pair in prov_kvs.split(","): - if "=" not in pair: - typer.echo(f"Provider config '{pair}' must be in key=value format.") - raise typer.Exit(code=1) - k, v = pair.split("=", 1) - kv_dict[k] = v - providers[prov_name] = kv_dict - config_data["providers"] = providers - validated = validate_config(config_data) - if not validated.providers: - typer.echo("No providers configured. Please add at least one provider.") - raise typer.Exit(code=1) - # Save the validated config - try: - save_config(validated) - except Exception as e: - typer.echo(f"Error saving config: {e}") - raise typer.Exit(code=1) - typer.echo("Configuration saved successfully.") - -@app.command() -def search( - name: Optional[str] = typer.Option(None, "--name", help="Regex to match file name"), - owner: Optional[str] = typer.Option(None, "--owner", help="Owner/contributor/reader email address to filter"), - updated_after: Optional[str] = typer.Option(None, "--updated-after", help="Last update after (ISO timestamp)"), - updated_before: Optional[str] = typer.Option(None, "--updated-before", help="Last update before (ISO timestamp)"), - created_after: Optional[str] = typer.Option(None, "--created-after", help="Created after (ISO timestamp)"), - created_before: Optional[str] = typer.Option(None, "--created-before", help="Created before (ISO timestamp)"), - min_size: Optional[int] = typer.Option(None, "--min-size", help="Minimum file size in KB"), - max_size: Optional[int] = typer.Option(None, "--max-size", help="Maximum file size in KB"), - provider: Optional[str] = typer.Option(None, "--provider", "-p", help="Provider name to search in"), - export_format: str = typer.Option("csv", "--export-format", help="Export format: csv or json", show_default=True), -): - """Search for files or folders matching filters across all providers and export results as CSV or JSON.""" - import re - import csv - import json - from datetime import datetime - from docbinder_oss.helpers.config import load_config - from docbinder_oss.services import create_provider_instance - - config = load_config() - if not config.providers: - typer.echo("No providers configured.") - raise typer.Exit(code=1) - - results = [] - for provider_config in config.providers: - if provider and provider_config.name != provider: - continue - client = create_provider_instance(provider_config) - if client is None or not hasattr(client, "list_all_files"): - continue - try: - files = client.list_all_files() - for item in files: - # Name regex filter - if name: - if not re.search(name, item.name or "", re.IGNORECASE): - continue - # Owner/contributor/reader email filter - if owner: - emails = set() - owners_list = getattr(item, "owners", None) or [] - emails.update([u.email_address for u in owners_list if u and getattr(u, "email_address", None)]) - last_mod_user = getattr(item, "last_modifying_user", None) - if last_mod_user and getattr(last_mod_user, "email_address", None): - emails.add(last_mod_user.email_address) - if owner not in emails: - continue - # Last update filter - if updated_after: - if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) < datetime.fromisoformat(updated_after): - continue - if updated_before: - if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) > datetime.fromisoformat(updated_before): - continue - # Created at filter - if created_after: - if not item.created_time or datetime.fromisoformat(str(item.created_time)) < datetime.fromisoformat(created_after): - continue - if created_before: - if not item.created_time or datetime.fromisoformat(str(item.created_time)) > datetime.fromisoformat(created_before): - continue - # Size filter (in KB) - if min_size is not None: - try: - if not item.size or int(item.size) < min_size * 1024: - continue - except Exception: - continue - if max_size is not None: - try: - if not item.size or int(item.size) > max_size * 1024: - continue - except Exception: - continue - # Collect all possible params for export - results.append({ - "provider": provider_config.name, - "id": getattr(item, "id", None), - "name": getattr(item, "name", None), - "size": getattr(item, "size", None), - "mime_type": getattr(item, "mime_type", None), - "created_time": getattr(item, "created_time", None), - "modified_time": getattr(item, "modified_time", None), - "owners": ",".join([u.email_address for u in (getattr(item, "owners", None) or []) if u and getattr(u, "email_address", None)]) if getattr(item, "owners", None) else None, - "last_modifying_user": getattr(getattr(item, "last_modifying_user", None), "email_address", None), - "web_view_link": getattr(item, "web_view_link", None), - "web_content_link": getattr(item, "web_content_link", None), - "shared": getattr(item, "shared", None), - "trashed": getattr(item, "trashed", None), - }) - except Exception as e: - typer.echo(f"Error searching provider '{provider_config.name}': {e}") - # Write results to CSV or JSON - if results: - fieldnames = [ - "provider", "id", "name", "size", "mime_type", "created_time", "modified_time", "owners", "last_modifying_user", "web_view_link", "web_content_link", "shared", "trashed" - ] - if export_format.lower() == "json": - with open("search_results.json", "w") as jsonfile: - json.dump(results, jsonfile, indent=2, default=str) - typer.echo(f"{len(results)} results written to search_results.json") - else: - with open("search_results.csv", "w", newline="") as csvfile: - writer = csv.DictWriter(csvfile, fieldnames=fieldnames) - writer.writeheader() - for row in results: - writer.writerow(row) - typer.echo(f"{len(results)} results written to search_results.csv") - else: - typer.echo("No results found.") - return results - if __name__ == "__main__": app() diff --git a/src/docbinder_oss/provider.py b/src/docbinder_oss/provider.py deleted file mode 100644 index 4f97fa1..0000000 --- a/src/docbinder_oss/provider.py +++ /dev/null @@ -1,93 +0,0 @@ -from typing import Annotated -import typer -from .main import app -from docbinder_oss.services import create_provider_instance - - -# --- Provider Subcommand Group --- -# We create a separate Typer app for the 'provider' command. -# This allows us to nest commands like 'provider list' and 'provider get'. -provider_app = typer.Typer( - help="Commands to manage providers. List them or get details for a specific one." -) -# We add this group to our main application. -app.add_typer(provider_app, name="provider") - -@provider_app.command() -def list(): - """List all configured providers.""" - from docbinder_oss.helpers.config import load_config - - config = load_config() - if not config.providers: - typer.echo("No providers configured.") - raise typer.Exit(code=1) - - for provider in config.providers: - typer.echo(f"Provider: {provider.name}, Type: {provider.type}") - - -@provider_app.command("get") -def get_provider( - connection_type: str = typer.Option( - None, "--type", "-t", help="The type of the provider to get." - ), - name: str = typer.Option( - None, "--name", "-n", help="The name of the provider to get." - ), -): - """Get connection information for a specific provider.""" - from docbinder_oss.helpers.config import load_config - - config = load_config() - - count = 0 - if not config.providers: - typer.echo("No providers configured.") - raise typer.Exit(code=1) - for provider in config.providers: - if provider.name == name: - typer.echo(f"Provider '{name}' found with config: {provider}") - count += 1 - if provider.type == connection_type: - typer.echo( - f"Provider '{provider.name}' of type '{connection_type}' found with config: {provider}" - ) - count += 1 - if count == 0: - typer.echo( - f"No providers found with name '{name}' or type '{connection_type}'." - ) - raise typer.Exit(code=1) - - -@provider_app.command("test") -def test( - name: Annotated[ - str, typer.Argument(help="The name of the provider to test the connection.") - ], -): - """Test the connection to a specific provider.""" - from docbinder_oss.helpers.config import load_config - - config = load_config() - if not config.providers: - typer.echo("No providers configured.") - raise typer.Exit(code=1) - for provider_config in config.providers: - if provider_config.name == name: - typer.echo(f"Testing connection for provider '{name}'...") - try: - client = create_provider_instance(provider_config) - if client is None: - typer.echo(f"Provider '{name}' is not supported or not implemented.") - raise typer.Exit(code=1) - # Attempt to test the connection - client.test_connection() - typer.echo(f"Connection to provider '{name}' is successful.") - except Exception as e: - typer.echo(f"Failed to connect to provider '{name}': {e}") - return - # If we reach here, the provider was not found - typer.echo(f"Provider '{name}' not found in configuration.") - raise typer.Exit(code=1) diff --git a/tests/commands/test_search_command.py b/tests/commands/test_search_command.py new file mode 100644 index 0000000..9f1bd67 --- /dev/null +++ b/tests/commands/test_search_command.py @@ -0,0 +1,154 @@ +import os +import csv +import json +import pytest +from typer.testing import CliRunner +from docbinder_oss.main import app + +class DummyFile: + def __init__(self, **kwargs): + self.id = kwargs.get("id", "fileid1") + self.name = kwargs.get("name", "Test File") + self.size = kwargs.get("size", 12345) + self.mime_type = kwargs.get("mime_type", "application/pdf") + self.created_time = kwargs.get("created_time", "2024-01-01T00:00:00") + self.modified_time = kwargs.get("modified_time", "2024-01-02T00:00:00") + self.owners = kwargs.get("owners", [type("User", (), {"email_address": "owner@example.com"})()]) + self.last_modifying_user = kwargs.get("last_modifying_user", type("User", (), {"email_address": "mod@example.com"})()) + self.web_view_link = kwargs.get("web_view_link", "http://example.com/view") + self.web_content_link = kwargs.get("web_content_link", "http://example.com/content") + self.shared = kwargs.get("shared", True) + self.trashed = kwargs.get("trashed", False) + +@pytest.fixture(autouse=True) +def patch_provider(monkeypatch, tmp_path): + # Patch config loader to return two dummy provider configs + class DummyProviderConfig: + def __init__(self, name): + self.name = name + class DummyConfig: + providers = [DummyProviderConfig("dummy1"), DummyProviderConfig("dummy2")] + monkeypatch.setattr("docbinder_oss.helpers.config.load_config", lambda: DummyConfig()) + # Patch create_provider_instance to return a dummy client with different files per provider + def create_provider_instance(cfg): + if cfg.name == "dummy1": + return type("DummyClient", (), {"list_all_files": lambda self: [ + DummyFile(id="f1", name="Alpha Report", size=2048, owners=[type("User", (), {"email_address": "alpha@a.com"})()], + created_time="2024-01-01T10:00:00", modified_time="2024-01-02T10:00:00") + ]})() + else: + return type("DummyClient", (), {"list_all_files": lambda self: [ + DummyFile(id="f2", name="Beta Notes", size=4096, owners=[type("User", (), {"email_address": "beta@b.com"})()], + created_time="2024-02-01T10:00:00", modified_time="2024-02-02T10:00:00") + ]})() + monkeypatch.setattr("docbinder_oss.services.create_provider_instance", create_provider_instance) + # Change working directory to a temp dir for file output + orig_cwd = os.getcwd() + os.chdir(tmp_path) + yield + os.chdir(orig_cwd) + +def test_search_export_csv(): + runner = CliRunner() + result = runner.invoke(app, ["search", "--export-format", "csv"]) + assert result.exit_code == 0 + assert os.path.exists("search_results.csv") + with open("search_results.csv") as f: + reader = csv.DictReader(f) + rows = list(reader) + assert len(rows) == 2 + names = set(r["name"] for r in rows) + assert names == {"Alpha Report", "Beta Notes"} + # Check owners field is a string + for r in rows: + if r["name"] == "Alpha Report": + assert r["owners"] == "alpha@a.com" + if r["name"] == "Beta Notes": + assert r["owners"] == "beta@b.com" + +def test_search_export_json(): + runner = CliRunner() + result = runner.invoke(app, ["search", "--export-format", "json"]) + assert result.exit_code == 0 + assert os.path.exists("search_results.json") + with open("search_results.json") as f: + data = json.load(f) + assert isinstance(data, list) + assert len(data) == 2 + names = set(d["name"] for d in data) + assert names == {"Alpha Report", "Beta Notes"} + +def test_search_name_filter(): + runner = CliRunner() + result = runner.invoke(app, ["search", "--name", "Alpha", "--export-format", "json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 1 + assert data[0]["name"] == "Alpha Report" + +def test_search_owner_filter(): + runner = CliRunner() + result = runner.invoke(app, ["search", "--owner", "beta@b.com", "--export-format", "json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 1 + assert data[0]["name"] == "Beta Notes" + +def test_search_updated_after_filter(): + runner = CliRunner() + result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 1 + assert data[0]["name"] == "Beta Notes" + +def test_search_created_before_filter(): + runner = CliRunner() + result = runner.invoke(app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-format", "json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 1 + assert data[0]["name"] == "Alpha Report" + +def test_search_min_size_filter(): + runner = CliRunner() + result = runner.invoke(app, ["search", "--min-size", "3", "--export-format", "json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 1 + assert data[0]["name"] == "Beta Notes" + +def test_search_max_size_filter(): + runner = CliRunner() + result = runner.invoke(app, ["search", "--max-size", "3", "--export-format", "json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 1 + assert data[0]["name"] == "Alpha Report" + +def test_search_provider_filter(): + runner = CliRunner() + result = runner.invoke(app, ["search", "--provider", "dummy2", "--export-format", "json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 1 + assert data[0]["provider"] == "dummy2" + assert data[0]["name"] == "Beta Notes" + +def test_search_combined_filters(): + runner = CliRunner() + result = runner.invoke(app, ["search", "--name", "Beta", "--owner", "beta@b.com", "--min-size", "3", "--provider", "dummy2", "--export-format", "json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 1 + assert data[0]["name"] == "Beta Notes" + assert data[0]["provider"] == "dummy2" + assert data[0]["owners"] == "beta@b.com" diff --git a/tests/services/test_search_export.py b/tests/services/test_search_export.py new file mode 100644 index 0000000..b998449 --- /dev/null +++ b/tests/services/test_search_export.py @@ -0,0 +1,149 @@ +import os +import csv +import json +import tempfile +import shutil +import pytest +from typer.testing import CliRunner +from docbinder_oss.main import app + +class DummyFile: + def __init__(self, **kwargs): + self.id = kwargs.get("id", "fileid1") + self.name = kwargs.get("name", "Test File") + self.size = kwargs.get("size", 12345) + self.mime_type = kwargs.get("mime_type", "application/pdf") + self.created_time = kwargs.get("created_time", "2024-01-01T00:00:00") + self.modified_time = kwargs.get("modified_time", "2024-01-02T00:00:00") + self.owners = kwargs.get("owners", [type("User", (), {"email_address": "owner@example.com"})()]) + self.last_modifying_user = kwargs.get("last_modifying_user", type("User", (), {"email_address": "mod@example.com"})()) + self.web_view_link = kwargs.get("web_view_link", "http://example.com/view") + self.web_content_link = kwargs.get("web_content_link", "http://example.com/content") + self.shared = kwargs.get("shared", True) + self.trashed = kwargs.get("trashed", False) + +@pytest.fixture(autouse=True) +def patch_provider(monkeypatch, tmp_path): + # Patch config loader to return two dummy provider configs + class DummyProviderConfig: + def __init__(self, name): + self.name = name + class DummyConfig: + providers = [DummyProviderConfig("dummy1"), DummyProviderConfig("dummy2")] + monkeypatch.setattr("docbinder_oss.helpers.config.load_config", lambda: DummyConfig()) + # Patch create_provider_instance to return a dummy client with different files per provider + def create_provider_instance(cfg): + if cfg.name == "dummy1": + return type("DummyClient", (), {"list_all_files": lambda self: [ + DummyFile(id="f1", name="Alpha Report", size=2048, owners=[type("User", (), {"email_address": "alpha@a.com"})()], + created_time="2024-01-01T10:00:00", modified_time="2024-01-02T10:00:00") + ]})() + else: + return type("DummyClient", (), {"list_all_files": lambda self: [ + DummyFile(id="f2", name="Beta Notes", size=4096, owners=[type("User", (), {"email_address": "beta@b.com"})()], + created_time="2024-02-01T10:00:00", modified_time="2024-02-02T10:00:00") + ]})() + monkeypatch.setattr("docbinder_oss.services.create_provider_instance", create_provider_instance) + # Change working directory to a temp dir for file output + orig_cwd = os.getcwd() + os.chdir(tmp_path) + yield + os.chdir(orig_cwd) + +def test_search_export_csv(): + runner = CliRunner() + result = runner.invoke(app, ["search", "--export-format", "csv"]) + assert result.exit_code == 0 + assert os.path.exists("search_results.csv") + with open("search_results.csv") as f: + reader = csv.DictReader(f) + rows = list(reader) + assert len(rows) == 2 + assert set(r["name"] for r in rows) == {"Alpha Report", "Beta Notes"} + +def test_search_export_json(): + runner = CliRunner() + result = runner.invoke(app, ["search", "--export-format", "json"]) + assert result.exit_code == 0 + assert os.path.exists("search_results.json") + with open("search_results.json") as f: + data = json.load(f) + assert isinstance(data, list) + assert len(data) == 2 + names = set(d["name"] for d in data) + assert names == {"Alpha Report", "Beta Notes"} + +def test_search_name_filter(): + runner = CliRunner() + result = runner.invoke(app, ["search", "--name", "Alpha", "--export-format", "json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 1 + assert data[0]["name"] == "Alpha Report" + +def test_search_owner_filter(): + runner = CliRunner() + result = runner.invoke(app, ["search", "--owner", "beta@b.com", "--export-format", "json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 1 + assert data[0]["name"] == "Beta Notes" + +def test_search_updated_after_filter(): + runner = CliRunner() + result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 1 + assert data[0]["name"] == "Beta Notes" + +def test_search_created_before_filter(): + runner = CliRunner() + result = runner.invoke(app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-format", "json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 1 + assert data[0]["name"] == "Alpha Report" + +def test_search_min_size_filter(): + runner = CliRunner() + result = runner.invoke(app, ["search", "--min-size", "3", "--export-format", "json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 1 + assert data[0]["name"] == "Beta Notes" + +def test_search_max_size_filter(): + runner = CliRunner() + result = runner.invoke(app, ["search", "--max-size", "3", "--export-format", "json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 1 + assert data[0]["name"] == "Alpha Report" + +def test_search_provider_filter(): + runner = CliRunner() + result = runner.invoke(app, ["search", "--provider", "dummy2", "--export-format", "json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 1 + assert data[0]["provider"] == "dummy2" + assert data[0]["name"] == "Beta Notes" + +def test_search_combined_filters(): + runner = CliRunner() + result = runner.invoke(app, ["search", "--name", "Beta", "--owner", "beta@b.com", "--min-size", "3", "--provider", "dummy2", "--export-format", "json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 1 + assert data[0]["name"] == "Beta Notes" + assert data[0]["provider"] == "dummy2" + assert data[0]["owners"] == "beta@b.com" From a9fc52c261091bb4129af94f06925ef005c99417 Mon Sep 17 00:00:00 2001 From: Christophe Beke Date: Thu, 19 Jun 2025 13:27:16 +0200 Subject: [PATCH 03/39] Added traversal of child directories to get all the files --- search_results.csv | 4 ++ .../google_drive/google_drive_client.py | 20 +++++- .../google_drive/google_drive_files.py | 36 ++++++---- .../google_drive/test_google_drive_files.py | 68 +++++++++++++++++++ 4 files changed, 112 insertions(+), 16 deletions(-) create mode 100644 search_results.csv diff --git a/search_results.csv b/search_results.csv new file mode 100644 index 0000000..f62fd5b --- /dev/null +++ b/search_results.csv @@ -0,0 +1,4 @@ +provider,id,name,size,mime_type,created_time,modified_time,owners,last_modifying_user,web_view_link,web_content_link,shared,trashed +my_google_drive,1mpnjaTRDfT7vRP5iq4adIaBFXhkwqjtZ,New Folder,,application/vnd.google-apps.folder,2025-06-15 13:56:34.594000+00:00,2025-06-15 13:56:34.594000+00:00,snappylab25@gmail.com,snappylab25@gmail.com,https://drive.google.com/drive/folders/1mpnjaTRDfT7vRP5iq4adIaBFXhkwqjtZ,,False,False +my_google_drive,1C37OTKAK3rLnZrJ1ZtusHQsGAlncUHR-0RIYCadQob8,Test Sheet,1024,application/vnd.google-apps.spreadsheet,2025-06-15 13:56:41.248000+00:00,2025-06-15 13:56:45.986000+00:00,snappylab25@gmail.com,snappylab25@gmail.com,https://docs.google.com/spreadsheets/d/1C37OTKAK3rLnZrJ1ZtusHQsGAlncUHR-0RIYCadQob8/edit?usp=drivesdk,,False,False +my_google_drive,1eYc3W-SqWZT_mT43o9HSkOP_pzFEl5u5U7FDChDWgWA,Test Doc,1024,application/vnd.google-apps.document,2025-06-15 11:37:05.926000+00:00,2025-06-15 11:37:10.663000+00:00,snappylab25@gmail.com,snappylab25@gmail.com,https://docs.google.com/document/d/1eYc3W-SqWZT_mT43o9HSkOP_pzFEl5u5U7FDChDWgWA/edit?usp=drivesdk,,False,False diff --git a/src/docbinder_oss/services/google_drive/google_drive_client.py b/src/docbinder_oss/services/google_drive/google_drive_client.py index 77aab51..3e6fb17 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_client.py +++ b/src/docbinder_oss/services/google_drive/google_drive_client.py @@ -78,11 +78,26 @@ def list_files(self, folder_id: Optional[str] = None) -> List[File]: return self.files.list_files(folder_id) def list_all_files(self) -> List[File]: + """ + Recursively list all files and folders in all buckets (drives). + Handles My Drive and Shared Drives correctly. + """ + def _recursive_list(folder_id, is_drive_root=False): + items = self.files.list_files(folder_id, is_drive_root=is_drive_root) + all_items = [] + for item in items: + all_items.append(item) + # Use mime_type to check if this is a folder + if getattr(item, "mime_type", None) == "application/vnd.google-apps.folder": + all_items.extend(_recursive_list(item.id)) + return all_items + buckets = self.list_buckets() all_files = [] for bucket in buckets: - files = self.files.list_files(bucket.id) - all_files.extend(files) + # If bucket.id == "root", it's My Drive; otherwise, it's a shared drive + is_drive_root = bucket.id != "root" + all_files.extend(_recursive_list(bucket.id, is_drive_root=is_drive_root)) return all_files def get_file_metadata(self, item_id: str) -> File: @@ -90,3 +105,4 @@ def get_file_metadata(self, item_id: str) -> File: def get_permissions(self, item_id: str) -> List[Permission]: return self.permissions.get_permissions(item_id) + \ No newline at end of file diff --git a/src/docbinder_oss/services/google_drive/google_drive_files.py b/src/docbinder_oss/services/google_drive/google_drive_files.py index a41b9f1..d1b687d 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_files.py +++ b/src/docbinder_oss/services/google_drive/google_drive_files.py @@ -13,35 +13,43 @@ class GoogleDriveFiles: def __init__(self, service: Resource): self.service = service - def list_files(self, folder_id=None): - if folder_id and len(folder_id.split("|", 1)) > 1: - logger.warning("Folder ID should not contain '|' character") - _, folder_id = folder_id.split("|", 1) - - if folder_id == "root": - query = "'root' in parents and trashed=false" + def list_files(self, folder_id=None, is_drive_root=False) -> list[File]: + # If listing the root of a shared drive + if is_drive_root: + resp = ( + self.service.files() # type: ignore[attr-defined] + .list( + corpora="drive", + driveId=folder_id, + includeItemsFromAllDrives=True, + supportsAllDrives=True, + q="'root' in parents and trashed=false", + fields=f"files({REQUIRED_FIELDS})", + ) + .execute() + ) + elif folder_id == "root" or folder_id is None: + # Listing the root of My Drive resp = ( - self.service.files() + self.service.files() # type: ignore[attr-defined] .list( - q=query, + q="'root' in parents and trashed=false", fields=f"files({REQUIRED_FIELDS})", ) .execute() ) else: + # Listing a regular folder resp = ( - self.service.files() + self.service.files() # type: ignore[attr-defined] .list( - corpora="drive", q=f"'{folder_id}' in parents and trashed=false", - driveId=folder_id, includeItemsFromAllDrives=True, supportsAllDrives=True, fields=f"files({REQUIRED_FIELDS})", ) .execute() ) - return [ File( id=f.get("id"), @@ -79,7 +87,7 @@ def list_files(self, folder_id=None): def get_file_metadata(self, file_id: str): item_metadata = ( - self.service.files() + self.service.files() # type: ignore[attr-defined] .get( fileId=file_id, fields=f"{REQUIRED_FIELDS}", diff --git a/tests/services/google_drive/test_google_drive_files.py b/tests/services/google_drive/test_google_drive_files.py index 6443cfb..1f5c379 100644 --- a/tests/services/google_drive/test_google_drive_files.py +++ b/tests/services/google_drive/test_google_drive_files.py @@ -1,6 +1,58 @@ from datetime import datetime +import os +import pytest +from typer.testing import CliRunner from docbinder_oss.core.schemas import File +from docbinder_oss.main import app + + +class DummyFile: + def __init__(self, id, name, parents=None, is_folder=False): + self.id = id + self.name = name + self.parents = parents or [] + self.is_folder = is_folder + self.size = 1000 + self.mime_type = "application/pdf" + self.created_time = "2024-01-01T00:00:00" + self.modified_time = "2024-01-02T00:00:00" + self.owners = [type("User", (), {"email_address": "owner@example.com"})()] + self.last_modifying_user = type("User", (), {"email_address": "mod@example.com"})() + self.web_view_link = "http://example.com/view" + self.web_content_link = "http://example.com/content" + self.shared = True + self.trashed = False + + +@pytest.fixture(autouse=True) +def patch_provider(monkeypatch, tmp_path): + class DummyProviderConfig: + name = "googledrive" + + class DummyConfig: + providers = [DummyProviderConfig()] + + monkeypatch.setattr("docbinder_oss.helpers.config.load_config", lambda: DummyConfig()) + # Simulate a folder structure: root -> folder1 -> file1, file2; root -> file3 + def list_all_files(self): + return [ + DummyFile(id="root", name="root", is_folder=True), + DummyFile(id="folder1", name="folder1", parents=["root"], is_folder=True), + DummyFile(id="file1", name="file1.pdf", parents=["folder1"]), + DummyFile(id="file2", name="file2.pdf", parents=["folder1"]), + DummyFile(id="file3", name="file3.pdf", parents=["root"]), + ] + + class DummyClient: + def list_all_files(self): + return list_all_files(self) + + monkeypatch.setattr("docbinder_oss.services.create_provider_instance", lambda cfg: DummyClient()) + orig_cwd = os.getcwd() + os.chdir(tmp_path) + yield + os.chdir(orig_cwd) def test_list_files(mock_gdrive_service, gdrive_client): @@ -81,3 +133,19 @@ def test_list_files(mock_gdrive_service, gdrive_client): trashed=False, ) ] + + +def test_search_finds_all_files_recursively(): + runner = CliRunner() + result = runner.invoke(app, ["search", "--export-format", "json"]) + assert result.exit_code == 0 + assert os.path.exists("search_results.json") + import json + + with open("search_results.json") as f: + data = json.load(f) + # All files and folders should be included in the results + file_names = set(d["name"] for d in data) + expected = {"file1.pdf", "file2.pdf", "file3.pdf", "folder1", "root"} + assert file_names == expected + assert len(file_names) == 5 From 178ea9abb1105d747925a573b044346ce321b752 Mon Sep 17 00:00:00 2001 From: Christophe Beke Date: Thu, 19 Jun 2025 13:29:02 +0200 Subject: [PATCH 04/39] Delete a file that wasn't supposed to be there --- search_results.csv | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 search_results.csv diff --git a/search_results.csv b/search_results.csv deleted file mode 100644 index f62fd5b..0000000 --- a/search_results.csv +++ /dev/null @@ -1,4 +0,0 @@ -provider,id,name,size,mime_type,created_time,modified_time,owners,last_modifying_user,web_view_link,web_content_link,shared,trashed -my_google_drive,1mpnjaTRDfT7vRP5iq4adIaBFXhkwqjtZ,New Folder,,application/vnd.google-apps.folder,2025-06-15 13:56:34.594000+00:00,2025-06-15 13:56:34.594000+00:00,snappylab25@gmail.com,snappylab25@gmail.com,https://drive.google.com/drive/folders/1mpnjaTRDfT7vRP5iq4adIaBFXhkwqjtZ,,False,False -my_google_drive,1C37OTKAK3rLnZrJ1ZtusHQsGAlncUHR-0RIYCadQob8,Test Sheet,1024,application/vnd.google-apps.spreadsheet,2025-06-15 13:56:41.248000+00:00,2025-06-15 13:56:45.986000+00:00,snappylab25@gmail.com,snappylab25@gmail.com,https://docs.google.com/spreadsheets/d/1C37OTKAK3rLnZrJ1ZtusHQsGAlncUHR-0RIYCadQob8/edit?usp=drivesdk,,False,False -my_google_drive,1eYc3W-SqWZT_mT43o9HSkOP_pzFEl5u5U7FDChDWgWA,Test Doc,1024,application/vnd.google-apps.document,2025-06-15 11:37:05.926000+00:00,2025-06-15 11:37:10.663000+00:00,snappylab25@gmail.com,snappylab25@gmail.com,https://docs.google.com/document/d/1eYc3W-SqWZT_mT43o9HSkOP_pzFEl5u5U7FDChDWgWA/edit?usp=drivesdk,,False,False From 7260899cfecc7b4c0717513ff8f81bbff7a7dff0 Mon Sep 17 00:00:00 2001 From: Christophe Beke Date: Thu, 19 Jun 2025 13:31:15 +0200 Subject: [PATCH 05/39] Update tests --- tests/services/google_drive/test_google_drive_files.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/services/google_drive/test_google_drive_files.py b/tests/services/google_drive/test_google_drive_files.py index 1f5c379..393849c 100644 --- a/tests/services/google_drive/test_google_drive_files.py +++ b/tests/services/google_drive/test_google_drive_files.py @@ -14,7 +14,8 @@ def __init__(self, id, name, parents=None, is_folder=False): self.parents = parents or [] self.is_folder = is_folder self.size = 1000 - self.mime_type = "application/pdf" + # Use correct mime_type for folders and files + self.mime_type = "application/vnd.google-apps.folder" if is_folder else "application/pdf" self.created_time = "2024-01-01T00:00:00" self.modified_time = "2024-01-02T00:00:00" self.owners = [type("User", (), {"email_address": "owner@example.com"})()] From d107a8fe6b7c5921ca1560c8f2db3fa96603f7a8 Mon Sep 17 00:00:00 2001 From: Christophe Beke Date: Thu, 19 Jun 2025 13:32:39 +0200 Subject: [PATCH 06/39] Fixed merge conflict in 'src/docbinder_oss/services/google_drive/google_drive_client.py' of the docbinder-oss repository. --- src/docbinder_oss/services/google_drive/google_drive_client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/docbinder_oss/services/google_drive/google_drive_client.py b/src/docbinder_oss/services/google_drive/google_drive_client.py index a772fd9..e8e9611 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_client.py +++ b/src/docbinder_oss/services/google_drive/google_drive_client.py @@ -1,4 +1,5 @@ import logging +import os from typing import List, Optional from google.auth.transport.requests import Request From 9e5a51b39fdfbb1b4c502e5ebda7ae46e61ee43a Mon Sep 17 00:00:00 2001 From: Christophe Beke Date: Thu, 19 Jun 2025 13:44:21 +0200 Subject: [PATCH 07/39] Add the path of the items as an attribute. --- .gitignore | 3 + src/docbinder_oss/commands/search.py | 158 +++++++++++------- .../google_drive/google_drive_client.py | 6 +- .../google_drive_service_config.py | 3 +- 4 files changed, 106 insertions(+), 64 deletions(-) diff --git a/.gitignore b/.gitignore index d434cbf..a8e0008 100644 --- a/.gitignore +++ b/.gitignore @@ -77,3 +77,6 @@ ENV/ # Credentials gcp_credentials.json *_token.json + +# Test files +search_results.csv \ No newline at end of file diff --git a/src/docbinder_oss/commands/search.py b/src/docbinder_oss/commands/search.py index 9e61827..7253455 100644 --- a/src/docbinder_oss/commands/search.py +++ b/src/docbinder_oss/commands/search.py @@ -28,79 +28,119 @@ def search( typer.echo("No providers configured.") raise typer.Exit(code=1) - results = [] + # Build a mapping of id -> file for path reconstruction + all_items_by_id = {} + all_results = [] + drive_id_to_name = {} + # If provider is Google Drive, build a mapping of drive id to drive name for provider_config in config.providers: if provider and provider_config.name != provider: continue client = create_provider_instance(provider_config) if client is None or not hasattr(client, "list_all_files"): continue + # Try to get drive mapping if possible + drive_id_to_name_local = {} + if hasattr(client, "buckets") and hasattr(client.buckets, "list_buckets"): # type: ignore[attr-defined] + try: + for bucket in client.buckets.list_buckets(): # type: ignore[attr-defined] + drive_id_to_name_local[bucket.id] = bucket.name + except Exception: + pass + drive_id_to_name.update(drive_id_to_name_local) try: files = client.list_all_files() for item in files: - # Name regex filter - if name: - if not re.search(name, item.name or "", re.IGNORECASE): - continue - # Owner/contributor/reader email filter - if owner: - emails = set() - owners_list = getattr(item, "owners", None) or [] - emails.update([u.email_address for u in owners_list if u and getattr(u, "email_address", None)]) - last_mod_user = getattr(item, "last_modifying_user", None) - if last_mod_user and getattr(last_mod_user, "email_address", None): - emails.add(last_mod_user.email_address) - if owner not in emails: - continue - # Last update filter - if updated_after: - if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) < datetime.fromisoformat(updated_after): - continue - if updated_before: - if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) > datetime.fromisoformat(updated_before): - continue - # Created at filter - if created_after: - if not item.created_time or datetime.fromisoformat(str(item.created_time)) < datetime.fromisoformat(created_after): - continue - if created_before: - if not item.created_time or datetime.fromisoformat(str(item.created_time)) > datetime.fromisoformat(created_before): - continue - # Size filter (in KB) - if min_size is not None: - try: - if not item.size or int(item.size) < min_size * 1024: - continue - except Exception: - continue - if max_size is not None: - try: - if not item.size or int(item.size) > max_size * 1024: - continue - except Exception: - continue - # Collect all possible params for export - results.append({ - "provider": provider_config.name, - "id": getattr(item, "id", None), - "name": getattr(item, "name", None), - "size": getattr(item, "size", None), - "mime_type": getattr(item, "mime_type", None), - "created_time": getattr(item, "created_time", None), - "modified_time": getattr(item, "modified_time", None), - "owners": ",".join([u.email_address for u in (getattr(item, "owners", None) or []) if u and getattr(u, "email_address", None)]) if getattr(item, "owners", None) else None, - "last_modifying_user": getattr(getattr(item, "last_modifying_user", None), "email_address", None), - "web_view_link": getattr(item, "web_view_link", None), - "web_content_link": getattr(item, "web_content_link", None), - "shared": getattr(item, "shared", None), - "trashed": getattr(item, "trashed", None), - }) + all_items_by_id[item.id] = item + # Attach drive_id for later lookup + all_results.append((provider_config.name, item, getattr(item, "parents", ["root"])[0] if hasattr(item, "parents") and getattr(item, "parents", None) else "root", drive_id_to_name_local)) except Exception as e: typer.echo(f"Error searching provider '{provider_config.name}': {e}") + + def build_path(item): + # Reconstruct the path by walking up parents + path_parts = [item.name] + current = item + seen = set() + while getattr(current, "parents", None): + parent_ids = current.parents if isinstance(current.parents, list) else [current.parents] + parent_id = parent_ids[0] if parent_ids else None + if not parent_id or parent_id in seen or parent_id not in all_items_by_id: + break + seen.add(parent_id) + parent = all_items_by_id[parent_id] + path_parts.append(parent.name) + current = parent + return "/".join(reversed(path_parts)) + + results = [] + for provider_name, item, parent_id, drive_map in all_results: + # Name regex filter + if name: + if not re.search(name, item.name or "", re.IGNORECASE): + continue + # Owner/contributor/reader email filter + if owner: + emails = set() + owners_list = getattr(item, "owners", None) or [] + emails.update([u.email_address for u in owners_list if u and getattr(u, "email_address", None)]) + last_mod_user = getattr(item, "last_modifying_user", None) + if last_mod_user and getattr(last_mod_user, "email_address", None): + emails.add(last_mod_user.email_address) + if owner not in emails: + continue + # Last update filter + if updated_after: + if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) < datetime.fromisoformat(updated_after): + continue + if updated_before: + if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) > datetime.fromisoformat(updated_before): + continue + # Created at filter + if created_after: + if not item.created_time or datetime.fromisoformat(str(item.created_time)) < datetime.fromisoformat(created_after): + continue + if created_before: + if not item.created_time or datetime.fromisoformat(str(item.created_time)) > datetime.fromisoformat(created_before): + continue + # Size filter (in KB) + if min_size is not None: + try: + if not item.size or int(item.size) < min_size * 1024: + continue + except Exception: + continue + if max_size is not None: + try: + if not item.size or int(item.size) > max_size * 1024: + continue + except Exception: + continue + # Find drive name + drive_name = drive_map.get(parent_id) or drive_id_to_name.get(parent_id) or drive_id_to_name.get("root") or "Unknown" + # Collect all possible params for export, including path, is_folder, and drive_name + results.append({ + "provider": provider_name, + "id": getattr(item, "id", None), + "name": getattr(item, "name", None), + "path": build_path(item), + "is_folder": getattr(item, "mime_type", None) == "application/vnd.google-apps.folder", + "drive_name": drive_name, + "size": getattr(item, "size", None), + "mime_type": getattr(item, "mime_type", None), + "created_time": getattr(item, "created_time", None), + "modified_time": getattr(item, "modified_time", None), + "owners": ",".join([u.email_address for u in (getattr(item, "owners", None) or []) if u and getattr(u, "email_address", None)]) if getattr(item, "owners", None) else None, + "last_modifying_user": getattr(getattr(item, "last_modifying_user", None), "email_address", None), + "web_view_link": getattr(item, "web_view_link", None), + "web_content_link": getattr(item, "web_content_link", None), + "shared": getattr(item, "shared", None), + "trashed": getattr(item, "trashed", None), + }) # Write results to CSV or JSON if results: fieldnames = [ - "provider", "id", "name", "size", "mime_type", "created_time", "modified_time", "owners", "last_modifying_user", "web_view_link", "web_content_link", "shared", "trashed" + "provider", "id", "name", "path", "is_folder", "drive_name", "size", "mime_type", "created_time", "modified_time", "owners", "last_modifying_user", "web_view_link", "web_content_link", "shared", "trashed" ] if export_format.lower() == "json": with open("search_results.json", "w") as jsonfile: diff --git a/src/docbinder_oss/services/google_drive/google_drive_client.py b/src/docbinder_oss/services/google_drive/google_drive_client.py index e8e9611..ed8ed0d 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_client.py +++ b/src/docbinder_oss/services/google_drive/google_drive_client.py @@ -45,7 +45,7 @@ def _get_credentials(self): try: creds = Credentials.from_authorized_user_file( - self.config.gcp_token_json, scopes=self.SCOPES + TOKEN_PATH, scopes=self.SCOPES ) except (FileNotFoundError, ValueError): logger.warning("Credentials file not found or invalid, re-authenticating") @@ -55,11 +55,11 @@ def _get_credentials(self): creds.refresh(Request()) else: flow = InstalledAppFlow.from_client_secrets_file( - self.config.gcp_credentials_json, self.SCOPES + TOKEN_PATH, self.SCOPES ) creds = flow.run_local_server(port=0) # Save the credentials for the next run - with open(self.config.gcp_token_json, "w") as token: + with open(TOKEN_PATH, "w") as token: token.write(creds.to_json()) return creds diff --git a/src/docbinder_oss/services/google_drive/google_drive_service_config.py b/src/docbinder_oss/services/google_drive/google_drive_service_config.py index dd6c957..236f4d3 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_service_config.py +++ b/src/docbinder_oss/services/google_drive/google_drive_service_config.py @@ -5,5 +5,4 @@ class GoogleDriveServiceConfig(ServiceConfig): type: Literal["google_drive"] = "google_drive" # type: ignore[override] - gcp_credentials_json: str - gcp_token_json: str + gcp_credentials_json: str \ No newline at end of file From c753807af7964b3bdbc934cac56d2056a9e71b06 Mon Sep 17 00:00:00 2001 From: ChristopheBeke <48618152+ChristopheBeke@users.noreply.github.com> Date: Thu, 19 Jun 2025 13:52:03 +0200 Subject: [PATCH 08/39] Update src/docbinder_oss/services/google_drive/google_drive_client.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/docbinder_oss/services/google_drive/google_drive_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/docbinder_oss/services/google_drive/google_drive_client.py b/src/docbinder_oss/services/google_drive/google_drive_client.py index ed8ed0d..d827ea3 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_client.py +++ b/src/docbinder_oss/services/google_drive/google_drive_client.py @@ -55,7 +55,7 @@ def _get_credentials(self): creds.refresh(Request()) else: flow = InstalledAppFlow.from_client_secrets_file( - TOKEN_PATH, self.SCOPES + self.config.gcp_credentials_json, self.SCOPES ) creds = flow.run_local_server(port=0) # Save the credentials for the next run From 9dbf490bd0849482f2926f4bfe0212dc3d683d20 Mon Sep 17 00:00:00 2001 From: ChristopheBeke <48618152+ChristopheBeke@users.noreply.github.com> Date: Thu, 19 Jun 2025 13:52:13 +0200 Subject: [PATCH 09/39] Update src/docbinder_oss/services/google_drive/__init__.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/docbinder_oss/services/google_drive/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/docbinder_oss/services/google_drive/__init__.py b/src/docbinder_oss/services/google_drive/__init__.py index 6f3bc44..9059dab 100644 --- a/src/docbinder_oss/services/google_drive/__init__.py +++ b/src/docbinder_oss/services/google_drive/__init__.py @@ -38,5 +38,4 @@ def get_service_display_name() -> str: Returns the display name of the service. This is used for user-friendly identification. """ - return "Google Drive Service" - return \ No newline at end of file + return "Google Drive Service" \ No newline at end of file From 98a6bc8115066fec77b4071970e3826f7a265c49 Mon Sep 17 00:00:00 2001 From: ChristopheBeke <48618152+ChristopheBeke@users.noreply.github.com> Date: Thu, 19 Jun 2025 13:52:36 +0200 Subject: [PATCH 10/39] Update tests/services/test_search_export.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/services/test_search_export.py | 43 ++-------------------------- 1 file changed, 2 insertions(+), 41 deletions(-) diff --git a/tests/services/test_search_export.py b/tests/services/test_search_export.py index b998449..c16c44b 100644 --- a/tests/services/test_search_export.py +++ b/tests/services/test_search_export.py @@ -50,47 +50,8 @@ def create_provider_instance(cfg): yield os.chdir(orig_cwd) -def test_search_export_csv(): - runner = CliRunner() - result = runner.invoke(app, ["search", "--export-format", "csv"]) - assert result.exit_code == 0 - assert os.path.exists("search_results.csv") - with open("search_results.csv") as f: - reader = csv.DictReader(f) - rows = list(reader) - assert len(rows) == 2 - assert set(r["name"] for r in rows) == {"Alpha Report", "Beta Notes"} - -def test_search_export_json(): - runner = CliRunner() - result = runner.invoke(app, ["search", "--export-format", "json"]) - assert result.exit_code == 0 - assert os.path.exists("search_results.json") - with open("search_results.json") as f: - data = json.load(f) - assert isinstance(data, list) - assert len(data) == 2 - names = set(d["name"] for d in data) - assert names == {"Alpha Report", "Beta Notes"} - -def test_search_name_filter(): - runner = CliRunner() - result = runner.invoke(app, ["search", "--name", "Alpha", "--export-format", "json"]) - assert result.exit_code == 0 - with open("search_results.json") as f: - data = json.load(f) - assert len(data) == 1 - assert data[0]["name"] == "Alpha Report" - -def test_search_owner_filter(): - runner = CliRunner() - result = runner.invoke(app, ["search", "--owner", "beta@b.com", "--export-format", "json"]) - assert result.exit_code == 0 - with open("search_results.json") as f: - data = json.load(f) - assert len(data) == 1 - assert data[0]["name"] == "Beta Notes" - +# The test logic for search export and filters has been consolidated into `tests/commands/test_search_command.py`. +# This file no longer contains duplicate tests. def test_search_updated_after_filter(): runner = CliRunner() result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"]) From 77476805be0906a244ccf65cd51678a09ea949ce Mon Sep 17 00:00:00 2001 From: Christophe Beke Date: Thu, 19 Jun 2025 14:04:12 +0200 Subject: [PATCH 11/39] Removed unused parameter --- example_file.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/example_file.yaml b/example_file.yaml index e55f752..9a94d45 100644 --- a/example_file.yaml +++ b/example_file.yaml @@ -2,7 +2,6 @@ providers: - type: google_drive name: my_google_drive gcp_credentials_json: gcp_credentials.json - gcp_token_json: gcp_token.json # - type: dropbox # name: my_dropbox # api_key: dropbox-api-key From 59b00ffeeee70c8dc82e7955275263844057e222 Mon Sep 17 00:00:00 2001 From: PaoloLeonard Date: Sat, 21 Jun 2025 12:11:18 +0200 Subject: [PATCH 12/39] improved help message and internal logic --- src/docbinder_oss/commands/provider/get.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/docbinder_oss/commands/provider/get.py b/src/docbinder_oss/commands/provider/get.py index a20a7fa..1bd8f65 100644 --- a/src/docbinder_oss/commands/provider/get.py +++ b/src/docbinder_oss/commands/provider/get.py @@ -10,25 +10,26 @@ def get_provider( None, "--name", "-n", help="The name of the provider to get." ), ): - """Get connection information for a specific provider.""" + """Get connection information for a provider by name or by type. + If both options are provided, it will search for providers matching either criterion.""" from docbinder_oss.helpers.config import load_config config = load_config() - count = 0 + provider_found = False if not config.providers: typer.echo("No providers configured.") raise typer.Exit(code=1) for provider in config.providers: if provider.name == name: typer.echo(f"Provider '{name}' found with config: {provider}") - count += 1 + provider_found = True if provider.type == connection_type: typer.echo( f"Provider '{provider.name}' of type '{connection_type}' found with config: {provider}" ) - count += 1 - if count == 0: + provider_found = True + if not provider_found: typer.echo( f"No providers found with name '{name}' or type '{connection_type}'." ) From 3da72b568bd3b55c729d7276c0a31c141896b65a Mon Sep 17 00:00:00 2001 From: PaoloLeonard Date: Sat, 21 Jun 2025 12:13:55 +0200 Subject: [PATCH 13/39] typo in list --- src/docbinder_oss/commands/provider/list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/docbinder_oss/commands/provider/list.py b/src/docbinder_oss/commands/provider/list.py index a6fc0b7..4d4d19e 100644 --- a/src/docbinder_oss/commands/provider/list.py +++ b/src/docbinder_oss/commands/provider/list.py @@ -12,4 +12,4 @@ def list(): raise typer.Exit(code=1) for provider in config.providers: - typer.echo(f"Provider: {provider.name}, Type: {provider.type}") \ No newline at end of file + typer.echo(f"Provider: {provider.name}, type: {provider.type}") \ No newline at end of file From 6e68c5788d2ad37084c8702ce7f7c91ae4cf5618 Mon Sep 17 00:00:00 2001 From: PaoloLeonard Date: Sat, 21 Jun 2025 12:19:00 +0200 Subject: [PATCH 14/39] improved the internal logic --- src/docbinder_oss/commands/provider/test.py | 44 +++++++++++++-------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/src/docbinder_oss/commands/provider/test.py b/src/docbinder_oss/commands/provider/test.py index 354b8fa..7a18db2 100644 --- a/src/docbinder_oss/commands/provider/test.py +++ b/src/docbinder_oss/commands/provider/test.py @@ -1,35 +1,45 @@ from docbinder_oss.commands.provider import provider_app import typer from typing import Annotated -from docbinder_oss.services import create_provider_instance + @provider_app.command("test") def test( - name: Annotated[ - str, typer.Argument(help="The name of the provider to test the connection.") - ], + name: Annotated[str, typer.Argument(help="The name of the provider to test the connection.")], ): """Test the connection to a specific provider.""" from docbinder_oss.helpers.config import load_config + from docbinder_oss.services import create_provider_instance + + if not name: + typer.echo("Provider name is required.") + raise typer.Exit(code=1) config = load_config() if not config.providers: typer.echo("No providers configured.") raise typer.Exit(code=1) + + found_provider_config = None for provider_config in config.providers: if provider_config.name == name: - typer.echo(f"Testing connection for provider '{name}'...") - try: - client = create_provider_instance(provider_config) - if client is None: - typer.echo(f"Provider '{name}' is not supported or not implemented.") - raise typer.Exit(code=1) - # Attempt to test the connection - client.test_connection() - typer.echo(f"Connection to provider '{name}' is successful.") - except Exception as e: - typer.echo(f"Failed to connect to provider '{name}': {e}") - return + found_provider_config = provider_config + break # Exit the loop once the provider is found + + if found_provider_config: + typer.echo(f"Testing connection for provider '{name}'...") + try: + client = create_provider_instance(provider_config) + if client is None: + typer.echo(f"Provider '{name}' is not supported or not implemented.") + raise typer.Exit(code=1) + # Attempt to test the connection + client.test_connection() + typer.echo(f"Connection to provider '{name}' is successful.") + except Exception as e: + typer.echo(f"Failed to connect to provider '{name}': {e}") + return + # If we reach here, the provider was not found typer.echo(f"Provider '{name}' not found in configuration.") - raise typer.Exit(code=1) \ No newline at end of file + raise typer.Exit(code=1) From b4e209696b8c42cd2a8ca02efa1adcc7b5096f65 Mon Sep 17 00:00:00 2001 From: PaoloLeonard Date: Sun, 22 Jun 2025 15:07:45 +0200 Subject: [PATCH 15/39] change to the provdier --- src/docbinder_oss/commands/provider/__init__.py | 2 +- src/docbinder_oss/commands/provider/get.py | 11 ++++------- src/docbinder_oss/commands/provider/list.py | 3 ++- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/docbinder_oss/commands/provider/__init__.py b/src/docbinder_oss/commands/provider/__init__.py index 4fa1055..e6057c3 100644 --- a/src/docbinder_oss/commands/provider/__init__.py +++ b/src/docbinder_oss/commands/provider/__init__.py @@ -8,4 +8,4 @@ help="Commands to manage providers. List them or get details for a specific one." ) # We add this group to our main application. -app.add_typer(provider_app, name="provider") \ No newline at end of file +app.add_typer(provider_app, name="provider") diff --git a/src/docbinder_oss/commands/provider/get.py b/src/docbinder_oss/commands/provider/get.py index 1bd8f65..0e9f1fe 100644 --- a/src/docbinder_oss/commands/provider/get.py +++ b/src/docbinder_oss/commands/provider/get.py @@ -1,14 +1,13 @@ from docbinder_oss.commands.provider import provider_app import typer + @provider_app.command("get") def get_provider( connection_type: str = typer.Option( None, "--type", "-t", help="The type of the provider to get." ), - name: str = typer.Option( - None, "--name", "-n", help="The name of the provider to get." - ), + name: str = typer.Option(None, "--name", "-n", help="The name of the provider to get."), ): """Get connection information for a provider by name or by type. If both options are provided, it will search for providers matching either criterion.""" @@ -30,7 +29,5 @@ def get_provider( ) provider_found = True if not provider_found: - typer.echo( - f"No providers found with name '{name}' or type '{connection_type}'." - ) - raise typer.Exit(code=1) \ No newline at end of file + typer.echo(f"No providers found with name '{name}' or type '{connection_type}'.") + raise typer.Exit(code=1) diff --git a/src/docbinder_oss/commands/provider/list.py b/src/docbinder_oss/commands/provider/list.py index 4d4d19e..ce0a664 100644 --- a/src/docbinder_oss/commands/provider/list.py +++ b/src/docbinder_oss/commands/provider/list.py @@ -1,6 +1,7 @@ from docbinder_oss.commands.provider import provider_app import typer + @provider_app.command() def list(): """List all configured providers.""" @@ -12,4 +13,4 @@ def list(): raise typer.Exit(code=1) for provider in config.providers: - typer.echo(f"Provider: {provider.name}, type: {provider.type}") \ No newline at end of file + typer.echo(f"Provider: {provider.name}, type: {provider.type}") From eebc5b5a8dfaefe42ea4c86337272159c2619616 Mon Sep 17 00:00:00 2001 From: PaoloLeonard Date: Sun, 22 Jun 2025 21:06:46 +0200 Subject: [PATCH 16/39] improved the cli --- .../{commands => cli}/__init__.py | 0 src/docbinder_oss/cli/provider/__init__.py | 12 ++ .../{commands => cli}/provider/get.py | 8 +- .../{commands => cli}/provider/list.py | 5 +- .../{commands => cli}/provider/test.py | 13 +- src/docbinder_oss/{commands => cli}/search.py | 141 +++++++++++++----- src/docbinder_oss/{commands => cli}/setup.py | 6 +- .../commands/provider/__init__.py | 11 -- src/docbinder_oss/core/schemas.py | 12 +- src/docbinder_oss/helpers/config.py | 8 +- src/docbinder_oss/main.py | 140 +---------------- src/docbinder_oss/services/__init__.py | 4 +- src/docbinder_oss/services/base_class.py | 2 +- .../services/google_drive/__init__.py | 8 +- .../google_drive/google_drive_client.py | 44 ++---- .../google_drive/google_drive_files.py | 81 +++++----- .../google_drive/google_drive_permissions.py | 6 +- .../google_drive_service_config.py | 3 +- tests/commands/test_search_command.py | 89 +++++++++-- tests/services/google_drive/conftest.py | 4 +- .../google_drive/test_google_drive_files.py | 5 +- tests/services/test_search_export.py | 91 ++++++++--- 22 files changed, 364 insertions(+), 329 deletions(-) rename src/docbinder_oss/{commands => cli}/__init__.py (100%) create mode 100644 src/docbinder_oss/cli/provider/__init__.py rename src/docbinder_oss/{commands => cli}/provider/get.py (90%) rename src/docbinder_oss/{commands => cli}/provider/list.py (82%) rename src/docbinder_oss/{commands => cli}/provider/test.py (90%) rename src/docbinder_oss/{commands => cli}/search.py (54%) rename src/docbinder_oss/{commands => cli}/setup.py (93%) delete mode 100644 src/docbinder_oss/commands/provider/__init__.py diff --git a/src/docbinder_oss/commands/__init__.py b/src/docbinder_oss/cli/__init__.py similarity index 100% rename from src/docbinder_oss/commands/__init__.py rename to src/docbinder_oss/cli/__init__.py diff --git a/src/docbinder_oss/cli/provider/__init__.py b/src/docbinder_oss/cli/provider/__init__.py new file mode 100644 index 0000000..6066b1a --- /dev/null +++ b/src/docbinder_oss/cli/provider/__init__.py @@ -0,0 +1,12 @@ +import typer +from .get import app as get_app +from .list import app as list_app +from .test import app as test_app + +# --- Provider Subcommand Group --- +# We create a separate Typer app for the 'provider' command. +# This allows us to nest commands like 'provider list' and 'provider get'. +app = typer.Typer(help="Commands to manage providers. List them or get details for a specific one.") +app.add_typer(get_app) +app.add_typer(list_app) +app.add_typer(test_app) diff --git a/src/docbinder_oss/commands/provider/get.py b/src/docbinder_oss/cli/provider/get.py similarity index 90% rename from src/docbinder_oss/commands/provider/get.py rename to src/docbinder_oss/cli/provider/get.py index 0e9f1fe..7793870 100644 --- a/src/docbinder_oss/commands/provider/get.py +++ b/src/docbinder_oss/cli/provider/get.py @@ -1,8 +1,9 @@ -from docbinder_oss.commands.provider import provider_app import typer +app = typer.Typer() -@provider_app.command("get") + +@app.command("get") def get_provider( connection_type: str = typer.Option( None, "--type", "-t", help="The type of the provider to get." @@ -25,7 +26,8 @@ def get_provider( provider_found = True if provider.type == connection_type: typer.echo( - f"Provider '{provider.name}' of type '{connection_type}' found with config: {provider}" + f"Provider '{provider.name}' of type '{connection_type}'" + f" found with config: {provider}" ) provider_found = True if not provider_found: diff --git a/src/docbinder_oss/commands/provider/list.py b/src/docbinder_oss/cli/provider/list.py similarity index 82% rename from src/docbinder_oss/commands/provider/list.py rename to src/docbinder_oss/cli/provider/list.py index ce0a664..c3bd5f9 100644 --- a/src/docbinder_oss/commands/provider/list.py +++ b/src/docbinder_oss/cli/provider/list.py @@ -1,8 +1,9 @@ -from docbinder_oss.commands.provider import provider_app import typer +app = typer.Typer() -@provider_app.command() + +@app.command() def list(): """List all configured providers.""" from docbinder_oss.helpers.config import load_config diff --git a/src/docbinder_oss/commands/provider/test.py b/src/docbinder_oss/cli/provider/test.py similarity index 90% rename from src/docbinder_oss/commands/provider/test.py rename to src/docbinder_oss/cli/provider/test.py index 7a18db2..d01262d 100644 --- a/src/docbinder_oss/commands/provider/test.py +++ b/src/docbinder_oss/cli/provider/test.py @@ -1,16 +1,17 @@ -from docbinder_oss.commands.provider import provider_app import typer from typing import Annotated +app = typer.Typer() -@provider_app.command("test") + +@app.command("test") def test( name: Annotated[str, typer.Argument(help="The name of the provider to test the connection.")], ): """Test the connection to a specific provider.""" from docbinder_oss.helpers.config import load_config from docbinder_oss.services import create_provider_instance - + if not name: typer.echo("Provider name is required.") raise typer.Exit(code=1) @@ -24,8 +25,8 @@ def test( for provider_config in config.providers: if provider_config.name == name: found_provider_config = provider_config - break # Exit the loop once the provider is found - + break # Exit the loop once the provider is found + if found_provider_config: typer.echo(f"Testing connection for provider '{name}'...") try: @@ -39,7 +40,7 @@ def test( except Exception as e: typer.echo(f"Failed to connect to provider '{name}': {e}") return - + # If we reach here, the provider was not found typer.echo(f"Provider '{name}' not found in configuration.") raise typer.Exit(code=1) diff --git a/src/docbinder_oss/commands/search.py b/src/docbinder_oss/cli/search.py similarity index 54% rename from src/docbinder_oss/commands/search.py rename to src/docbinder_oss/cli/search.py index 7253455..f19d0c2 100644 --- a/src/docbinder_oss/commands/search.py +++ b/src/docbinder_oss/cli/search.py @@ -1,21 +1,38 @@ import typer from typing import Optional -from docbinder_oss.main import app + +app = typer.Typer() + @app.command() def search( name: Optional[str] = typer.Option(None, "--name", help="Regex to match file name"), - owner: Optional[str] = typer.Option(None, "--owner", help="Owner/contributor/reader email address to filter"), - updated_after: Optional[str] = typer.Option(None, "--updated-after", help="Last update after (ISO timestamp)"), - updated_before: Optional[str] = typer.Option(None, "--updated-before", help="Last update before (ISO timestamp)"), - created_after: Optional[str] = typer.Option(None, "--created-after", help="Created after (ISO timestamp)"), - created_before: Optional[str] = typer.Option(None, "--created-before", help="Created before (ISO timestamp)"), + owner: Optional[str] = typer.Option( + None, "--owner", help="Owner/contributor/reader email address to filter" + ), + updated_after: Optional[str] = typer.Option( + None, "--updated-after", help="Last update after (ISO timestamp)" + ), + updated_before: Optional[str] = typer.Option( + None, "--updated-before", help="Last update before (ISO timestamp)" + ), + created_after: Optional[str] = typer.Option( + None, "--created-after", help="Created after (ISO timestamp)" + ), + created_before: Optional[str] = typer.Option( + None, "--created-before", help="Created before (ISO timestamp)" + ), min_size: Optional[int] = typer.Option(None, "--min-size", help="Minimum file size in KB"), max_size: Optional[int] = typer.Option(None, "--max-size", help="Maximum file size in KB"), - provider: Optional[str] = typer.Option(None, "--provider", "-p", help="Provider name to search in"), - export_format: str = typer.Option("csv", "--export-format", help="Export format: csv or json", show_default=True), + provider: Optional[str] = typer.Option( + None, "--provider", "-p", help="Provider name to search in" + ), + export_format: str = typer.Option( + "csv", "--export-format", help="Export format: csv or json", show_default=True + ), ): - """Search for files or folders matching filters across all providers and export results as CSV or JSON.""" + """Search for files or folders matching filters across all + providers and export results as CSV or JSON.""" import re import csv import json @@ -53,7 +70,16 @@ def search( for item in files: all_items_by_id[item.id] = item # Attach drive_id for later lookup - all_results.append((provider_config.name, item, getattr(item, "parents", ["root"])[0] if hasattr(item, "parents") and getattr(item, "parents", None) else "root", drive_id_to_name_local)) + all_results.append( + ( + provider_config.name, + item, + getattr(item, "parents", ["root"])[0] + if hasattr(item, "parents") and getattr(item, "parents", None) + else "root", + drive_id_to_name_local, + ) + ) except Exception as e: typer.echo(f"Error searching provider '{provider_config.name}': {e}") @@ -83,7 +109,9 @@ def build_path(item): if owner: emails = set() owners_list = getattr(item, "owners", None) or [] - emails.update([u.email_address for u in owners_list if u and getattr(u, "email_address", None)]) + emails.update( + [u.email_address for u in owners_list if u and getattr(u, "email_address", None)] + ) last_mod_user = getattr(item, "last_modifying_user", None) if last_mod_user and getattr(last_mod_user, "email_address", None): emails.add(last_mod_user.email_address) @@ -91,17 +119,25 @@ def build_path(item): continue # Last update filter if updated_after: - if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) < datetime.fromisoformat(updated_after): + if not item.modified_time or datetime.fromisoformat( + str(item.modified_time) + ) < datetime.fromisoformat(updated_after): continue if updated_before: - if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) > datetime.fromisoformat(updated_before): + if not item.modified_time or datetime.fromisoformat( + str(item.modified_time) + ) > datetime.fromisoformat(updated_before): continue # Created at filter if created_after: - if not item.created_time or datetime.fromisoformat(str(item.created_time)) < datetime.fromisoformat(created_after): + if not item.created_time or datetime.fromisoformat( + str(item.created_time) + ) < datetime.fromisoformat(created_after): continue if created_before: - if not item.created_time or datetime.fromisoformat(str(item.created_time)) > datetime.fromisoformat(created_before): + if not item.created_time or datetime.fromisoformat( + str(item.created_time) + ) > datetime.fromisoformat(created_before): continue # Size filter (in KB) if min_size is not None: @@ -117,30 +153,63 @@ def build_path(item): except Exception: continue # Find drive name - drive_name = drive_map.get(parent_id) or drive_id_to_name.get(parent_id) or drive_id_to_name.get("root") or "Unknown" + drive_name = ( + drive_map.get(parent_id) + or drive_id_to_name.get(parent_id) + or drive_id_to_name.get("root") + or "Unknown" + ) # Collect all possible params for export, including path, is_folder, and drive_name - results.append({ - "provider": provider_name, - "id": getattr(item, "id", None), - "name": getattr(item, "name", None), - "path": build_path(item), - "is_folder": getattr(item, "mime_type", None) == "application/vnd.google-apps.folder", - "drive_name": drive_name, - "size": getattr(item, "size", None), - "mime_type": getattr(item, "mime_type", None), - "created_time": getattr(item, "created_time", None), - "modified_time": getattr(item, "modified_time", None), - "owners": ",".join([u.email_address for u in (getattr(item, "owners", None) or []) if u and getattr(u, "email_address", None)]) if getattr(item, "owners", None) else None, - "last_modifying_user": getattr(getattr(item, "last_modifying_user", None), "email_address", None), - "web_view_link": getattr(item, "web_view_link", None), - "web_content_link": getattr(item, "web_content_link", None), - "shared": getattr(item, "shared", None), - "trashed": getattr(item, "trashed", None), - }) + results.append( + { + "provider": provider_name, + "id": getattr(item, "id", None), + "name": getattr(item, "name", None), + "path": build_path(item), + "is_folder": getattr(item, "mime_type", None) + == "application/vnd.google-apps.folder", + "drive_name": drive_name, + "size": getattr(item, "size", None), + "mime_type": getattr(item, "mime_type", None), + "created_time": getattr(item, "created_time", None), + "modified_time": getattr(item, "modified_time", None), + "owners": ",".join( + [ + u.email_address + for u in (getattr(item, "owners", None) or []) + if u and getattr(u, "email_address", None) + ] + ) + if getattr(item, "owners", None) + else None, + "last_modifying_user": getattr( + getattr(item, "last_modifying_user", None), "email_address", None + ), + "web_view_link": getattr(item, "web_view_link", None), + "web_content_link": getattr(item, "web_content_link", None), + "shared": getattr(item, "shared", None), + "trashed": getattr(item, "trashed", None), + } + ) # Write results to CSV or JSON if results: fieldnames = [ - "provider", "id", "name", "path", "is_folder", "drive_name", "size", "mime_type", "created_time", "modified_time", "owners", "last_modifying_user", "web_view_link", "web_content_link", "shared", "trashed" + "provider", + "id", + "name", + "path", + "is_folder", + "drive_name", + "size", + "mime_type", + "created_time", + "modified_time", + "owners", + "last_modifying_user", + "web_view_link", + "web_content_link", + "shared", + "trashed", ] if export_format.lower() == "json": with open("search_results.json", "w") as jsonfile: @@ -155,4 +224,4 @@ def build_path(item): typer.echo(f"{len(results)} results written to search_results.csv") else: typer.echo("No results found.") - return results \ No newline at end of file + return results diff --git a/src/docbinder_oss/commands/setup.py b/src/docbinder_oss/cli/setup.py similarity index 93% rename from src/docbinder_oss/commands/setup.py rename to src/docbinder_oss/cli/setup.py index dbe9839..b9ff56d 100644 --- a/src/docbinder_oss/commands/setup.py +++ b/src/docbinder_oss/cli/setup.py @@ -2,7 +2,9 @@ from typing import List, Optional import yaml from docbinder_oss.helpers.config import save_config, validate_config -from docbinder_oss.main import app + +app = typer.Typer(help="DocBinder configuration setup commands.") + @app.command() def setup( @@ -47,4 +49,4 @@ def setup( except Exception as e: typer.echo(f"Error saving config: {e}") raise typer.Exit(code=1) - typer.echo("Configuration saved successfully.") \ No newline at end of file + typer.echo("Configuration saved successfully.") diff --git a/src/docbinder_oss/commands/provider/__init__.py b/src/docbinder_oss/commands/provider/__init__.py deleted file mode 100644 index e6057c3..0000000 --- a/src/docbinder_oss/commands/provider/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -import typer -from docbinder_oss.main import app - -# --- Provider Subcommand Group --- -# We create a separate Typer app for the 'provider' command. -# This allows us to nest commands like 'provider list' and 'provider get'. -provider_app = typer.Typer( - help="Commands to manage providers. List them or get details for a specific one." -) -# We add this group to our main application. -app.add_typer(provider_app, name="provider") diff --git a/src/docbinder_oss/core/schemas.py b/src/docbinder_oss/core/schemas.py index 2718f1b..b9e2f9a 100644 --- a/src/docbinder_oss/core/schemas.py +++ b/src/docbinder_oss/core/schemas.py @@ -13,9 +13,7 @@ class Bucket(BaseModel): id: str name: str kind: Optional[str] = Field(description="Type of the bucket, e.g., 'drive#file'") - created_time: Optional[datetime] = Field( - description="Timestamp when the bucket was created." - ) + created_time: Optional[datetime] = Field(description="Timestamp when the bucket was created.") viewable: Optional[bool] restrictions: Optional[Dict[str, Any]] @@ -48,9 +46,7 @@ class File(BaseModel): mime_type: str kind: Optional[str] - is_folder: bool = Field( - False, description="True if the item is a folder, False otherwise." - ) + is_folder: bool = Field(False, description="True if the item is a folder, False otherwise.") web_view_link: Optional[HttpUrl] icon_link: Optional[HttpUrl] @@ -61,9 +57,7 @@ class File(BaseModel): owners: Optional[List[User]] last_modifying_user: Optional[User] - size: Optional[str] = Field( - description="Size in bytes, as a string. Only populated for files." - ) + size: Optional[str] = Field(description="Size in bytes, as a string. Only populated for files.") parents: Optional[str] = Field(description="Parent folder ID, if applicable.") capabilities: Optional[FileCapabilities] = None diff --git a/src/docbinder_oss/helpers/config.py b/src/docbinder_oss/helpers/config.py index 77e17cf..d098793 100644 --- a/src/docbinder_oss/helpers/config.py +++ b/src/docbinder_oss/helpers/config.py @@ -21,9 +21,7 @@ class Config(BaseModel): def load_config() -> Config: if not os.path.exists(CONFIG_PATH): - typer.echo( - f"Config file not found at {CONFIG_PATH}. Please run 'docbinder setup' first." - ) + typer.echo(f"Config file not found at {CONFIG_PATH}. Please run 'docbinder setup' first.") raise typer.Exit(code=1) with open(CONFIG_PATH, "r") as f: config_data = yaml.safe_load(f) @@ -33,9 +31,7 @@ def load_config() -> Config: if config.get("type") not in provider_registry: typer.echo(f"Unknown provider type: {config['type']}") raise typer.Exit(code=1) - config_to_add.append( - provider_registry[config["type"]]["config_class"](**config) - ) + config_to_add.append(provider_registry[config["type"]]["config_class"](**config)) try: configss = Config(providers=config_to_add) return configss diff --git a/src/docbinder_oss/main.py b/src/docbinder_oss/main.py index 116a0c6..d28d3a4 100644 --- a/src/docbinder_oss/main.py +++ b/src/docbinder_oss/main.py @@ -1,11 +1,12 @@ import typer -from docbinder_oss.helpers.config import save_config, validate_config +from docbinder_oss.cli.provider import app as provider_app +from docbinder_oss.cli.search import app as search_app +from docbinder_oss.cli.setup import app as setup_app app = typer.Typer() - -from docbinder_oss.commands import search -from docbinder_oss.commands import setup -from docbinder_oss.commands.provider import list, get, test +app.add_typer(provider_app, name="provider") +app.add_typer(search_app) +app.add_typer(setup_app) # This is the main entry point for the DocBinder CLI. @@ -15,134 +16,5 @@ def main(): pass -@app.command() -def hello(): - """Print a friendly greeting.""" - typer.echo("Hello, DocBinder OSS!") - - -@app.command() -def setup( - file: Optional[str] = typer.Option(None, "--file", help="Path to YAML config file"), - provider: Optional[List[str]] = typer.Option( - None, - "--provider", - help="Provider config as provider:key1=val1,key2=val2", - callback=lambda v: v or [], - ), -): - """Setup DocBinder configuration via YAML file or provider key-value pairs.""" - config_data = {} - if file: - with open(file, "r") as f: - config_data = yaml.safe_load(f) or {} - elif provider: - providers = {} - for entry in provider: - if ":" not in entry: - typer.echo( - f"Provider entry '{entry}' must be in provider:key1=val1,key2=val2 format." - ) - raise typer.Exit(code=1) - prov_name, prov_kvs = entry.split(":", 1) - kv_dict = {} - for pair in prov_kvs.split(","): - if "=" not in pair: - typer.echo(f"Provider config '{pair}' must be in key=value format.") - raise typer.Exit(code=1) - k, v = pair.split("=", 1) - kv_dict[k] = v - providers[prov_name] = kv_dict - config_data["providers"] = providers - validated = validate_config(config_data) - if not validated.providers: - typer.echo("No providers configured. Please add at least one provider.") - raise typer.Exit(code=1) - # Save the validated config - try: - save_config(validated) - except Exception as e: - typer.echo(f"Error saving config: {e}") - raise typer.Exit(code=1) - typer.echo("Configuration saved successfully.") - - -@provider_app.command() -def list(): - """List all configured providers.""" - from docbinder_oss.helpers.config import load_config - - config = load_config() - if not config.providers: - typer.echo("No providers configured.") - raise typer.Exit(code=1) - - for provider in config.providers: - typer.echo(f"Provider: {provider.name}, Type: {provider.type}") - - -@provider_app.command("get") -def get_provider( - connection_type: str = typer.Option( - None, "--type", "-t", help="The type of the provider to get." - ), - name: str = typer.Option( - None, "--name", "-n", help="The name of the provider to get." - ), -): - """Get connection information for a specific provider.""" - from docbinder_oss.helpers.config import load_config - - config = load_config() - - count = 0 - if not config.providers: - typer.echo("No providers configured.") - raise typer.Exit(code=1) - for provider in config.providers: - if provider.name == name: - typer.echo(f"Provider '{name}' found with config: {provider}") - count += 1 - if provider.type == connection_type: - typer.echo( - f"Provider '{provider.name}' of type " - f"'{connection_type}' found with config: {provider}" - ) - count += 1 - if count == 0: - typer.echo( - f"No providers found with name '{name}' or type '{connection_type}'." - ) - raise typer.Exit(code=1) - - -@provider_app.command("test") -def test( - name: Annotated[ - str, typer.Argument(help="The name of the provider to test the connection.") - ], -): - """Test the connection to a specific provider.""" - from docbinder_oss.helpers.config import load_config - - config = load_config() - if not config.providers: - typer.echo("No providers configured.") - raise typer.Exit(code=1) - for provider_config in config.providers: - if provider_config.name == name: - typer.echo(f"Testing connection for provider '{name}'...") - try: - client = create_provider_instance(provider_config) - client.test_connection() - typer.echo(f"Connection to provider '{name}' is successful.") - except Exception as e: - typer.echo(f"Failed to connect to provider '{name}': {e}") - return - # If we reach here, the provider was not found - typer.echo(f"Provider '{name}' not found in configuration.") - raise typer.Exit(code=1) - - if __name__ == "__main__": app() diff --git a/src/docbinder_oss/services/__init__.py b/src/docbinder_oss/services/__init__.py index 3384d07..78d738c 100644 --- a/src/docbinder_oss/services/__init__.py +++ b/src/docbinder_oss/services/__init__.py @@ -12,9 +12,7 @@ if not logging.getLogger().handlers: FORMAT = "%(message)s" - logging.basicConfig( - level="NOTSET", format=FORMAT, datefmt="[%X]", handlers=[RichHandler()] - ) + logging.basicConfig(level="NOTSET", format=FORMAT, datefmt="[%X]", handlers=[RichHandler()]) logging.getLogger("googleapiclient").setLevel(logging.WARNING) logger = logging.getLogger(__name__) diff --git a/src/docbinder_oss/services/base_class.py b/src/docbinder_oss/services/base_class.py index dd51cec..dbeb328 100644 --- a/src/docbinder_oss/services/base_class.py +++ b/src/docbinder_oss/services/base_class.py @@ -56,7 +56,7 @@ def list_all_files(self) -> List[File]: A list of StorageItem objects representing all files and folders. """ pass - + @abstractmethod def get_file_metadata(self, item_id: str) -> File: """ diff --git a/src/docbinder_oss/services/google_drive/__init__.py b/src/docbinder_oss/services/google_drive/__init__.py index 9059dab..71b6fe3 100644 --- a/src/docbinder_oss/services/google_drive/__init__.py +++ b/src/docbinder_oss/services/google_drive/__init__.py @@ -7,9 +7,7 @@ if not logging.getLogger().handlers: FORMAT = "%(message)s" - logging.basicConfig( - level="NOTSET", format=FORMAT, datefmt="[%X]", handlers=[RichHandler()] - ) + logging.basicConfig(level="NOTSET", format=FORMAT, datefmt="[%X]", handlers=[RichHandler()]) logging.getLogger("googleapiclient").setLevel(logging.WARNING) @@ -26,6 +24,7 @@ def register() -> dict: "client_class": GoogleDriveClient, } + def get_service_name() -> str: """ Returns the name of the service. @@ -33,9 +32,10 @@ def get_service_name() -> str: """ return "Google Drive" + def get_service_display_name() -> str: """ Returns the display name of the service. This is used for user-friendly identification. """ - return "Google Drive Service" \ No newline at end of file + return "Google Drive Service" diff --git a/src/docbinder_oss/services/google_drive/google_drive_client.py b/src/docbinder_oss/services/google_drive/google_drive_client.py index d827ea3..757f7ea 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_client.py +++ b/src/docbinder_oss/services/google_drive/google_drive_client.py @@ -1,5 +1,4 @@ import logging -import os from typing import List, Optional from google.auth.transport.requests import Request @@ -7,7 +6,7 @@ from google_auth_oauthlib.flow import InstalledAppFlow from googleapiclient.discovery import build -from docbinder_oss.core.schemas import File, Permission +from docbinder_oss.core.schemas import Bucket, File, Permission from docbinder_oss.services.base_class import BaseStorageClient from docbinder_oss.services.google_drive.google_drive_buckets import GoogleDriveBuckets from docbinder_oss.services.google_drive.google_drive_files import GoogleDriveFiles @@ -39,13 +38,11 @@ def __init__(self, config: GoogleDriveServiceConfig): self.permissions = GoogleDrivePermissions(self.service) def _get_credentials(self): - TOKEN_PATH = os.path.expanduser("~/.config/docbinder/gcp/" + self.config.name + "_token.json") - # Ensure the directory exists - os.makedirs(os.path.dirname(TOKEN_PATH), exist_ok=True) + logger.info("Getting credentials for Google Drive client") try: creds = Credentials.from_authorized_user_file( - TOKEN_PATH, scopes=self.SCOPES + self.config.gcp_token_json, scopes=self.SCOPES ) except (FileNotFoundError, ValueError): logger.warning("Credentials file not found or invalid, re-authenticating") @@ -59,7 +56,7 @@ def _get_credentials(self): ) creds = flow.run_local_server(port=0) # Save the credentials for the next run - with open(TOKEN_PATH, "w") as token: + with open(self.config.gcp_token_json, "w") as token: token.write(creds.to_json()) return creds @@ -71,38 +68,25 @@ def test_connection(self) -> bool: logger.error(f"Test connection failed: {e}") return False - def list_buckets(self) -> list: + def list_buckets(self) -> list[Bucket]: return self.buckets.list_buckets() def list_files(self, folder_id: Optional[str] = None) -> List[File]: return self.files.list_files(folder_id) - - def list_all_files(self) -> List[File]: - """ - Recursively list all files and folders in all buckets (drives). - Handles My Drive and Shared Drives correctly. - """ - def _recursive_list(folder_id, is_drive_root=False): - items = self.files.list_files(folder_id, is_drive_root=is_drive_root) - all_items = [] - for item in items: - all_items.append(item) - # Use mime_type to check if this is a folder - if getattr(item, "mime_type", None) == "application/vnd.google-apps.folder": - all_items.extend(_recursive_list(item.id)) - return all_items - buckets = self.list_buckets() - all_files = [] + def list_files_recursively(self, bucket: Bucket = None) -> List[File]: + """List all files and folders recursively in the specified bucket or root.""" + return self.files.list_files_recursively(bucket) + + def list_all_files(self) -> List[File]: + files = [] + buckets = self.buckets.list_buckets() for bucket in buckets: - # If bucket.id == "root", it's My Drive; otherwise, it's a shared drive - is_drive_root = bucket.id != "root" - all_files.extend(_recursive_list(bucket.id, is_drive_root=is_drive_root)) - return all_files + files.extend(self.files.list_files_recursively(bucket)) + return files def get_file_metadata(self, item_id: str) -> File: return self.files.get_file_metadata(item_id) def get_permissions(self, item_id: str) -> List[Permission]: return self.permissions.get_permissions(item_id) - \ No newline at end of file diff --git a/src/docbinder_oss/services/google_drive/google_drive_files.py b/src/docbinder_oss/services/google_drive/google_drive_files.py index fa20258..225e1aa 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_files.py +++ b/src/docbinder_oss/services/google_drive/google_drive_files.py @@ -2,7 +2,7 @@ from googleapiclient.discovery import Resource -from docbinder_oss.core.schemas import File, User +from docbinder_oss.core.schemas import Bucket, File, User logger = logging.getLogger(__name__) @@ -18,43 +18,29 @@ class GoogleDriveFiles: def __init__(self, service: Resource): self.service = service - def list_files(self, folder_id=None, is_drive_root=False) -> list[File]: - # If listing the root of a shared drive + def list_files(self, bucket: Bucket = None, is_drive_root: bool = False) -> list[File]: + args = { + "includeItemsFromAllDrives": True, + "supportsAllDrives": True, + "fields": f"files({REQUIRED_FIELDS})", + } + folder_id = bucket.id if bucket else None if is_drive_root: - resp = ( - self.service.files() # type: ignore[attr-defined] - .list( - corpora="drive", - driveId=folder_id, - includeItemsFromAllDrives=True, - supportsAllDrives=True, - q="'root' in parents and trashed=false", - fields=f"files({REQUIRED_FIELDS})", - ) - .execute() + if not folder_id: + raise ValueError("folder_id must be provided when is_drive_root is True") + args.update( + { + "corpora": "drive", + "driveId": folder_id, + "q": "'root' in parents and trashed=false", + } ) elif folder_id == "root" or folder_id is None: - # Listing the root of My Drive - resp = ( - self.service.files() # type: ignore[attr-defined] - .list( - q="'root' in parents and trashed=false", - fields=f"files({REQUIRED_FIELDS})", - ) - .execute() - ) + args["q"] = "'root' in parents and trashed=false" else: - # Listing a regular folder - resp = ( - self.service.files() # type: ignore[attr-defined] - .list( - q=f"'{folder_id}' in parents and trashed=false", - includeItemsFromAllDrives=True, - supportsAllDrives=True, - fields=f"files({REQUIRED_FIELDS})", - ) - .execute() - ) + args["q"] = f"'{folder_id}' in parents and trashed=false" + + resp = self.service.files().list(**args).execute() return [ File( id=f.get("id"), @@ -90,6 +76,22 @@ def list_files(self, folder_id=None, is_drive_root=False) -> list[File]: for f in resp.get("files") ] + def list_files_recursively(self, bucket: Bucket) -> list[File]: + """List all files in the Google Drive bucket.""" + is_drive_root = bucket.id != "root" + + def _recursive_list(folder_id: str): + items: list[File] = self.list_files(folder_id, is_drive_root=is_drive_root) + all_items = [] + for item in items: + all_items.append(item) + # Use mime_type to check if this is a folder + if item.mime_type == "application/vnd.google-apps.folder": + all_items.extend(_recursive_list(item.id)) + return all_items + + return _recursive_list(bucket.id) + def get_file_metadata(self, file_id: str): item_metadata = ( self.service.files() # type: ignore[attr-defined] @@ -118,12 +120,8 @@ def get_file_metadata(self, file_id: str): for owner in item_metadata.get("owners") ], last_modifying_user=User( - display_name=item_metadata.get("lastModifyingUser", {}).get( - "displayName" - ), - email_address=item_metadata.get("lastModifyingUser", {}).get( - "emailAddress" - ), + display_name=item_metadata.get("lastModifyingUser", {}).get("displayName"), + email_address=item_metadata.get("lastModifyingUser", {}).get("emailAddress"), photo_link=item_metadata.get("lastModifyingUser", {}).get("photoLink"), kind=item_metadata.get("lastModifyingUser", {}).get("kind"), ), @@ -132,7 +130,6 @@ def get_file_metadata(self, file_id: str): trashed=item_metadata.get("trashed"), shared=item_metadata.get("shared"), starred=item_metadata.get("starred"), - is_folder=item_metadata.get("mimeType") - == "application/vnd.google-apps.folder", + is_folder=item_metadata.get("mimeType") == "application/vnd.google-apps.folder", parents=None, # This field is not populated by the API, so we set it to None for files. ) diff --git a/src/docbinder_oss/services/google_drive/google_drive_permissions.py b/src/docbinder_oss/services/google_drive/google_drive_permissions.py index 70988e2..ab0b830 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_permissions.py +++ b/src/docbinder_oss/services/google_drive/google_drive_permissions.py @@ -31,11 +31,7 @@ def get_user(self): ) def get_permissions(self, item_id: str): - resp = ( - self.service.permissions() - .list(fileId=item_id, fields="permissions") - .execute() - ) + resp = self.service.permissions().list(fileId=item_id, fields="permissions").execute() return [ Permission( diff --git a/src/docbinder_oss/services/google_drive/google_drive_service_config.py b/src/docbinder_oss/services/google_drive/google_drive_service_config.py index 236f4d3..dd6c957 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_service_config.py +++ b/src/docbinder_oss/services/google_drive/google_drive_service_config.py @@ -5,4 +5,5 @@ class GoogleDriveServiceConfig(ServiceConfig): type: Literal["google_drive"] = "google_drive" # type: ignore[override] - gcp_credentials_json: str \ No newline at end of file + gcp_credentials_json: str + gcp_token_json: str diff --git a/tests/commands/test_search_command.py b/tests/commands/test_search_command.py index 9f1bd67..c0f69ab 100644 --- a/tests/commands/test_search_command.py +++ b/tests/commands/test_search_command.py @@ -5,6 +5,7 @@ from typer.testing import CliRunner from docbinder_oss.main import app + class DummyFile: def __init__(self, **kwargs): self.id = kwargs.get("id", "fileid1") @@ -13,34 +14,67 @@ def __init__(self, **kwargs): self.mime_type = kwargs.get("mime_type", "application/pdf") self.created_time = kwargs.get("created_time", "2024-01-01T00:00:00") self.modified_time = kwargs.get("modified_time", "2024-01-02T00:00:00") - self.owners = kwargs.get("owners", [type("User", (), {"email_address": "owner@example.com"})()]) - self.last_modifying_user = kwargs.get("last_modifying_user", type("User", (), {"email_address": "mod@example.com"})()) + self.owners = kwargs.get( + "owners", [type("User", (), {"email_address": "owner@example.com"})()] + ) + self.last_modifying_user = kwargs.get( + "last_modifying_user", type("User", (), {"email_address": "mod@example.com"})() + ) self.web_view_link = kwargs.get("web_view_link", "http://example.com/view") self.web_content_link = kwargs.get("web_content_link", "http://example.com/content") self.shared = kwargs.get("shared", True) self.trashed = kwargs.get("trashed", False) + @pytest.fixture(autouse=True) def patch_provider(monkeypatch, tmp_path): # Patch config loader to return two dummy provider configs class DummyProviderConfig: def __init__(self, name): self.name = name + class DummyConfig: providers = [DummyProviderConfig("dummy1"), DummyProviderConfig("dummy2")] + monkeypatch.setattr("docbinder_oss.helpers.config.load_config", lambda: DummyConfig()) + # Patch create_provider_instance to return a dummy client with different files per provider def create_provider_instance(cfg): if cfg.name == "dummy1": - return type("DummyClient", (), {"list_all_files": lambda self: [ - DummyFile(id="f1", name="Alpha Report", size=2048, owners=[type("User", (), {"email_address": "alpha@a.com"})()], - created_time="2024-01-01T10:00:00", modified_time="2024-01-02T10:00:00") - ]})() + return type( + "DummyClient", + (), + { + "list_all_files": lambda self: [ + DummyFile( + id="f1", + name="Alpha Report", + size=2048, + owners=[type("User", (), {"email_address": "alpha@a.com"})()], + created_time="2024-01-01T10:00:00", + modified_time="2024-01-02T10:00:00", + ) + ] + }, + )() else: - return type("DummyClient", (), {"list_all_files": lambda self: [ - DummyFile(id="f2", name="Beta Notes", size=4096, owners=[type("User", (), {"email_address": "beta@b.com"})()], - created_time="2024-02-01T10:00:00", modified_time="2024-02-02T10:00:00") - ]})() + return type( + "DummyClient", + (), + { + "list_all_files": lambda self: [ + DummyFile( + id="f2", + name="Beta Notes", + size=4096, + owners=[type("User", (), {"email_address": "beta@b.com"})()], + created_time="2024-02-01T10:00:00", + modified_time="2024-02-02T10:00:00", + ) + ] + }, + )() + monkeypatch.setattr("docbinder_oss.services.create_provider_instance", create_provider_instance) # Change working directory to a temp dir for file output orig_cwd = os.getcwd() @@ -48,6 +82,7 @@ def create_provider_instance(cfg): yield os.chdir(orig_cwd) + def test_search_export_csv(): runner = CliRunner() result = runner.invoke(app, ["search", "--export-format", "csv"]) @@ -66,6 +101,7 @@ def test_search_export_csv(): if r["name"] == "Beta Notes": assert r["owners"] == "beta@b.com" + def test_search_export_json(): runner = CliRunner() result = runner.invoke(app, ["search", "--export-format", "json"]) @@ -78,6 +114,7 @@ def test_search_export_json(): names = set(d["name"] for d in data) assert names == {"Alpha Report", "Beta Notes"} + def test_search_name_filter(): runner = CliRunner() result = runner.invoke(app, ["search", "--name", "Alpha", "--export-format", "json"]) @@ -87,6 +124,7 @@ def test_search_name_filter(): assert len(data) == 1 assert data[0]["name"] == "Alpha Report" + def test_search_owner_filter(): runner = CliRunner() result = runner.invoke(app, ["search", "--owner", "beta@b.com", "--export-format", "json"]) @@ -96,24 +134,31 @@ def test_search_owner_filter(): assert len(data) == 1 assert data[0]["name"] == "Beta Notes" + def test_search_updated_after_filter(): runner = CliRunner() - result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"]) + result = runner.invoke( + app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"] + ) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) assert len(data) == 1 assert data[0]["name"] == "Beta Notes" + def test_search_created_before_filter(): runner = CliRunner() - result = runner.invoke(app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-format", "json"]) + result = runner.invoke( + app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-format", "json"] + ) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) assert len(data) == 1 assert data[0]["name"] == "Alpha Report" + def test_search_min_size_filter(): runner = CliRunner() result = runner.invoke(app, ["search", "--min-size", "3", "--export-format", "json"]) @@ -123,6 +168,7 @@ def test_search_min_size_filter(): assert len(data) == 1 assert data[0]["name"] == "Beta Notes" + def test_search_max_size_filter(): runner = CliRunner() result = runner.invoke(app, ["search", "--max-size", "3", "--export-format", "json"]) @@ -132,6 +178,7 @@ def test_search_max_size_filter(): assert len(data) == 1 assert data[0]["name"] == "Alpha Report" + def test_search_provider_filter(): runner = CliRunner() result = runner.invoke(app, ["search", "--provider", "dummy2", "--export-format", "json"]) @@ -142,9 +189,25 @@ def test_search_provider_filter(): assert data[0]["provider"] == "dummy2" assert data[0]["name"] == "Beta Notes" + def test_search_combined_filters(): runner = CliRunner() - result = runner.invoke(app, ["search", "--name", "Beta", "--owner", "beta@b.com", "--min-size", "3", "--provider", "dummy2", "--export-format", "json"]) + result = runner.invoke( + app, + [ + "search", + "--name", + "Beta", + "--owner", + "beta@b.com", + "--min-size", + "3", + "--provider", + "dummy2", + "--export-format", + "json", + ], + ) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) diff --git a/tests/services/google_drive/conftest.py b/tests/services/google_drive/conftest.py index ff50b73..c60300f 100644 --- a/tests/services/google_drive/conftest.py +++ b/tests/services/google_drive/conftest.py @@ -19,9 +19,7 @@ def mock_gdrive_service(): Whenever `GoogleDriveClient` calls `build('drive', 'v3', ...)`, it will receive our mock object instead of making a real network call. """ - with patch( - "docbinder_oss.services.google_drive.google_drive_client.build" - ) as mock_build: + with patch("docbinder_oss.services.google_drive.google_drive_client.build") as mock_build: # Create a mock for the service object that `build` would return mock_service = MagicMock() # Configure the `build` function to return our mock service diff --git a/tests/services/google_drive/test_google_drive_files.py b/tests/services/google_drive/test_google_drive_files.py index 393849c..7b0b019 100644 --- a/tests/services/google_drive/test_google_drive_files.py +++ b/tests/services/google_drive/test_google_drive_files.py @@ -35,6 +35,7 @@ class DummyConfig: providers = [DummyProviderConfig()] monkeypatch.setattr("docbinder_oss.helpers.config.load_config", lambda: DummyConfig()) + # Simulate a folder structure: root -> folder1 -> file1, file2; root -> file3 def list_all_files(self): return [ @@ -49,7 +50,9 @@ class DummyClient: def list_all_files(self): return list_all_files(self) - monkeypatch.setattr("docbinder_oss.services.create_provider_instance", lambda cfg: DummyClient()) + monkeypatch.setattr( + "docbinder_oss.services.create_provider_instance", lambda cfg: DummyClient() + ) orig_cwd = os.getcwd() os.chdir(tmp_path) yield diff --git a/tests/services/test_search_export.py b/tests/services/test_search_export.py index c16c44b..78fd856 100644 --- a/tests/services/test_search_export.py +++ b/tests/services/test_search_export.py @@ -1,12 +1,10 @@ import os -import csv import json -import tempfile -import shutil import pytest from typer.testing import CliRunner from docbinder_oss.main import app + class DummyFile: def __init__(self, **kwargs): self.id = kwargs.get("id", "fileid1") @@ -15,34 +13,67 @@ def __init__(self, **kwargs): self.mime_type = kwargs.get("mime_type", "application/pdf") self.created_time = kwargs.get("created_time", "2024-01-01T00:00:00") self.modified_time = kwargs.get("modified_time", "2024-01-02T00:00:00") - self.owners = kwargs.get("owners", [type("User", (), {"email_address": "owner@example.com"})()]) - self.last_modifying_user = kwargs.get("last_modifying_user", type("User", (), {"email_address": "mod@example.com"})()) + self.owners = kwargs.get( + "owners", [type("User", (), {"email_address": "owner@example.com"})()] + ) + self.last_modifying_user = kwargs.get( + "last_modifying_user", type("User", (), {"email_address": "mod@example.com"})() + ) self.web_view_link = kwargs.get("web_view_link", "http://example.com/view") self.web_content_link = kwargs.get("web_content_link", "http://example.com/content") self.shared = kwargs.get("shared", True) self.trashed = kwargs.get("trashed", False) + @pytest.fixture(autouse=True) def patch_provider(monkeypatch, tmp_path): # Patch config loader to return two dummy provider configs class DummyProviderConfig: def __init__(self, name): self.name = name + class DummyConfig: providers = [DummyProviderConfig("dummy1"), DummyProviderConfig("dummy2")] + monkeypatch.setattr("docbinder_oss.helpers.config.load_config", lambda: DummyConfig()) + # Patch create_provider_instance to return a dummy client with different files per provider def create_provider_instance(cfg): if cfg.name == "dummy1": - return type("DummyClient", (), {"list_all_files": lambda self: [ - DummyFile(id="f1", name="Alpha Report", size=2048, owners=[type("User", (), {"email_address": "alpha@a.com"})()], - created_time="2024-01-01T10:00:00", modified_time="2024-01-02T10:00:00") - ]})() + return type( + "DummyClient", + (), + { + "list_all_files": lambda self: [ + DummyFile( + id="f1", + name="Alpha Report", + size=2048, + owners=[type("User", (), {"email_address": "alpha@a.com"})()], + created_time="2024-01-01T10:00:00", + modified_time="2024-01-02T10:00:00", + ) + ] + }, + )() else: - return type("DummyClient", (), {"list_all_files": lambda self: [ - DummyFile(id="f2", name="Beta Notes", size=4096, owners=[type("User", (), {"email_address": "beta@b.com"})()], - created_time="2024-02-01T10:00:00", modified_time="2024-02-02T10:00:00") - ]})() + return type( + "DummyClient", + (), + { + "list_all_files": lambda self: [ + DummyFile( + id="f2", + name="Beta Notes", + size=4096, + owners=[type("User", (), {"email_address": "beta@b.com"})()], + created_time="2024-02-01T10:00:00", + modified_time="2024-02-02T10:00:00", + ) + ] + }, + )() + monkeypatch.setattr("docbinder_oss.services.create_provider_instance", create_provider_instance) # Change working directory to a temp dir for file output orig_cwd = os.getcwd() @@ -50,26 +81,34 @@ def create_provider_instance(cfg): yield os.chdir(orig_cwd) -# The test logic for search export and filters has been consolidated into `tests/commands/test_search_command.py`. + +# The test logic for search export and filters has been consolidated into +# `tests/commands/test_search_command.py`. # This file no longer contains duplicate tests. def test_search_updated_after_filter(): runner = CliRunner() - result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"]) + result = runner.invoke( + app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"] + ) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) assert len(data) == 1 assert data[0]["name"] == "Beta Notes" + def test_search_created_before_filter(): runner = CliRunner() - result = runner.invoke(app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-format", "json"]) + result = runner.invoke( + app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-format", "json"] + ) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) assert len(data) == 1 assert data[0]["name"] == "Alpha Report" + def test_search_min_size_filter(): runner = CliRunner() result = runner.invoke(app, ["search", "--min-size", "3", "--export-format", "json"]) @@ -79,6 +118,7 @@ def test_search_min_size_filter(): assert len(data) == 1 assert data[0]["name"] == "Beta Notes" + def test_search_max_size_filter(): runner = CliRunner() result = runner.invoke(app, ["search", "--max-size", "3", "--export-format", "json"]) @@ -88,6 +128,7 @@ def test_search_max_size_filter(): assert len(data) == 1 assert data[0]["name"] == "Alpha Report" + def test_search_provider_filter(): runner = CliRunner() result = runner.invoke(app, ["search", "--provider", "dummy2", "--export-format", "json"]) @@ -98,9 +139,25 @@ def test_search_provider_filter(): assert data[0]["provider"] == "dummy2" assert data[0]["name"] == "Beta Notes" + def test_search_combined_filters(): runner = CliRunner() - result = runner.invoke(app, ["search", "--name", "Beta", "--owner", "beta@b.com", "--min-size", "3", "--provider", "dummy2", "--export-format", "json"]) + result = runner.invoke( + app, + [ + "search", + "--name", + "Beta", + "--owner", + "beta@b.com", + "--min-size", + "3", + "--provider", + "dummy2", + "--export-format", + "json", + ], + ) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) From ea4ebfc1ff70b4f84bbe263a3f0d3b16c0cd84fa Mon Sep 17 00:00:00 2001 From: PaoloLeonard Date: Sun, 22 Jun 2025 23:31:00 +0200 Subject: [PATCH 17/39] refactoring list all --- src/docbinder_oss/cli/search.py | 247 +++++------------- src/docbinder_oss/services/__init__.py | 6 +- src/docbinder_oss/services/base_class.py | 14 +- .../services/dropbox/__init__.py | 11 - .../services/dropbox/dropbox_client.py | 5 - .../dropbox/dropbox_service_config.py | 8 - .../google_drive/google_drive_client.py | 6 +- .../google_drive/google_drive_files.py | 39 +-- 8 files changed, 106 insertions(+), 230 deletions(-) delete mode 100644 src/docbinder_oss/services/dropbox/__init__.py delete mode 100644 src/docbinder_oss/services/dropbox/dropbox_client.py delete mode 100644 src/docbinder_oss/services/dropbox/dropbox_service_config.py diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py index f19d0c2..87b3988 100644 --- a/src/docbinder_oss/cli/search.py +++ b/src/docbinder_oss/cli/search.py @@ -1,6 +1,11 @@ +from datetime import datetime +import re import typer from typing import Optional +from docbinder_oss.helpers.config import Config +from docbinder_oss.services.base_class import BaseProvider + app = typer.Typer() @@ -28,200 +33,82 @@ def search( None, "--provider", "-p", help="Provider name to search in" ), export_format: str = typer.Option( - "csv", "--export-format", help="Export format: csv or json", show_default=True + None, "--export-format", help="Export format: csv or json", show_default=True ), ): """Search for files or folders matching filters across all providers and export results as CSV or JSON.""" - import re import csv import json - from datetime import datetime from docbinder_oss.helpers.config import load_config from docbinder_oss.services import create_provider_instance - - config = load_config() + + # 1 Load documents with filter "provider" + # 2 Filter the documents based on the provided filters + # 3 Export results to CSV or JSON + + config: Config = load_config() if not config.providers: typer.echo("No providers configured.") raise typer.Exit(code=1) - - # Build a mapping of id -> file for path reconstruction - all_items_by_id = {} - all_results = [] - drive_id_to_name = {} - # If provider is Google Drive, build a mapping of drive id to drive name + + current_files = {} for provider_config in config.providers: if provider and provider_config.name != provider: continue - client = create_provider_instance(provider_config) - if client is None or not hasattr(client, "list_all_files"): + client: BaseProvider = create_provider_instance(provider_config) + if not client: + typer.echo(f"Provider '{provider_config.name}' is not supported or not implemented.") + raise typer.Exit(code=1) + current_files[provider_config.name] = client.list_all_files() + + current_files = filter_files( + current_files, + name=name, + owner=owner, + updated_after=updated_after, + updated_before=updated_before, + created_after=created_after, + created_before=created_before, + min_size=min_size, + max_size=max_size, + ) + + if not export_format: + typer.echo(current_files) + return + +def filter_files( + files, + name=None, + owner=None, + updated_after=None, + updated_before=None, + created_after=None, + created_before=None, + min_size=None, + max_size=None, +): + results = [] + + for file in files: + if name and not re.search(name, file.name, re.IGNORECASE): + continue + if owner and not any(owner in u.email_address for u in file.owners): + continue + if updated_after and file.modified_time < datetime.fromisoformat(updated_after): + continue + if updated_before and file.modified_time > datetime.fromisoformat(updated_before): + continue + if created_after and file.created_time < datetime.fromisoformat(created_after): + continue + if created_before and file.created_time > datetime.fromisoformat(created_before): + continue + if min_size and file.size < min_size * 1024: + continue + if max_size and file.size > max_size * 1024: continue - # Try to get drive mapping if possible - drive_id_to_name_local = {} - if hasattr(client, "buckets") and hasattr(client.buckets, "list_buckets"): # type: ignore[attr-defined] - try: - for bucket in client.buckets.list_buckets(): # type: ignore[attr-defined] - drive_id_to_name_local[bucket.id] = bucket.name - except Exception: - pass - drive_id_to_name.update(drive_id_to_name_local) - try: - files = client.list_all_files() - for item in files: - all_items_by_id[item.id] = item - # Attach drive_id for later lookup - all_results.append( - ( - provider_config.name, - item, - getattr(item, "parents", ["root"])[0] - if hasattr(item, "parents") and getattr(item, "parents", None) - else "root", - drive_id_to_name_local, - ) - ) - except Exception as e: - typer.echo(f"Error searching provider '{provider_config.name}': {e}") - def build_path(item): - # Reconstruct the path by walking up parents - path_parts = [item.name] - current = item - seen = set() - while getattr(current, "parents", None): - parent_ids = current.parents if isinstance(current.parents, list) else [current.parents] - parent_id = parent_ids[0] if parent_ids else None - if not parent_id or parent_id in seen or parent_id not in all_items_by_id: - break - seen.add(parent_id) - parent = all_items_by_id[parent_id] - path_parts.append(parent.name) - current = parent - return "/".join(reversed(path_parts)) + results.append(file) - results = [] - for provider_name, item, parent_id, drive_map in all_results: - # Name regex filter - if name: - if not re.search(name, item.name or "", re.IGNORECASE): - continue - # Owner/contributor/reader email filter - if owner: - emails = set() - owners_list = getattr(item, "owners", None) or [] - emails.update( - [u.email_address for u in owners_list if u and getattr(u, "email_address", None)] - ) - last_mod_user = getattr(item, "last_modifying_user", None) - if last_mod_user and getattr(last_mod_user, "email_address", None): - emails.add(last_mod_user.email_address) - if owner not in emails: - continue - # Last update filter - if updated_after: - if not item.modified_time or datetime.fromisoformat( - str(item.modified_time) - ) < datetime.fromisoformat(updated_after): - continue - if updated_before: - if not item.modified_time or datetime.fromisoformat( - str(item.modified_time) - ) > datetime.fromisoformat(updated_before): - continue - # Created at filter - if created_after: - if not item.created_time or datetime.fromisoformat( - str(item.created_time) - ) < datetime.fromisoformat(created_after): - continue - if created_before: - if not item.created_time or datetime.fromisoformat( - str(item.created_time) - ) > datetime.fromisoformat(created_before): - continue - # Size filter (in KB) - if min_size is not None: - try: - if not item.size or int(item.size) < min_size * 1024: - continue - except Exception: - continue - if max_size is not None: - try: - if not item.size or int(item.size) > max_size * 1024: - continue - except Exception: - continue - # Find drive name - drive_name = ( - drive_map.get(parent_id) - or drive_id_to_name.get(parent_id) - or drive_id_to_name.get("root") - or "Unknown" - ) - # Collect all possible params for export, including path, is_folder, and drive_name - results.append( - { - "provider": provider_name, - "id": getattr(item, "id", None), - "name": getattr(item, "name", None), - "path": build_path(item), - "is_folder": getattr(item, "mime_type", None) - == "application/vnd.google-apps.folder", - "drive_name": drive_name, - "size": getattr(item, "size", None), - "mime_type": getattr(item, "mime_type", None), - "created_time": getattr(item, "created_time", None), - "modified_time": getattr(item, "modified_time", None), - "owners": ",".join( - [ - u.email_address - for u in (getattr(item, "owners", None) or []) - if u and getattr(u, "email_address", None) - ] - ) - if getattr(item, "owners", None) - else None, - "last_modifying_user": getattr( - getattr(item, "last_modifying_user", None), "email_address", None - ), - "web_view_link": getattr(item, "web_view_link", None), - "web_content_link": getattr(item, "web_content_link", None), - "shared": getattr(item, "shared", None), - "trashed": getattr(item, "trashed", None), - } - ) - # Write results to CSV or JSON - if results: - fieldnames = [ - "provider", - "id", - "name", - "path", - "is_folder", - "drive_name", - "size", - "mime_type", - "created_time", - "modified_time", - "owners", - "last_modifying_user", - "web_view_link", - "web_content_link", - "shared", - "trashed", - ] - if export_format.lower() == "json": - with open("search_results.json", "w") as jsonfile: - json.dump(results, jsonfile, indent=2, default=str) - typer.echo(f"{len(results)} results written to search_results.json") - else: - with open("search_results.csv", "w", newline="") as csvfile: - writer = csv.DictWriter(csvfile, fieldnames=fieldnames) - writer.writeheader() - for row in results: - writer.writerow(row) - typer.echo(f"{len(results)} results written to search_results.csv") - else: - typer.echo("No results found.") - return results + return results \ No newline at end of file diff --git a/src/docbinder_oss/services/__init__.py b/src/docbinder_oss/services/__init__.py index 78d738c..0e57925 100644 --- a/src/docbinder_oss/services/__init__.py +++ b/src/docbinder_oss/services/__init__.py @@ -8,13 +8,13 @@ from rich.logging import RichHandler from docbinder_oss import services -from docbinder_oss.services.base_class import BaseStorageClient, ServiceConfig +from docbinder_oss.services.base_class import BaseProvider, ServiceConfig if not logging.getLogger().handlers: FORMAT = "%(message)s" logging.basicConfig(level="NOTSET", format=FORMAT, datefmt="[%X]", handlers=[RichHandler()]) -logging.getLogger("googleapiclient").setLevel(logging.WARNING) +logging.getLogger("services").setLevel(logging.WARNING) logger = logging.getLogger(__name__) _provider_registry = None # Module-level cache @@ -37,7 +37,7 @@ def get_provider_registry() -> dict: return _provider_registry -def create_provider_instance(config: ServiceConfig) -> Optional["BaseStorageClient"]: +def create_provider_instance(config: ServiceConfig) -> Optional["BaseProvider"]: """ Factory function to create a provider instance from its config. """ diff --git a/src/docbinder_oss/services/base_class.py b/src/docbinder_oss/services/base_class.py index dbeb328..7b62f72 100644 --- a/src/docbinder_oss/services/base_class.py +++ b/src/docbinder_oss/services/base_class.py @@ -3,7 +3,7 @@ from pydantic import BaseModel -from docbinder_oss.core.schemas import File, Permission +from docbinder_oss.core.schemas import Bucket, File, Permission class ServiceConfig(BaseModel): @@ -13,7 +13,7 @@ class ServiceConfig(BaseModel): name: str -class BaseStorageClient(ABC): +class BaseProvider(ABC): """ Abstract base class for a client that interacts with a cloud storage service. Defines a standard interface for listing items and retrieving metadata. @@ -32,6 +32,16 @@ def test_connection(self) -> bool: True if the connection is successful, False otherwise. """ pass + + @abstractmethod + def list_buckets(self) -> List[Bucket]: + """ + Lists all available buckets in the storage service. + + Returns: + A list of bucket names. + """ + pass @abstractmethod def list_files(self, folder_id: Optional[str] = None) -> List[File]: diff --git a/src/docbinder_oss/services/dropbox/__init__.py b/src/docbinder_oss/services/dropbox/__init__.py deleted file mode 100644 index 80759af..0000000 --- a/src/docbinder_oss/services/dropbox/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -from .dropbox_client import DropboxClient -from .dropbox_service_config import DropboxServiceConfig - - -def register(): - # Register the Dropbox client - return { - "display_name": "dropbox", - "config_class": DropboxServiceConfig, - "client_class": DropboxClient, - } diff --git a/src/docbinder_oss/services/dropbox/dropbox_client.py b/src/docbinder_oss/services/dropbox/dropbox_client.py deleted file mode 100644 index 3919701..0000000 --- a/src/docbinder_oss/services/dropbox/dropbox_client.py +++ /dev/null @@ -1,5 +0,0 @@ -from docbinder_oss.services.base_class import BaseStorageClient - - -class DropboxClient(BaseStorageClient): - pass diff --git a/src/docbinder_oss/services/dropbox/dropbox_service_config.py b/src/docbinder_oss/services/dropbox/dropbox_service_config.py deleted file mode 100644 index 515c471..0000000 --- a/src/docbinder_oss/services/dropbox/dropbox_service_config.py +++ /dev/null @@ -1,8 +0,0 @@ -from typing import Literal - -from docbinder_oss.services.base_class import ServiceConfig - - -class DropboxServiceConfig(ServiceConfig): - type: Literal["dropbox"] = "dropbox" # type: ignore[override] - api_key: str diff --git a/src/docbinder_oss/services/google_drive/google_drive_client.py b/src/docbinder_oss/services/google_drive/google_drive_client.py index 757f7ea..fe8f93c 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_client.py +++ b/src/docbinder_oss/services/google_drive/google_drive_client.py @@ -7,7 +7,7 @@ from googleapiclient.discovery import build from docbinder_oss.core.schemas import Bucket, File, Permission -from docbinder_oss.services.base_class import BaseStorageClient +from docbinder_oss.services.base_class import BaseProvider from docbinder_oss.services.google_drive.google_drive_buckets import GoogleDriveBuckets from docbinder_oss.services.google_drive.google_drive_files import GoogleDriveFiles from docbinder_oss.services.google_drive.google_drive_permissions import ( @@ -21,7 +21,7 @@ logger.setLevel(logging.INFO) -class GoogleDriveClient(BaseStorageClient): +class GoogleDriveClient(BaseProvider): def __init__(self, config: GoogleDriveServiceConfig): super().__init__(config) logger.info("Initializing Google Drive client") @@ -74,7 +74,7 @@ def list_buckets(self) -> list[Bucket]: def list_files(self, folder_id: Optional[str] = None) -> List[File]: return self.files.list_files(folder_id) - def list_files_recursively(self, bucket: Bucket = None) -> List[File]: + def list_files_recursively(self, bucket: str = None) -> List[File]: """List all files and folders recursively in the specified bucket or root.""" return self.files.list_files_recursively(bucket) diff --git a/src/docbinder_oss/services/google_drive/google_drive_files.py b/src/docbinder_oss/services/google_drive/google_drive_files.py index 225e1aa..fac56f7 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_files.py +++ b/src/docbinder_oss/services/google_drive/google_drive_files.py @@ -10,7 +10,7 @@ "id,name,mimeType,kind,size,createdTime,modifiedTime," "owners(permissionId,displayName,emailAddress,photoLink)," "lastModifyingUser(permissionId,displayName,emailAddress,photoLink)," - "webViewLink,iconLink,trashed,shared,starred" + "webViewLink,iconLink,trashed,shared,starred,parents" ) @@ -18,29 +18,33 @@ class GoogleDriveFiles: def __init__(self, service: Resource): self.service = service - def list_files(self, bucket: Bucket = None, is_drive_root: bool = False) -> list[File]: + def list_files(self, bucket: str = None, is_drive_root: bool = False) -> list[File]: args = { "includeItemsFromAllDrives": True, "supportsAllDrives": True, - "fields": f"files({REQUIRED_FIELDS})", + "fields": f"nextPageToken,files({REQUIRED_FIELDS})", } - folder_id = bucket.id if bucket else None - if is_drive_root: - if not folder_id: - raise ValueError("folder_id must be provided when is_drive_root is True") + logger.debug(f"{type(bucket)}: {bucket}") + bucket_id = bucket.id if hasattr(bucket, "id") else bucket + + if is_drive_root and bucket_id != "root": args.update( { "corpora": "drive", - "driveId": folder_id, + "driveId": bucket_id, "q": "'root' in parents and trashed=false", } ) - elif folder_id == "root" or folder_id is None: - args["q"] = "'root' in parents and trashed=false" else: - args["q"] = f"'{folder_id}' in parents and trashed=false" - + parent_id = bucket_id + if parent_id == "root" or parent_id is None: + args["q"] = "'root' in parents and trashed=false" + else: + args["q"] = f"'{parent_id}' in parents and trashed=false" + resp = self.service.files().list(**args).execute() + print(len(resp["files"])) + exit(1) return [ File( id=f.get("id"), @@ -71,26 +75,25 @@ def list_files(self, bucket: Bucket = None, is_drive_root: bool = False) -> list shared=f.get("shared"), starred=f.get("starred"), is_folder=f.get("mimeType") == "application/vnd.google-apps.folder", - parents=folder_id if folder_id else None, + parents=bucket_id if bucket_id else None, ) for f in resp.get("files") ] - def list_files_recursively(self, bucket: Bucket) -> list[File]: + def list_files_recursively(self, bucket: str) -> list[File]: """List all files in the Google Drive bucket.""" - is_drive_root = bucket.id != "root" + is_drive_root = bucket != "root" def _recursive_list(folder_id: str): items: list[File] = self.list_files(folder_id, is_drive_root=is_drive_root) all_items = [] for item in items: all_items.append(item) - # Use mime_type to check if this is a folder - if item.mime_type == "application/vnd.google-apps.folder": + if item.is_folder: all_items.extend(_recursive_list(item.id)) return all_items - return _recursive_list(bucket.id) + return _recursive_list(bucket) def get_file_metadata(self, file_id: str): item_metadata = ( From a5d2e6b002904663f07a43fd5ccc5aeb5d0e5074 Mon Sep 17 00:00:00 2001 From: Christophe Beke Date: Mon, 23 Jun 2025 22:41:24 +0200 Subject: [PATCH 18/39] Update with all new comments. Also made sure the search command now fully adds all required fields for csv and json. --- .gitignore | 3 +- provider_setup_example.yml | 4 + src/docbinder_oss/cli/provider/test.py | 2 +- src/docbinder_oss/cli/search.py | 160 ++++++++++++++--- src/docbinder_oss/core/schemas.py | 17 +- src/docbinder_oss/helpers/config.py | 2 +- src/docbinder_oss/helpers/path_utils.py | 83 +++++++++ src/docbinder_oss/services/base_class.py | 2 +- .../google_drive/google_drive_buckets.py | 2 +- .../google_drive/google_drive_client.py | 28 +-- .../google_drive/google_drive_files.py | 34 ++-- .../google_drive/google_drive_permissions.py | 4 +- .../google_drive_service_config.py | 4 +- tests/commands/test_search_command.py | 43 ++++- tests/services/google_drive/conftest.py | 1 - .../google_drive/test_google_drive_files.py | 96 +++++----- tests/services/test_search_export.py | 167 ------------------ 17 files changed, 362 insertions(+), 290 deletions(-) create mode 100644 provider_setup_example.yml create mode 100644 src/docbinder_oss/helpers/path_utils.py delete mode 100644 tests/services/test_search_export.py diff --git a/.gitignore b/.gitignore index a8e0008..8ec8616 100644 --- a/.gitignore +++ b/.gitignore @@ -79,4 +79,5 @@ gcp_credentials.json *_token.json # Test files -search_results.csv \ No newline at end of file +search_results.csv +search_results.json \ No newline at end of file diff --git a/provider_setup_example.yml b/provider_setup_example.yml new file mode 100644 index 0000000..ff3c851 --- /dev/null +++ b/provider_setup_example.yml @@ -0,0 +1,4 @@ +providers: + - type: google_drive + name: my_google_drive + gcp_credentials_json: gcp_credentials.json diff --git a/src/docbinder_oss/cli/provider/test.py b/src/docbinder_oss/cli/provider/test.py index d01262d..be424d9 100644 --- a/src/docbinder_oss/cli/provider/test.py +++ b/src/docbinder_oss/cli/provider/test.py @@ -30,7 +30,7 @@ def test( if found_provider_config: typer.echo(f"Testing connection for provider '{name}'...") try: - client = create_provider_instance(provider_config) + client = create_provider_instance(found_provider_config) if client is None: typer.echo(f"Provider '{name}' is not supported or not implemented.") raise typer.Exit(code=1) diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py index 87b3988..8f79b03 100644 --- a/src/docbinder_oss/cli/search.py +++ b/src/docbinder_oss/cli/search.py @@ -2,9 +2,14 @@ import re import typer from typing import Optional +import csv +import json +from docbinder_oss.helpers.config import load_config +from docbinder_oss.services import create_provider_instance from docbinder_oss.helpers.config import Config from docbinder_oss.services.base_class import BaseProvider +from docbinder_oss.helpers.path_utils import build_id_to_item, get_full_path, build_all_full_paths app = typer.Typer() @@ -38,10 +43,6 @@ def search( ): """Search for files or folders matching filters across all providers and export results as CSV or JSON.""" - import csv - import json - from docbinder_oss.helpers.config import load_config - from docbinder_oss.services import create_provider_instance # 1 Load documents with filter "provider" # 2 Filter the documents based on the provided filters @@ -56,27 +57,66 @@ def search( for provider_config in config.providers: if provider and provider_config.name != provider: continue - client: BaseProvider = create_provider_instance(provider_config) + client: Optional[BaseProvider] = create_provider_instance(provider_config) if not client: typer.echo(f"Provider '{provider_config.name}' is not supported or not implemented.") raise typer.Exit(code=1) current_files[provider_config.name] = client.list_all_files() - - current_files = filter_files( - current_files, - name=name, - owner=owner, - updated_after=updated_after, - updated_before=updated_before, - created_after=created_after, - created_before=created_before, - min_size=min_size, - max_size=max_size, - ) - + + # After collecting all files, build id-to-item mapping for all providers + all_files = [] + for files in current_files.values(): + all_files.extend(files) + + # Build root_id_to_name mapping for all drives (My Drive, Shared Drives, etc.) + root_id_to_name = {} + # Heuristic: a file/folder with no parents or with a special marker is a root + for f in all_files: + # If a file has no parents, treat it as a root + if not getattr(f, 'parents', None) or (isinstance(f.parents, list) and not f.parents[0]): + root_id_to_name[f.id] = f.name + # Optionally, if you have a drive_id or drive_name attribute, add here + # elif hasattr(f, 'drive_id') and hasattr(f, 'drive_name'): + # root_id_to_name[f.drive_id] = f.drive_name + + # Fallback for Google Drive: always include 'root': 'My Drive' if not present + if 'root' not in root_id_to_name: + root_id_to_name['root'] = 'My Drive' + + id_to_path = build_all_full_paths(all_files, root_id_to_name=root_id_to_name) + # Add full_path to each file using the memoized paths + for files in current_files.values(): + for file in files: + file.full_path = id_to_path.get(file.id, file.name) + + # Filter files per provider, keep grouping + filtered_files_by_provider = {} + for provider_name, files in current_files.items(): + filtered = filter_files( + files, + name=name, + owner=owner, + updated_after=updated_after, + updated_before=updated_before, + created_after=created_after, + created_before=created_before, + min_size=min_size, + max_size=max_size, + ) + filtered_files_by_provider[provider_name] = filtered + if not export_format: - typer.echo(current_files) + typer.echo(filtered_files_by_provider) return + elif export_format.lower() == "csv": + __write_csv(filtered_files_by_provider, "search_results.csv") + typer.echo("Results written to search_results.csv") + elif export_format.lower() == "json": + __write_json(filtered_files_by_provider, "search_results.json", flat=True) # or flat=False for grouped + typer.echo("Results written to search_results.json") + else: + typer.echo(f"Unsupported export format: {export_format}") + raise typer.Exit(code=1) def filter_files( files, @@ -90,25 +130,93 @@ def filter_files( max_size=None, ): results = [] - for file in files: if name and not re.search(name, file.name, re.IGNORECASE): continue if owner and not any(owner in u.email_address for u in file.owners): continue - if updated_after and file.modified_time < datetime.fromisoformat(updated_after): + if updated_after and __parse_dt(file.modified_time) < __parse_dt(updated_after): continue - if updated_before and file.modified_time > datetime.fromisoformat(updated_before): + if updated_before and __parse_dt(file.modified_time) > __parse_dt(updated_before): continue - if created_after and file.created_time < datetime.fromisoformat(created_after): + if created_after and __parse_dt(file.created_time) < __parse_dt(created_after): continue - if created_before and file.created_time > datetime.fromisoformat(created_before): + if created_before and __parse_dt(file.created_time) > __parse_dt(created_before): continue if min_size and file.size < min_size * 1024: continue if max_size and file.size > max_size * 1024: continue - results.append(file) + return results + +def __parse_dt(val): + if isinstance(val, datetime): + return val + try: + return datetime.fromisoformat(val) + except Exception: + return val + +def __write_csv(files_by_provider, filename): + # Collect all possible fieldnames from all files + all_fieldnames = set(["provider"]) + for files in files_by_provider.values(): + for file in files: + file_dict = file.model_dump() if hasattr(file, 'model_dump') else file.__dict__.copy() + all_fieldnames.update(file_dict.keys()) + # Move provider to the front, rest sorted + fieldnames = ["provider"] + sorted(f for f in all_fieldnames if f != "provider") + with open(filename, "w", newline="") as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + for provider, files in files_by_provider.items(): + for file in files: + file_dict = file.model_dump() if hasattr(file, 'model_dump') else file.__dict__.copy() + file_dict["provider"] = provider + # Flatten owners for CSV (only email addresses) + owners = file_dict.get("owners") + if isinstance(owners, list): + emails = [] + for u in owners: + if hasattr(u, "email_address") and u.email_address: + emails.append(u.email_address) + elif isinstance(u, dict) and u.get("email_address"): + emails.append(u["email_address"]) + elif isinstance(u, str): + emails.append(u) + file_dict["owners"] = ";".join(emails) + # Flatten last_modifying_user for CSV (only email address) + last_mod = file_dict.get("last_modifying_user") + if last_mod is not None: + if hasattr(last_mod, "email_address"): + file_dict["last_modifying_user"] = last_mod.email_address + elif isinstance(last_mod, dict) and "email_address" in last_mod: + file_dict["last_modifying_user"] = last_mod["email_address"] + else: + file_dict["last_modifying_user"] = str(last_mod) + # Flatten parents for CSV + parents = file_dict.get("parents") + if isinstance(parents, list): + file_dict["parents"] = ";".join(str(p) for p in parents) + writer.writerow({fn: file_dict.get(fn, "") for fn in fieldnames}) - return results \ No newline at end of file +def __write_json(files_by_provider, filename, flat=False): + with open(filename, "w") as jsonfile: + if flat: + all_files = [] + for provider, files in files_by_provider.items(): + for file in files: + file_dict = file.model_dump() if hasattr(file, 'model_dump') else file.__dict__.copy() + file_dict["provider"] = provider + all_files.append(file_dict) + json.dump(all_files, jsonfile, default=str, indent=2) + else: + grouped = { + provider: [ + file.model_dump() if hasattr(file, 'model_dump') else file.__dict__.copy() + for file in files + ] + for provider, files in files_by_provider.items() + } + json.dump(grouped, jsonfile, default=str, indent=2) \ No newline at end of file diff --git a/src/docbinder_oss/core/schemas.py b/src/docbinder_oss/core/schemas.py index b9e2f9a..70b1604 100644 --- a/src/docbinder_oss/core/schemas.py +++ b/src/docbinder_oss/core/schemas.py @@ -58,7 +58,7 @@ class File(BaseModel): last_modifying_user: Optional[User] size: Optional[str] = Field(description="Size in bytes, as a string. Only populated for files.") - parents: Optional[str] = Field(description="Parent folder ID, if applicable.") + parents: Optional[List[str]] = Field(description="Parent folder IDs, if applicable.") capabilities: Optional[FileCapabilities] = None @@ -66,8 +66,21 @@ class File(BaseModel): starred: Optional[bool] trashed: Optional[bool] - # If you want a more robust way to set is_folder after initialization: + # Add full_path as an optional field for export/CLI assignment + full_path: Optional[str] = Field(default=None, description="Full path of the file/folder, computed at runtime.") + def __init__(self, **data: Any): + # Coerce parents to a list of strings or None + if 'parents' in data: + if data['parents'] is None: + data['parents'] = None + elif isinstance(data['parents'], str): + data['parents'] = [data['parents']] + elif isinstance(data['parents'], list): + # Ensure all elements are strings + data['parents'] = [str(p) for p in data['parents'] if p is not None] + else: + data['parents'] = [str(data['parents'])] super().__init__(**data) if self.mime_type == "application/vnd.google-apps.folder": self.is_folder = True diff --git a/src/docbinder_oss/helpers/config.py b/src/docbinder_oss/helpers/config.py index d098793..e92acb4 100644 --- a/src/docbinder_oss/helpers/config.py +++ b/src/docbinder_oss/helpers/config.py @@ -16,7 +16,7 @@ class Config(BaseModel): """Main configuration model that holds a list of all provider configs.""" - providers: List[ServiceUnion] + providers: list def load_config() -> Config: diff --git a/src/docbinder_oss/helpers/path_utils.py b/src/docbinder_oss/helpers/path_utils.py new file mode 100644 index 0000000..d0b9cf9 --- /dev/null +++ b/src/docbinder_oss/helpers/path_utils.py @@ -0,0 +1,83 @@ +def build_id_to_item(files): + """ + Build a mapping from file/folder id to the file/folder object. + """ + return {getattr(f, 'id', None): f for f in files if hasattr(f, 'id')} + +def get_full_path(file, id_to_item, root_id='root', root_name='My Drive'): + """ + Recursively build the full path for a file or folder using its parents. + Returns a string like '/My Drive/Folder/Subfolder/File.pdf'. + """ + path_parts = [file.name] + current = file + while True: + parents = getattr(current, 'parents', None) + if not parents or not isinstance(parents, list) or not parents[0]: + break + parent_id = parents[0] + if parent_id == root_id: + path_parts.append(root_name) + break + parent = id_to_item.get(parent_id) + if not parent: + break + path_parts.append(parent.name) + current = parent + return '/' + '/'.join(reversed(path_parts)) + +def build_all_full_paths(files, root_id='root', root_name='My Drive', root_id_to_name=None): + """ + Efficiently compute the full path for every file/folder in one pass using an iterative approach and memoization. + Supports multiple drives by using a root_id_to_name mapping. + Returns a dict: {file_id: full_path} + """ + id_to_item = build_id_to_item(files) + id_to_path = {} + if root_id_to_name is None: + root_id_to_name = {root_id: root_name} + for item in files: + if not hasattr(item, 'id') or not hasattr(item, 'name'): + continue + if item.id in id_to_path: + continue + # Iterative path construction + current = item + temp_stack = [] + while True: + if current.id in id_to_path: + break + parents = getattr(current, 'parents', None) + if not parents or not isinstance(parents, list) or not parents[0]: + temp_stack.append((current.id, '/' + current.name)) + break + parent_id = parents[0] + if parent_id in root_id_to_name: + temp_stack.append((current.id, '/' + root_id_to_name[parent_id] + '/' + current.name)) + break + parent = id_to_item.get(parent_id) + if not parent: + temp_stack.append((current.id, '/' + current.name)) + break + temp_stack.append((current.id, None)) # Mark as not yet resolved + current = parent + # Now unwind the stack and build the paths + while temp_stack: + file_id, path = temp_stack.pop() + if path is not None: + id_to_path[file_id] = path + else: + parent_id = id_to_item[file_id].parents[0] + parent_path = id_to_path.get(parent_id, '') + id_to_path[file_id] = parent_path.rstrip('/') + '/' + id_to_item[file_id].name + # Ensure root_name is present at the start (for legacy single-drive fallback) + found_root = False + for root_name_val in root_id_to_name.values(): + if id_to_path[item.id].lstrip('/').startswith(root_name_val + '/'): # e.g. 'My Drive/' + found_root = True + break + if not found_root: + # Use the first root_name as fallback + fallback_root = next(iter(root_id_to_name.values())) + id_to_path[item.id] = '/' + fallback_root + id_to_path[item.id] if not id_to_path[item.id].startswith('/') else '/' + fallback_root + id_to_path[item.id] + return id_to_path diff --git a/src/docbinder_oss/services/base_class.py b/src/docbinder_oss/services/base_class.py index 7b62f72..5d8f09e 100644 --- a/src/docbinder_oss/services/base_class.py +++ b/src/docbinder_oss/services/base_class.py @@ -44,7 +44,7 @@ def list_buckets(self) -> List[Bucket]: pass @abstractmethod - def list_files(self, folder_id: Optional[str] = None) -> List[File]: + def list_files_in_folder(self, folder_id: Optional[str] = None) -> List[File]: """ Lists items (files and folders) within a specific folder. diff --git a/src/docbinder_oss/services/google_drive/google_drive_buckets.py b/src/docbinder_oss/services/google_drive/google_drive_buckets.py index e5746be..1976b89 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_buckets.py +++ b/src/docbinder_oss/services/google_drive/google_drive_buckets.py @@ -25,7 +25,7 @@ def list_buckets(self) -> List[Bucket]: ] # Default root drive resp = ( - self.service.drives() + self.service.drives() # type: ignore[attr-defined] .list(fields="drives(id,name,kind,createdTime,hidden,restrictions)") .execute() ) diff --git a/src/docbinder_oss/services/google_drive/google_drive_client.py b/src/docbinder_oss/services/google_drive/google_drive_client.py index fe8f93c..02ca1d7 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_client.py +++ b/src/docbinder_oss/services/google_drive/google_drive_client.py @@ -1,4 +1,5 @@ import logging +import os from typing import List, Optional from google.auth.transport.requests import Request @@ -40,9 +41,14 @@ def __init__(self, config: GoogleDriveServiceConfig): def _get_credentials(self): logger.info("Getting credentials for Google Drive client") + TOKEN_PATH = os.path.expanduser("~/.config/docbinder/gcp/" + self.config.name + "_token.json") + # Ensure the directory exists + os.makedirs(os.path.dirname(TOKEN_PATH), exist_ok=True) + logger.debug(f"Token path: {TOKEN_PATH}") + try: creds = Credentials.from_authorized_user_file( - self.config.gcp_token_json, scopes=self.SCOPES + TOKEN_PATH, scopes=self.SCOPES ) except (FileNotFoundError, ValueError): logger.warning("Credentials file not found or invalid, re-authenticating") @@ -56,7 +62,7 @@ def _get_credentials(self): ) creds = flow.run_local_server(port=0) # Save the credentials for the next run - with open(self.config.gcp_token_json, "w") as token: + with open(TOKEN_PATH, "w") as token: token.write(creds.to_json()) return creds @@ -71,19 +77,19 @@ def test_connection(self) -> bool: def list_buckets(self) -> list[Bucket]: return self.buckets.list_buckets() - def list_files(self, folder_id: Optional[str] = None) -> List[File]: - return self.files.list_files(folder_id) + def list_files_in_folder(self, folder_id: Optional[str] = None) -> List[File]: + return self.files.list_files_in_folder(folder_id) - def list_files_recursively(self, bucket: str = None) -> List[File]: + def list_files_recursively(self, bucket_id: str | None = None) -> List[File]: """List all files and folders recursively in the specified bucket or root.""" - return self.files.list_files_recursively(bucket) + if bucket_id is None: + bucket_id = "root" + logger.info(f"Listing files recursively in bucket: {bucket_id}") + return self.files.list_files_recursively(bucket_id) def list_all_files(self) -> List[File]: - files = [] - buckets = self.buckets.list_buckets() - for bucket in buckets: - files.extend(self.files.list_files_recursively(bucket)) - return files + buckets = self.buckets.list_buckets() + return self.files.list_all_files(buckets) def get_file_metadata(self, item_id: str) -> File: return self.files.get_file_metadata(item_id) diff --git a/src/docbinder_oss/services/google_drive/google_drive_files.py b/src/docbinder_oss/services/google_drive/google_drive_files.py index fac56f7..39f477e 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_files.py +++ b/src/docbinder_oss/services/google_drive/google_drive_files.py @@ -18,14 +18,17 @@ class GoogleDriveFiles: def __init__(self, service: Resource): self.service = service - def list_files(self, bucket: str = None, is_drive_root: bool = False) -> list[File]: + def list_files_in_folder(self, bucket_id: str | None = None, is_drive_root: bool = False) -> list[File]: args = { "includeItemsFromAllDrives": True, "supportsAllDrives": True, "fields": f"nextPageToken,files({REQUIRED_FIELDS})", } - logger.debug(f"{type(bucket)}: {bucket}") - bucket_id = bucket.id if hasattr(bucket, "id") else bucket + if bucket_id is None: + logger.debug("Listing files in the root directory.") + bucket_id = "root" + else: + logger.debug(f"{type(bucket_id)}: {bucket_id}") if is_drive_root and bucket_id != "root": args.update( @@ -40,11 +43,10 @@ def list_files(self, bucket: str = None, is_drive_root: bool = False) -> list[Fi if parent_id == "root" or parent_id is None: args["q"] = "'root' in parents and trashed=false" else: - args["q"] = f"'{parent_id}' in parents and trashed=false" - - resp = self.service.files().list(**args).execute() - print(len(resp["files"])) - exit(1) + args["q"] = f"'{parent_id}' in parents and trashed=false" + + resp = self.service.files().list(**args).execute() # type: ignore[attr-defined] + return [ File( id=f.get("id"), @@ -75,21 +77,23 @@ def list_files(self, bucket: str = None, is_drive_root: bool = False) -> list[Fi shared=f.get("shared"), starred=f.get("starred"), is_folder=f.get("mimeType") == "application/vnd.google-apps.folder", - parents=bucket_id if bucket_id else None, + parents=f.get("parents") if isinstance(f.get("parents"), list) else None, ) for f in resp.get("files") ] def list_files_recursively(self, bucket: str) -> list[File]: - """List all files in the Google Drive bucket.""" + """List all files in the Google Drive bucket, including all subfolders.""" is_drive_root = bucket != "root" def _recursive_list(folder_id: str): - items: list[File] = self.list_files(folder_id, is_drive_root=is_drive_root) + logger.debug(f"Listing files in folder: {folder_id}") + items: list[File] = self.list_files_in_folder(folder_id, is_drive_root=is_drive_root) all_items = [] for item in items: all_items.append(item) - if item.is_folder: + # Recursively list files in subfolders + if hasattr(item, "is_folder") and item.is_folder: all_items.extend(_recursive_list(item.id)) return all_items @@ -136,3 +140,9 @@ def get_file_metadata(self, file_id: str): is_folder=item_metadata.get("mimeType") == "application/vnd.google-apps.folder", parents=None, # This field is not populated by the API, so we set it to None for files. ) + + def list_all_files(self, buckets: list[Bucket]) -> list[File]: + files = [] + for bucket in buckets: + files.extend(self.list_files_recursively(bucket.id)) + return files diff --git a/src/docbinder_oss/services/google_drive/google_drive_permissions.py b/src/docbinder_oss/services/google_drive/google_drive_permissions.py index ab0b830..8b6fd23 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_permissions.py +++ b/src/docbinder_oss/services/google_drive/google_drive_permissions.py @@ -18,7 +18,7 @@ def get_user(self): Returns: User object containing the user's details. """ - resp = self.service.about().get(fields="user").execute() + resp = self.service.about().get(fields="user").execute() # type: ignore[attr-defined] user_info = resp.get("user", {}) return User( @@ -31,7 +31,7 @@ def get_user(self): ) def get_permissions(self, item_id: str): - resp = self.service.permissions().list(fileId=item_id, fields="permissions").execute() + resp = self.service.permissions().list(fileId=item_id, fields="permissions").execute() # type: ignore[attr-defined] return [ Permission( diff --git a/src/docbinder_oss/services/google_drive/google_drive_service_config.py b/src/docbinder_oss/services/google_drive/google_drive_service_config.py index dd6c957..022b9ba 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_service_config.py +++ b/src/docbinder_oss/services/google_drive/google_drive_service_config.py @@ -5,5 +5,5 @@ class GoogleDriveServiceConfig(ServiceConfig): type: Literal["google_drive"] = "google_drive" # type: ignore[override] - gcp_credentials_json: str - gcp_token_json: str + name: str + gcp_credentials_json: str \ No newline at end of file diff --git a/tests/commands/test_search_command.py b/tests/commands/test_search_command.py index c0f69ab..2a2a406 100644 --- a/tests/commands/test_search_command.py +++ b/tests/commands/test_search_command.py @@ -4,6 +4,8 @@ import pytest from typer.testing import CliRunner from docbinder_oss.main import app +import sys +import importlib class DummyFile: @@ -25,6 +27,23 @@ def __init__(self, **kwargs): self.shared = kwargs.get("shared", True) self.trashed = kwargs.get("trashed", False) + def model_dump(self): + # Simulate pydantic's model_dump for test compatibility + return { + "id": self.id, + "name": self.name, + "size": self.size, + "mime_type": self.mime_type, + "created_time": self.created_time, + "modified_time": self.modified_time, + "owners": [u.email_address for u in self.owners], + "last_modifying_user": getattr(self.last_modifying_user, "email_address", None), + "web_view_link": self.web_view_link, + "web_content_link": self.web_content_link, + "shared": self.shared, + "trashed": self.trashed, + } + @pytest.fixture(autouse=True) def patch_provider(monkeypatch, tmp_path): @@ -32,13 +51,15 @@ def patch_provider(monkeypatch, tmp_path): class DummyProviderConfig: def __init__(self, name): self.name = name + self.type = name # Simulate type for registry class DummyConfig: providers = [DummyProviderConfig("dummy1"), DummyProviderConfig("dummy2")] - monkeypatch.setattr("docbinder_oss.helpers.config.load_config", lambda: DummyConfig()) + # Patch load_config in the CLI's namespace + monkeypatch.setattr("docbinder_oss.cli.search.load_config", lambda: DummyConfig()) - # Patch create_provider_instance to return a dummy client with different files per provider + # Patch create_provider_instance in the CLI's namespace def create_provider_instance(cfg): if cfg.name == "dummy1": return type( @@ -75,7 +96,8 @@ def create_provider_instance(cfg): }, )() - monkeypatch.setattr("docbinder_oss.services.create_provider_instance", create_provider_instance) + monkeypatch.setattr("docbinder_oss.cli.search.create_provider_instance", create_provider_instance) + # Change working directory to a temp dir for file output orig_cwd = os.getcwd() os.chdir(tmp_path) @@ -94,12 +116,13 @@ def test_search_export_csv(): assert len(rows) == 2 names = set(r["name"] for r in rows) assert names == {"Alpha Report", "Beta Notes"} - # Check owners field is a string + # Check owners field is a string and contains the expected email for r in rows: + owners = r["owners"] if r["name"] == "Alpha Report": - assert r["owners"] == "alpha@a.com" + assert "alpha@a.com" in owners if r["name"] == "Beta Notes": - assert r["owners"] == "beta@b.com" + assert "beta@b.com" in owners def test_search_export_json(): @@ -113,6 +136,12 @@ def test_search_export_json(): assert len(data) == 2 names = set(d["name"] for d in data) assert names == {"Alpha Report", "Beta Notes"} + # Check owners field is a string or list + for d in data: + if d["name"] == "Alpha Report": + assert "alpha@a.com" in d["owners"] + if d["name"] == "Beta Notes": + assert "beta@b.com" in d["owners"] def test_search_name_filter(): @@ -214,4 +243,4 @@ def test_search_combined_filters(): assert len(data) == 1 assert data[0]["name"] == "Beta Notes" assert data[0]["provider"] == "dummy2" - assert data[0]["owners"] == "beta@b.com" + assert "beta@b.com" in data[0]["owners"] diff --git a/tests/services/google_drive/conftest.py b/tests/services/google_drive/conftest.py index c60300f..f95b44b 100644 --- a/tests/services/google_drive/conftest.py +++ b/tests/services/google_drive/conftest.py @@ -42,6 +42,5 @@ def gdrive_client(mock_gdrive_service): config = GoogleDriveServiceConfig( name="test_gdrive", gcp_credentials_json="fake_creds.json", - gcp_token_json="fake_token.json", ) return GoogleDriveClient(config=config) diff --git a/tests/services/google_drive/test_google_drive_files.py b/tests/services/google_drive/test_google_drive_files.py index 7b0b019..c5dc850 100644 --- a/tests/services/google_drive/test_google_drive_files.py +++ b/tests/services/google_drive/test_google_drive_files.py @@ -3,7 +3,7 @@ import pytest from typer.testing import CliRunner -from docbinder_oss.core.schemas import File +from docbinder_oss.core import schemas from docbinder_oss.main import app @@ -11,7 +11,13 @@ class DummyFile: def __init__(self, id, name, parents=None, is_folder=False): self.id = id self.name = name - self.parents = parents or [] + # Always use a list for parents, or None + if parents is None: + self.parents = None + elif isinstance(parents, list): + self.parents = parents + else: + self.parents = [parents] self.is_folder = is_folder self.size = 1000 # Use correct mime_type for folders and files @@ -40,10 +46,10 @@ class DummyConfig: def list_all_files(self): return [ DummyFile(id="root", name="root", is_folder=True), - DummyFile(id="folder1", name="folder1", parents=["root"], is_folder=True), - DummyFile(id="file1", name="file1.pdf", parents=["folder1"]), - DummyFile(id="file2", name="file2.pdf", parents=["folder1"]), - DummyFile(id="file3", name="file3.pdf", parents=["root"]), + DummyFile(id="folder1", name="folder1", parents="root", is_folder=True), + DummyFile(id="file1", name="file1.pdf", parents="folder1"), + DummyFile(id="file2", name="file2.pdf", parents="folder1"), + DummyFile(id="file3", name="file3.pdf", parents="root"), ] class DummyClient: @@ -99,57 +105,37 @@ def test_list_files(mock_gdrive_service, gdrive_client): fake_api_response ) - files = gdrive_client.list_files() + files = gdrive_client.list_files_in_folder() print(files) assert isinstance(files, list) assert len(files) == 1 - assert files == [ - File( - id="1234", - name="testDrive", - mime_type="application/vnd.google-apps.drive", - kind="drive#drive", - is_folder=False, - web_view_link="https://drive.google.com/drive/folders/1234", - icon_link="https://drive.google.com/drive/folders/1234/icon", - created_time=datetime(2023, 10, 1, 12, 0, 0), - modified_time=datetime(2023, 10, 1, 12, 0, 0), - owners=[ - { - "display_name": "Test User", - "email_address": "test@test.com", - "kind": "drive#user", - "photo_link": "https://example.com/photo.jpg", - } - ], - last_modifying_user={ - "display_name": "Test User", - "email_address": "test@test.com", - "kind": "drive#user", - "photo_link": "https://example.com/photo.jpg", - }, - size="1024", - parents=None, - shared=True, - starred=False, - trashed=False, - ) - ] - - -def test_search_finds_all_files_recursively(): - runner = CliRunner() - result = runner.invoke(app, ["search", "--export-format", "json"]) - assert result.exit_code == 0 - assert os.path.exists("search_results.json") - import json - - with open("search_results.json") as f: - data = json.load(f) - # All files and folders should be included in the results - file_names = set(d["name"] for d in data) - expected = {"file1.pdf", "file2.pdf", "file3.pdf", "folder1", "root"} - assert file_names == expected - assert len(file_names) == 5 + # Compare fields individually to match the actual File model structure + file = files[0] + assert file.id == "1234" + assert file.name == "testDrive" + assert file.mime_type == "application/vnd.google-apps.drive" + assert file.kind == "drive#drive" + assert file.is_folder is False + assert str(file.web_view_link) == "https://drive.google.com/drive/folders/1234" + assert str(file.icon_link) == "https://drive.google.com/drive/folders/1234/icon" + assert file.created_time == datetime(2023, 10, 1, 12, 0, 0) + assert file.modified_time == datetime(2023, 10, 1, 12, 0, 0) + assert len(file.owners) == 1 + owner = file.owners[0] + assert getattr(owner, "display_name", None) == "Test User" + assert getattr(owner, "email_address", None) == "test@test.com" + assert getattr(owner, "kind", None) == "drive#user" + assert str(getattr(owner, "photo_link", "")) == "https://example.com/photo.jpg" + last_mod = file.last_modifying_user + assert getattr(last_mod, "display_name", None) == "Test User" + assert getattr(last_mod, "email_address", None) == "test@test.com" + assert getattr(last_mod, "kind", None) == "drive#user" + assert str(getattr(last_mod, "photo_link", "")) == "https://example.com/photo.jpg" + assert file.size == "1024" + # Accept None or any list value for parents + assert file.parents is None or isinstance(file.parents, list) + assert file.shared is True + assert file.starred is False + assert file.trashed is False \ No newline at end of file diff --git a/tests/services/test_search_export.py b/tests/services/test_search_export.py deleted file mode 100644 index 78fd856..0000000 --- a/tests/services/test_search_export.py +++ /dev/null @@ -1,167 +0,0 @@ -import os -import json -import pytest -from typer.testing import CliRunner -from docbinder_oss.main import app - - -class DummyFile: - def __init__(self, **kwargs): - self.id = kwargs.get("id", "fileid1") - self.name = kwargs.get("name", "Test File") - self.size = kwargs.get("size", 12345) - self.mime_type = kwargs.get("mime_type", "application/pdf") - self.created_time = kwargs.get("created_time", "2024-01-01T00:00:00") - self.modified_time = kwargs.get("modified_time", "2024-01-02T00:00:00") - self.owners = kwargs.get( - "owners", [type("User", (), {"email_address": "owner@example.com"})()] - ) - self.last_modifying_user = kwargs.get( - "last_modifying_user", type("User", (), {"email_address": "mod@example.com"})() - ) - self.web_view_link = kwargs.get("web_view_link", "http://example.com/view") - self.web_content_link = kwargs.get("web_content_link", "http://example.com/content") - self.shared = kwargs.get("shared", True) - self.trashed = kwargs.get("trashed", False) - - -@pytest.fixture(autouse=True) -def patch_provider(monkeypatch, tmp_path): - # Patch config loader to return two dummy provider configs - class DummyProviderConfig: - def __init__(self, name): - self.name = name - - class DummyConfig: - providers = [DummyProviderConfig("dummy1"), DummyProviderConfig("dummy2")] - - monkeypatch.setattr("docbinder_oss.helpers.config.load_config", lambda: DummyConfig()) - - # Patch create_provider_instance to return a dummy client with different files per provider - def create_provider_instance(cfg): - if cfg.name == "dummy1": - return type( - "DummyClient", - (), - { - "list_all_files": lambda self: [ - DummyFile( - id="f1", - name="Alpha Report", - size=2048, - owners=[type("User", (), {"email_address": "alpha@a.com"})()], - created_time="2024-01-01T10:00:00", - modified_time="2024-01-02T10:00:00", - ) - ] - }, - )() - else: - return type( - "DummyClient", - (), - { - "list_all_files": lambda self: [ - DummyFile( - id="f2", - name="Beta Notes", - size=4096, - owners=[type("User", (), {"email_address": "beta@b.com"})()], - created_time="2024-02-01T10:00:00", - modified_time="2024-02-02T10:00:00", - ) - ] - }, - )() - - monkeypatch.setattr("docbinder_oss.services.create_provider_instance", create_provider_instance) - # Change working directory to a temp dir for file output - orig_cwd = os.getcwd() - os.chdir(tmp_path) - yield - os.chdir(orig_cwd) - - -# The test logic for search export and filters has been consolidated into -# `tests/commands/test_search_command.py`. -# This file no longer contains duplicate tests. -def test_search_updated_after_filter(): - runner = CliRunner() - result = runner.invoke( - app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"] - ) - assert result.exit_code == 0 - with open("search_results.json") as f: - data = json.load(f) - assert len(data) == 1 - assert data[0]["name"] == "Beta Notes" - - -def test_search_created_before_filter(): - runner = CliRunner() - result = runner.invoke( - app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-format", "json"] - ) - assert result.exit_code == 0 - with open("search_results.json") as f: - data = json.load(f) - assert len(data) == 1 - assert data[0]["name"] == "Alpha Report" - - -def test_search_min_size_filter(): - runner = CliRunner() - result = runner.invoke(app, ["search", "--min-size", "3", "--export-format", "json"]) - assert result.exit_code == 0 - with open("search_results.json") as f: - data = json.load(f) - assert len(data) == 1 - assert data[0]["name"] == "Beta Notes" - - -def test_search_max_size_filter(): - runner = CliRunner() - result = runner.invoke(app, ["search", "--max-size", "3", "--export-format", "json"]) - assert result.exit_code == 0 - with open("search_results.json") as f: - data = json.load(f) - assert len(data) == 1 - assert data[0]["name"] == "Alpha Report" - - -def test_search_provider_filter(): - runner = CliRunner() - result = runner.invoke(app, ["search", "--provider", "dummy2", "--export-format", "json"]) - assert result.exit_code == 0 - with open("search_results.json") as f: - data = json.load(f) - assert len(data) == 1 - assert data[0]["provider"] == "dummy2" - assert data[0]["name"] == "Beta Notes" - - -def test_search_combined_filters(): - runner = CliRunner() - result = runner.invoke( - app, - [ - "search", - "--name", - "Beta", - "--owner", - "beta@b.com", - "--min-size", - "3", - "--provider", - "dummy2", - "--export-format", - "json", - ], - ) - assert result.exit_code == 0 - with open("search_results.json") as f: - data = json.load(f) - assert len(data) == 1 - assert data[0]["name"] == "Beta Notes" - assert data[0]["provider"] == "dummy2" - assert data[0]["owners"] == "beta@b.com" From 63bd58c5954b536da1aed05f93a4a233f7e1a12b Mon Sep 17 00:00:00 2001 From: PaoloLeonard Date: Tue, 24 Jun 2025 10:26:15 +0200 Subject: [PATCH 19/39] initial change to gdrive --- src/docbinder_oss/cli/search.py | 12 ++++++++++-- src/docbinder_oss/helpers/rich_helpers.py | 19 +++++++++++++++++++ .../google_drive/google_drive_client.py | 6 +----- .../google_drive/google_drive_files.py | 2 +- 4 files changed, 31 insertions(+), 8 deletions(-) create mode 100644 src/docbinder_oss/helpers/rich_helpers.py diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py index 87b3988..ae9287c 100644 --- a/src/docbinder_oss/cli/search.py +++ b/src/docbinder_oss/cli/search.py @@ -1,9 +1,11 @@ from datetime import datetime import re import typer +from rich import print as rich_print from typing import Optional from docbinder_oss.helpers.config import Config +from docbinder_oss.helpers.rich_helpers import create_rich_table from docbinder_oss.services.base_class import BaseProvider app = typer.Typer() @@ -62,6 +64,8 @@ def search( raise typer.Exit(code=1) current_files[provider_config.name] = client.list_all_files() + rich_print(current_files["my_google_drive"]) + current_files = filter_files( current_files, name=name, @@ -73,9 +77,13 @@ def search( min_size=min_size, max_size=max_size, ) - + rich_print(current_files["my_google_drive"]) if not export_format: - typer.echo(current_files) + table = create_rich_table( + headers=["Provider", "Name", "ID", "Size", "Created Time", "Modified Time"], + rows=current_files + ) + rich_print(table) return def filter_files( diff --git a/src/docbinder_oss/helpers/rich_helpers.py b/src/docbinder_oss/helpers/rich_helpers.py new file mode 100644 index 0000000..87ae580 --- /dev/null +++ b/src/docbinder_oss/helpers/rich_helpers.py @@ -0,0 +1,19 @@ +from typing import List +from rich.table import Table + + +def create_rich_table(headers: List[str], rows: List[List[str]]) -> Table: + """ + Create a Rich table with the given headers and rows. + + Args: + headers (List[str]): The headers for the table. + rows (List[List[str]]): The data rows for the table. + + Returns: + Table: A Rich Table object. + """ + table = Table(*headers, show_header=True, header_style="bold magenta") + for row in rows: + table.add_row(*row) + return table \ No newline at end of file diff --git a/src/docbinder_oss/services/google_drive/google_drive_client.py b/src/docbinder_oss/services/google_drive/google_drive_client.py index fe8f93c..7af3eb1 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_client.py +++ b/src/docbinder_oss/services/google_drive/google_drive_client.py @@ -74,15 +74,11 @@ def list_buckets(self) -> list[Bucket]: def list_files(self, folder_id: Optional[str] = None) -> List[File]: return self.files.list_files(folder_id) - def list_files_recursively(self, bucket: str = None) -> List[File]: - """List all files and folders recursively in the specified bucket or root.""" - return self.files.list_files_recursively(bucket) - def list_all_files(self) -> List[File]: files = [] buckets = self.buckets.list_buckets() for bucket in buckets: - files.extend(self.files.list_files_recursively(bucket)) + files.extend(self.files.list_files(bucket)) return files def get_file_metadata(self, item_id: str) -> File: diff --git a/src/docbinder_oss/services/google_drive/google_drive_files.py b/src/docbinder_oss/services/google_drive/google_drive_files.py index fac56f7..96aeb96 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_files.py +++ b/src/docbinder_oss/services/google_drive/google_drive_files.py @@ -44,7 +44,7 @@ def list_files(self, bucket: str = None, is_drive_root: bool = False) -> list[Fi resp = self.service.files().list(**args).execute() print(len(resp["files"])) - exit(1) + # exit(1) return [ File( id=f.get("id"), From b2d8d877459db59a4e9b5247c3b7a115f3ac6af6 Mon Sep 17 00:00:00 2001 From: PaoloLeonard Date: Tue, 24 Jun 2025 19:02:41 +0200 Subject: [PATCH 20/39] refactor google drive --- src/docbinder_oss/cli/provider/test.py | 2 +- src/docbinder_oss/cli/search.py | 60 +++++++++++------- src/docbinder_oss/helpers/config.py | 2 +- .../{services => providers}/__init__.py | 6 +- .../{services => providers}/base_class.py | 0 .../google_drive/__init__.py | 0 .../google_drive/google_drive_buckets.py | 0 .../google_drive/google_drive_client.py | 32 +++------- .../google_drive/google_drive_files.py | 63 +++++-------------- .../google_drive/google_drive_permissions.py | 0 .../google_drive_service_config.py | 2 +- tests/services/google_drive/conftest.py | 4 +- 12 files changed, 71 insertions(+), 100 deletions(-) rename src/docbinder_oss/{services => providers}/__init__.py (95%) rename src/docbinder_oss/{services => providers}/base_class.py (100%) rename src/docbinder_oss/{services => providers}/google_drive/__init__.py (100%) rename src/docbinder_oss/{services => providers}/google_drive/google_drive_buckets.py (100%) rename src/docbinder_oss/{services => providers}/google_drive/google_drive_client.py (73%) rename src/docbinder_oss/{services => providers}/google_drive/google_drive_files.py (67%) rename src/docbinder_oss/{services => providers}/google_drive/google_drive_permissions.py (100%) rename src/docbinder_oss/{services => providers}/google_drive/google_drive_service_config.py (76%) diff --git a/src/docbinder_oss/cli/provider/test.py b/src/docbinder_oss/cli/provider/test.py index be424d9..2ba7091 100644 --- a/src/docbinder_oss/cli/provider/test.py +++ b/src/docbinder_oss/cli/provider/test.py @@ -10,7 +10,7 @@ def test( ): """Test the connection to a specific provider.""" from docbinder_oss.helpers.config import load_config - from docbinder_oss.services import create_provider_instance + from docbinder_oss.providers import create_provider_instance if not name: typer.echo("Provider name is required.") diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py index c6fe85a..e43ef07 100644 --- a/src/docbinder_oss/cli/search.py +++ b/src/docbinder_oss/cli/search.py @@ -6,11 +6,12 @@ import csv import json +from docbinder_oss.core.schemas import File from docbinder_oss.helpers.config import load_config -from docbinder_oss.services import create_provider_instance +from docbinder_oss.providers import create_provider_instance from docbinder_oss.helpers.config import Config from docbinder_oss.helpers.rich_helpers import create_rich_table -from docbinder_oss.services.base_class import BaseProvider +from docbinder_oss.providers.base_class import BaseProvider from docbinder_oss.helpers.path_utils import build_id_to_item, get_full_path, build_all_full_paths app = typer.Typer() @@ -65,8 +66,6 @@ def search( raise typer.Exit(code=1) current_files[provider_config.name] = client.list_all_files() - rich_print(current_files["my_google_drive"]) - current_files = filter_files( current_files, name=name, @@ -78,14 +77,11 @@ def search( min_size=min_size, max_size=max_size, ) - rich_print(current_files["my_google_drive"]) + if not export_format: - table = create_rich_table( - headers=["Provider", "Name", "ID", "Size", "Created Time", "Modified Time"], - rows=current_files - ) - rich_print(table) + typer.echo(current_files) return + elif export_format.lower() == "csv": __write_csv(filtered_files_by_provider, "search_results.csv") typer.echo("Results written to search_results.csv") @@ -107,26 +103,46 @@ def filter_files( min_size=None, max_size=None, ): - results = [] - for file in files: + """ + Filters a collection of files based on various criteria such as name, owner, modification/creation dates, and file size. + + Args: + files (dict): A dictionary where keys are providers and values are lists of file objects. + name (str, optional): A regex pattern to match file names (case-insensitive). + owner (str, optional): An email address to match file owners. + updated_after (str, optional): ISO format datetime string; only include files modified after this date. + updated_before (str, optional): ISO format datetime string; only include files modified before this date. + created_after (str, optional): ISO format datetime string; only include files created after this date. + created_before (str, optional): ISO format datetime string; only include files created before this date. + min_size (int, optional): Minimum file size in kilobytes (KB). + max_size (int, optional): Maximum file size in kilobytes (KB). + + Returns: + list: A list of file objects that match the specified filters. + """ + def file_matches(file: File): if name and not re.search(name, file.name, re.IGNORECASE): - continue + return False if owner and not any(owner in u.email_address for u in file.owners): - continue + return False if updated_after and __parse_dt(file.modified_time) < __parse_dt(updated_after): - continue + return False if updated_before and __parse_dt(file.modified_time) > __parse_dt(updated_before): - continue + return False if created_after and __parse_dt(file.created_time) < __parse_dt(created_after): - continue + return False if created_before and __parse_dt(file.created_time) > __parse_dt(created_before): - continue + return False if min_size and file.size < min_size * 1024: - continue + return False if max_size and file.size > max_size * 1024: - continue - results.append(file) - return results + return False + return True + + filtered = {} + for provider, file_list in files.items(): + filtered[provider] = [file for file in file_list if file_matches(file)] + return filtered def __parse_dt(val): if isinstance(val, datetime): diff --git a/src/docbinder_oss/helpers/config.py b/src/docbinder_oss/helpers/config.py index e92acb4..2fad950 100644 --- a/src/docbinder_oss/helpers/config.py +++ b/src/docbinder_oss/helpers/config.py @@ -6,7 +6,7 @@ import yaml from pydantic import BaseModel, ValidationError -from docbinder_oss.services import ServiceUnion, get_provider_registry +from docbinder_oss.providers import ServiceUnion, get_provider_registry logger = logging.getLogger(__name__) diff --git a/src/docbinder_oss/services/__init__.py b/src/docbinder_oss/providers/__init__.py similarity index 95% rename from src/docbinder_oss/services/__init__.py rename to src/docbinder_oss/providers/__init__.py index 0e57925..0fe786e 100644 --- a/src/docbinder_oss/services/__init__.py +++ b/src/docbinder_oss/providers/__init__.py @@ -7,8 +7,8 @@ from pydantic import Field from rich.logging import RichHandler -from docbinder_oss import services -from docbinder_oss.services.base_class import BaseProvider, ServiceConfig +from docbinder_oss import providers +from docbinder_oss.providers.base_class import BaseProvider, ServiceConfig if not logging.getLogger().handlers: FORMAT = "%(message)s" @@ -86,5 +86,5 @@ def get_service_union() -> Annotated: return Annotated[dynamic_union, Field(discriminator="type")] -load_services(services) +load_services(providers) ServiceUnion = get_service_union() diff --git a/src/docbinder_oss/services/base_class.py b/src/docbinder_oss/providers/base_class.py similarity index 100% rename from src/docbinder_oss/services/base_class.py rename to src/docbinder_oss/providers/base_class.py diff --git a/src/docbinder_oss/services/google_drive/__init__.py b/src/docbinder_oss/providers/google_drive/__init__.py similarity index 100% rename from src/docbinder_oss/services/google_drive/__init__.py rename to src/docbinder_oss/providers/google_drive/__init__.py diff --git a/src/docbinder_oss/services/google_drive/google_drive_buckets.py b/src/docbinder_oss/providers/google_drive/google_drive_buckets.py similarity index 100% rename from src/docbinder_oss/services/google_drive/google_drive_buckets.py rename to src/docbinder_oss/providers/google_drive/google_drive_buckets.py diff --git a/src/docbinder_oss/services/google_drive/google_drive_client.py b/src/docbinder_oss/providers/google_drive/google_drive_client.py similarity index 73% rename from src/docbinder_oss/services/google_drive/google_drive_client.py rename to src/docbinder_oss/providers/google_drive/google_drive_client.py index 74049b3..45eb703 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_client.py +++ b/src/docbinder_oss/providers/google_drive/google_drive_client.py @@ -8,12 +8,13 @@ from googleapiclient.discovery import build from docbinder_oss.core.schemas import Bucket, File, Permission -from docbinder_oss.services.base_class import BaseProvider -from docbinder_oss.services.google_drive.google_drive_buckets import GoogleDriveBuckets -from docbinder_oss.services.google_drive.google_drive_files import GoogleDriveFiles -from docbinder_oss.services.google_drive.google_drive_permissions import ( +from docbinder_oss.providers.base_class import BaseProvider +from docbinder_oss.providers.google_drive.google_drive_buckets import GoogleDriveBuckets +from docbinder_oss.providers.google_drive.google_drive_files import GoogleDriveFiles +from docbinder_oss.providers.google_drive.google_drive_permissions import ( GoogleDrivePermissions, ) +from docbinder_oss.providers.google_drive.google_drive_service_config import GoogleDriveServiceConfig logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -28,8 +29,8 @@ def __init__(self, config: GoogleDriveServiceConfig): "https://www.googleapis.com/auth/drive.metadata.readonly", "https://www.googleapis.com/auth/drive.activity.readonly", ] - self.settings = Settings() - self.creds = credentials or self._get_credentials() + self.settings = config + self.creds = self._get_credentials() self.service = build("drive", "v3", credentials=self.creds) self.buckets = GoogleDriveBuckets(self.service) self.files = GoogleDriveFiles(self.service) @@ -77,22 +78,9 @@ def list_buckets(self) -> list[Bucket]: def list_files_in_folder(self, folder_id: Optional[str] = None) -> List[File]: return self.files.list_files_in_folder(folder_id) - def list_files_recursively(self, bucket_id: str | None = None) -> List[File]: - """List all files and folders recursively in the specified bucket or root.""" - if bucket_id is None: - bucket_id = "root" - logger.info(f"Listing files recursively in bucket: {bucket_id}") - return self.files.list_files_recursively(bucket_id) - - def list_all_files(self) -> List[File]: - files = [] - buckets = self.buckets.list_buckets() - for bucket in buckets: - files.extend(self.files.list_files(bucket)) - return files - buckets = self.buckets.list_buckets() - return self.files.list_all_files(buckets) - + def list_all_files(self) -> List[File]: + return self.files.list_files_in_folder() + def get_file_metadata(self, item_id: str) -> File: return self.files.get_file_metadata(item_id) diff --git a/src/docbinder_oss/services/google_drive/google_drive_files.py b/src/docbinder_oss/providers/google_drive/google_drive_files.py similarity index 67% rename from src/docbinder_oss/services/google_drive/google_drive_files.py rename to src/docbinder_oss/providers/google_drive/google_drive_files.py index 0ac2519..b86ee37 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_files.py +++ b/src/docbinder_oss/providers/google_drive/google_drive_files.py @@ -18,36 +18,26 @@ class GoogleDriveFiles: def __init__(self, service: Resource): self.service = service - def list_files_in_folder(self, bucket_id: str | None = None, is_drive_root: bool = False) -> list[File]: + def list_files_in_folder(self, bucket_id: str | None = None) -> list[File]: args = { - "includeItemsFromAllDrives": True, - "supportsAllDrives": True, "fields": f"nextPageToken,files({REQUIRED_FIELDS})", } - if bucket_id is None: - logger.debug("Listing files in the root directory.") - bucket_id = "root" + + if bucket_id: + args["q"] = f"'{bucket_id}' in parents and trashed=false" else: - logger.debug(f"{type(bucket_id)}: {bucket_id}") + args["q"] = "sharedWithMe=true and trashed=false" - if is_drive_root and bucket_id != "root": - args.update( - { - "corpora": "drive", - "driveId": bucket_id, - "q": "'root' in parents and trashed=false", - } - ) - else: - parent_id = bucket_id - if parent_id == "root" or parent_id is None: - args["q"] = "'root' in parents and trashed=false" - else: - args["q"] = f"'{parent_id}' in parents and trashed=false" - resp = self.service.files().list(**args).execute() - print(len(resp["files"])) - # exit(1) + files = resp.get("files", []) + next_page_token = resp.get("nextPageToken") + + while next_page_token: + logger.debug("Getting next page...") + current_page = self.service.files().list(**args, pageToken=next_page_token).execute() + files.extend(current_page.get("files", [])) + next_page_token = current_page.get("nextPageToken") + return [ File( id=f.get("id"), @@ -80,26 +70,9 @@ def list_files_in_folder(self, bucket_id: str | None = None, is_drive_root: bool is_folder=f.get("mimeType") == "application/vnd.google-apps.folder", parents=f.get("parents") if isinstance(f.get("parents"), list) else None, ) - for f in resp.get("files") + for f in files ] - def list_files_recursively(self, bucket: str) -> list[File]: - """List all files in the Google Drive bucket, including all subfolders.""" - is_drive_root = bucket != "root" - - def _recursive_list(folder_id: str): - logger.debug(f"Listing files in folder: {folder_id}") - items: list[File] = self.list_files_in_folder(folder_id, is_drive_root=is_drive_root) - all_items = [] - for item in items: - all_items.append(item) - # Recursively list files in subfolders - if hasattr(item, "is_folder") and item.is_folder: - all_items.extend(_recursive_list(item.id)) - return all_items - - return _recursive_list(bucket) - def get_file_metadata(self, file_id: str): item_metadata = ( self.service.files() # type: ignore[attr-defined] @@ -141,9 +114,3 @@ def get_file_metadata(self, file_id: str): is_folder=item_metadata.get("mimeType") == "application/vnd.google-apps.folder", parents=None, # This field is not populated by the API, so we set it to None for files. ) - - def list_all_files(self, buckets: list[Bucket]) -> list[File]: - files = [] - for bucket in buckets: - files.extend(self.list_files_recursively(bucket.id)) - return files diff --git a/src/docbinder_oss/services/google_drive/google_drive_permissions.py b/src/docbinder_oss/providers/google_drive/google_drive_permissions.py similarity index 100% rename from src/docbinder_oss/services/google_drive/google_drive_permissions.py rename to src/docbinder_oss/providers/google_drive/google_drive_permissions.py diff --git a/src/docbinder_oss/services/google_drive/google_drive_service_config.py b/src/docbinder_oss/providers/google_drive/google_drive_service_config.py similarity index 76% rename from src/docbinder_oss/services/google_drive/google_drive_service_config.py rename to src/docbinder_oss/providers/google_drive/google_drive_service_config.py index 022b9ba..f99c350 100644 --- a/src/docbinder_oss/services/google_drive/google_drive_service_config.py +++ b/src/docbinder_oss/providers/google_drive/google_drive_service_config.py @@ -1,6 +1,6 @@ from typing import Literal -from docbinder_oss.services.base_class import ServiceConfig +from docbinder_oss.providers.base_class import ServiceConfig class GoogleDriveServiceConfig(ServiceConfig): diff --git a/tests/services/google_drive/conftest.py b/tests/services/google_drive/conftest.py index f95b44b..8f3fe03 100644 --- a/tests/services/google_drive/conftest.py +++ b/tests/services/google_drive/conftest.py @@ -2,10 +2,10 @@ import pytest -from docbinder_oss.services.google_drive.google_drive_client import ( +from docbinder_oss.providers.google_drive.google_drive_client import ( GoogleDriveClient, ) -from docbinder_oss.services.google_drive.google_drive_service_config import ( +from docbinder_oss.providers.google_drive.google_drive_service_config import ( GoogleDriveServiceConfig, ) From 36fa20dced283b4a22c7f420e5d450558f7a71e5 Mon Sep 17 00:00:00 2001 From: PaoloLeonard Date: Tue, 24 Jun 2025 19:03:44 +0200 Subject: [PATCH 21/39] increased the page size --- src/docbinder_oss/providers/google_drive/google_drive_files.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/docbinder_oss/providers/google_drive/google_drive_files.py b/src/docbinder_oss/providers/google_drive/google_drive_files.py index b86ee37..f5af39f 100644 --- a/src/docbinder_oss/providers/google_drive/google_drive_files.py +++ b/src/docbinder_oss/providers/google_drive/google_drive_files.py @@ -21,6 +21,7 @@ def __init__(self, service: Resource): def list_files_in_folder(self, bucket_id: str | None = None) -> list[File]: args = { "fields": f"nextPageToken,files({REQUIRED_FIELDS})", + "pageSize": 1000, } if bucket_id: From a475692af30835a127b54cab00ee5a75faa5b992 Mon Sep 17 00:00:00 2001 From: PaoloLeonard Date: Tue, 24 Jun 2025 19:09:46 +0200 Subject: [PATCH 22/39] ruff linting --- src/docbinder_oss/cli/search.py | 53 +++++++++++-------- src/docbinder_oss/core/schemas.py | 20 +++---- src/docbinder_oss/helpers/config.py | 3 +- src/docbinder_oss/helpers/path_utils.py | 39 ++++++++------ src/docbinder_oss/helpers/rich_helpers.py | 6 +-- src/docbinder_oss/providers/base_class.py | 2 +- .../google_drive/google_drive_client.py | 16 +++--- .../google_drive/google_drive_files.py | 6 +-- .../google_drive_service_config.py | 2 +- tests/commands/test_search_command.py | 6 +-- .../google_drive/test_google_drive_files.py | 6 +-- 11 files changed, 89 insertions(+), 70 deletions(-) diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py index e43ef07..673aa08 100644 --- a/src/docbinder_oss/cli/search.py +++ b/src/docbinder_oss/cli/search.py @@ -1,7 +1,6 @@ from datetime import datetime import re import typer -from rich import print as rich_print from typing import Optional import csv import json @@ -10,9 +9,7 @@ from docbinder_oss.helpers.config import load_config from docbinder_oss.providers import create_provider_instance from docbinder_oss.helpers.config import Config -from docbinder_oss.helpers.rich_helpers import create_rich_table from docbinder_oss.providers.base_class import BaseProvider -from docbinder_oss.helpers.path_utils import build_id_to_item, get_full_path, build_all_full_paths app = typer.Typer() @@ -46,16 +43,16 @@ def search( ): """Search for files or folders matching filters across all providers and export results as CSV or JSON.""" - + # 1 Load documents with filter "provider" # 2 Filter the documents based on the provided filters # 3 Export results to CSV or JSON - + config: Config = load_config() if not config.providers: typer.echo("No providers configured.") raise typer.Exit(code=1) - + current_files = {} for provider_config in config.providers: if provider and provider_config.name != provider: @@ -65,7 +62,7 @@ def search( typer.echo(f"Provider '{provider_config.name}' is not supported or not implemented.") raise typer.Exit(code=1) current_files[provider_config.name] = client.list_all_files() - + current_files = filter_files( current_files, name=name, @@ -77,21 +74,22 @@ def search( min_size=min_size, max_size=max_size, ) - + if not export_format: typer.echo(current_files) return - + elif export_format.lower() == "csv": - __write_csv(filtered_files_by_provider, "search_results.csv") + __write_csv(current_files, "search_results.csv") typer.echo("Results written to search_results.csv") elif export_format.lower() == "json": - __write_json(filtered_files_by_provider, "search_results.json", flat=True) # or flat=False for grouped + __write_json(current_files, "search_results.json", flat=True) # or flat=False for grouped typer.echo("Results written to search_results.json") else: typer.echo(f"Unsupported export format: {export_format}") raise typer.Exit(code=1) + def filter_files( files, name=None, @@ -104,22 +102,28 @@ def filter_files( max_size=None, ): """ - Filters a collection of files based on various criteria such as name, owner, modification/creation dates, and file size. + Filters a collection of files based on various criteria such as name, owner, + modification/creation dates, and file size. Args: files (dict): A dictionary where keys are providers and values are lists of file objects. name (str, optional): A regex pattern to match file names (case-insensitive). owner (str, optional): An email address to match file owners. - updated_after (str, optional): ISO format datetime string; only include files modified after this date. - updated_before (str, optional): ISO format datetime string; only include files modified before this date. - created_after (str, optional): ISO format datetime string; only include files created after this date. - created_before (str, optional): ISO format datetime string; only include files created before this date. + updated_after (str, optional): ISO format datetime string; only include files modified + after this date. + updated_before (str, optional): ISO format datetime string; only include files modified + before this date. + created_after (str, optional): ISO format datetime string; only include files created after + this date. + created_before (str, optional): ISO format datetime string; only include files created + before this date. min_size (int, optional): Minimum file size in kilobytes (KB). max_size (int, optional): Maximum file size in kilobytes (KB). Returns: list: A list of file objects that match the specified filters. """ + def file_matches(file: File): if name and not re.search(name, file.name, re.IGNORECASE): return False @@ -144,6 +148,7 @@ def file_matches(file: File): filtered[provider] = [file for file in file_list if file_matches(file)] return filtered + def __parse_dt(val): if isinstance(val, datetime): return val @@ -152,12 +157,13 @@ def __parse_dt(val): except Exception: return val + def __write_csv(files_by_provider, filename): # Collect all possible fieldnames from all files all_fieldnames = set(["provider"]) for files in files_by_provider.values(): for file in files: - file_dict = file.model_dump() if hasattr(file, 'model_dump') else file.__dict__.copy() + file_dict = file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy() all_fieldnames.update(file_dict.keys()) # Move provider to the front, rest sorted fieldnames = ["provider"] + sorted(f for f in all_fieldnames if f != "provider") @@ -166,7 +172,9 @@ def __write_csv(files_by_provider, filename): writer.writeheader() for provider, files in files_by_provider.items(): for file in files: - file_dict = file.model_dump() if hasattr(file, 'model_dump') else file.__dict__.copy() + file_dict = ( + file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy() + ) file_dict["provider"] = provider # Flatten owners for CSV (only email addresses) owners = file_dict.get("owners") @@ -195,22 +203,25 @@ def __write_csv(files_by_provider, filename): file_dict["parents"] = ";".join(str(p) for p in parents) writer.writerow({fn: file_dict.get(fn, "") for fn in fieldnames}) + def __write_json(files_by_provider, filename, flat=False): with open(filename, "w") as jsonfile: if flat: all_files = [] for provider, files in files_by_provider.items(): for file in files: - file_dict = file.model_dump() if hasattr(file, 'model_dump') else file.__dict__.copy() + file_dict = ( + file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy() + ) file_dict["provider"] = provider all_files.append(file_dict) json.dump(all_files, jsonfile, default=str, indent=2) else: grouped = { provider: [ - file.model_dump() if hasattr(file, 'model_dump') else file.__dict__.copy() + file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy() for file in files ] for provider, files in files_by_provider.items() } - json.dump(grouped, jsonfile, default=str, indent=2) \ No newline at end of file + json.dump(grouped, jsonfile, default=str, indent=2) diff --git a/src/docbinder_oss/core/schemas.py b/src/docbinder_oss/core/schemas.py index 70b1604..e11307b 100644 --- a/src/docbinder_oss/core/schemas.py +++ b/src/docbinder_oss/core/schemas.py @@ -67,20 +67,22 @@ class File(BaseModel): trashed: Optional[bool] # Add full_path as an optional field for export/CLI assignment - full_path: Optional[str] = Field(default=None, description="Full path of the file/folder, computed at runtime.") + full_path: Optional[str] = Field( + default=None, description="Full path of the file/folder, computed at runtime." + ) def __init__(self, **data: Any): # Coerce parents to a list of strings or None - if 'parents' in data: - if data['parents'] is None: - data['parents'] = None - elif isinstance(data['parents'], str): - data['parents'] = [data['parents']] - elif isinstance(data['parents'], list): + if "parents" in data: + if data["parents"] is None: + data["parents"] = None + elif isinstance(data["parents"], str): + data["parents"] = [data["parents"]] + elif isinstance(data["parents"], list): # Ensure all elements are strings - data['parents'] = [str(p) for p in data['parents'] if p is not None] + data["parents"] = [str(p) for p in data["parents"] if p is not None] else: - data['parents'] = [str(data['parents'])] + data["parents"] = [str(data["parents"])] super().__init__(**data) if self.mime_type == "application/vnd.google-apps.folder": self.is_folder = True diff --git a/src/docbinder_oss/helpers/config.py b/src/docbinder_oss/helpers/config.py index 2fad950..8a49070 100644 --- a/src/docbinder_oss/helpers/config.py +++ b/src/docbinder_oss/helpers/config.py @@ -1,12 +1,11 @@ import logging import os -from typing import List import typer import yaml from pydantic import BaseModel, ValidationError -from docbinder_oss.providers import ServiceUnion, get_provider_registry +from docbinder_oss.providers import get_provider_registry logger = logging.getLogger(__name__) diff --git a/src/docbinder_oss/helpers/path_utils.py b/src/docbinder_oss/helpers/path_utils.py index d0b9cf9..a724e9f 100644 --- a/src/docbinder_oss/helpers/path_utils.py +++ b/src/docbinder_oss/helpers/path_utils.py @@ -2,9 +2,10 @@ def build_id_to_item(files): """ Build a mapping from file/folder id to the file/folder object. """ - return {getattr(f, 'id', None): f for f in files if hasattr(f, 'id')} + return {getattr(f, "id", None): f for f in files if hasattr(f, "id")} -def get_full_path(file, id_to_item, root_id='root', root_name='My Drive'): + +def get_full_path(file, id_to_item, root_id="root", root_name="My Drive"): """ Recursively build the full path for a file or folder using its parents. Returns a string like '/My Drive/Folder/Subfolder/File.pdf'. @@ -12,7 +13,7 @@ def get_full_path(file, id_to_item, root_id='root', root_name='My Drive'): path_parts = [file.name] current = file while True: - parents = getattr(current, 'parents', None) + parents = getattr(current, "parents", None) if not parents or not isinstance(parents, list) or not parents[0]: break parent_id = parents[0] @@ -24,11 +25,13 @@ def get_full_path(file, id_to_item, root_id='root', root_name='My Drive'): break path_parts.append(parent.name) current = parent - return '/' + '/'.join(reversed(path_parts)) + return "/" + "/".join(reversed(path_parts)) + -def build_all_full_paths(files, root_id='root', root_name='My Drive', root_id_to_name=None): +def build_all_full_paths(files, root_id="root", root_name="My Drive", root_id_to_name=None): """ - Efficiently compute the full path for every file/folder in one pass using an iterative approach and memoization. + Efficiently compute the full path for every file/folder in one pass using an iterative approach + and memoization. Supports multiple drives by using a root_id_to_name mapping. Returns a dict: {file_id: full_path} """ @@ -37,7 +40,7 @@ def build_all_full_paths(files, root_id='root', root_name='My Drive', root_id_to if root_id_to_name is None: root_id_to_name = {root_id: root_name} for item in files: - if not hasattr(item, 'id') or not hasattr(item, 'name'): + if not hasattr(item, "id") or not hasattr(item, "name"): continue if item.id in id_to_path: continue @@ -47,17 +50,19 @@ def build_all_full_paths(files, root_id='root', root_name='My Drive', root_id_to while True: if current.id in id_to_path: break - parents = getattr(current, 'parents', None) + parents = getattr(current, "parents", None) if not parents or not isinstance(parents, list) or not parents[0]: - temp_stack.append((current.id, '/' + current.name)) + temp_stack.append((current.id, "/" + current.name)) break parent_id = parents[0] if parent_id in root_id_to_name: - temp_stack.append((current.id, '/' + root_id_to_name[parent_id] + '/' + current.name)) + temp_stack.append( + (current.id, "/" + root_id_to_name[parent_id] + "/" + current.name) + ) break parent = id_to_item.get(parent_id) if not parent: - temp_stack.append((current.id, '/' + current.name)) + temp_stack.append((current.id, "/" + current.name)) break temp_stack.append((current.id, None)) # Mark as not yet resolved current = parent @@ -68,16 +73,20 @@ def build_all_full_paths(files, root_id='root', root_name='My Drive', root_id_to id_to_path[file_id] = path else: parent_id = id_to_item[file_id].parents[0] - parent_path = id_to_path.get(parent_id, '') - id_to_path[file_id] = parent_path.rstrip('/') + '/' + id_to_item[file_id].name + parent_path = id_to_path.get(parent_id, "") + id_to_path[file_id] = parent_path.rstrip("/") + "/" + id_to_item[file_id].name # Ensure root_name is present at the start (for legacy single-drive fallback) found_root = False for root_name_val in root_id_to_name.values(): - if id_to_path[item.id].lstrip('/').startswith(root_name_val + '/'): # e.g. 'My Drive/' + if id_to_path[item.id].lstrip("/").startswith(root_name_val + "/"): # e.g. 'My Drive/' found_root = True break if not found_root: # Use the first root_name as fallback fallback_root = next(iter(root_id_to_name.values())) - id_to_path[item.id] = '/' + fallback_root + id_to_path[item.id] if not id_to_path[item.id].startswith('/') else '/' + fallback_root + id_to_path[item.id] + id_to_path[item.id] = ( + "/" + fallback_root + id_to_path[item.id] + if not id_to_path[item.id].startswith("/") + else "/" + fallback_root + id_to_path[item.id] + ) return id_to_path diff --git a/src/docbinder_oss/helpers/rich_helpers.py b/src/docbinder_oss/helpers/rich_helpers.py index 87ae580..6faefe5 100644 --- a/src/docbinder_oss/helpers/rich_helpers.py +++ b/src/docbinder_oss/helpers/rich_helpers.py @@ -5,15 +5,15 @@ def create_rich_table(headers: List[str], rows: List[List[str]]) -> Table: """ Create a Rich table with the given headers and rows. - + Args: headers (List[str]): The headers for the table. rows (List[List[str]]): The data rows for the table. - + Returns: Table: A Rich Table object. """ table = Table(*headers, show_header=True, header_style="bold magenta") for row in rows: table.add_row(*row) - return table \ No newline at end of file + return table diff --git a/src/docbinder_oss/providers/base_class.py b/src/docbinder_oss/providers/base_class.py index 5d8f09e..4eb7862 100644 --- a/src/docbinder_oss/providers/base_class.py +++ b/src/docbinder_oss/providers/base_class.py @@ -32,7 +32,7 @@ def test_connection(self) -> bool: True if the connection is successful, False otherwise. """ pass - + @abstractmethod def list_buckets(self) -> List[Bucket]: """ diff --git a/src/docbinder_oss/providers/google_drive/google_drive_client.py b/src/docbinder_oss/providers/google_drive/google_drive_client.py index 45eb703..a171a2a 100644 --- a/src/docbinder_oss/providers/google_drive/google_drive_client.py +++ b/src/docbinder_oss/providers/google_drive/google_drive_client.py @@ -14,7 +14,9 @@ from docbinder_oss.providers.google_drive.google_drive_permissions import ( GoogleDrivePermissions, ) -from docbinder_oss.providers.google_drive.google_drive_service_config import GoogleDriveServiceConfig +from docbinder_oss.providers.google_drive.google_drive_service_config import ( + GoogleDriveServiceConfig, +) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -39,15 +41,15 @@ def __init__(self, config: GoogleDriveServiceConfig): def _get_credentials(self): logger.info("Getting credentials for Google Drive client") - TOKEN_PATH = os.path.expanduser("~/.config/docbinder/gcp/" + self.config.name + "_token.json") + TOKEN_PATH = os.path.expanduser( + "~/.config/docbinder/gcp/" + self.config.name + "_token.json" + ) # Ensure the directory exists os.makedirs(os.path.dirname(TOKEN_PATH), exist_ok=True) logger.debug(f"Token path: {TOKEN_PATH}") try: - creds = Credentials.from_authorized_user_file( - TOKEN_PATH, scopes=self.SCOPES - ) + creds = Credentials.from_authorized_user_file(TOKEN_PATH, scopes=self.SCOPES) except (FileNotFoundError, ValueError): logger.warning("Credentials file not found or invalid, re-authenticating") creds = None @@ -78,9 +80,9 @@ def list_buckets(self) -> list[Bucket]: def list_files_in_folder(self, folder_id: Optional[str] = None) -> List[File]: return self.files.list_files_in_folder(folder_id) - def list_all_files(self) -> List[File]: + def list_all_files(self) -> List[File]: return self.files.list_files_in_folder() - + def get_file_metadata(self, item_id: str) -> File: return self.files.get_file_metadata(item_id) diff --git a/src/docbinder_oss/providers/google_drive/google_drive_files.py b/src/docbinder_oss/providers/google_drive/google_drive_files.py index f5af39f..c8c08b7 100644 --- a/src/docbinder_oss/providers/google_drive/google_drive_files.py +++ b/src/docbinder_oss/providers/google_drive/google_drive_files.py @@ -2,7 +2,7 @@ from googleapiclient.discovery import Resource -from docbinder_oss.core.schemas import Bucket, File, User +from docbinder_oss.core.schemas import File, User logger = logging.getLogger(__name__) @@ -23,7 +23,7 @@ def list_files_in_folder(self, bucket_id: str | None = None) -> list[File]: "fields": f"nextPageToken,files({REQUIRED_FIELDS})", "pageSize": 1000, } - + if bucket_id: args["q"] = f"'{bucket_id}' in parents and trashed=false" else: @@ -38,7 +38,7 @@ def list_files_in_folder(self, bucket_id: str | None = None) -> list[File]: current_page = self.service.files().list(**args, pageToken=next_page_token).execute() files.extend(current_page.get("files", [])) next_page_token = current_page.get("nextPageToken") - + return [ File( id=f.get("id"), diff --git a/src/docbinder_oss/providers/google_drive/google_drive_service_config.py b/src/docbinder_oss/providers/google_drive/google_drive_service_config.py index f99c350..d98c058 100644 --- a/src/docbinder_oss/providers/google_drive/google_drive_service_config.py +++ b/src/docbinder_oss/providers/google_drive/google_drive_service_config.py @@ -6,4 +6,4 @@ class GoogleDriveServiceConfig(ServiceConfig): type: Literal["google_drive"] = "google_drive" # type: ignore[override] name: str - gcp_credentials_json: str \ No newline at end of file + gcp_credentials_json: str diff --git a/tests/commands/test_search_command.py b/tests/commands/test_search_command.py index 2a2a406..46899c8 100644 --- a/tests/commands/test_search_command.py +++ b/tests/commands/test_search_command.py @@ -4,8 +4,6 @@ import pytest from typer.testing import CliRunner from docbinder_oss.main import app -import sys -import importlib class DummyFile: @@ -96,7 +94,9 @@ def create_provider_instance(cfg): }, )() - monkeypatch.setattr("docbinder_oss.cli.search.create_provider_instance", create_provider_instance) + monkeypatch.setattr( + "docbinder_oss.cli.search.create_provider_instance", create_provider_instance + ) # Change working directory to a temp dir for file output orig_cwd = os.getcwd() diff --git a/tests/services/google_drive/test_google_drive_files.py b/tests/services/google_drive/test_google_drive_files.py index c5dc850..4ed40b2 100644 --- a/tests/services/google_drive/test_google_drive_files.py +++ b/tests/services/google_drive/test_google_drive_files.py @@ -1,10 +1,6 @@ from datetime import datetime import os import pytest -from typer.testing import CliRunner - -from docbinder_oss.core import schemas -from docbinder_oss.main import app class DummyFile: @@ -138,4 +134,4 @@ def test_list_files(mock_gdrive_service, gdrive_client): assert file.parents is None or isinstance(file.parents, list) assert file.shared is True assert file.starred is False - assert file.trashed is False \ No newline at end of file + assert file.trashed is False From 2c5718fa881cd17a07b4f09159c5d8017374913f Mon Sep 17 00:00:00 2001 From: PaoloLeonard Date: Tue, 24 Jun 2025 19:55:51 +0200 Subject: [PATCH 23/39] added nice writers and printing --- src/docbinder_oss/cli/search.py | 16 ++---- src/docbinder_oss/core/schemas.py | 14 ++--- src/docbinder_oss/helpers/writer.py | 84 +++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+), 19 deletions(-) create mode 100644 src/docbinder_oss/helpers/writer.py diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py index 673aa08..036e812 100644 --- a/src/docbinder_oss/cli/search.py +++ b/src/docbinder_oss/cli/search.py @@ -10,6 +10,7 @@ from docbinder_oss.providers import create_provider_instance from docbinder_oss.helpers.config import Config from docbinder_oss.providers.base_class import BaseProvider +from docbinder_oss.helpers.writer import MultiFormatWriter app = typer.Typer() @@ -75,19 +76,8 @@ def search( max_size=max_size, ) - if not export_format: - typer.echo(current_files) - return - - elif export_format.lower() == "csv": - __write_csv(current_files, "search_results.csv") - typer.echo("Results written to search_results.csv") - elif export_format.lower() == "json": - __write_json(current_files, "search_results.json", flat=True) # or flat=False for grouped - typer.echo("Results written to search_results.json") - else: - typer.echo(f"Unsupported export format: {export_format}") - raise typer.Exit(code=1) + MultiFormatWriter.write(current_files, export_format) + return def filter_files( diff --git a/src/docbinder_oss/core/schemas.py b/src/docbinder_oss/core/schemas.py index e11307b..1d8b72b 100644 --- a/src/docbinder_oss/core/schemas.py +++ b/src/docbinder_oss/core/schemas.py @@ -41,10 +41,12 @@ class FileCapabilities(BaseModel): class File(BaseModel): """Represents a file or folder""" - id: str - name: str - mime_type: str - kind: Optional[str] + id: str = Field(repr=True, description="Unique identifier for the file or folder.") + name: str = Field( + repr=True, description="Name of the file or folder. May not be unique." + ) + mime_type: str = Field(repr=True, description="MIME type of the file or folder.") + kind: Optional[str] = Field(repr=True, description="Kind of the item, e.g., 'drive#file'.") is_folder: bool = Field(False, description="True if the item is a folder, False otherwise.") @@ -52,9 +54,9 @@ class File(BaseModel): icon_link: Optional[HttpUrl] created_time: Optional[datetime] - modified_time: Optional[datetime] + modified_time: Optional[datetime] = Field(repr=True, description="Last modified time of the file or folder.") - owners: Optional[List[User]] + owners: Optional[List[User]] = Field(repr=True, description="List of owners of the file or folder.") last_modifying_user: Optional[User] size: Optional[str] = Field(description="Size in bytes, as a string. Only populated for files.") diff --git a/src/docbinder_oss/helpers/writer.py b/src/docbinder_oss/helpers/writer.py new file mode 100644 index 0000000..0363bea --- /dev/null +++ b/src/docbinder_oss/helpers/writer.py @@ -0,0 +1,84 @@ +import csv +import json +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Any, Dict, List, Union +from rich import print +from rich.panel import Panel + + +class Writer(ABC): + """Abstract base writer class.""" + + @abstractmethod + def write(self, data: Any, file_path: Union[None, str, Path]) -> None: + """Write data to file.""" + pass + + +class MultiFormatWriter: + """Factory writer that automatically detects format from file extension.""" + + _writers = { + '.csv': 'CSVWriter', + '.json': 'JSONWriter', + '.txt': 'TextWriter', + } + + @classmethod + def write(cls, data: Any, file_path: Union[None, str, Path]) -> None: + """Write data to file, format determined by extension.""" + if file_path is None: + # If no file path is provided, write to console + ConsoleWriter().write(data) + return + path = Path(file_path) + extension = path.suffix.lower() + + if extension not in cls._writers: + raise ValueError(f"Unsupported format: {extension}") + + writer_class = globals()[cls._writers[extension]] + writer = writer_class() + writer.write(data, file_path) + + +class CSVWriter(Writer): + def write(self, data: List[Dict], file_path: Union[str, Path]) -> None: + if not data: + return + + with open(file_path, 'w', newline='', encoding='utf-8') as f: + writer = csv.DictWriter(f, fieldnames=data[0].keys()) + writer.writeheader() + writer.writerows(data) + + +class JSONWriter(Writer): + def write(self, data: Any, file_path: Union[str, Path]) -> None: + with open(file_path, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=2, ensure_ascii=False, default=str) + + +class ConsoleWriter(Writer): + def write(self, data: Dict) -> None: + from rich.table import Table + + table = Table(title="Files and Folders") + table.add_column("Provider", justify="right", style="cyan", no_wrap=True) + table.add_column("Id", style="magenta") + table.add_column("Name", style="magenta") + table.add_column("Kind", style="magenta") + for provider, items in data.items(): + for item in items: + table.add_row(provider, item.id, item.name, item.kind) + print(table) + + +class TextWriter(Writer): + def write(self, data: Any, file_path: Union[str, Path]) -> None: + with open(file_path, 'w', encoding='utf-8') as f: + if isinstance(data, (list, dict)): + f.write(json.dumps(data, indent=2, default=str)) + else: + f.write(str(data)) \ No newline at end of file From ec570516142db873dc8955149995e00340fb1e64 Mon Sep 17 00:00:00 2001 From: PaoloLeonard Date: Tue, 24 Jun 2025 20:45:56 +0200 Subject: [PATCH 24/39] =?UTF-8?q?corrected=20tests=C3=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/docbinder_oss/cli/search.py | 23 ------ src/docbinder_oss/core/schemas.py | 7 -- src/docbinder_oss/helpers/writer.py | 36 +++++---- tests/helpers/test_writer.py | 80 +++++++++++++++++++ .../google_drive/__init__.py | 0 .../google_drive/conftest.py | 20 ++--- .../google_drive/test_google_drive_buckets.py | 4 +- .../google_drive/test_google_drive_files.py | 6 +- .../test_google_drive_permissions.py | 4 +- 9 files changed, 119 insertions(+), 61 deletions(-) create mode 100644 tests/helpers/test_writer.py rename tests/{services => providers}/google_drive/__init__.py (100%) rename tests/{services => providers}/google_drive/conftest.py (63%) rename tests/{services => providers}/google_drive/test_google_drive_buckets.py (90%) rename tests/{services => providers}/google_drive/test_google_drive_files.py (95%) rename tests/{services => providers}/google_drive/test_google_drive_permissions.py (88%) diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py index 036e812..b6ab969 100644 --- a/src/docbinder_oss/cli/search.py +++ b/src/docbinder_oss/cli/search.py @@ -192,26 +192,3 @@ def __write_csv(files_by_provider, filename): if isinstance(parents, list): file_dict["parents"] = ";".join(str(p) for p in parents) writer.writerow({fn: file_dict.get(fn, "") for fn in fieldnames}) - - -def __write_json(files_by_provider, filename, flat=False): - with open(filename, "w") as jsonfile: - if flat: - all_files = [] - for provider, files in files_by_provider.items(): - for file in files: - file_dict = ( - file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy() - ) - file_dict["provider"] = provider - all_files.append(file_dict) - json.dump(all_files, jsonfile, default=str, indent=2) - else: - grouped = { - provider: [ - file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy() - for file in files - ] - for provider, files in files_by_provider.items() - } - json.dump(grouped, jsonfile, default=str, indent=2) diff --git a/src/docbinder_oss/core/schemas.py b/src/docbinder_oss/core/schemas.py index 1d8b72b..354a61a 100644 --- a/src/docbinder_oss/core/schemas.py +++ b/src/docbinder_oss/core/schemas.py @@ -62,17 +62,10 @@ class File(BaseModel): size: Optional[str] = Field(description="Size in bytes, as a string. Only populated for files.") parents: Optional[List[str]] = Field(description="Parent folder IDs, if applicable.") - capabilities: Optional[FileCapabilities] = None - shared: Optional[bool] starred: Optional[bool] trashed: Optional[bool] - # Add full_path as an optional field for export/CLI assignment - full_path: Optional[str] = Field( - default=None, description="Full path of the file/folder, computed at runtime." - ) - def __init__(self, **data: Any): # Coerce parents to a list of strings or None if "parents" in data: diff --git a/src/docbinder_oss/helpers/writer.py b/src/docbinder_oss/helpers/writer.py index 0363bea..eddf4d5 100644 --- a/src/docbinder_oss/helpers/writer.py +++ b/src/docbinder_oss/helpers/writer.py @@ -3,8 +3,13 @@ from abc import ABC, abstractmethod from pathlib import Path from typing import Any, Dict, List, Union +from pydantic import BaseModel from rich import print -from rich.panel import Panel + +import logging + + +logger = logging.getLogger(__name__) class Writer(ABC): @@ -22,7 +27,6 @@ class MultiFormatWriter: _writers = { '.csv': 'CSVWriter', '.json': 'JSONWriter', - '.txt': 'TextWriter', } @classmethod @@ -44,18 +48,31 @@ def write(cls, data: Any, file_path: Union[None, str, Path]) -> None: class CSVWriter(Writer): + def get_fieldnames(self, data: Dict[str, List[BaseModel]]) -> List[str]: + fieldnames = next(iter(data.values()))[0].model_fields_set + return ["provider", *fieldnames] + def write(self, data: List[Dict], file_path: Union[str, Path]) -> None: if not data: + logger.warning("No data to write to CSV.") return with open(file_path, 'w', newline='', encoding='utf-8') as f: - writer = csv.DictWriter(f, fieldnames=data[0].keys()) + writer = csv.DictWriter(f, fieldnames=self.get_fieldnames(data)) writer.writeheader() - writer.writerows(data) + for provider, items in data.items(): + for item in items: + item_dict = item.model_dump() if isinstance(item, BaseModel) else item + item_dict['provider'] = provider + writer.writerow(item_dict) class JSONWriter(Writer): - def write(self, data: Any, file_path: Union[str, Path]) -> None: + def write(self, data: Dict[str, List[BaseModel]], file_path: Union[str, Path]) -> None: + data = { + provider: [item.model_dump() for item in items] + for provider, items in data.items() + } with open(file_path, 'w', encoding='utf-8') as f: json.dump(data, f, indent=2, ensure_ascii=False, default=str) @@ -73,12 +90,3 @@ def write(self, data: Dict) -> None: for item in items: table.add_row(provider, item.id, item.name, item.kind) print(table) - - -class TextWriter(Writer): - def write(self, data: Any, file_path: Union[str, Path]) -> None: - with open(file_path, 'w', encoding='utf-8') as f: - if isinstance(data, (list, dict)): - f.write(json.dumps(data, indent=2, default=str)) - else: - f.write(str(data)) \ No newline at end of file diff --git a/tests/helpers/test_writer.py b/tests/helpers/test_writer.py new file mode 100644 index 0000000..d3cf8ce --- /dev/null +++ b/tests/helpers/test_writer.py @@ -0,0 +1,80 @@ +import json +import csv +import pytest +from pydantic import BaseModel + +from docbinder_oss.helpers.writer import ( + MultiFormatWriter, + CSVWriter, + JSONWriter, +) + +class DummyModel(BaseModel): + id: str + name: str + kind: str + +@pytest.fixture +def sample_data(): + return { + "provider1": [ + DummyModel(id="1", name="FileA", kind="file"), + DummyModel(id="2", name="FolderB", kind="folder"), + ], + "provider2": [ + DummyModel(id="3", name="FileC", kind="file"), + ], + } + +def test_csv_writer(tmp_path, sample_data): + file_path = tmp_path / "output.csv" + writer = CSVWriter() + writer.write(sample_data, file_path) + assert file_path.exists() + with open(file_path, newline='', encoding='utf-8') as f: + reader = csv.DictReader(f) + rows = list(reader) + assert len(rows) == 3 + assert set(rows[0].keys()) == {"provider", "id", "name", "kind"} + assert rows[0]["provider"] == "provider1" + +def test_json_writer(tmp_path, sample_data): + file_path = tmp_path / "output.json" + writer = JSONWriter() + writer.write(sample_data, file_path) + assert file_path.exists() + with open(file_path, encoding='utf-8') as f: + data = json.load(f) + assert "provider1" in data + assert isinstance(data["provider1"], list) + assert data["provider1"][0]["id"] == "1" + + +def test_multiformat_writer_csv(tmp_path, sample_data): + file_path = tmp_path / "test.csv" + MultiFormatWriter.write(sample_data, file_path) + assert file_path.exists() + with open(file_path, newline='', encoding='utf-8') as f: + reader = csv.DictReader(f) + rows = list(reader) + assert len(rows) == 3 + +def test_multiformat_writer_json(tmp_path, sample_data): + file_path = tmp_path / "test.json" + MultiFormatWriter.write(sample_data, file_path) + assert file_path.exists() + with open(file_path, encoding='utf-8') as f: + data = json.load(f) + assert "provider2" in data + +def test_multiformat_writer_unsupported(tmp_path, sample_data): + file_path = tmp_path / "test.unsupported" + with pytest.raises(ValueError): + MultiFormatWriter.write(sample_data, file_path) + +def test_csv_writer_empty_data(tmp_path, caplog): + file_path = tmp_path / "empty.csv" + writer = CSVWriter() + with caplog.at_level("WARNING"): + writer.write({}, file_path) + assert "No data to write to CSV." in caplog.text diff --git a/tests/services/google_drive/__init__.py b/tests/providers/google_drive/__init__.py similarity index 100% rename from tests/services/google_drive/__init__.py rename to tests/providers/google_drive/__init__.py diff --git a/tests/services/google_drive/conftest.py b/tests/providers/google_drive/conftest.py similarity index 63% rename from tests/services/google_drive/conftest.py rename to tests/providers/google_drive/conftest.py index 8f3fe03..b248aac 100644 --- a/tests/services/google_drive/conftest.py +++ b/tests/providers/google_drive/conftest.py @@ -11,7 +11,7 @@ @pytest.fixture -def mock_gdrive_service(): +def mock_gdrive_provider(): """ This is the core of our testing strategy. We use 'patch' to replace the `build` function from the googleapiclient library. @@ -19,24 +19,24 @@ def mock_gdrive_service(): Whenever `GoogleDriveClient` calls `build('drive', 'v3', ...)`, it will receive our mock object instead of making a real network call. """ - with patch("docbinder_oss.services.google_drive.google_drive_client.build") as mock_build: - # Create a mock for the service object that `build` would return - mock_service = MagicMock() - # Configure the `build` function to return our mock service - mock_build.return_value = mock_service - yield mock_service + with patch("docbinder_oss.providers.google_drive.google_drive_client.build") as mock_build: + # Create a mock for the provider object that `build` would return + mock_provider = MagicMock() + # Configure the `build` function to return our mock provider + mock_build.return_value = mock_provider + yield mock_provider @pytest.fixture -def gdrive_client(mock_gdrive_service): +def gdrive_client(mock_gdrive_provider): """ Creates an instance of our GoogleDriveClient. It will be initialized with a fake config and will use - the mock_gdrive_service fixture internally. + the mock_gdrive_provider fixture internally. """ # Patch _get_credentials to avoid real auth with patch( - "docbinder_oss.services.google_drive.google_drive_client.GoogleDriveClient._get_credentials", + "docbinder_oss.providers.google_drive.google_drive_client.GoogleDriveClient._get_credentials", return_value=MagicMock(), ): config = GoogleDriveServiceConfig( diff --git a/tests/services/google_drive/test_google_drive_buckets.py b/tests/providers/google_drive/test_google_drive_buckets.py similarity index 90% rename from tests/services/google_drive/test_google_drive_buckets.py rename to tests/providers/google_drive/test_google_drive_buckets.py index 44e3bd5..a4a91c3 100644 --- a/tests/services/google_drive/test_google_drive_buckets.py +++ b/tests/providers/google_drive/test_google_drive_buckets.py @@ -3,7 +3,7 @@ from docbinder_oss.core.schemas import Bucket -def test_list_buckets(mock_gdrive_service, gdrive_client): +def test_list_buckets(mock_gdrive_provider, gdrive_client): fake_api_response = { "drives": [ { @@ -21,7 +21,7 @@ def test_list_buckets(mock_gdrive_service, gdrive_client): } ] } - mock_gdrive_service.drives.return_value.list.return_value.execute.return_value = ( + mock_gdrive_provider.drives.return_value.list.return_value.execute.return_value = ( fake_api_response ) diff --git a/tests/services/google_drive/test_google_drive_files.py b/tests/providers/google_drive/test_google_drive_files.py similarity index 95% rename from tests/services/google_drive/test_google_drive_files.py rename to tests/providers/google_drive/test_google_drive_files.py index 4ed40b2..432af3a 100644 --- a/tests/services/google_drive/test_google_drive_files.py +++ b/tests/providers/google_drive/test_google_drive_files.py @@ -53,7 +53,7 @@ def list_all_files(self): return list_all_files(self) monkeypatch.setattr( - "docbinder_oss.services.create_provider_instance", lambda cfg: DummyClient() + "docbinder_oss.providers.create_provider_instance", lambda cfg: DummyClient() ) orig_cwd = os.getcwd() os.chdir(tmp_path) @@ -61,7 +61,7 @@ def list_all_files(self): os.chdir(orig_cwd) -def test_list_files(mock_gdrive_service, gdrive_client): +def test_list_files(mock_gdrive_provider, gdrive_client): fake_api_response = { "files": [ { @@ -97,7 +97,7 @@ def test_list_files(mock_gdrive_service, gdrive_client): ] } - mock_gdrive_service.files.return_value.list.return_value.execute.return_value = ( + mock_gdrive_provider.files.return_value.list.return_value.execute.return_value = ( fake_api_response ) diff --git a/tests/services/google_drive/test_google_drive_permissions.py b/tests/providers/google_drive/test_google_drive_permissions.py similarity index 88% rename from tests/services/google_drive/test_google_drive_permissions.py rename to tests/providers/google_drive/test_google_drive_permissions.py index ddc0b8c..e4b14f6 100644 --- a/tests/services/google_drive/test_google_drive_permissions.py +++ b/tests/providers/google_drive/test_google_drive_permissions.py @@ -1,7 +1,7 @@ from docbinder_oss.core.schemas import Permission, User -def test_get_permissions(mock_gdrive_service, gdrive_client): +def test_get_permissions(mock_gdrive_provider, gdrive_client): fake_api_response = { "permissions": [ { @@ -18,7 +18,7 @@ def test_get_permissions(mock_gdrive_service, gdrive_client): } ] } - mock_gdrive_service.permissions.return_value.list.return_value.execute.return_value = ( + mock_gdrive_provider.permissions.return_value.list.return_value.execute.return_value = ( fake_api_response ) From 8a958e147fd7c211c62940f9f16effe6502e7a78 Mon Sep 17 00:00:00 2001 From: Christophe Beke Date: Fri, 27 Jun 2025 18:15:36 +0200 Subject: [PATCH 25/39] Changed filter_files to private method and updated linting. --- pyproject.toml | 2 +- src/docbinder_oss/cli/search.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 09ae953..f80c86b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ testpaths = [ [tool.ruff] # Set the maximum line length to 100. -line-length = 100 +line-length = 125 [tool.ruff.lint] # Add the `line-too-long` rule to the enforced rule set. By default, Ruff omits rules that diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py index b6ab969..f760cd7 100644 --- a/src/docbinder_oss/cli/search.py +++ b/src/docbinder_oss/cli/search.py @@ -3,7 +3,6 @@ import typer from typing import Optional import csv -import json from docbinder_oss.core.schemas import File from docbinder_oss.helpers.config import load_config @@ -64,7 +63,7 @@ def search( raise typer.Exit(code=1) current_files[provider_config.name] = client.list_all_files() - current_files = filter_files( + current_files = __filter_files( current_files, name=name, owner=owner, @@ -80,7 +79,7 @@ def search( return -def filter_files( +def __filter_files( files, name=None, owner=None, From 01f451e23b9fabb18d66e04d5c0c26f91e15cfad Mon Sep 17 00:00:00 2001 From: Christophe Beke Date: Fri, 27 Jun 2025 18:21:03 +0200 Subject: [PATCH 26/39] Added black for formatting and ruff for linting, including pre-commits --- .pre-commit-config.yaml | 9 +++++ CONTRIBUTING.md | 24 +++++++++++- pyproject.toml | 6 ++- src/docbinder_oss/cli/provider/get.py | 9 +---- src/docbinder_oss/cli/search.py | 32 ++++----------- src/docbinder_oss/cli/setup.py | 4 +- src/docbinder_oss/core/schemas.py | 4 +- src/docbinder_oss/helpers/path_utils.py | 4 +- src/docbinder_oss/helpers/writer.py | 27 ++++++------- .../google_drive/google_drive_client.py | 8 +--- tests/commands/test_search_command.py | 16 ++------ tests/helpers/test_writer.py | 15 +++++-- .../google_drive/test_google_drive_buckets.py | 4 +- .../google_drive/test_google_drive_files.py | 8 +--- .../test_google_drive_permissions.py | 4 +- uv.lock | 39 +++++++++++++++++++ 16 files changed, 122 insertions(+), 91 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..9168817 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,9 @@ +repos: + - repo: https://github.com/psf/black + rev: 24.3.0 + hooks: + - id: black + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.4.4 + hooks: + - id: ruff diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4226b1e..f5b442f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -56,4 +56,26 @@ All dependencies are tracked in `pyproject.toml`. Use `uv` commands to keep it u --- **Note:** -Always use `uv` commands to manage dependencies and environments to keep `pyproject.toml` in sync. \ No newline at end of file +Always use `uv` commands to manage dependencies and environments to keep `pyproject.toml` in sync. + +## Code Style and Linting + +This project uses [Black](https://black.readthedocs.io/en/stable/) for code formatting and [Ruff](https://docs.astral.sh/ruff/) for linting. All code should be formatted and linted before committing. + +- Run the following before committing code: + +```zsh +uv run black . +uv run ruff check . +``` + +- To automatically format and lint code on every commit, install pre-commit hooks: + +```zsh +uv pip install pre-commit +pre-commit install +``` + +This will ensure Black and Ruff are run on staged files before each commit. + +Configuration for Black and Ruff is in `pyproject.toml`. This enforces consistent quotes, spacing, and other style rules for all contributors. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index f80c86b..51ca21b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ include = ["src/docbinder_oss/**"] [dependency-groups] dev = [ + "black>=25.1.0", "mkdocs>=1.6.1", "mkdocs-material>=9.6.14", "pytest>=8.4.0", @@ -46,8 +47,11 @@ testpaths = [ "tests", ] +[tool.black] +line-length = 125 +skip-string-normalization = false + [tool.ruff] -# Set the maximum line length to 100. line-length = 125 [tool.ruff.lint] diff --git a/src/docbinder_oss/cli/provider/get.py b/src/docbinder_oss/cli/provider/get.py index 7793870..e86b7be 100644 --- a/src/docbinder_oss/cli/provider/get.py +++ b/src/docbinder_oss/cli/provider/get.py @@ -5,9 +5,7 @@ @app.command("get") def get_provider( - connection_type: str = typer.Option( - None, "--type", "-t", help="The type of the provider to get." - ), + connection_type: str = typer.Option(None, "--type", "-t", help="The type of the provider to get."), name: str = typer.Option(None, "--name", "-n", help="The name of the provider to get."), ): """Get connection information for a provider by name or by type. @@ -25,10 +23,7 @@ def get_provider( typer.echo(f"Provider '{name}' found with config: {provider}") provider_found = True if provider.type == connection_type: - typer.echo( - f"Provider '{provider.name}' of type '{connection_type}'" - f" found with config: {provider}" - ) + typer.echo(f"Provider '{provider.name}' of type '{connection_type}'" f" found with config: {provider}") provider_found = True if not provider_found: typer.echo(f"No providers found with name '{name}' or type '{connection_type}'.") diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py index f760cd7..6ba0583 100644 --- a/src/docbinder_oss/cli/search.py +++ b/src/docbinder_oss/cli/search.py @@ -17,29 +17,15 @@ @app.command() def search( name: Optional[str] = typer.Option(None, "--name", help="Regex to match file name"), - owner: Optional[str] = typer.Option( - None, "--owner", help="Owner/contributor/reader email address to filter" - ), - updated_after: Optional[str] = typer.Option( - None, "--updated-after", help="Last update after (ISO timestamp)" - ), - updated_before: Optional[str] = typer.Option( - None, "--updated-before", help="Last update before (ISO timestamp)" - ), - created_after: Optional[str] = typer.Option( - None, "--created-after", help="Created after (ISO timestamp)" - ), - created_before: Optional[str] = typer.Option( - None, "--created-before", help="Created before (ISO timestamp)" - ), + owner: Optional[str] = typer.Option(None, "--owner", help="Owner/contributor/reader email address to filter"), + updated_after: Optional[str] = typer.Option(None, "--updated-after", help="Last update after (ISO timestamp)"), + updated_before: Optional[str] = typer.Option(None, "--updated-before", help="Last update before (ISO timestamp)"), + created_after: Optional[str] = typer.Option(None, "--created-after", help="Created after (ISO timestamp)"), + created_before: Optional[str] = typer.Option(None, "--created-before", help="Created before (ISO timestamp)"), min_size: Optional[int] = typer.Option(None, "--min-size", help="Minimum file size in KB"), max_size: Optional[int] = typer.Option(None, "--max-size", help="Maximum file size in KB"), - provider: Optional[str] = typer.Option( - None, "--provider", "-p", help="Provider name to search in" - ), - export_format: str = typer.Option( - None, "--export-format", help="Export format: csv or json", show_default=True - ), + provider: Optional[str] = typer.Option(None, "--provider", "-p", help="Provider name to search in"), + export_format: str = typer.Option(None, "--export-format", help="Export format: csv or json", show_default=True), ): """Search for files or folders matching filters across all providers and export results as CSV or JSON.""" @@ -161,9 +147,7 @@ def __write_csv(files_by_provider, filename): writer.writeheader() for provider, files in files_by_provider.items(): for file in files: - file_dict = ( - file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy() - ) + file_dict = file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy() file_dict["provider"] = provider # Flatten owners for CSV (only email addresses) owners = file_dict.get("owners") diff --git a/src/docbinder_oss/cli/setup.py b/src/docbinder_oss/cli/setup.py index b9ff56d..b74cbc9 100644 --- a/src/docbinder_oss/cli/setup.py +++ b/src/docbinder_oss/cli/setup.py @@ -25,9 +25,7 @@ def setup( providers = {} for entry in provider: if ":" not in entry: - typer.echo( - f"Provider entry '{entry}' must be in provider:key1=val1,key2=val2 format." - ) + typer.echo(f"Provider entry '{entry}' must be in provider:key1=val1,key2=val2 format.") raise typer.Exit(code=1) prov_name, prov_kvs = entry.split(":", 1) kv_dict = {} diff --git a/src/docbinder_oss/core/schemas.py b/src/docbinder_oss/core/schemas.py index 354a61a..5fd8268 100644 --- a/src/docbinder_oss/core/schemas.py +++ b/src/docbinder_oss/core/schemas.py @@ -42,9 +42,7 @@ class File(BaseModel): """Represents a file or folder""" id: str = Field(repr=True, description="Unique identifier for the file or folder.") - name: str = Field( - repr=True, description="Name of the file or folder. May not be unique." - ) + name: str = Field(repr=True, description="Name of the file or folder. May not be unique.") mime_type: str = Field(repr=True, description="MIME type of the file or folder.") kind: Optional[str] = Field(repr=True, description="Kind of the item, e.g., 'drive#file'.") diff --git a/src/docbinder_oss/helpers/path_utils.py b/src/docbinder_oss/helpers/path_utils.py index a724e9f..b3a20b3 100644 --- a/src/docbinder_oss/helpers/path_utils.py +++ b/src/docbinder_oss/helpers/path_utils.py @@ -56,9 +56,7 @@ def build_all_full_paths(files, root_id="root", root_name="My Drive", root_id_to break parent_id = parents[0] if parent_id in root_id_to_name: - temp_stack.append( - (current.id, "/" + root_id_to_name[parent_id] + "/" + current.name) - ) + temp_stack.append((current.id, "/" + root_id_to_name[parent_id] + "/" + current.name)) break parent = id_to_item.get(parent_id) if not parent: diff --git a/src/docbinder_oss/helpers/writer.py b/src/docbinder_oss/helpers/writer.py index eddf4d5..92e917a 100644 --- a/src/docbinder_oss/helpers/writer.py +++ b/src/docbinder_oss/helpers/writer.py @@ -14,7 +14,7 @@ class Writer(ABC): """Abstract base writer class.""" - + @abstractmethod def write(self, data: Any, file_path: Union[None, str, Path]) -> None: """Write data to file.""" @@ -23,12 +23,12 @@ def write(self, data: Any, file_path: Union[None, str, Path]) -> None: class MultiFormatWriter: """Factory writer that automatically detects format from file extension.""" - + _writers = { - '.csv': 'CSVWriter', - '.json': 'JSONWriter', + ".csv": "CSVWriter", + ".json": "JSONWriter", } - + @classmethod def write(cls, data: Any, file_path: Union[None, str, Path]) -> None: """Write data to file, format determined by extension.""" @@ -38,10 +38,10 @@ def write(cls, data: Any, file_path: Union[None, str, Path]) -> None: return path = Path(file_path) extension = path.suffix.lower() - + if extension not in cls._writers: raise ValueError(f"Unsupported format: {extension}") - + writer_class = globals()[cls._writers[extension]] writer = writer_class() writer.write(data, file_path) @@ -56,24 +56,21 @@ def write(self, data: List[Dict], file_path: Union[str, Path]) -> None: if not data: logger.warning("No data to write to CSV.") return - - with open(file_path, 'w', newline='', encoding='utf-8') as f: + + with open(file_path, "w", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=self.get_fieldnames(data)) writer.writeheader() for provider, items in data.items(): for item in items: item_dict = item.model_dump() if isinstance(item, BaseModel) else item - item_dict['provider'] = provider + item_dict["provider"] = provider writer.writerow(item_dict) class JSONWriter(Writer): def write(self, data: Dict[str, List[BaseModel]], file_path: Union[str, Path]) -> None: - data = { - provider: [item.model_dump() for item in items] - for provider, items in data.items() - } - with open(file_path, 'w', encoding='utf-8') as f: + data = {provider: [item.model_dump() for item in items] for provider, items in data.items()} + with open(file_path, "w", encoding="utf-8") as f: json.dump(data, f, indent=2, ensure_ascii=False, default=str) diff --git a/src/docbinder_oss/providers/google_drive/google_drive_client.py b/src/docbinder_oss/providers/google_drive/google_drive_client.py index a171a2a..6c68a71 100644 --- a/src/docbinder_oss/providers/google_drive/google_drive_client.py +++ b/src/docbinder_oss/providers/google_drive/google_drive_client.py @@ -41,9 +41,7 @@ def __init__(self, config: GoogleDriveServiceConfig): def _get_credentials(self): logger.info("Getting credentials for Google Drive client") - TOKEN_PATH = os.path.expanduser( - "~/.config/docbinder/gcp/" + self.config.name + "_token.json" - ) + TOKEN_PATH = os.path.expanduser("~/.config/docbinder/gcp/" + self.config.name + "_token.json") # Ensure the directory exists os.makedirs(os.path.dirname(TOKEN_PATH), exist_ok=True) logger.debug(f"Token path: {TOKEN_PATH}") @@ -57,9 +55,7 @@ def _get_credentials(self): if creds and creds.expired and creds.refresh_token: creds.refresh(Request()) else: - flow = InstalledAppFlow.from_client_secrets_file( - self.settings.gcp_credentials_json, self.SCOPES - ) + flow = InstalledAppFlow.from_client_secrets_file(self.settings.gcp_credentials_json, self.SCOPES) creds = flow.run_local_server(port=0) # Save the credentials for the next run with open(TOKEN_PATH, "w") as token: diff --git a/tests/commands/test_search_command.py b/tests/commands/test_search_command.py index 46899c8..1a709b4 100644 --- a/tests/commands/test_search_command.py +++ b/tests/commands/test_search_command.py @@ -14,9 +14,7 @@ def __init__(self, **kwargs): self.mime_type = kwargs.get("mime_type", "application/pdf") self.created_time = kwargs.get("created_time", "2024-01-01T00:00:00") self.modified_time = kwargs.get("modified_time", "2024-01-02T00:00:00") - self.owners = kwargs.get( - "owners", [type("User", (), {"email_address": "owner@example.com"})()] - ) + self.owners = kwargs.get("owners", [type("User", (), {"email_address": "owner@example.com"})()]) self.last_modifying_user = kwargs.get( "last_modifying_user", type("User", (), {"email_address": "mod@example.com"})() ) @@ -94,9 +92,7 @@ def create_provider_instance(cfg): }, )() - monkeypatch.setattr( - "docbinder_oss.cli.search.create_provider_instance", create_provider_instance - ) + monkeypatch.setattr("docbinder_oss.cli.search.create_provider_instance", create_provider_instance) # Change working directory to a temp dir for file output orig_cwd = os.getcwd() @@ -166,9 +162,7 @@ def test_search_owner_filter(): def test_search_updated_after_filter(): runner = CliRunner() - result = runner.invoke( - app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"] - ) + result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"]) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) @@ -178,9 +172,7 @@ def test_search_updated_after_filter(): def test_search_created_before_filter(): runner = CliRunner() - result = runner.invoke( - app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-format", "json"] - ) + result = runner.invoke(app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-format", "json"]) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) diff --git a/tests/helpers/test_writer.py b/tests/helpers/test_writer.py index d3cf8ce..3a05ebf 100644 --- a/tests/helpers/test_writer.py +++ b/tests/helpers/test_writer.py @@ -9,11 +9,13 @@ JSONWriter, ) + class DummyModel(BaseModel): id: str name: str kind: str + @pytest.fixture def sample_data(): return { @@ -26,24 +28,26 @@ def sample_data(): ], } + def test_csv_writer(tmp_path, sample_data): file_path = tmp_path / "output.csv" writer = CSVWriter() writer.write(sample_data, file_path) assert file_path.exists() - with open(file_path, newline='', encoding='utf-8') as f: + with open(file_path, newline="", encoding="utf-8") as f: reader = csv.DictReader(f) rows = list(reader) assert len(rows) == 3 assert set(rows[0].keys()) == {"provider", "id", "name", "kind"} assert rows[0]["provider"] == "provider1" + def test_json_writer(tmp_path, sample_data): file_path = tmp_path / "output.json" writer = JSONWriter() writer.write(sample_data, file_path) assert file_path.exists() - with open(file_path, encoding='utf-8') as f: + with open(file_path, encoding="utf-8") as f: data = json.load(f) assert "provider1" in data assert isinstance(data["provider1"], list) @@ -54,24 +58,27 @@ def test_multiformat_writer_csv(tmp_path, sample_data): file_path = tmp_path / "test.csv" MultiFormatWriter.write(sample_data, file_path) assert file_path.exists() - with open(file_path, newline='', encoding='utf-8') as f: + with open(file_path, newline="", encoding="utf-8") as f: reader = csv.DictReader(f) rows = list(reader) assert len(rows) == 3 + def test_multiformat_writer_json(tmp_path, sample_data): file_path = tmp_path / "test.json" MultiFormatWriter.write(sample_data, file_path) assert file_path.exists() - with open(file_path, encoding='utf-8') as f: + with open(file_path, encoding="utf-8") as f: data = json.load(f) assert "provider2" in data + def test_multiformat_writer_unsupported(tmp_path, sample_data): file_path = tmp_path / "test.unsupported" with pytest.raises(ValueError): MultiFormatWriter.write(sample_data, file_path) + def test_csv_writer_empty_data(tmp_path, caplog): file_path = tmp_path / "empty.csv" writer = CSVWriter() diff --git a/tests/providers/google_drive/test_google_drive_buckets.py b/tests/providers/google_drive/test_google_drive_buckets.py index a4a91c3..bff2dee 100644 --- a/tests/providers/google_drive/test_google_drive_buckets.py +++ b/tests/providers/google_drive/test_google_drive_buckets.py @@ -21,9 +21,7 @@ def test_list_buckets(mock_gdrive_provider, gdrive_client): } ] } - mock_gdrive_provider.drives.return_value.list.return_value.execute.return_value = ( - fake_api_response - ) + mock_gdrive_provider.drives.return_value.list.return_value.execute.return_value = fake_api_response buckets = gdrive_client.list_buckets() diff --git a/tests/providers/google_drive/test_google_drive_files.py b/tests/providers/google_drive/test_google_drive_files.py index 432af3a..b8a5866 100644 --- a/tests/providers/google_drive/test_google_drive_files.py +++ b/tests/providers/google_drive/test_google_drive_files.py @@ -52,9 +52,7 @@ class DummyClient: def list_all_files(self): return list_all_files(self) - monkeypatch.setattr( - "docbinder_oss.providers.create_provider_instance", lambda cfg: DummyClient() - ) + monkeypatch.setattr("docbinder_oss.providers.create_provider_instance", lambda cfg: DummyClient()) orig_cwd = os.getcwd() os.chdir(tmp_path) yield @@ -97,9 +95,7 @@ def test_list_files(mock_gdrive_provider, gdrive_client): ] } - mock_gdrive_provider.files.return_value.list.return_value.execute.return_value = ( - fake_api_response - ) + mock_gdrive_provider.files.return_value.list.return_value.execute.return_value = fake_api_response files = gdrive_client.list_files_in_folder() diff --git a/tests/providers/google_drive/test_google_drive_permissions.py b/tests/providers/google_drive/test_google_drive_permissions.py index e4b14f6..63d8865 100644 --- a/tests/providers/google_drive/test_google_drive_permissions.py +++ b/tests/providers/google_drive/test_google_drive_permissions.py @@ -18,9 +18,7 @@ def test_get_permissions(mock_gdrive_provider, gdrive_client): } ] } - mock_gdrive_provider.permissions.return_value.list.return_value.execute.return_value = ( - fake_api_response - ) + mock_gdrive_provider.permissions.return_value.list.return_value.execute.return_value = fake_api_response permissions = gdrive_client.get_permissions("1234") diff --git a/uv.lock b/uv.lock index 8630097..fe662e7 100644 --- a/uv.lock +++ b/uv.lock @@ -37,6 +37,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/37/fb6973edeb700f6e3d6ff222400602ab1830446c25c7b4676d8de93e65b8/backrefs-5.8-py39-none-any.whl", hash = "sha256:a66851e4533fb5b371aa0628e1fee1af05135616b86140c9d787a2ffdf4b8fdc", size = 380336, upload-time = "2025-02-25T16:53:29.858Z" }, ] +[[package]] +name = "black" +version = "25.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "mypy-extensions" }, + { name = "packaging" }, + { name = "pathspec" }, + { name = "platformdirs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/94/49/26a7b0f3f35da4b5a65f081943b7bcd22d7002f5f0fb8098ec1ff21cb6ef/black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666", size = 649449, upload-time = "2025-01-29T04:15:40.373Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/4f/87f596aca05c3ce5b94b8663dbfe242a12843caaa82dd3f85f1ffdc3f177/black-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a39337598244de4bae26475f77dda852ea00a93bd4c728e09eacd827ec929df0", size = 1614372, upload-time = "2025-01-29T05:37:11.71Z" }, + { url = "https://files.pythonhosted.org/packages/e7/d0/2c34c36190b741c59c901e56ab7f6e54dad8df05a6272a9747ecef7c6036/black-25.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96c1c7cd856bba8e20094e36e0f948718dc688dba4a9d78c3adde52b9e6c2299", size = 1442865, upload-time = "2025-01-29T05:37:14.309Z" }, + { url = "https://files.pythonhosted.org/packages/21/d4/7518c72262468430ead45cf22bd86c883a6448b9eb43672765d69a8f1248/black-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce2e264d59c91e52d8000d507eb20a9aca4a778731a08cfff7e5ac4a4bb7096", size = 1749699, upload-time = "2025-01-29T04:18:17.688Z" }, + { url = "https://files.pythonhosted.org/packages/58/db/4f5beb989b547f79096e035c4981ceb36ac2b552d0ac5f2620e941501c99/black-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:172b1dbff09f86ce6f4eb8edf9dede08b1fce58ba194c87d7a4f1a5aa2f5b3c2", size = 1428028, upload-time = "2025-01-29T04:18:51.711Z" }, + { url = "https://files.pythonhosted.org/packages/83/71/3fe4741df7adf015ad8dfa082dd36c94ca86bb21f25608eb247b4afb15b2/black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b", size = 1650988, upload-time = "2025-01-29T05:37:16.707Z" }, + { url = "https://files.pythonhosted.org/packages/13/f3/89aac8a83d73937ccd39bbe8fc6ac8860c11cfa0af5b1c96d081facac844/black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc", size = 1453985, upload-time = "2025-01-29T05:37:18.273Z" }, + { url = "https://files.pythonhosted.org/packages/6f/22/b99efca33f1f3a1d2552c714b1e1b5ae92efac6c43e790ad539a163d1754/black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f", size = 1783816, upload-time = "2025-01-29T04:18:33.823Z" }, + { url = "https://files.pythonhosted.org/packages/18/7e/a27c3ad3822b6f2e0e00d63d58ff6299a99a5b3aee69fa77cd4b0076b261/black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba", size = 1440860, upload-time = "2025-01-29T04:19:12.944Z" }, + { url = "https://files.pythonhosted.org/packages/98/87/0edf98916640efa5d0696e1abb0a8357b52e69e82322628f25bf14d263d1/black-25.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f", size = 1650673, upload-time = "2025-01-29T05:37:20.574Z" }, + { url = "https://files.pythonhosted.org/packages/52/e5/f7bf17207cf87fa6e9b676576749c6b6ed0d70f179a3d812c997870291c3/black-25.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3", size = 1453190, upload-time = "2025-01-29T05:37:22.106Z" }, + { url = "https://files.pythonhosted.org/packages/e3/ee/adda3d46d4a9120772fae6de454c8495603c37c4c3b9c60f25b1ab6401fe/black-25.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171", size = 1782926, upload-time = "2025-01-29T04:18:58.564Z" }, + { url = "https://files.pythonhosted.org/packages/cc/64/94eb5f45dcb997d2082f097a3944cfc7fe87e071907f677e80788a2d7b7a/black-25.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18", size = 1442613, upload-time = "2025-01-29T04:19:27.63Z" }, + { url = "https://files.pythonhosted.org/packages/09/71/54e999902aed72baf26bca0d50781b01838251a462612966e9fc4891eadd/black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717", size = 207646, upload-time = "2025-01-29T04:15:38.082Z" }, +] + [[package]] name = "cachetools" version = "5.5.2" @@ -167,6 +195,7 @@ dependencies = [ [package.dev-dependencies] dev = [ + { name = "black" }, { name = "mkdocs" }, { name = "mkdocs-material" }, { name = "pytest" }, @@ -188,6 +217,7 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ + { name = "black", specifier = ">=25.1.0" }, { name = "mkdocs", specifier = ">=1.6.1" }, { name = "mkdocs-material", specifier = ">=9.6.14" }, { name = "pytest", specifier = ">=8.4.0" }, @@ -511,6 +541,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5b/54/662a4743aa81d9582ee9339d4ffa3c8fd40a4965e033d77b9da9774d3960/mkdocs_material_extensions-1.3.1-py3-none-any.whl", hash = "sha256:adff8b62700b25cb77b53358dad940f3ef973dd6db797907c49e3c2ef3ab4e31", size = 8728, upload-time = "2023-11-22T19:09:43.465Z" }, ] +[[package]] +name = "mypy-extensions" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, +] + [[package]] name = "oauthlib" version = "3.2.2" From 97d749e919dc42d81ee85d2bd6a84abed3001b87 Mon Sep 17 00:00:00 2001 From: Christophe Beke Date: Fri, 27 Jun 2025 18:21:50 +0200 Subject: [PATCH 27/39] Added pre-commit --- pyproject.toml | 1 + uv.lock | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 51ca21b..ced6ecc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ dev = [ "black>=25.1.0", "mkdocs>=1.6.1", "mkdocs-material>=9.6.14", + "pre-commit>=4.2.0", "pytest>=8.4.0", "tox>=4.26.0", "tox-uv>=1.26.0", diff --git a/uv.lock b/uv.lock index fe662e7..61dfd65 100644 --- a/uv.lock +++ b/uv.lock @@ -83,6 +83,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4a/7e/3db2bd1b1f9e95f7cddca6d6e75e2f2bd9f51b1246e546d88addca0106bd/certifi-2025.4.26-py3-none-any.whl", hash = "sha256:30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3", size = 159618, upload-time = "2025-04-26T02:12:27.662Z" }, ] +[[package]] +name = "cfgv" +version = "3.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560", size = 7114, upload-time = "2023-08-12T20:38:17.776Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249, upload-time = "2023-08-12T20:38:16.269Z" }, +] + [[package]] name = "chardet" version = "5.2.0" @@ -198,6 +207,7 @@ dev = [ { name = "black" }, { name = "mkdocs" }, { name = "mkdocs-material" }, + { name = "pre-commit" }, { name = "pytest" }, { name = "tox" }, { name = "tox-uv" }, @@ -220,6 +230,7 @@ dev = [ { name = "black", specifier = ">=25.1.0" }, { name = "mkdocs", specifier = ">=1.6.1" }, { name = "mkdocs-material", specifier = ">=9.6.14" }, + { name = "pre-commit", specifier = ">=4.2.0" }, { name = "pytest", specifier = ">=8.4.0" }, { name = "tox", specifier = ">=4.26.0" }, { name = "tox-uv", specifier = ">=1.26.0" }, @@ -355,6 +366,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a8/6c/d2fbdaaa5959339d53ba38e94c123e4e84b8fbc4b84beb0e70d7c1608486/httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc", size = 96854, upload-time = "2023-03-21T22:29:35.683Z" }, ] +[[package]] +name = "identify" +version = "2.6.12" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/88/d193a27416618628a5eea64e3223acd800b40749a96ffb322a9b55a49ed1/identify-2.6.12.tar.gz", hash = "sha256:d8de45749f1efb108badef65ee8386f0f7bb19a7f26185f74de6367bffbaf0e6", size = 99254, upload-time = "2025-05-23T20:37:53.3Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/cd/18f8da995b658420625f7ef13f037be53ae04ec5ad33f9b718240dcfd48c/identify-2.6.12-py2.py3-none-any.whl", hash = "sha256:ad9672d5a72e0d2ff7c5c8809b62dfa60458626352fb0eb7b55e69bdc45334a2", size = 99145, upload-time = "2025-05-23T20:37:51.495Z" }, +] + [[package]] name = "idna" version = "3.10" @@ -550,6 +570,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, ] +[[package]] +name = "nodeenv" +version = "1.9.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437, upload-time = "2024-06-04T18:44:11.171Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" }, +] + [[package]] name = "oauthlib" version = "3.2.2" @@ -604,6 +633,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] +[[package]] +name = "pre-commit" +version = "4.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cfgv" }, + { name = "identify" }, + { name = "nodeenv" }, + { name = "pyyaml" }, + { name = "virtualenv" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/08/39/679ca9b26c7bb2999ff122d50faa301e49af82ca9c066ec061cfbc0c6784/pre_commit-4.2.0.tar.gz", hash = "sha256:601283b9757afd87d40c4c4a9b2b5de9637a8ea02eaff7adc2d0fb4e04841146", size = 193424, upload-time = "2025-03-18T21:35:20.987Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/74/a88bf1b1efeae488a0c0b7bdf71429c313722d1fc0f377537fbe554e6180/pre_commit-4.2.0-py2.py3-none-any.whl", hash = "sha256:a009ca7205f1eb497d10b845e52c838a98b6cdd2102a6c8e4540e94ee75c58bd", size = 220707, upload-time = "2025-03-18T21:35:19.343Z" }, +] + [[package]] name = "proto-plus" version = "1.26.1" From 8c1fd2984a8574b13aff0b13913242b2f6d77fd9 Mon Sep 17 00:00:00 2001 From: Christophe Beke Date: Fri, 27 Jun 2025 18:24:12 +0200 Subject: [PATCH 28/39] Update contributing file --- docs/CONTRIBUTING.md | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 4226b1e..f5b442f 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -56,4 +56,26 @@ All dependencies are tracked in `pyproject.toml`. Use `uv` commands to keep it u --- **Note:** -Always use `uv` commands to manage dependencies and environments to keep `pyproject.toml` in sync. \ No newline at end of file +Always use `uv` commands to manage dependencies and environments to keep `pyproject.toml` in sync. + +## Code Style and Linting + +This project uses [Black](https://black.readthedocs.io/en/stable/) for code formatting and [Ruff](https://docs.astral.sh/ruff/) for linting. All code should be formatted and linted before committing. + +- Run the following before committing code: + +```zsh +uv run black . +uv run ruff check . +``` + +- To automatically format and lint code on every commit, install pre-commit hooks: + +```zsh +uv pip install pre-commit +pre-commit install +``` + +This will ensure Black and Ruff are run on staged files before each commit. + +Configuration for Black and Ruff is in `pyproject.toml`. This enforces consistent quotes, spacing, and other style rules for all contributors. \ No newline at end of file From c31d4bb6c3c16ae5c8a2a473f45124fba1aa774d Mon Sep 17 00:00:00 2001 From: Christophe Beke Date: Fri, 27 Jun 2025 18:54:12 +0200 Subject: [PATCH 29/39] Fixed the search cli --export-filename and improved the writer function to now show which extensions are supported. --- src/docbinder_oss/cli/search.py | 12 +++++------- src/docbinder_oss/helpers/writer.py | 2 +- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py index 6ba0583..25226f8 100644 --- a/src/docbinder_oss/cli/search.py +++ b/src/docbinder_oss/cli/search.py @@ -25,14 +25,12 @@ def search( min_size: Optional[int] = typer.Option(None, "--min-size", help="Minimum file size in KB"), max_size: Optional[int] = typer.Option(None, "--max-size", help="Maximum file size in KB"), provider: Optional[str] = typer.Option(None, "--provider", "-p", help="Provider name to search in"), - export_format: str = typer.Option(None, "--export-format", help="Export format: csv or json", show_default=True), + export_file: Optional[str] = typer.Option( + None, "--export-file", help="Export file name (e.g. results.csv or results.json)" + ), ): """Search for files or folders matching filters across all - providers and export results as CSV or JSON.""" - - # 1 Load documents with filter "provider" - # 2 Filter the documents based on the provided filters - # 3 Export results to CSV or JSON + providers and export results as CSV or JSON. If --export-file is not provided, results are printed to the console.""" config: Config = load_config() if not config.providers: @@ -61,7 +59,7 @@ def search( max_size=max_size, ) - MultiFormatWriter.write(current_files, export_format) + MultiFormatWriter.write(current_files, export_file) return diff --git a/src/docbinder_oss/helpers/writer.py b/src/docbinder_oss/helpers/writer.py index 92e917a..7def033 100644 --- a/src/docbinder_oss/helpers/writer.py +++ b/src/docbinder_oss/helpers/writer.py @@ -40,7 +40,7 @@ def write(cls, data: Any, file_path: Union[None, str, Path]) -> None: extension = path.suffix.lower() if extension not in cls._writers: - raise ValueError(f"Unsupported format: {extension}") + raise ValueError(f"Unsupported format: {extension}. Supported formats are: {', '.join(cls._writers.keys())}") writer_class = globals()[cls._writers[extension]] writer = writer_class() From 27a1b5d55cbf9b863ad685782f75bcbc6a1f073b Mon Sep 17 00:00:00 2001 From: Christophe Beke Date: Fri, 27 Jun 2025 19:34:36 +0200 Subject: [PATCH 30/39] Updated the writer functions to work and improve readibility and align the tests with it. --- src/docbinder_oss/cli/search.py | 32 ++++--- src/docbinder_oss/helpers/writer.py | 89 ------------------- src/docbinder_oss/helpers/writers/base.py | 11 +++ .../helpers/writers/helper_functions.py | 46 ++++++++++ .../helpers/writers/multiformat_writer.py | 34 +++++++ .../helpers/writers/writer_console.py | 29 ++++++ .../helpers/writers/writer_csv.py | 41 +++++++++ .../helpers/writers/writer_json.py | 29 ++++++ tests/commands/test_search_command.py | 24 ++--- tests/helpers/test_writer.py | 34 ++++--- 10 files changed, 247 insertions(+), 122 deletions(-) delete mode 100644 src/docbinder_oss/helpers/writer.py create mode 100644 src/docbinder_oss/helpers/writers/base.py create mode 100644 src/docbinder_oss/helpers/writers/helper_functions.py create mode 100644 src/docbinder_oss/helpers/writers/multiformat_writer.py create mode 100644 src/docbinder_oss/helpers/writers/writer_console.py create mode 100644 src/docbinder_oss/helpers/writers/writer_csv.py create mode 100644 src/docbinder_oss/helpers/writers/writer_json.py diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py index 25226f8..6cfd24a 100644 --- a/src/docbinder_oss/cli/search.py +++ b/src/docbinder_oss/cli/search.py @@ -9,7 +9,7 @@ from docbinder_oss.providers import create_provider_instance from docbinder_oss.helpers.config import Config from docbinder_oss.providers.base_class import BaseProvider -from docbinder_oss.helpers.writer import MultiFormatWriter +from docbinder_oss.helpers.writers.multiformat_writer import MultiFormatWriter app = typer.Typer() @@ -100,16 +100,28 @@ def __filter_files( def file_matches(file: File): if name and not re.search(name, file.name, re.IGNORECASE): return False - if owner and not any(owner in u.email_address for u in file.owners): - return False - if updated_after and __parse_dt(file.modified_time) < __parse_dt(updated_after): - return False - if updated_before and __parse_dt(file.modified_time) > __parse_dt(updated_before): - return False - if created_after and __parse_dt(file.created_time) < __parse_dt(created_after): - return False - if created_before and __parse_dt(file.created_time) > __parse_dt(created_before): + if owner and (not file.owners or not any(owner in u.email_address for u in file.owners)): return False + if updated_after: + file_mod_time = __parse_dt(file.modified_time) + updated_after_dt = __parse_dt(updated_after) + if file_mod_time is None or updated_after_dt is None or file_mod_time < updated_after_dt: + return False + if updated_before: + file_mod_time = __parse_dt(file.modified_time) + updated_before_dt = __parse_dt(updated_before) + if file_mod_time is None or updated_before_dt is None or file_mod_time > updated_before_dt: + return False + if created_after: + file_created_time = __parse_dt(file.created_time) + created_after_dt = __parse_dt(created_after) + if file_created_time is None or created_after_dt is None or file_created_time < created_after_dt: + return False + if created_before: + file_created_time = __parse_dt(file.created_time) + created_before_dt = __parse_dt(created_before) + if file_created_time is not None and created_before_dt is not None and file_created_time > created_before_dt: + return False if min_size and file.size < min_size * 1024: return False if max_size and file.size > max_size * 1024: diff --git a/src/docbinder_oss/helpers/writer.py b/src/docbinder_oss/helpers/writer.py deleted file mode 100644 index 7def033..0000000 --- a/src/docbinder_oss/helpers/writer.py +++ /dev/null @@ -1,89 +0,0 @@ -import csv -import json -from abc import ABC, abstractmethod -from pathlib import Path -from typing import Any, Dict, List, Union -from pydantic import BaseModel -from rich import print - -import logging - - -logger = logging.getLogger(__name__) - - -class Writer(ABC): - """Abstract base writer class.""" - - @abstractmethod - def write(self, data: Any, file_path: Union[None, str, Path]) -> None: - """Write data to file.""" - pass - - -class MultiFormatWriter: - """Factory writer that automatically detects format from file extension.""" - - _writers = { - ".csv": "CSVWriter", - ".json": "JSONWriter", - } - - @classmethod - def write(cls, data: Any, file_path: Union[None, str, Path]) -> None: - """Write data to file, format determined by extension.""" - if file_path is None: - # If no file path is provided, write to console - ConsoleWriter().write(data) - return - path = Path(file_path) - extension = path.suffix.lower() - - if extension not in cls._writers: - raise ValueError(f"Unsupported format: {extension}. Supported formats are: {', '.join(cls._writers.keys())}") - - writer_class = globals()[cls._writers[extension]] - writer = writer_class() - writer.write(data, file_path) - - -class CSVWriter(Writer): - def get_fieldnames(self, data: Dict[str, List[BaseModel]]) -> List[str]: - fieldnames = next(iter(data.values()))[0].model_fields_set - return ["provider", *fieldnames] - - def write(self, data: List[Dict], file_path: Union[str, Path]) -> None: - if not data: - logger.warning("No data to write to CSV.") - return - - with open(file_path, "w", newline="", encoding="utf-8") as f: - writer = csv.DictWriter(f, fieldnames=self.get_fieldnames(data)) - writer.writeheader() - for provider, items in data.items(): - for item in items: - item_dict = item.model_dump() if isinstance(item, BaseModel) else item - item_dict["provider"] = provider - writer.writerow(item_dict) - - -class JSONWriter(Writer): - def write(self, data: Dict[str, List[BaseModel]], file_path: Union[str, Path]) -> None: - data = {provider: [item.model_dump() for item in items] for provider, items in data.items()} - with open(file_path, "w", encoding="utf-8") as f: - json.dump(data, f, indent=2, ensure_ascii=False, default=str) - - -class ConsoleWriter(Writer): - def write(self, data: Dict) -> None: - from rich.table import Table - - table = Table(title="Files and Folders") - table.add_column("Provider", justify="right", style="cyan", no_wrap=True) - table.add_column("Id", style="magenta") - table.add_column("Name", style="magenta") - table.add_column("Kind", style="magenta") - for provider, items in data.items(): - for item in items: - table.add_row(provider, item.id, item.name, item.kind) - print(table) diff --git a/src/docbinder_oss/helpers/writers/base.py b/src/docbinder_oss/helpers/writers/base.py new file mode 100644 index 0000000..b0da8af --- /dev/null +++ b/src/docbinder_oss/helpers/writers/base.py @@ -0,0 +1,11 @@ +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Any + + +class Writer(ABC): + """Abstract base writer class for exporting data.""" + + @abstractmethod + def write(self, data: Any, file_path: str | Path | None = None) -> None: + pass diff --git a/src/docbinder_oss/helpers/writers/helper_functions.py b/src/docbinder_oss/helpers/writers/helper_functions.py new file mode 100644 index 0000000..9277d74 --- /dev/null +++ b/src/docbinder_oss/helpers/writers/helper_functions.py @@ -0,0 +1,46 @@ +def flatten_file(item, provider=None): + """ + Convert a file object (Pydantic, DummyFile, or dict) to a flat dict for export. + Flattens owners, parents, and last_modifying_user fields, and adds provider if given. + """ + # Convert to dict + if hasattr(item, "model_dump"): + result = item.model_dump() + elif hasattr(item, "__dict__"): + result = dict(item.__dict__) + else: + result = dict(item) + # Add provider field to output dict + if provider: + result["provider"] = provider + # Flatten owners to email addresses + owners = result.get("owners") + if owners: + emails = [] + for owner in owners: + if isinstance(owner, dict): + emails.append(owner.get("email_address") or owner.get("email") or str(owner)) + elif hasattr(owner, "email_address"): + emails.append(owner.email_address) + else: + emails.append(str(owner)) + result["owners"] = ";".join(filter(None, emails)) + # Flatten parents to semicolon-separated string + parents = result.get("parents") + if isinstance(parents, list): + result["parents"] = ";".join(str(p) for p in parents) + elif parents is None: + result["parents"] = "" + else: + result["parents"] = str(parents) + # Flatten last_modifying_user to email address + lmu = result.get("last_modifying_user") + if lmu: + if isinstance(lmu, dict): + result["last_modifying_user"] = lmu.get("email_address") or lmu.get("email") or str(lmu) + elif hasattr(lmu, "email_address"): + result["last_modifying_user"] = lmu.email_address + else: + result["last_modifying_user"] = str(lmu) + + return result diff --git a/src/docbinder_oss/helpers/writers/multiformat_writer.py b/src/docbinder_oss/helpers/writers/multiformat_writer.py new file mode 100644 index 0000000..4cae081 --- /dev/null +++ b/src/docbinder_oss/helpers/writers/multiformat_writer.py @@ -0,0 +1,34 @@ +from pathlib import Path +from typing import Any + +from docbinder_oss.helpers.writers.writer_console import ConsoleWriter +from docbinder_oss.helpers.writers.writer_csv import CSVWriter +from docbinder_oss.helpers.writers.writer_json import JSONWriter + + +class MultiFormatWriter: + """ + Factory writer that automatically detects format from file extension or format string. + If file_path is None, prints to console. + """ + + _writers = { + ".csv": CSVWriter, + ".json": JSONWriter, + "csv": CSVWriter, + "json": JSONWriter, + } + + @classmethod + def write(cls, data: Any, file_path: str | None = None) -> None: + if not file_path: + ConsoleWriter().write(data) + return + extension = Path(file_path).suffix.lower() + # Use extension or fallback to format string + writer_key = extension if extension in cls._writers else file_path.lower() + if writer_key not in cls._writers: + raise ValueError(f"Unsupported format: {file_path}") + writer_class = cls._writers[writer_key] + writer = writer_class() + writer.write(data, file_path) diff --git a/src/docbinder_oss/helpers/writers/writer_console.py b/src/docbinder_oss/helpers/writers/writer_console.py new file mode 100644 index 0000000..0fae481 --- /dev/null +++ b/src/docbinder_oss/helpers/writers/writer_console.py @@ -0,0 +1,29 @@ +from pathlib import Path +from typing import Any +from docbinder_oss.helpers.writers.base import Writer + + +class ConsoleWriter(Writer): + """Writer for pretty-printing data to the console using rich tables.""" + + def write(self, data: Any, file_path: str | Path | None = None) -> None: + from rich.table import Table + + table = Table(title="Files and Folders") + table.add_column("Provider", justify="right", style="cyan", no_wrap=True) + table.add_column("Id", style="magenta") + table.add_column("Name", style="magenta") + table.add_column("Kind", style="magenta") + for provider, items in data.items() if isinstance(data, dict) else [("?", data)]: + for item in items: + if hasattr(item, "model_dump"): + item = item.model_dump() + elif hasattr(item, "__dict__"): + item = dict(item.__dict__) + table.add_row( + str(provider), + str(item.get("id", "")), + str(item.get("name", "")), + str(item.get("kind", "")), + ) + print(table) diff --git a/src/docbinder_oss/helpers/writers/writer_csv.py b/src/docbinder_oss/helpers/writers/writer_csv.py new file mode 100644 index 0000000..3d6eb64 --- /dev/null +++ b/src/docbinder_oss/helpers/writers/writer_csv.py @@ -0,0 +1,41 @@ +import csv +import logging +from pathlib import Path +from typing import Any +from docbinder_oss.helpers.writers.base import Writer +from docbinder_oss.helpers.writers.helper_functions import flatten_file + + +class CSVWriter(Writer): + """Writer for exporting data to CSV files.""" + + def get_fieldnames(self, rows: list) -> list: + fieldnames = set() + for row in rows: + fieldnames.update(row.keys()) + # Provider first, then the rest sorted + return ["provider"] + sorted(f for f in fieldnames if f != "provider") + + def write(self, data: Any, file_path: str | Path | None = None) -> None: + """ + Always flattens grouped dicts to a flat list for CSV export. + """ + rows = [] + if isinstance(data, dict): + for provider, items in data.items(): + for item in items: + rows.append(flatten_file(item, provider)) + elif isinstance(data, list): + for item in data: + provider = item.get("provider") if isinstance(item, dict) else getattr(item, "provider", None) + rows.append(flatten_file(item, provider)) + else: + return + if not rows or not file_path: + logging.warning("No data to write to CSV.") + return + with open(file_path, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=self.get_fieldnames(rows)) + writer.writeheader() + for row in rows: + writer.writerow(row) diff --git a/src/docbinder_oss/helpers/writers/writer_json.py b/src/docbinder_oss/helpers/writers/writer_json.py new file mode 100644 index 0000000..977ce3f --- /dev/null +++ b/src/docbinder_oss/helpers/writers/writer_json.py @@ -0,0 +1,29 @@ +import json +from pathlib import Path +from typing import Any +from docbinder_oss.helpers.writers.base import Writer +from docbinder_oss.helpers.writers.helper_functions import flatten_file + + +class JSONWriter(Writer): + """Writer for exporting data to JSON files.""" + + def write(self, data: Any, file_path: str | Path | None = None) -> None: + """ + Always flattens grouped dicts to a flat list for JSON export. + """ + flat = [] + if isinstance(data, dict): + for provider, items in data.items(): + for item in items: + flat.append(flatten_file(item, provider)) + elif isinstance(data, list): + for item in data: + provider = item.get("provider") if isinstance(item, dict) else getattr(item, "provider", None) + flat.append(flatten_file(item, provider)) + else: + return + if not file_path: + return + with open(file_path, "w", encoding="utf-8") as f: + json.dump(flat, f, indent=2, ensure_ascii=False, default=str) diff --git a/tests/commands/test_search_command.py b/tests/commands/test_search_command.py index 1a709b4..8608fac 100644 --- a/tests/commands/test_search_command.py +++ b/tests/commands/test_search_command.py @@ -103,7 +103,7 @@ def create_provider_instance(cfg): def test_search_export_csv(): runner = CliRunner() - result = runner.invoke(app, ["search", "--export-format", "csv"]) + result = runner.invoke(app, ["search", "--export-file", "search_results.csv"]) assert result.exit_code == 0 assert os.path.exists("search_results.csv") with open("search_results.csv") as f: @@ -123,7 +123,7 @@ def test_search_export_csv(): def test_search_export_json(): runner = CliRunner() - result = runner.invoke(app, ["search", "--export-format", "json"]) + result = runner.invoke(app, ["search", "--export-file", "search_results.json"]) assert result.exit_code == 0 assert os.path.exists("search_results.json") with open("search_results.json") as f: @@ -142,7 +142,7 @@ def test_search_export_json(): def test_search_name_filter(): runner = CliRunner() - result = runner.invoke(app, ["search", "--name", "Alpha", "--export-format", "json"]) + result = runner.invoke(app, ["search", "--name", "Alpha", "--export-file", "search_results.json"]) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) @@ -152,7 +152,7 @@ def test_search_name_filter(): def test_search_owner_filter(): runner = CliRunner() - result = runner.invoke(app, ["search", "--owner", "beta@b.com", "--export-format", "json"]) + result = runner.invoke(app, ["search", "--owner", "beta@b.com", "--export-file", "search_results.json"]) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) @@ -162,7 +162,7 @@ def test_search_owner_filter(): def test_search_updated_after_filter(): runner = CliRunner() - result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"]) + result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-file", "search_results.json"]) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) @@ -172,7 +172,9 @@ def test_search_updated_after_filter(): def test_search_created_before_filter(): runner = CliRunner() - result = runner.invoke(app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-format", "json"]) + result = runner.invoke( + app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-file", "search_results.json"] + ) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) @@ -182,7 +184,7 @@ def test_search_created_before_filter(): def test_search_min_size_filter(): runner = CliRunner() - result = runner.invoke(app, ["search", "--min-size", "3", "--export-format", "json"]) + result = runner.invoke(app, ["search", "--min-size", "3", "--export-file", "search_results.json"]) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) @@ -192,7 +194,7 @@ def test_search_min_size_filter(): def test_search_max_size_filter(): runner = CliRunner() - result = runner.invoke(app, ["search", "--max-size", "3", "--export-format", "json"]) + result = runner.invoke(app, ["search", "--max-size", "3", "--export-file", "search_results.json"]) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) @@ -202,7 +204,7 @@ def test_search_max_size_filter(): def test_search_provider_filter(): runner = CliRunner() - result = runner.invoke(app, ["search", "--provider", "dummy2", "--export-format", "json"]) + result = runner.invoke(app, ["search", "--provider", "dummy2", "--export-file", "search_results.json"]) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) @@ -225,8 +227,8 @@ def test_search_combined_filters(): "3", "--provider", "dummy2", - "--export-format", - "json", + "--export-file", + "search_results.json", ], ) assert result.exit_code == 0 diff --git a/tests/helpers/test_writer.py b/tests/helpers/test_writer.py index 3a05ebf..651bf87 100644 --- a/tests/helpers/test_writer.py +++ b/tests/helpers/test_writer.py @@ -3,11 +3,9 @@ import pytest from pydantic import BaseModel -from docbinder_oss.helpers.writer import ( - MultiFormatWriter, - CSVWriter, - JSONWriter, -) +from docbinder_oss.helpers.writers.multiformat_writer import MultiFormatWriter +from docbinder_oss.helpers.writers.writer_csv import CSVWriter +from docbinder_oss.helpers.writers.writer_json import JSONWriter class DummyModel(BaseModel): @@ -38,7 +36,10 @@ def test_csv_writer(tmp_path, sample_data): reader = csv.DictReader(f) rows = list(reader) assert len(rows) == 3 - assert set(rows[0].keys()) == {"provider", "id", "name", "kind"} + # Allow extra fields, but required fields must be present + for row in rows: + for field in ("provider", "id", "name", "kind"): + assert field in row assert rows[0]["provider"] == "provider1" @@ -49,9 +50,12 @@ def test_json_writer(tmp_path, sample_data): assert file_path.exists() with open(file_path, encoding="utf-8") as f: data = json.load(f) - assert "provider1" in data - assert isinstance(data["provider1"], list) - assert data["provider1"][0]["id"] == "1" + assert isinstance(data, list) + assert len(data) == 3 + providers = {d["provider"] for d in data} + assert "provider1" in providers + assert "provider2" in providers + assert any(d["id"] == "1" and d["provider"] == "provider1" for d in data) def test_multiformat_writer_csv(tmp_path, sample_data): @@ -70,18 +74,24 @@ def test_multiformat_writer_json(tmp_path, sample_data): assert file_path.exists() with open(file_path, encoding="utf-8") as f: data = json.load(f) - assert "provider2" in data + assert isinstance(data, list) + providers = {d["provider"] for d in data} + assert "provider2" in providers def test_multiformat_writer_unsupported(tmp_path, sample_data): file_path = tmp_path / "test.unsupported" + # Convert file_path to str for .lower() in MultiFormatWriter with pytest.raises(ValueError): - MultiFormatWriter.write(sample_data, file_path) + MultiFormatWriter.write(sample_data, str(file_path)) def test_csv_writer_empty_data(tmp_path, caplog): + import logging + file_path = tmp_path / "empty.csv" writer = CSVWriter() - with caplog.at_level("WARNING"): + logger = logging.getLogger() + with caplog.at_level("WARNING", logger=logger.name): writer.write({}, file_path) assert "No data to write to CSV." in caplog.text From c7747b23d7baa54a6065d67da41fb3d46d9508d0 Mon Sep 17 00:00:00 2001 From: Christophe Beke Date: Fri, 27 Jun 2025 19:40:25 +0200 Subject: [PATCH 31/39] Make sure to get all files, not only the shared ones --- src/docbinder_oss/providers/google_drive/google_drive_files.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/docbinder_oss/providers/google_drive/google_drive_files.py b/src/docbinder_oss/providers/google_drive/google_drive_files.py index c8c08b7..76512d3 100644 --- a/src/docbinder_oss/providers/google_drive/google_drive_files.py +++ b/src/docbinder_oss/providers/google_drive/google_drive_files.py @@ -27,7 +27,7 @@ def list_files_in_folder(self, bucket_id: str | None = None) -> list[File]: if bucket_id: args["q"] = f"'{bucket_id}' in parents and trashed=false" else: - args["q"] = "sharedWithMe=true and trashed=false" + args["q"] = None resp = self.service.files().list(**args).execute() files = resp.get("files", []) From 463ce846d5d87f8fb5306a8f34cd2c521c45c6ee Mon Sep 17 00:00:00 2001 From: Christophe Beke Date: Fri, 27 Jun 2025 19:45:18 +0200 Subject: [PATCH 32/39] Fix mkdocs --- docs/tool/providers/custom_provider.md | 14 +++++++------- mkdocs.yml | 3 +++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/docs/tool/providers/custom_provider.md b/docs/tool/providers/custom_provider.md index bad4644..8a2cca7 100644 --- a/docs/tool/providers/custom_provider.md +++ b/docs/tool/providers/custom_provider.md @@ -6,7 +6,7 @@ This guide explains how to integrate a new storage provider (e.g., DropBox, OneD ## 1. Create a Service Configuration Class -Each provider must define a configuration class that inherits from [`ServiceConfig`](src/docbinder_oss/services/base_class.py): +Each provider must define a configuration class that inherits from [`ServiceConfig`](https://github.com/SnappyLab/DocBinder-OSS/blob/main/src/docbinder_oss/services/base_class.py): ```python # filepath: src/docbinder_oss/services/my_provider/my_provider_service_config.py @@ -26,7 +26,7 @@ class MyProviderServiceConfig(ServiceConfig): ## 2. Implement the Storage Client -Create a client class that inherits from [`BaseStorageClient`](src/docbinder_oss/services/base_class.py) and implements all abstract methods: +Create a client class that inherits from [`BaseStorageClient`](https://github.com/SnappyLab/DocBinder-OSS/blob/main/src/docbinder_oss/services/base_class.py) and implements all abstract methods: ```python # filepath: src/docbinder_oss/services/my_provider/my_provider_client.py @@ -57,7 +57,7 @@ class MyProviderClient(BaseStorageClient): pass ``` -- Use the shared models [`File`](src/docbinder_oss/core/schemas.py), [`Permission`](src/docbinder_oss/core/schemas.py), etc., for return types. +- Use the shared models [`File`](https://github.com/SnappyLab/DocBinder-OSS/blob/main/src/docbinder_oss/core/schemas.py), [`Permission`](https://github.com/SnappyLab/DocBinder-OSS/blob/main/src/docbinder_oss/core/schemas.py), etc., for return types. --- @@ -109,10 +109,10 @@ providers: ## Reference -- [src/docbinder_oss/services/base_class.py](src/docbinder_oss/services/base_class.py) -- [src/docbinder_oss/core/schemas.py](src/docbinder_oss/core/schemas.py) -- [src/docbinder_oss/services/google_drive/](src/docbinder_oss/services/google_drive/) (example implementation) -- [src/docbinder_oss/services/__init__.py](src/docbinder_oss/services/__init__.py) +- [src/docbinder_oss/services/base_class.py](https://github.com/SnappyLab/DocBinder-OSS/blob/main/src/docbinder_oss/services/base_class.py) +- [src/docbinder_oss/core/schemas.py](https://github.com/SnappyLab/DocBinder-OSS/blob/main/src/docbinder_oss/core/schemas.py) +- [src/docbinder_oss/services/google_drive/](https://github.com/SnappyLab/DocBinder-OSS/tree/main/src/docbinder_oss/services/google_drive/) (example implementation) +- [src/docbinder_oss/services/__init__.py](https://github.com/SnappyLab/DocBinder-OSS/blob/main/src/docbinder_oss/services/__init__.py) --- diff --git a/mkdocs.yml b/mkdocs.yml index e864210..78c26bd 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -10,6 +10,9 @@ nav: - Commands: - Main CLI: commands/main.md - Provider: commands/provider.md + - Providers: + - Google Drive: tool/providers/google_drive.md + - Custom Provider: tool/providers/custom_provider.md - Contributing: CONTRIBUTING.md - Code of Conduct: CODE_OF_CONDUCT.md - Security: SECURITY.md From 4827ce9dfa3cf92d00f6548f4f2cb4a87819897d Mon Sep 17 00:00:00 2001 From: Christophe Beke Date: Fri, 27 Jun 2025 19:48:45 +0200 Subject: [PATCH 33/39] Update incorrect readme reference in mkdocs --- docs/tool/providers/google_drive.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tool/providers/google_drive.md b/docs/tool/providers/google_drive.md index a390973..9791cad 100644 --- a/docs/tool/providers/google_drive.md +++ b/docs/tool/providers/google_drive.md @@ -65,4 +65,4 @@ providers: ## References - [Google Drive API Documentation](https://developers.google.com/drive) -- [DocBinder Documentation](../README.md) \ No newline at end of file +- [DocBinder Documentation](https://github.com/SnappyLab/DocBinder-OSS) \ No newline at end of file From da028871617c9d69a04d79d3586375b4f4431444 Mon Sep 17 00:00:00 2001 From: Christophe Beke Date: Fri, 27 Jun 2025 19:52:10 +0200 Subject: [PATCH 34/39] update workflow of docbinder oss to not trigger on doc updates and changed the reference name of docbinder documentation to be more correct --- .github/workflows/docbinder-oss.yml | 6 ++++++ docs/tool/providers/google_drive.md | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docbinder-oss.yml b/.github/workflows/docbinder-oss.yml index 3072209..ea94412 100644 --- a/.github/workflows/docbinder-oss.yml +++ b/.github/workflows/docbinder-oss.yml @@ -5,10 +5,16 @@ on: branches: - main - dev + paths-ignore: + - "docs/**" + - "mkdocs.yml" pull_request: branches: - main - dev + paths-ignore: + - "docs/**" + - "mkdocs.yml" jobs: test: runs-on: ubuntu-latest diff --git a/docs/tool/providers/google_drive.md b/docs/tool/providers/google_drive.md index 9791cad..62d488c 100644 --- a/docs/tool/providers/google_drive.md +++ b/docs/tool/providers/google_drive.md @@ -65,4 +65,4 @@ providers: ## References - [Google Drive API Documentation](https://developers.google.com/drive) -- [DocBinder Documentation](https://github.com/SnappyLab/DocBinder-OSS) \ No newline at end of file +- [DocBinder OSS - GitHub](https://github.com/SnappyLab/DocBinder-OSS) \ No newline at end of file From ca62bc66e7ce96c504e174c0e75a6191a28c492e Mon Sep 17 00:00:00 2001 From: PaoloLeonard Date: Mon, 30 Jun 2025 11:39:19 +0200 Subject: [PATCH 35/39] revert back to writer and improve tests --- .pre-commit-config.yaml | 25 +- pyproject.toml | 4 - src/docbinder_oss/cli/search.py | 69 +--- src/docbinder_oss/helpers/config.py | 5 +- src/docbinder_oss/helpers/rich_helpers.py | 19 - .../helpers/writers/multiformat_writer.py | 9 +- .../helpers/writers/writer_console.py | 17 +- .../helpers/writers/writer_csv.py | 46 +-- .../helpers/writers/writer_json.py | 30 +- .../google_drive/google_drive_files.py | 2 +- tests/commands/test_search_command.py | 372 ++++++++++-------- tests/conftest.py | 135 +++++++ tests/helpers/test_writer.py | 18 +- tests/providers/google_drive/conftest.py | 46 --- 14 files changed, 430 insertions(+), 367 deletions(-) delete mode 100644 src/docbinder_oss/helpers/rich_helpers.py create mode 100644 tests/conftest.py delete mode 100644 tests/providers/google_drive/conftest.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9168817..9591f18 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,9 +1,24 @@ repos: - - repo: https://github.com/psf/black - rev: 24.3.0 + - repo: https://github.com/astral-sh/uv-pre-commit + rev: 0.7.16 hooks: - - id: black + - id: uv-export + - id: uv-lock + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.4 + # Ruff version. + rev: v0.12.1 hooks: - - id: ruff + # Run the linter. + - id: ruff-check + types_or: [ python, pyi ] + args: [ --select, I, --fix ] + # Run the formatter. + - id: ruff-format + types_or: [ python, pyi ] diff --git a/pyproject.toml b/pyproject.toml index ced6ecc..ed3fec0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,10 +48,6 @@ testpaths = [ "tests", ] -[tool.black] -line-length = 125 -skip-string-normalization = false - [tool.ruff] line-length = 125 diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py index 6cfd24a..34feb90 100644 --- a/src/docbinder_oss/cli/search.py +++ b/src/docbinder_oss/cli/search.py @@ -1,7 +1,8 @@ from datetime import datetime +import logging import re import typer -from typing import Optional +from typing import Dict, List, Optional import csv from docbinder_oss.core.schemas import File @@ -64,7 +65,7 @@ def search( def __filter_files( - files, + files: Dict[str, List[File]], name=None, owner=None, updated_after=None, @@ -73,7 +74,7 @@ def __filter_files( created_before=None, min_size=None, max_size=None, -): +) -> Dict[str, List[File]]: """ Filters a collection of files based on various criteria such as name, owner, modification/creation dates, and file size. @@ -103,14 +104,14 @@ def file_matches(file: File): if owner and (not file.owners or not any(owner in u.email_address for u in file.owners)): return False if updated_after: - file_mod_time = __parse_dt(file.modified_time) + file_modified_time = __parse_dt(file.modified_time) updated_after_dt = __parse_dt(updated_after) - if file_mod_time is None or updated_after_dt is None or file_mod_time < updated_after_dt: + if file_modified_time is None or updated_after_dt is None or file_modified_time < updated_after_dt: return False if updated_before: - file_mod_time = __parse_dt(file.modified_time) + file_modified_time = __parse_dt(file.modified_time) updated_before_dt = __parse_dt(updated_before) - if file_mod_time is None or updated_before_dt is None or file_mod_time > updated_before_dt: + if file_modified_time is None or updated_before_dt is None or file_modified_time > updated_before_dt: return False if created_after: file_created_time = __parse_dt(file.created_time) @@ -120,11 +121,12 @@ def file_matches(file: File): if created_before: file_created_time = __parse_dt(file.created_time) created_before_dt = __parse_dt(created_before) + logging.debug(f"File created time: {file_created_time}, Created before: {created_before_dt}, Type: {type(file_created_time)}, Type: {type(created_before_dt)}") if file_created_time is not None and created_before_dt is not None and file_created_time > created_before_dt: return False - if min_size and file.size < min_size * 1024: + if min_size and file.size < min_size: return False - if max_size and file.size > max_size * 1024: + if max_size and file.size > max_size: return False return True @@ -139,49 +141,6 @@ def __parse_dt(val): return val try: return datetime.fromisoformat(val) - except Exception: - return val - - -def __write_csv(files_by_provider, filename): - # Collect all possible fieldnames from all files - all_fieldnames = set(["provider"]) - for files in files_by_provider.values(): - for file in files: - file_dict = file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy() - all_fieldnames.update(file_dict.keys()) - # Move provider to the front, rest sorted - fieldnames = ["provider"] + sorted(f for f in all_fieldnames if f != "provider") - with open(filename, "w", newline="") as csvfile: - writer = csv.DictWriter(csvfile, fieldnames=fieldnames) - writer.writeheader() - for provider, files in files_by_provider.items(): - for file in files: - file_dict = file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy() - file_dict["provider"] = provider - # Flatten owners for CSV (only email addresses) - owners = file_dict.get("owners") - if isinstance(owners, list): - emails = [] - for u in owners: - if hasattr(u, "email_address") and u.email_address: - emails.append(u.email_address) - elif isinstance(u, dict) and u.get("email_address"): - emails.append(u["email_address"]) - elif isinstance(u, str): - emails.append(u) - file_dict["owners"] = ";".join(emails) - # Flatten last_modifying_user for CSV (only email address) - last_mod = file_dict.get("last_modifying_user") - if last_mod is not None: - if hasattr(last_mod, "email_address"): - file_dict["last_modifying_user"] = last_mod.email_address - elif isinstance(last_mod, dict) and "email_address" in last_mod: - file_dict["last_modifying_user"] = last_mod["email_address"] - else: - file_dict["last_modifying_user"] = str(last_mod) - # Flatten parents for CSV - parents = file_dict.get("parents") - if isinstance(parents, list): - file_dict["parents"] = ";".join(str(p) for p in parents) - writer.writerow({fn: file_dict.get(fn, "") for fn in fieldnames}) + except Exception as e: + typer.echo(f"Failed to parse datetime from value: {val} with error: {e}", err=True) + raise ValueError(f"Invalid datetime format: {val}") from e diff --git a/src/docbinder_oss/helpers/config.py b/src/docbinder_oss/helpers/config.py index 8a49070..088d95d 100644 --- a/src/docbinder_oss/helpers/config.py +++ b/src/docbinder_oss/helpers/config.py @@ -1,11 +1,12 @@ import logging import os +from typing import List import typer import yaml from pydantic import BaseModel, ValidationError -from docbinder_oss.providers import get_provider_registry +from docbinder_oss.providers import ServiceUnion, get_provider_registry logger = logging.getLogger(__name__) @@ -15,7 +16,7 @@ class Config(BaseModel): """Main configuration model that holds a list of all provider configs.""" - providers: list + providers: List[ServiceUnion] # type: ignore def load_config() -> Config: diff --git a/src/docbinder_oss/helpers/rich_helpers.py b/src/docbinder_oss/helpers/rich_helpers.py deleted file mode 100644 index 6faefe5..0000000 --- a/src/docbinder_oss/helpers/rich_helpers.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import List -from rich.table import Table - - -def create_rich_table(headers: List[str], rows: List[List[str]]) -> Table: - """ - Create a Rich table with the given headers and rows. - - Args: - headers (List[str]): The headers for the table. - rows (List[List[str]]): The data rows for the table. - - Returns: - Table: A Rich Table object. - """ - table = Table(*headers, show_header=True, header_style="bold magenta") - for row in rows: - table.add_row(*row) - return table diff --git a/src/docbinder_oss/helpers/writers/multiformat_writer.py b/src/docbinder_oss/helpers/writers/multiformat_writer.py index 4cae081..c6b688e 100644 --- a/src/docbinder_oss/helpers/writers/multiformat_writer.py +++ b/src/docbinder_oss/helpers/writers/multiformat_writer.py @@ -1,6 +1,8 @@ from pathlib import Path -from typing import Any +from typing import Any, Dict, List +from docbinder_oss.core.schemas import File +from docbinder_oss.helpers.writers.base import Writer from docbinder_oss.helpers.writers.writer_console import ConsoleWriter from docbinder_oss.helpers.writers.writer_csv import CSVWriter from docbinder_oss.helpers.writers.writer_json import JSONWriter @@ -20,7 +22,7 @@ class MultiFormatWriter: } @classmethod - def write(cls, data: Any, file_path: str | None = None) -> None: + def write(cls, data: Dict[str, List[File]], file_path: str | None = None) -> None: if not file_path: ConsoleWriter().write(data) return @@ -30,5 +32,6 @@ def write(cls, data: Any, file_path: str | None = None) -> None: if writer_key not in cls._writers: raise ValueError(f"Unsupported format: {file_path}") writer_class = cls._writers[writer_key] - writer = writer_class() + writer: Writer = writer_class() writer.write(data, file_path) + \ No newline at end of file diff --git a/src/docbinder_oss/helpers/writers/writer_console.py b/src/docbinder_oss/helpers/writers/writer_console.py index 0fae481..ff17bff 100644 --- a/src/docbinder_oss/helpers/writers/writer_console.py +++ b/src/docbinder_oss/helpers/writers/writer_console.py @@ -1,5 +1,7 @@ from pathlib import Path from typing import Any +from rich.table import Table +from rich import print from docbinder_oss.helpers.writers.base import Writer @@ -7,23 +9,12 @@ class ConsoleWriter(Writer): """Writer for pretty-printing data to the console using rich tables.""" def write(self, data: Any, file_path: str | Path | None = None) -> None: - from rich.table import Table - table = Table(title="Files and Folders") table.add_column("Provider", justify="right", style="cyan", no_wrap=True) table.add_column("Id", style="magenta") table.add_column("Name", style="magenta") table.add_column("Kind", style="magenta") - for provider, items in data.items() if isinstance(data, dict) else [("?", data)]: + for provider, items in data.items(): for item in items: - if hasattr(item, "model_dump"): - item = item.model_dump() - elif hasattr(item, "__dict__"): - item = dict(item.__dict__) - table.add_row( - str(provider), - str(item.get("id", "")), - str(item.get("name", "")), - str(item.get("kind", "")), - ) + table.add_row(provider, item.id, item.name, item.kind) print(table) diff --git a/src/docbinder_oss/helpers/writers/writer_csv.py b/src/docbinder_oss/helpers/writers/writer_csv.py index 3d6eb64..0d9c281 100644 --- a/src/docbinder_oss/helpers/writers/writer_csv.py +++ b/src/docbinder_oss/helpers/writers/writer_csv.py @@ -1,41 +1,29 @@ import csv import logging from pathlib import Path -from typing import Any +from typing import List, Dict, Union +from pydantic import BaseModel from docbinder_oss.helpers.writers.base import Writer -from docbinder_oss.helpers.writers.helper_functions import flatten_file + +logger = logging.getLogger(__name__) class CSVWriter(Writer): """Writer for exporting data to CSV files.""" + def get_fieldnames(self, data: Dict[str, List[BaseModel]]) -> List[str]: + fieldnames = next(iter(data.values()))[0].model_fields_set + return ["provider", *fieldnames] - def get_fieldnames(self, rows: list) -> list: - fieldnames = set() - for row in rows: - fieldnames.update(row.keys()) - # Provider first, then the rest sorted - return ["provider"] + sorted(f for f in fieldnames if f != "provider") + def write(self, data: List[Dict], file_path: Union[str, Path]) -> None: + if not data: + logger.warning("No data to write to CSV.") + return - def write(self, data: Any, file_path: str | Path | None = None) -> None: - """ - Always flattens grouped dicts to a flat list for CSV export. - """ - rows = [] - if isinstance(data, dict): + with open(file_path, 'w', newline='', encoding='utf-8') as f: + writer = csv.DictWriter(f, fieldnames=self.get_fieldnames(data)) + writer.writeheader() for provider, items in data.items(): for item in items: - rows.append(flatten_file(item, provider)) - elif isinstance(data, list): - for item in data: - provider = item.get("provider") if isinstance(item, dict) else getattr(item, "provider", None) - rows.append(flatten_file(item, provider)) - else: - return - if not rows or not file_path: - logging.warning("No data to write to CSV.") - return - with open(file_path, "w", newline="", encoding="utf-8") as f: - writer = csv.DictWriter(f, fieldnames=self.get_fieldnames(rows)) - writer.writeheader() - for row in rows: - writer.writerow(row) + item_dict = item.model_dump() if isinstance(item, BaseModel) else item + item_dict['provider'] = provider + writer.writerow(item_dict) diff --git a/src/docbinder_oss/helpers/writers/writer_json.py b/src/docbinder_oss/helpers/writers/writer_json.py index 977ce3f..d928814 100644 --- a/src/docbinder_oss/helpers/writers/writer_json.py +++ b/src/docbinder_oss/helpers/writers/writer_json.py @@ -1,29 +1,17 @@ import json from pathlib import Path -from typing import Any +from typing import Dict, List, Union +from docbinder_oss.core.schemas import File from docbinder_oss.helpers.writers.base import Writer -from docbinder_oss.helpers.writers.helper_functions import flatten_file class JSONWriter(Writer): """Writer for exporting data to JSON files.""" - def write(self, data: Any, file_path: str | Path | None = None) -> None: - """ - Always flattens grouped dicts to a flat list for JSON export. - """ - flat = [] - if isinstance(data, dict): - for provider, items in data.items(): - for item in items: - flat.append(flatten_file(item, provider)) - elif isinstance(data, list): - for item in data: - provider = item.get("provider") if isinstance(item, dict) else getattr(item, "provider", None) - flat.append(flatten_file(item, provider)) - else: - return - if not file_path: - return - with open(file_path, "w", encoding="utf-8") as f: - json.dump(flat, f, indent=2, ensure_ascii=False, default=str) + def write(self, data: Dict[str, List[File]], file_path: Union[str, Path]) -> None: + data = { + provider: [item.model_dump() for item in items] + for provider, items in data.items() + } + with open(file_path, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=2, ensure_ascii=False, default=str) diff --git a/src/docbinder_oss/providers/google_drive/google_drive_files.py b/src/docbinder_oss/providers/google_drive/google_drive_files.py index 76512d3..18fbb58 100644 --- a/src/docbinder_oss/providers/google_drive/google_drive_files.py +++ b/src/docbinder_oss/providers/google_drive/google_drive_files.py @@ -27,7 +27,7 @@ def list_files_in_folder(self, bucket_id: str | None = None) -> list[File]: if bucket_id: args["q"] = f"'{bucket_id}' in parents and trashed=false" else: - args["q"] = None + args["q"] = "trashed=false" resp = self.service.files().list(**args).execute() files = resp.get("files", []) diff --git a/tests/commands/test_search_command.py b/tests/commands/test_search_command.py index 8608fac..1ccc378 100644 --- a/tests/commands/test_search_command.py +++ b/tests/commands/test_search_command.py @@ -1,220 +1,272 @@ -import os import csv import json +from typing import Dict import pytest +from pathlib import Path from typer.testing import CliRunner +from docbinder_oss.core.schemas import User from docbinder_oss.main import app +from conftest import DummyModel -class DummyFile: - def __init__(self, **kwargs): - self.id = kwargs.get("id", "fileid1") - self.name = kwargs.get("name", "Test File") - self.size = kwargs.get("size", 12345) - self.mime_type = kwargs.get("mime_type", "application/pdf") - self.created_time = kwargs.get("created_time", "2024-01-01T00:00:00") - self.modified_time = kwargs.get("modified_time", "2024-01-02T00:00:00") - self.owners = kwargs.get("owners", [type("User", (), {"email_address": "owner@example.com"})()]) - self.last_modifying_user = kwargs.get( - "last_modifying_user", type("User", (), {"email_address": "mod@example.com"})() - ) - self.web_view_link = kwargs.get("web_view_link", "http://example.com/view") - self.web_content_link = kwargs.get("web_content_link", "http://example.com/content") - self.shared = kwargs.get("shared", True) - self.trashed = kwargs.get("trashed", False) - - def model_dump(self): - # Simulate pydantic's model_dump for test compatibility - return { - "id": self.id, - "name": self.name, - "size": self.size, - "mime_type": self.mime_type, - "created_time": self.created_time, - "modified_time": self.modified_time, - "owners": [u.email_address for u in self.owners], - "last_modifying_user": getattr(self.last_modifying_user, "email_address", None), - "web_view_link": self.web_view_link, - "web_content_link": self.web_content_link, - "shared": self.shared, - "trashed": self.trashed, - } - - -@pytest.fixture(autouse=True) -def patch_provider(monkeypatch, tmp_path): - # Patch config loader to return two dummy provider configs - class DummyProviderConfig: - def __init__(self, name): - self.name = name - self.type = name # Simulate type for registry - - class DummyConfig: - providers = [DummyProviderConfig("dummy1"), DummyProviderConfig("dummy2")] - - # Patch load_config in the CLI's namespace - monkeypatch.setattr("docbinder_oss.cli.search.load_config", lambda: DummyConfig()) - - # Patch create_provider_instance in the CLI's namespace - def create_provider_instance(cfg): - if cfg.name == "dummy1": - return type( - "DummyClient", - (), - { - "list_all_files": lambda self: [ - DummyFile( - id="f1", - name="Alpha Report", - size=2048, - owners=[type("User", (), {"email_address": "alpha@a.com"})()], - created_time="2024-01-01T10:00:00", - modified_time="2024-01-02T10:00:00", - ) - ] - }, - )() - else: - return type( - "DummyClient", - (), - { - "list_all_files": lambda self: [ - DummyFile( - id="f2", - name="Beta Notes", - size=4096, - owners=[type("User", (), {"email_address": "beta@b.com"})()], - created_time="2024-02-01T10:00:00", - modified_time="2024-02-02T10:00:00", - ) - ] - }, - )() - - monkeypatch.setattr("docbinder_oss.cli.search.create_provider_instance", create_provider_instance) - - # Change working directory to a temp dir for file output - orig_cwd = os.getcwd() - os.chdir(tmp_path) - yield - os.chdir(orig_cwd) - - -def test_search_export_csv(): - runner = CliRunner() +runner = CliRunner() + +@pytest.mark.parametrize('load_config_mock', [("dummy", 2)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file"), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file"), + ])], indirect=True) +def test_search_export_csv(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test happy path for search command with CSV export.""" result = runner.invoke(app, ["search", "--export-file", "search_results.csv"]) assert result.exit_code == 0 - assert os.path.exists("search_results.csv") + assert Path("search_results.csv").exists() with open("search_results.csv") as f: reader = csv.DictReader(f) rows = list(reader) - assert len(rows) == 2 - names = set(r["name"] for r in rows) - assert names == {"Alpha Report", "Beta Notes"} - # Check owners field is a string and contains the expected email - for r in rows: - owners = r["owners"] - if r["name"] == "Alpha Report": - assert "alpha@a.com" in owners - if r["name"] == "Beta Notes": - assert "beta@b.com" in owners - - -def test_search_export_json(): - runner = CliRunner() + assert len(rows) == 4 + assert set(r["provider"] for r in rows) == {"dummy1", "dummy2"} + +@pytest.mark.parametrize('load_config_mock', [("dummy", 2)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file"), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file"), + ])], indirect=True) +def test_search_export_json(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test happy path for search command with CSV export.""" result = runner.invoke(app, ["search", "--export-file", "search_results.json"]) assert result.exit_code == 0 - assert os.path.exists("search_results.json") + assert Path("search_results.json").exists() with open("search_results.json") as f: - data = json.load(f) - assert isinstance(data, list) - assert len(data) == 2 - names = set(d["name"] for d in data) - assert names == {"Alpha Report", "Beta Notes"} - # Check owners field is a string or list - for d in data: - if d["name"] == "Alpha Report": - assert "alpha@a.com" in d["owners"] - if d["name"] == "Beta Notes": - assert "beta@b.com" in d["owners"] - + data: Dict = json.load(f) + assert len(data.keys()) == 2 + assert len(data["dummy1"]) == 2 + assert len(data["dummy2"]) == 2 + assert all(key in data for key in ("dummy1", "dummy2")) -def test_search_name_filter(): - runner = CliRunner() +@pytest.mark.parametrize('load_config_mock', [("dummy", 2)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file"), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file"), + ])], indirect=True) +def test_search_name_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """ + Test search command with name filter that returns no results. + """ result = runner.invoke(app, ["search", "--name", "Alpha", "--export-file", "search_results.json"]) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) - assert len(data) == 1 - assert data[0]["name"] == "Alpha Report" + assert len(data["dummy1"]) == 0 + assert len(data["dummy2"]) == 0 + +@pytest.mark.parametrize('load_config_mock', [("dummy", 2)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file"), + DummyModel(id="dummy_file2", name="File 2", kind="file"), + ])], indirect=True) +def test_search_name_filter_not_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """ + Test search command with name filter that returns some results. + """ + result = runner.invoke(app, ["search", "--name", "dummy", "--export-file", "search_results.json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 2 + assert data["dummy1"][0]["name"] == "dummy File 1" + assert data["dummy2"][0]["name"] == "dummy File 1" +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", owners=[User(display_name="test", email_address="beta@a.com", photo_link="https://test.com", kind="")]), + DummyModel(id="dummy_file2", name="File 2", kind="file", owners=[]), + ])], indirect=True) +def test_search_owner_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with owner filter that returns no results.""" + result = runner.invoke(app, ["search", "--owner", "beta@b.com", "--export-file", "search_results.json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data["dummy1"]) == 0 -def test_search_owner_filter(): - runner = CliRunner() +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", owners=[User(display_name="test", email_address="beta@b.com", photo_link="https://test.com", kind="")]), + DummyModel(id="dummy_file2", name="File 2", kind="file", owners=[]), + ])], indirect=True) +def test_search_owner_filter_not_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with owner filter that returns some results.""" result = runner.invoke(app, ["search", "--owner", "beta@b.com", "--export-file", "search_results.json"]) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) assert len(data) == 1 - assert data[0]["name"] == "Beta Notes" - + assert data["dummy1"][0]["owners"][0]["email_address"] == "beta@b.com" -def test_search_updated_after_filter(): - runner = CliRunner() +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", modified_time="2023-02-02T00:00:00"), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", modified_time="2024-01-31T00:00:00"), + ])], indirect=True) +def test_search_updated_after_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with updated_after filter that returns no results.""" result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-file", "search_results.json"]) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) - assert len(data) == 1 - assert data[0]["name"] == "Beta Notes" + assert len(data["dummy1"]) == 0 + +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", modified_time="2024-02-02T00:00:00"), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", modified_time="2024-01-31T00:00:00"), + ])], indirect=True) +def test_search_updated_after_filter_not_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with updated_after filter that returns some results.""" + result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-file", "search_results.json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data["dummy1"]) == 1 + assert data["dummy1"][0]["name"] == "dummy File 1" +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", created_time="2024-04-02T00:00:00"), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", created_time="2024-04-30T00:00:00"), + ])], indirect=True) +def test_search_created_before_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with created_before filter that returns no results.""" + result = runner.invoke( + app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-file", "search_results.json"] + ) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data["dummy1"]) == 0 -def test_search_created_before_filter(): - runner = CliRunner() +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", created_time="2024-02-02T00:00:00"), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", created_time="2024-01-31T00:00:00"), + ])], indirect=True) +def test_search_created_before_filter_not_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with created_before filter that returns some results.""" result = runner.invoke( app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-file", "search_results.json"] ) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) - assert len(data) == 1 - assert data[0]["name"] == "Alpha Report" + assert len(data["dummy1"]) == 1 + assert data["dummy1"][0]["name"] == "dummy File 2" +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", size=1), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=2), + ])], indirect=True) +def test_search_min_size_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with min_size filter that returns no results.""" + result = runner.invoke(app, ["search", "--min-size", 3, "--export-file", "search_results.json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data["dummy1"]) == 0 -def test_search_min_size_filter(): +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", size=5), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=2), + ])], indirect=True) +def test_search_min_size_filter_not_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): runner = CliRunner() - result = runner.invoke(app, ["search", "--min-size", "3", "--export-file", "search_results.json"]) + result = runner.invoke(app, ["search", "--min-size", 3, "--export-file", "search_results.json"]) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) - assert len(data) == 1 - assert data[0]["name"] == "Beta Notes" + assert len(data["dummy1"]) == 1 + assert data["dummy1"][0]["name"] == "dummy File 1" +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", size=5), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=3), + ])], indirect=True) +def test_search_max_size_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with max_size filter that returns no results.""" + result = runner.invoke(app, ["search", "--max-size", "3", "--export-file", "search_results.json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data["dummy1"]) == 1 -def test_search_max_size_filter(): - runner = CliRunner() +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", size=5), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=2), + ])], indirect=True) +def test_search_max_size_filter_not_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with max_size filter that returns some results.""" result = runner.invoke(app, ["search", "--max-size", "3", "--export-file", "search_results.json"]) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) - assert len(data) == 1 - assert data[0]["name"] == "Alpha Report" + assert len(data["dummy1"]) == 1 + assert data["dummy1"][0]["name"] == "dummy File 2" +@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", size=5), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=2), + ])], indirect=True) +def test_search_provider_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with provider filter that returns no results.""" + result = runner.invoke(app, ["search", "--provider", "dummy2", "--export-file", "search_results.json"]) + assert result.exit_code == 0 + with open("search_results.json") as f: + data = json.load(f) + assert len(data) == 0 -def test_search_provider_filter(): - runner = CliRunner() +@pytest.mark.parametrize('load_config_mock', [("dummy", 2)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="dummy File 1", kind="file", size=5), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=2), + ])], indirect=True) +def test_search_provider_filter(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with provider filter that returns some results.""" result = runner.invoke(app, ["search", "--provider", "dummy2", "--export-file", "search_results.json"]) assert result.exit_code == 0 with open("search_results.json") as f: data = json.load(f) assert len(data) == 1 - assert data[0]["provider"] == "dummy2" - assert data[0]["name"] == "Beta Notes" - + assert "dummy2" in data -def test_search_combined_filters(): - runner = CliRunner() +@pytest.mark.parametrize('load_config_mock', [("dummy", 2)], indirect=True) +@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) +@pytest.mark.parametrize('list_all_files_mock', [([ + DummyModel(id="dummy_file1", name="Beta File 1", kind="file", size=5, owners=[User(display_name="test", email_address="beta@b.com", photo_link="https://test.com", kind="")]), + DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=2), + ])], indirect=True) +def test_search_combined_filters(load_config_mock, create_provider_instance_mock, list_all_files_mock): + """Test search command with combined filters.""" result = runner.invoke( app, [ @@ -235,6 +287,6 @@ def test_search_combined_filters(): with open("search_results.json") as f: data = json.load(f) assert len(data) == 1 - assert data[0]["name"] == "Beta Notes" - assert data[0]["provider"] == "dummy2" - assert "beta@b.com" in data[0]["owners"] + assert "dummy2" in data + assert data["dummy2"][0]["name"] == "Beta File 1" + assert data["dummy2"][0]["owners"][0]["email_address"] == "beta@b.com" diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..e3f37ee --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,135 @@ +from typing import List +from unittest.mock import MagicMock, patch + +from pydantic import BaseModel, ConfigDict +import pytest + +from docbinder_oss.providers.base_class import BaseProvider +from docbinder_oss.providers.google_drive.google_drive_client import ( + GoogleDriveClient, +) +from docbinder_oss.providers.google_drive.google_drive_service_config import ( + GoogleDriveServiceConfig, +) + + +class DummyModel(BaseModel): + id: str + name: str + kind: str + + model_config = ConfigDict(extra="allow") + + +class DummyProvider(BaseProvider): + def __init__(self, name, type=None): + self.name = name + self.type = type if type else f"{name}_type" + + def list_all_files(self): + raise NotImplementedError("Please use the pytest parametrize settings to add your test data.") + def test_connection(self): + raise NotImplementedError("This provider does not implement connection testing") + def list_buckets(self): + raise NotImplementedError("This provider does not implement buckets") + def get_permissions(self): + raise NotImplementedError("This provider does not implement permissions") + def list_files_in_folder(self): + raise NotImplementedError("This provider does not implement folder listing") + def get_file_metadata(self, item_id): + raise NotImplementedError("This provider does not implement file metadata retrieval") + +class DummyConfig: + providers: List[DummyProvider] = [] + +@pytest.fixture +def sample_data(): + return { + "provider1": [ + DummyModel(id="1", name="FileA", kind="file"), + DummyModel(id="2", name="FolderB", kind="folder"), + ], + "provider2": [ + DummyModel(id="3", name="FileC", kind="file"), + ], + } + +@pytest.fixture +def mock_gdrive_provider(): + """ + This is the core of our testing strategy. We use 'patch' to replace + the `build` function from the googleapiclient library. + + Whenever `GoogleDriveClient` calls `build('drive', 'v3', ...)`, it will + receive our mock object instead of making a real network call. + """ + with patch("docbinder_oss.providers.google_drive.google_drive_client.build") as mock_build: + # Create a mock for the provider object that `build` would return + mock_provider = MagicMock() + # Configure the `build` function to return our mock provider + mock_build.return_value = mock_provider + yield mock_provider + + +@pytest.fixture +def gdrive_client(mock_gdrive_provider): + """ + Creates an instance of our GoogleDriveClient. + It will be initialized with a fake config and will use + the mock_gdrive_provider fixture internally. + """ + # Patch _get_credentials to avoid real auth + with patch( + "docbinder_oss.providers.google_drive.google_drive_client.GoogleDriveClient._get_credentials", + return_value=MagicMock(), + ): + config = GoogleDriveServiceConfig( + name="test_gdrive", + gcp_credentials_json="fake_creds.json", + ) + return GoogleDriveClient(config=config) + +@pytest.fixture(scope='session') +def load_config_mock(request, create_config_mock): + """ + This fixture mocks the `load_config` function to return + a dummy configuration with a specified number of providers. + """ + name, number_of_providers = request.param + with patch("docbinder_oss.cli.search.load_config", return_value=create_config_mock(name, number_of_providers)) as _fixture: + yield _fixture + +@pytest.fixture(scope='session') +def create_provider_instance_mock(request, create_provider_mock): + """ + This fixture mocks the `create_provider_instance` function to return + a dummy provider instance based on the provider name. + """ + with patch("docbinder_oss.cli.search.create_provider_instance", return_value=create_provider_mock(request.param)) as _fixture: + yield _fixture + +@pytest.fixture(scope="session") +def list_all_files_mock(request): + """ + + Yields: + _type_: _description_ + """ + data = request.param + with patch("conftest.DummyProvider.list_all_files", return_value=data) as _fixture: + yield _fixture + +@pytest.fixture(scope='session') +def create_provider_mock(): + def create_dummy_provider(name): + return DummyProvider(name=name, type=f"{name}_type") + yield create_dummy_provider + +@pytest.fixture(scope='session') +def create_config_mock(create_provider_mock): + """This fixture creates a dummy configuration with a specified number of providers.""" + def create_dummy_config(name, number_of_providers=2): + dummy_config = DummyConfig() + dummy_config.providers = [create_provider_mock(f"{name}{i+1}") for i in range(number_of_providers)] + return dummy_config + yield create_dummy_config \ No newline at end of file diff --git a/tests/helpers/test_writer.py b/tests/helpers/test_writer.py index 651bf87..abe0920 100644 --- a/tests/helpers/test_writer.py +++ b/tests/helpers/test_writer.py @@ -50,12 +50,12 @@ def test_json_writer(tmp_path, sample_data): assert file_path.exists() with open(file_path, encoding="utf-8") as f: data = json.load(f) - assert isinstance(data, list) - assert len(data) == 3 - providers = {d["provider"] for d in data} - assert "provider1" in providers - assert "provider2" in providers - assert any(d["id"] == "1" and d["provider"] == "provider1" for d in data) + assert isinstance(data, dict) + assert len(data) == 2 + assert "provider1" in data + assert "provider2" in data + assert data["provider1"][0]["id"] == "1" + assert data["provider2"][0]["id"] == "3" def test_multiformat_writer_csv(tmp_path, sample_data): @@ -74,9 +74,9 @@ def test_multiformat_writer_json(tmp_path, sample_data): assert file_path.exists() with open(file_path, encoding="utf-8") as f: data = json.load(f) - assert isinstance(data, list) - providers = {d["provider"] for d in data} - assert "provider2" in providers + assert isinstance(data, dict) + assert "provider1" in data + assert "provider2" in data def test_multiformat_writer_unsupported(tmp_path, sample_data): diff --git a/tests/providers/google_drive/conftest.py b/tests/providers/google_drive/conftest.py deleted file mode 100644 index b248aac..0000000 --- a/tests/providers/google_drive/conftest.py +++ /dev/null @@ -1,46 +0,0 @@ -from unittest.mock import MagicMock, patch - -import pytest - -from docbinder_oss.providers.google_drive.google_drive_client import ( - GoogleDriveClient, -) -from docbinder_oss.providers.google_drive.google_drive_service_config import ( - GoogleDriveServiceConfig, -) - - -@pytest.fixture -def mock_gdrive_provider(): - """ - This is the core of our testing strategy. We use 'patch' to replace - the `build` function from the googleapiclient library. - - Whenever `GoogleDriveClient` calls `build('drive', 'v3', ...)`, it will - receive our mock object instead of making a real network call. - """ - with patch("docbinder_oss.providers.google_drive.google_drive_client.build") as mock_build: - # Create a mock for the provider object that `build` would return - mock_provider = MagicMock() - # Configure the `build` function to return our mock provider - mock_build.return_value = mock_provider - yield mock_provider - - -@pytest.fixture -def gdrive_client(mock_gdrive_provider): - """ - Creates an instance of our GoogleDriveClient. - It will be initialized with a fake config and will use - the mock_gdrive_provider fixture internally. - """ - # Patch _get_credentials to avoid real auth - with patch( - "docbinder_oss.providers.google_drive.google_drive_client.GoogleDriveClient._get_credentials", - return_value=MagicMock(), - ): - config = GoogleDriveServiceConfig( - name="test_gdrive", - gcp_credentials_json="fake_creds.json", - ) - return GoogleDriveClient(config=config) From 090ee2983b229435e489d746d0d737541f77a45c Mon Sep 17 00:00:00 2001 From: PaoloLeonard Date: Mon, 30 Jun 2025 11:41:00 +0200 Subject: [PATCH 36/39] remove logger --- src/docbinder_oss/cli/search.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py index 34feb90..0a58f1f 100644 --- a/src/docbinder_oss/cli/search.py +++ b/src/docbinder_oss/cli/search.py @@ -121,7 +121,6 @@ def file_matches(file: File): if created_before: file_created_time = __parse_dt(file.created_time) created_before_dt = __parse_dt(created_before) - logging.debug(f"File created time: {file_created_time}, Created before: {created_before_dt}, Type: {type(file_created_time)}, Type: {type(created_before_dt)}") if file_created_time is not None and created_before_dt is not None and file_created_time > created_before_dt: return False if min_size and file.size < min_size: From a803975aa68151aa963b7f89186a8ead3cd17a9c Mon Sep 17 00:00:00 2001 From: PaoloLeonard Date: Mon, 30 Jun 2025 11:55:29 +0200 Subject: [PATCH 37/39] fix linting --- .pre-commit-config.yaml | 2 +- src/docbinder_oss/cli/search.py | 2 - .../helpers/writers/multiformat_writer.py | 2 +- tests/commands/test_search_command.py | 43 +++++++++++++++++-- tests/conftest.py | 10 ++++- 5 files changed, 50 insertions(+), 9 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9591f18..e885ddb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # Run the linter. - id: ruff-check types_or: [ python, pyi ] - args: [ --select, I, --fix ] + args: [ --select, I, --fix, --select=E501 ] # Run the formatter. - id: ruff-format types_or: [ python, pyi ] diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py index 0a58f1f..d4b63e3 100644 --- a/src/docbinder_oss/cli/search.py +++ b/src/docbinder_oss/cli/search.py @@ -1,9 +1,7 @@ from datetime import datetime -import logging import re import typer from typing import Dict, List, Optional -import csv from docbinder_oss.core.schemas import File from docbinder_oss.helpers.config import load_config diff --git a/src/docbinder_oss/helpers/writers/multiformat_writer.py b/src/docbinder_oss/helpers/writers/multiformat_writer.py index c6b688e..ba282fa 100644 --- a/src/docbinder_oss/helpers/writers/multiformat_writer.py +++ b/src/docbinder_oss/helpers/writers/multiformat_writer.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Any, Dict, List +from typing import Dict, List from docbinder_oss.core.schemas import File from docbinder_oss.helpers.writers.base import Writer diff --git a/tests/commands/test_search_command.py b/tests/commands/test_search_command.py index 1ccc378..eb37e4d 100644 --- a/tests/commands/test_search_command.py +++ b/tests/commands/test_search_command.py @@ -84,7 +84,19 @@ def test_search_name_filter_not_empty(load_config_mock, create_provider_instance @pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) @pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) @pytest.mark.parametrize('list_all_files_mock', [([ - DummyModel(id="dummy_file1", name="dummy File 1", kind="file", owners=[User(display_name="test", email_address="beta@a.com", photo_link="https://test.com", kind="")]), + DummyModel( + id="dummy_file1", + name="dummy File 1", + kind="file", + owners=[ + User( + display_name="test", + email_address="beta@a.com", + photo_link="https://test.com", + kind="" + ) + ] + ), DummyModel(id="dummy_file2", name="File 2", kind="file", owners=[]), ])], indirect=True) def test_search_owner_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): @@ -98,7 +110,19 @@ def test_search_owner_filter_empty(load_config_mock, create_provider_instance_mo @pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True) @pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) @pytest.mark.parametrize('list_all_files_mock', [([ - DummyModel(id="dummy_file1", name="dummy File 1", kind="file", owners=[User(display_name="test", email_address="beta@b.com", photo_link="https://test.com", kind="")]), + DummyModel( + id="dummy_file1", + name="dummy File 1", + kind="file", + owners=[ + User( + display_name="test", + email_address="beta@b.com", + photo_link="https://test.com", + kind="" + ) + ] + ), DummyModel(id="dummy_file2", name="File 2", kind="file", owners=[]), ])], indirect=True) def test_search_owner_filter_not_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock): @@ -262,7 +286,20 @@ def test_search_provider_filter(load_config_mock, create_provider_instance_mock, @pytest.mark.parametrize('load_config_mock', [("dummy", 2)], indirect=True) @pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True) @pytest.mark.parametrize('list_all_files_mock', [([ - DummyModel(id="dummy_file1", name="Beta File 1", kind="file", size=5, owners=[User(display_name="test", email_address="beta@b.com", photo_link="https://test.com", kind="")]), + DummyModel( + id="dummy_file1", + name="Beta File 1", + kind="file", + size=5, + owners=[ + User( + display_name="test", + email_address="beta@b.com", + photo_link="https://test.com", + kind="" + ) + ] + ), DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=2), ])], indirect=True) def test_search_combined_filters(load_config_mock, create_provider_instance_mock, list_all_files_mock): diff --git a/tests/conftest.py b/tests/conftest.py index e3f37ee..062bc2a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -96,7 +96,10 @@ def load_config_mock(request, create_config_mock): a dummy configuration with a specified number of providers. """ name, number_of_providers = request.param - with patch("docbinder_oss.cli.search.load_config", return_value=create_config_mock(name, number_of_providers)) as _fixture: + with patch( + "docbinder_oss.cli.search.load_config", + return_value=create_config_mock(name, number_of_providers) + ) as _fixture: yield _fixture @pytest.fixture(scope='session') @@ -105,7 +108,10 @@ def create_provider_instance_mock(request, create_provider_mock): This fixture mocks the `create_provider_instance` function to return a dummy provider instance based on the provider name. """ - with patch("docbinder_oss.cli.search.create_provider_instance", return_value=create_provider_mock(request.param)) as _fixture: + with patch( + "docbinder_oss.cli.search.create_provider_instance", + return_value=create_provider_mock(request.param) + ) as _fixture: yield _fixture @pytest.fixture(scope="session") From 1a325b9695e533a1bcc50dc24b4af9f325b865f7 Mon Sep 17 00:00:00 2001 From: Christophe Beke Date: Tue, 1 Jul 2025 09:35:43 +0200 Subject: [PATCH 38/39] Updated namings --- .github/workflows/{docbinder-oss.yml => ci.yml} | 2 +- .github/workflows/pypi-publish.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename .github/workflows/{docbinder-oss.yml => ci.yml} (96%) diff --git a/.github/workflows/docbinder-oss.yml b/.github/workflows/ci.yml similarity index 96% rename from .github/workflows/docbinder-oss.yml rename to .github/workflows/ci.yml index 6a6a044..bbf2c95 100644 --- a/.github/workflows/docbinder-oss.yml +++ b/.github/workflows/ci.yml @@ -1,4 +1,4 @@ -name: DocBinder OSS Library CI/CD +name: DocBinder OSS Library CI on: pull_request: diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index c9ff952..1614841 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -1,4 +1,4 @@ -name: Publish to PyPI +name: DocBinder OSS Publish to PyPI # This workflow publishes the package to PyPI when a new tag is created on the main branch. on: push: From 70f7c0b2624838c2b90d46ed0ebab4fa55a152b3 Mon Sep 17 00:00:00 2001 From: PaoloLeonard Date: Tue, 1 Jul 2025 09:42:50 +0200 Subject: [PATCH 39/39] removed black and added pre-commit in ci --- .github/workflows/ci.yml | 2 +- pyproject.toml | 1 - uv.lock | 39 --------------------------------------- 3 files changed, 1 insertion(+), 41 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bbf2c95..2c162ef 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,7 +40,7 @@ jobs: - name: Lint code with uv run: | - uv tool run ruff check + uv tool run pre-commit - name: Run tox with uv run: | diff --git a/pyproject.toml b/pyproject.toml index ed3fec0..026bb96 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,6 @@ include = ["src/docbinder_oss/**"] [dependency-groups] dev = [ - "black>=25.1.0", "mkdocs>=1.6.1", "mkdocs-material>=9.6.14", "pre-commit>=4.2.0", diff --git a/uv.lock b/uv.lock index 61dfd65..e5d1e47 100644 --- a/uv.lock +++ b/uv.lock @@ -37,34 +37,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/37/fb6973edeb700f6e3d6ff222400602ab1830446c25c7b4676d8de93e65b8/backrefs-5.8-py39-none-any.whl", hash = "sha256:a66851e4533fb5b371aa0628e1fee1af05135616b86140c9d787a2ffdf4b8fdc", size = 380336, upload-time = "2025-02-25T16:53:29.858Z" }, ] -[[package]] -name = "black" -version = "25.1.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click" }, - { name = "mypy-extensions" }, - { name = "packaging" }, - { name = "pathspec" }, - { name = "platformdirs" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/94/49/26a7b0f3f35da4b5a65f081943b7bcd22d7002f5f0fb8098ec1ff21cb6ef/black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666", size = 649449, upload-time = "2025-01-29T04:15:40.373Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/4f/87f596aca05c3ce5b94b8663dbfe242a12843caaa82dd3f85f1ffdc3f177/black-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a39337598244de4bae26475f77dda852ea00a93bd4c728e09eacd827ec929df0", size = 1614372, upload-time = "2025-01-29T05:37:11.71Z" }, - { url = "https://files.pythonhosted.org/packages/e7/d0/2c34c36190b741c59c901e56ab7f6e54dad8df05a6272a9747ecef7c6036/black-25.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96c1c7cd856bba8e20094e36e0f948718dc688dba4a9d78c3adde52b9e6c2299", size = 1442865, upload-time = "2025-01-29T05:37:14.309Z" }, - { url = "https://files.pythonhosted.org/packages/21/d4/7518c72262468430ead45cf22bd86c883a6448b9eb43672765d69a8f1248/black-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce2e264d59c91e52d8000d507eb20a9aca4a778731a08cfff7e5ac4a4bb7096", size = 1749699, upload-time = "2025-01-29T04:18:17.688Z" }, - { url = "https://files.pythonhosted.org/packages/58/db/4f5beb989b547f79096e035c4981ceb36ac2b552d0ac5f2620e941501c99/black-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:172b1dbff09f86ce6f4eb8edf9dede08b1fce58ba194c87d7a4f1a5aa2f5b3c2", size = 1428028, upload-time = "2025-01-29T04:18:51.711Z" }, - { url = "https://files.pythonhosted.org/packages/83/71/3fe4741df7adf015ad8dfa082dd36c94ca86bb21f25608eb247b4afb15b2/black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b", size = 1650988, upload-time = "2025-01-29T05:37:16.707Z" }, - { url = "https://files.pythonhosted.org/packages/13/f3/89aac8a83d73937ccd39bbe8fc6ac8860c11cfa0af5b1c96d081facac844/black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc", size = 1453985, upload-time = "2025-01-29T05:37:18.273Z" }, - { url = "https://files.pythonhosted.org/packages/6f/22/b99efca33f1f3a1d2552c714b1e1b5ae92efac6c43e790ad539a163d1754/black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f", size = 1783816, upload-time = "2025-01-29T04:18:33.823Z" }, - { url = "https://files.pythonhosted.org/packages/18/7e/a27c3ad3822b6f2e0e00d63d58ff6299a99a5b3aee69fa77cd4b0076b261/black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba", size = 1440860, upload-time = "2025-01-29T04:19:12.944Z" }, - { url = "https://files.pythonhosted.org/packages/98/87/0edf98916640efa5d0696e1abb0a8357b52e69e82322628f25bf14d263d1/black-25.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f", size = 1650673, upload-time = "2025-01-29T05:37:20.574Z" }, - { url = "https://files.pythonhosted.org/packages/52/e5/f7bf17207cf87fa6e9b676576749c6b6ed0d70f179a3d812c997870291c3/black-25.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3", size = 1453190, upload-time = "2025-01-29T05:37:22.106Z" }, - { url = "https://files.pythonhosted.org/packages/e3/ee/adda3d46d4a9120772fae6de454c8495603c37c4c3b9c60f25b1ab6401fe/black-25.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171", size = 1782926, upload-time = "2025-01-29T04:18:58.564Z" }, - { url = "https://files.pythonhosted.org/packages/cc/64/94eb5f45dcb997d2082f097a3944cfc7fe87e071907f677e80788a2d7b7a/black-25.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18", size = 1442613, upload-time = "2025-01-29T04:19:27.63Z" }, - { url = "https://files.pythonhosted.org/packages/09/71/54e999902aed72baf26bca0d50781b01838251a462612966e9fc4891eadd/black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717", size = 207646, upload-time = "2025-01-29T04:15:38.082Z" }, -] - [[package]] name = "cachetools" version = "5.5.2" @@ -204,7 +176,6 @@ dependencies = [ [package.dev-dependencies] dev = [ - { name = "black" }, { name = "mkdocs" }, { name = "mkdocs-material" }, { name = "pre-commit" }, @@ -227,7 +198,6 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ - { name = "black", specifier = ">=25.1.0" }, { name = "mkdocs", specifier = ">=1.6.1" }, { name = "mkdocs-material", specifier = ">=9.6.14" }, { name = "pre-commit", specifier = ">=4.2.0" }, @@ -561,15 +531,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5b/54/662a4743aa81d9582ee9339d4ffa3c8fd40a4965e033d77b9da9774d3960/mkdocs_material_extensions-1.3.1-py3-none-any.whl", hash = "sha256:adff8b62700b25cb77b53358dad940f3ef973dd6db797907c49e3c2ef3ab4e31", size = 8728, upload-time = "2023-11-22T19:09:43.465Z" }, ] -[[package]] -name = "mypy-extensions" -version = "1.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, -] - [[package]] name = "nodeenv" version = "1.9.1"