From 9a5ee1726c9c553b6d419f4d331199c0b42d5da3 Mon Sep 17 00:00:00 2001
From: Christophe Beke <beke.christophe@gmail.com>
Date: Sun, 15 Jun 2025 15:35:03 +0200
Subject: [PATCH 01/39] Added the docbinder search command that allows to
 search through all google drive documents

---
 src/docbinder_oss/main.py                     | 184 ++++++++++--------
 src/docbinder_oss/provider.py                 |  93 +++++++++
 src/docbinder_oss/services/base_class.py      |  10 +
 .../services/google_drive/__init__.py         |  15 ++
 .../google_drive/google_drive_client.py       |  10 +-
 5 files changed, 233 insertions(+), 79 deletions(-)
 create mode 100644 src/docbinder_oss/provider.py

diff --git a/src/docbinder_oss/main.py b/src/docbinder_oss/main.py
index 949fc89..720e3d4 100644
--- a/src/docbinder_oss/main.py
+++ b/src/docbinder_oss/main.py
@@ -1,22 +1,12 @@
-from typing import Annotated, List, Optional
+from typing import List, Optional
 
 import typer
 import yaml
 
 from docbinder_oss.helpers.config import save_config, validate_config
-from docbinder_oss.services import create_provider_instance
 
 app = typer.Typer()
 
-# --- Provider Subcommand Group ---
-# We create a separate Typer app for the 'provider' command.
-# This allows us to nest commands like 'provider list' and 'provider get'.
-provider_app = typer.Typer(
-    help="Commands to manage providers. List them or get details for a specific one."
-)
-# We add this group to our main application.
-app.add_typer(provider_app, name="provider")
-
 
 # This is the main entry point for the DocBinder CLI.
 @app.callback()
@@ -76,82 +66,120 @@ def setup(
         raise typer.Exit(code=1)
     typer.echo("Configuration saved successfully.")
 
-
-@provider_app.command()
-def list():
-    """List all configured providers."""
-    from docbinder_oss.helpers.config import load_config
-
-    config = load_config()
-    if not config.providers:
-        typer.echo("No providers configured.")
-        raise typer.Exit(code=1)
-
-    for provider in config.providers:
-        typer.echo(f"Provider: {provider.name}, Type: {provider.type}")
-
-
-@provider_app.command("get")
-def get_provider(
-    connection_type: str = typer.Option(
-        None, "--type", "-t", help="The type of the provider to get."
-    ),
-    name: str = typer.Option(
-        None, "--name", "-n", help="The name of the provider to get."
-    ),
+@app.command()
+def search(
+    name: Optional[str] = typer.Option(None, "--name", help="Regex to match file name"),
+    owner: Optional[str] = typer.Option(None, "--owner", help="Owner/contributor/reader email address to filter"),
+    updated_after: Optional[str] = typer.Option(None, "--updated-after", help="Last update after (ISO timestamp)"),
+    updated_before: Optional[str] = typer.Option(None, "--updated-before", help="Last update before (ISO timestamp)"),
+    created_after: Optional[str] = typer.Option(None, "--created-after", help="Created after (ISO timestamp)"),
+    created_before: Optional[str] = typer.Option(None, "--created-before", help="Created before (ISO timestamp)"),
+    min_size: Optional[int] = typer.Option(None, "--min-size", help="Minimum file size in KB"),
+    max_size: Optional[int] = typer.Option(None, "--max-size", help="Maximum file size in KB"),
+    provider: Optional[str] = typer.Option(None, "--provider", "-p", help="Provider name to search in"),
+    export_format: str = typer.Option("csv", "--export-format", help="Export format: csv or json", show_default=True),
 ):
-    """Get connection information for a specific provider."""
+    """Search for files or folders matching filters across all providers and export results as CSV or JSON."""
+    import re
+    import csv
+    import json
+    from datetime import datetime
     from docbinder_oss.helpers.config import load_config
+    from docbinder_oss.services import create_provider_instance
 
     config = load_config()
-
-    count = 0
     if not config.providers:
         typer.echo("No providers configured.")
         raise typer.Exit(code=1)
-    for provider in config.providers:
-        if provider.name == name:
-            typer.echo(f"Provider '{name}' found with config: {provider}")
-            count += 1
-        if provider.type == connection_type:
-            typer.echo(
-                f"Provider '{provider.name}' of type '{connection_type}' found with config: {provider}"
-            )
-            count += 1
-    if count == 0:
-        typer.echo(
-            f"No providers found with name '{name}' or type '{connection_type}'."
-        )
-        raise typer.Exit(code=1)
-
-
-@provider_app.command("test")
-def test(
-    name: Annotated[
-        str, typer.Argument(help="The name of the provider to test the connection.")
-    ],
-):
-    """Test the connection to a specific provider."""
-    from docbinder_oss.helpers.config import load_config
 
-    config = load_config()
-    if not config.providers:
-        typer.echo("No providers configured.")
-        raise typer.Exit(code=1)
+    results = []
     for provider_config in config.providers:
-        if provider_config.name == name:
-            typer.echo(f"Testing connection for provider '{name}'...")
-            try:
-                client = create_provider_instance(provider_config)
-                client.test_connection()
-                typer.echo(f"Connection to provider '{name}' is successful.")
-            except Exception as e:
-                typer.echo(f"Failed to connect to provider '{name}': {e}")
-            return
-    # If we reach here, the provider was not found
-    typer.echo(f"Provider '{name}' not found in configuration.")
-    raise typer.Exit(code=1)
-
+        if provider and provider_config.name != provider:
+            continue
+        client = create_provider_instance(provider_config)
+        if client is None or not hasattr(client, "list_all_files"):
+            continue
+        try:
+            files = client.list_all_files()
+            for item in files:
+                # Name regex filter
+                if name:
+                    if not re.search(name, item.name or "", re.IGNORECASE):
+                        continue
+                # Owner/contributor/reader email filter
+                if owner:
+                    emails = set()
+                    owners_list = getattr(item, "owners", None) or []
+                    emails.update([u.email_address for u in owners_list if u and getattr(u, "email_address", None)])
+                    last_mod_user = getattr(item, "last_modifying_user", None)
+                    if last_mod_user and getattr(last_mod_user, "email_address", None):
+                        emails.add(last_mod_user.email_address)
+                    if owner not in emails:
+                        continue
+                # Last update filter
+                if updated_after:
+                    if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) < datetime.fromisoformat(updated_after):
+                        continue
+                if updated_before:
+                    if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) > datetime.fromisoformat(updated_before):
+                        continue
+                # Created at filter
+                if created_after:
+                    if not item.created_time or datetime.fromisoformat(str(item.created_time)) < datetime.fromisoformat(created_after):
+                        continue
+                if created_before:
+                    if not item.created_time or datetime.fromisoformat(str(item.created_time)) > datetime.fromisoformat(created_before):
+                        continue
+                # Size filter (in KB)
+                if min_size is not None:
+                    try:
+                        if not item.size or int(item.size) < min_size * 1024:
+                            continue
+                    except Exception:
+                        continue
+                if max_size is not None:
+                    try:
+                        if not item.size or int(item.size) > max_size * 1024:
+                            continue
+                    except Exception:
+                        continue
+                # Collect all possible params for export
+                results.append({
+                    "provider": provider_config.name,
+                    "id": getattr(item, "id", None),
+                    "name": getattr(item, "name", None),
+                    "size": getattr(item, "size", None),
+                    "mime_type": getattr(item, "mime_type", None),
+                    "created_time": getattr(item, "created_time", None),
+                    "modified_time": getattr(item, "modified_time", None),
+                    "owners": ",".join([u.email_address for u in (getattr(item, "owners", None) or []) if u and getattr(u, "email_address", None)]) if getattr(item, "owners", None) else None,
+                    "last_modifying_user": getattr(getattr(item, "last_modifying_user", None), "email_address", None),
+                    "web_view_link": getattr(item, "web_view_link", None),
+                    "web_content_link": getattr(item, "web_content_link", None),
+                    "shared": getattr(item, "shared", None),
+                    "trashed": getattr(item, "trashed", None),
+                })
+        except Exception as e:
+            typer.echo(f"Error searching provider '{provider_config.name}': {e}")
+    # Write results to CSV or JSON
+    if results:
+        fieldnames = [
+            "provider", "id", "name", "size", "mime_type", "created_time", "modified_time", "owners", "last_modifying_user", "web_view_link", "web_content_link", "shared", "trashed"
+        ]
+        if export_format.lower() == "json":
+            with open("search_results.json", "w") as jsonfile:
+                json.dump(results, jsonfile, indent=2, default=str)
+            typer.echo(f"{len(results)} results written to search_results.json")
+        else:
+            with open("search_results.csv", "w", newline="") as csvfile:
+                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+                writer.writeheader()
+                for row in results:
+                    writer.writerow(row)
+            typer.echo(f"{len(results)} results written to search_results.csv")
+    else:
+        typer.echo("No results found.")
+    return results
 
 if __name__ == "__main__":
     app()
diff --git a/src/docbinder_oss/provider.py b/src/docbinder_oss/provider.py
new file mode 100644
index 0000000..4f97fa1
--- /dev/null
+++ b/src/docbinder_oss/provider.py
@@ -0,0 +1,93 @@
+from typing import Annotated
+import typer
+from .main import app
+from docbinder_oss.services import create_provider_instance
+
+
+# --- Provider Subcommand Group ---
+# We create a separate Typer app for the 'provider' command.
+# This allows us to nest commands like 'provider list' and 'provider get'.
+provider_app = typer.Typer(
+    help="Commands to manage providers. List them or get details for a specific one."
+)
+# We add this group to our main application.
+app.add_typer(provider_app, name="provider")
+
+@provider_app.command()
+def list():
+    """List all configured providers."""
+    from docbinder_oss.helpers.config import load_config
+
+    config = load_config()
+    if not config.providers:
+        typer.echo("No providers configured.")
+        raise typer.Exit(code=1)
+
+    for provider in config.providers:
+        typer.echo(f"Provider: {provider.name}, Type: {provider.type}")
+
+
+@provider_app.command("get")
+def get_provider(
+    connection_type: str = typer.Option(
+        None, "--type", "-t", help="The type of the provider to get."
+    ),
+    name: str = typer.Option(
+        None, "--name", "-n", help="The name of the provider to get."
+    ),
+):
+    """Get connection information for a specific provider."""
+    from docbinder_oss.helpers.config import load_config
+
+    config = load_config()
+
+    count = 0
+    if not config.providers:
+        typer.echo("No providers configured.")
+        raise typer.Exit(code=1)
+    for provider in config.providers:
+        if provider.name == name:
+            typer.echo(f"Provider '{name}' found with config: {provider}")
+            count += 1
+        if provider.type == connection_type:
+            typer.echo(
+                f"Provider '{provider.name}' of type '{connection_type}' found with config: {provider}"
+            )
+            count += 1
+    if count == 0:
+        typer.echo(
+            f"No providers found with name '{name}' or type '{connection_type}'."
+        )
+        raise typer.Exit(code=1)
+
+
+@provider_app.command("test")
+def test(
+    name: Annotated[
+        str, typer.Argument(help="The name of the provider to test the connection.")
+    ],
+):
+    """Test the connection to a specific provider."""
+    from docbinder_oss.helpers.config import load_config
+
+    config = load_config()
+    if not config.providers:
+        typer.echo("No providers configured.")
+        raise typer.Exit(code=1)
+    for provider_config in config.providers:
+        if provider_config.name == name:
+            typer.echo(f"Testing connection for provider '{name}'...")
+            try:
+                client = create_provider_instance(provider_config)
+                if client is None:
+                    typer.echo(f"Provider '{name}' is not supported or not implemented.")
+                    raise typer.Exit(code=1)
+                # Attempt to test the connection
+                client.test_connection()
+                typer.echo(f"Connection to provider '{name}' is successful.")
+            except Exception as e:
+                typer.echo(f"Failed to connect to provider '{name}': {e}")
+            return
+    # If we reach here, the provider was not found
+    typer.echo(f"Provider '{name}' not found in configuration.")
+    raise typer.Exit(code=1)
diff --git a/src/docbinder_oss/services/base_class.py b/src/docbinder_oss/services/base_class.py
index 08761b3..dd51cec 100644
--- a/src/docbinder_oss/services/base_class.py
+++ b/src/docbinder_oss/services/base_class.py
@@ -47,6 +47,16 @@ def list_files(self, folder_id: Optional[str] = None) -> List[File]:
         """
         pass
 
+    @abstractmethod
+    def list_all_files(self) -> List[File]:
+        """
+        Lists all files and folders in the storage service.
+
+        Returns:
+            A list of StorageItem objects representing all files and folders.
+        """
+        pass
+    
     @abstractmethod
     def get_file_metadata(self, item_id: str) -> File:
         """
diff --git a/src/docbinder_oss/services/google_drive/__init__.py b/src/docbinder_oss/services/google_drive/__init__.py
index 87153e8..6f3bc44 100644
--- a/src/docbinder_oss/services/google_drive/__init__.py
+++ b/src/docbinder_oss/services/google_drive/__init__.py
@@ -25,3 +25,18 @@ def register() -> dict:
         "config_class": GoogleDriveServiceConfig,
         "client_class": GoogleDriveClient,
     }
+
+def get_service_name() -> str:
+    """
+    Returns the name of the service.
+    This is used for logging and identification purposes.
+    """
+    return "Google Drive"
+
+def get_service_display_name() -> str:
+    """
+    Returns the display name of the service.
+    This is used for user-friendly identification.
+    """
+    return "Google Drive Service"
+    return
\ No newline at end of file
diff --git a/src/docbinder_oss/services/google_drive/google_drive_client.py b/src/docbinder_oss/services/google_drive/google_drive_client.py
index a08c28f..77aab51 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_client.py
+++ b/src/docbinder_oss/services/google_drive/google_drive_client.py
@@ -42,7 +42,7 @@ def _get_credentials(self):
         TOKEN_PATH = os.path.expanduser("~/.config/docbinder/gcp/" + self.config.name + "_token.json")
         # Ensure the directory exists
         os.makedirs(os.path.dirname(TOKEN_PATH), exist_ok=True)
-        
+
         try:
             creds = Credentials.from_authorized_user_file(
                 TOKEN_PATH, scopes=self.SCOPES
@@ -76,6 +76,14 @@ def list_buckets(self) -> list:
 
     def list_files(self, folder_id: Optional[str] = None) -> List[File]:
         return self.files.list_files(folder_id)
+    
+    def list_all_files(self) -> List[File]:
+        buckets = self.list_buckets()
+        all_files = []
+        for bucket in buckets:
+            files = self.files.list_files(bucket.id)
+            all_files.extend(files)
+        return all_files
 
     def get_file_metadata(self, item_id: str) -> File:
         return self.files.get_file_metadata(item_id)

From 3722d39837a7d909f94d7764fe292ae83d6aec9b Mon Sep 17 00:00:00 2001
From: Christophe Beke <beke.christophe@gmail.com>
Date: Sun, 15 Jun 2025 15:52:39 +0200
Subject: [PATCH 02/39] Changed all commands to dedicated folders and better
 structure. Also added tests for the new search functionality.

---
 src/docbinder_oss/commands/__init__.py        |   0
 .../commands/provider/__init__.py             |  11 ++
 src/docbinder_oss/commands/provider/get.py    |  35 ++++
 src/docbinder_oss/commands/provider/list.py   |  15 ++
 src/docbinder_oss/commands/provider/test.py   |  35 ++++
 src/docbinder_oss/commands/search.py          | 118 ++++++++++++
 src/docbinder_oss/commands/setup.py           |  50 ++++++
 src/docbinder_oss/main.py                     | 169 +-----------------
 src/docbinder_oss/provider.py                 |  93 ----------
 tests/commands/test_search_command.py         | 154 ++++++++++++++++
 tests/services/test_search_export.py          | 149 +++++++++++++++
 11 files changed, 571 insertions(+), 258 deletions(-)
 create mode 100644 src/docbinder_oss/commands/__init__.py
 create mode 100644 src/docbinder_oss/commands/provider/__init__.py
 create mode 100644 src/docbinder_oss/commands/provider/get.py
 create mode 100644 src/docbinder_oss/commands/provider/list.py
 create mode 100644 src/docbinder_oss/commands/provider/test.py
 create mode 100644 src/docbinder_oss/commands/search.py
 create mode 100644 src/docbinder_oss/commands/setup.py
 delete mode 100644 src/docbinder_oss/provider.py
 create mode 100644 tests/commands/test_search_command.py
 create mode 100644 tests/services/test_search_export.py

diff --git a/src/docbinder_oss/commands/__init__.py b/src/docbinder_oss/commands/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/docbinder_oss/commands/provider/__init__.py b/src/docbinder_oss/commands/provider/__init__.py
new file mode 100644
index 0000000..4fa1055
--- /dev/null
+++ b/src/docbinder_oss/commands/provider/__init__.py
@@ -0,0 +1,11 @@
+import typer
+from docbinder_oss.main import app
+
+# --- Provider Subcommand Group ---
+# We create a separate Typer app for the 'provider' command.
+# This allows us to nest commands like 'provider list' and 'provider get'.
+provider_app = typer.Typer(
+    help="Commands to manage providers. List them or get details for a specific one."
+)
+# We add this group to our main application.
+app.add_typer(provider_app, name="provider")
\ No newline at end of file
diff --git a/src/docbinder_oss/commands/provider/get.py b/src/docbinder_oss/commands/provider/get.py
new file mode 100644
index 0000000..a20a7fa
--- /dev/null
+++ b/src/docbinder_oss/commands/provider/get.py
@@ -0,0 +1,35 @@
+from docbinder_oss.commands.provider import provider_app
+import typer
+
+@provider_app.command("get")
+def get_provider(
+    connection_type: str = typer.Option(
+        None, "--type", "-t", help="The type of the provider to get."
+    ),
+    name: str = typer.Option(
+        None, "--name", "-n", help="The name of the provider to get."
+    ),
+):
+    """Get connection information for a specific provider."""
+    from docbinder_oss.helpers.config import load_config
+
+    config = load_config()
+
+    count = 0
+    if not config.providers:
+        typer.echo("No providers configured.")
+        raise typer.Exit(code=1)
+    for provider in config.providers:
+        if provider.name == name:
+            typer.echo(f"Provider '{name}' found with config: {provider}")
+            count += 1
+        if provider.type == connection_type:
+            typer.echo(
+                f"Provider '{provider.name}' of type '{connection_type}' found with config: {provider}"
+            )
+            count += 1
+    if count == 0:
+        typer.echo(
+            f"No providers found with name '{name}' or type '{connection_type}'."
+        )
+        raise typer.Exit(code=1)
\ No newline at end of file
diff --git a/src/docbinder_oss/commands/provider/list.py b/src/docbinder_oss/commands/provider/list.py
new file mode 100644
index 0000000..a6fc0b7
--- /dev/null
+++ b/src/docbinder_oss/commands/provider/list.py
@@ -0,0 +1,15 @@
+from docbinder_oss.commands.provider import provider_app
+import typer
+
+@provider_app.command()
+def list():
+    """List all configured providers."""
+    from docbinder_oss.helpers.config import load_config
+
+    config = load_config()
+    if not config.providers:
+        typer.echo("No providers configured.")
+        raise typer.Exit(code=1)
+
+    for provider in config.providers:
+        typer.echo(f"Provider: {provider.name}, Type: {provider.type}")
\ No newline at end of file
diff --git a/src/docbinder_oss/commands/provider/test.py b/src/docbinder_oss/commands/provider/test.py
new file mode 100644
index 0000000..354b8fa
--- /dev/null
+++ b/src/docbinder_oss/commands/provider/test.py
@@ -0,0 +1,35 @@
+from docbinder_oss.commands.provider import provider_app
+import typer
+from typing import Annotated
+from docbinder_oss.services import create_provider_instance
+
+@provider_app.command("test")
+def test(
+    name: Annotated[
+        str, typer.Argument(help="The name of the provider to test the connection.")
+    ],
+):
+    """Test the connection to a specific provider."""
+    from docbinder_oss.helpers.config import load_config
+
+    config = load_config()
+    if not config.providers:
+        typer.echo("No providers configured.")
+        raise typer.Exit(code=1)
+    for provider_config in config.providers:
+        if provider_config.name == name:
+            typer.echo(f"Testing connection for provider '{name}'...")
+            try:
+                client = create_provider_instance(provider_config)
+                if client is None:
+                    typer.echo(f"Provider '{name}' is not supported or not implemented.")
+                    raise typer.Exit(code=1)
+                # Attempt to test the connection
+                client.test_connection()
+                typer.echo(f"Connection to provider '{name}' is successful.")
+            except Exception as e:
+                typer.echo(f"Failed to connect to provider '{name}': {e}")
+            return
+    # If we reach here, the provider was not found
+    typer.echo(f"Provider '{name}' not found in configuration.")
+    raise typer.Exit(code=1)
\ No newline at end of file
diff --git a/src/docbinder_oss/commands/search.py b/src/docbinder_oss/commands/search.py
new file mode 100644
index 0000000..9e61827
--- /dev/null
+++ b/src/docbinder_oss/commands/search.py
@@ -0,0 +1,118 @@
+import typer
+from typing import Optional
+from docbinder_oss.main import app
+
+@app.command()
+def search(
+    name: Optional[str] = typer.Option(None, "--name", help="Regex to match file name"),
+    owner: Optional[str] = typer.Option(None, "--owner", help="Owner/contributor/reader email address to filter"),
+    updated_after: Optional[str] = typer.Option(None, "--updated-after", help="Last update after (ISO timestamp)"),
+    updated_before: Optional[str] = typer.Option(None, "--updated-before", help="Last update before (ISO timestamp)"),
+    created_after: Optional[str] = typer.Option(None, "--created-after", help="Created after (ISO timestamp)"),
+    created_before: Optional[str] = typer.Option(None, "--created-before", help="Created before (ISO timestamp)"),
+    min_size: Optional[int] = typer.Option(None, "--min-size", help="Minimum file size in KB"),
+    max_size: Optional[int] = typer.Option(None, "--max-size", help="Maximum file size in KB"),
+    provider: Optional[str] = typer.Option(None, "--provider", "-p", help="Provider name to search in"),
+    export_format: str = typer.Option("csv", "--export-format", help="Export format: csv or json", show_default=True),
+):
+    """Search for files or folders matching filters across all providers and export results as CSV or JSON."""
+    import re
+    import csv
+    import json
+    from datetime import datetime
+    from docbinder_oss.helpers.config import load_config
+    from docbinder_oss.services import create_provider_instance
+
+    config = load_config()
+    if not config.providers:
+        typer.echo("No providers configured.")
+        raise typer.Exit(code=1)
+
+    results = []
+    for provider_config in config.providers:
+        if provider and provider_config.name != provider:
+            continue
+        client = create_provider_instance(provider_config)
+        if client is None or not hasattr(client, "list_all_files"):
+            continue
+        try:
+            files = client.list_all_files()
+            for item in files:
+                # Name regex filter
+                if name:
+                    if not re.search(name, item.name or "", re.IGNORECASE):
+                        continue
+                # Owner/contributor/reader email filter
+                if owner:
+                    emails = set()
+                    owners_list = getattr(item, "owners", None) or []
+                    emails.update([u.email_address for u in owners_list if u and getattr(u, "email_address", None)])
+                    last_mod_user = getattr(item, "last_modifying_user", None)
+                    if last_mod_user and getattr(last_mod_user, "email_address", None):
+                        emails.add(last_mod_user.email_address)
+                    if owner not in emails:
+                        continue
+                # Last update filter
+                if updated_after:
+                    if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) < datetime.fromisoformat(updated_after):
+                        continue
+                if updated_before:
+                    if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) > datetime.fromisoformat(updated_before):
+                        continue
+                # Created at filter
+                if created_after:
+                    if not item.created_time or datetime.fromisoformat(str(item.created_time)) < datetime.fromisoformat(created_after):
+                        continue
+                if created_before:
+                    if not item.created_time or datetime.fromisoformat(str(item.created_time)) > datetime.fromisoformat(created_before):
+                        continue
+                # Size filter (in KB)
+                if min_size is not None:
+                    try:
+                        if not item.size or int(item.size) < min_size * 1024:
+                            continue
+                    except Exception:
+                        continue
+                if max_size is not None:
+                    try:
+                        if not item.size or int(item.size) > max_size * 1024:
+                            continue
+                    except Exception:
+                        continue
+                # Collect all possible params for export
+                results.append({
+                    "provider": provider_config.name,
+                    "id": getattr(item, "id", None),
+                    "name": getattr(item, "name", None),
+                    "size": getattr(item, "size", None),
+                    "mime_type": getattr(item, "mime_type", None),
+                    "created_time": getattr(item, "created_time", None),
+                    "modified_time": getattr(item, "modified_time", None),
+                    "owners": ",".join([u.email_address for u in (getattr(item, "owners", None) or []) if u and getattr(u, "email_address", None)]) if getattr(item, "owners", None) else None,
+                    "last_modifying_user": getattr(getattr(item, "last_modifying_user", None), "email_address", None),
+                    "web_view_link": getattr(item, "web_view_link", None),
+                    "web_content_link": getattr(item, "web_content_link", None),
+                    "shared": getattr(item, "shared", None),
+                    "trashed": getattr(item, "trashed", None),
+                })
+        except Exception as e:
+            typer.echo(f"Error searching provider '{provider_config.name}': {e}")
+    # Write results to CSV or JSON
+    if results:
+        fieldnames = [
+            "provider", "id", "name", "size", "mime_type", "created_time", "modified_time", "owners", "last_modifying_user", "web_view_link", "web_content_link", "shared", "trashed"
+        ]
+        if export_format.lower() == "json":
+            with open("search_results.json", "w") as jsonfile:
+                json.dump(results, jsonfile, indent=2, default=str)
+            typer.echo(f"{len(results)} results written to search_results.json")
+        else:
+            with open("search_results.csv", "w", newline="") as csvfile:
+                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+                writer.writeheader()
+                for row in results:
+                    writer.writerow(row)
+            typer.echo(f"{len(results)} results written to search_results.csv")
+    else:
+        typer.echo("No results found.")
+    return results
\ No newline at end of file
diff --git a/src/docbinder_oss/commands/setup.py b/src/docbinder_oss/commands/setup.py
new file mode 100644
index 0000000..dbe9839
--- /dev/null
+++ b/src/docbinder_oss/commands/setup.py
@@ -0,0 +1,50 @@
+import typer
+from typing import List, Optional
+import yaml
+from docbinder_oss.helpers.config import save_config, validate_config
+from docbinder_oss.main import app
+
+@app.command()
+def setup(
+    file: Optional[str] = typer.Option(None, "--file", help="Path to YAML config file"),
+    provider: Optional[List[str]] = typer.Option(
+        None,
+        "--provider",
+        help="Provider config as provider:key1=val1,key2=val2",
+        callback=lambda v: v or [],
+    ),
+):
+    """Setup DocBinder configuration via YAML file or provider key-value pairs."""
+    config_data = {}
+    if file:
+        with open(file, "r") as f:
+            config_data = yaml.safe_load(f) or {}
+    elif provider:
+        providers = {}
+        for entry in provider:
+            if ":" not in entry:
+                typer.echo(
+                    f"Provider entry '{entry}' must be in provider:key1=val1,key2=val2 format."
+                )
+                raise typer.Exit(code=1)
+            prov_name, prov_kvs = entry.split(":", 1)
+            kv_dict = {}
+            for pair in prov_kvs.split(","):
+                if "=" not in pair:
+                    typer.echo(f"Provider config '{pair}' must be in key=value format.")
+                    raise typer.Exit(code=1)
+                k, v = pair.split("=", 1)
+                kv_dict[k] = v
+            providers[prov_name] = kv_dict
+        config_data["providers"] = providers
+    validated = validate_config(config_data)
+    if not validated.providers:
+        typer.echo("No providers configured. Please add at least one provider.")
+        raise typer.Exit(code=1)
+    # Save the validated config
+    try:
+        save_config(validated)
+    except Exception as e:
+        typer.echo(f"Error saving config: {e}")
+        raise typer.Exit(code=1)
+    typer.echo("Configuration saved successfully.")
\ No newline at end of file
diff --git a/src/docbinder_oss/main.py b/src/docbinder_oss/main.py
index 720e3d4..0aff0b5 100644
--- a/src/docbinder_oss/main.py
+++ b/src/docbinder_oss/main.py
@@ -1,12 +1,12 @@
-from typing import List, Optional
-
 import typer
-import yaml
-
 from docbinder_oss.helpers.config import save_config, validate_config
 
 app = typer.Typer()
 
+from docbinder_oss.commands import search
+from docbinder_oss.commands import setup
+from docbinder_oss.commands.provider import list, get, test
+
 
 # This is the main entry point for the DocBinder CLI.
 @app.callback()
@@ -20,166 +20,5 @@ def hello():
     """Print a friendly greeting."""
     typer.echo("Hello, DocBinder OSS!")
 
-
-@app.command()
-def setup(
-    file: Optional[str] = typer.Option(None, "--file", help="Path to YAML config file"),
-    provider: Optional[List[str]] = typer.Option(
-        None,
-        "--provider",
-        help="Provider config as provider:key1=val1,key2=val2",
-        callback=lambda v: v or [],
-    ),
-):
-    """Setup DocBinder configuration via YAML file or provider key-value pairs."""
-    config_data = {}
-    if file:
-        with open(file, "r") as f:
-            config_data = yaml.safe_load(f) or {}
-    elif provider:
-        providers = {}
-        for entry in provider:
-            if ":" not in entry:
-                typer.echo(
-                    f"Provider entry '{entry}' must be in provider:key1=val1,key2=val2 format."
-                )
-                raise typer.Exit(code=1)
-            prov_name, prov_kvs = entry.split(":", 1)
-            kv_dict = {}
-            for pair in prov_kvs.split(","):
-                if "=" not in pair:
-                    typer.echo(f"Provider config '{pair}' must be in key=value format.")
-                    raise typer.Exit(code=1)
-                k, v = pair.split("=", 1)
-                kv_dict[k] = v
-            providers[prov_name] = kv_dict
-        config_data["providers"] = providers
-    validated = validate_config(config_data)
-    if not validated.providers:
-        typer.echo("No providers configured. Please add at least one provider.")
-        raise typer.Exit(code=1)
-    # Save the validated config
-    try:
-        save_config(validated)
-    except Exception as e:
-        typer.echo(f"Error saving config: {e}")
-        raise typer.Exit(code=1)
-    typer.echo("Configuration saved successfully.")
-
-@app.command()
-def search(
-    name: Optional[str] = typer.Option(None, "--name", help="Regex to match file name"),
-    owner: Optional[str] = typer.Option(None, "--owner", help="Owner/contributor/reader email address to filter"),
-    updated_after: Optional[str] = typer.Option(None, "--updated-after", help="Last update after (ISO timestamp)"),
-    updated_before: Optional[str] = typer.Option(None, "--updated-before", help="Last update before (ISO timestamp)"),
-    created_after: Optional[str] = typer.Option(None, "--created-after", help="Created after (ISO timestamp)"),
-    created_before: Optional[str] = typer.Option(None, "--created-before", help="Created before (ISO timestamp)"),
-    min_size: Optional[int] = typer.Option(None, "--min-size", help="Minimum file size in KB"),
-    max_size: Optional[int] = typer.Option(None, "--max-size", help="Maximum file size in KB"),
-    provider: Optional[str] = typer.Option(None, "--provider", "-p", help="Provider name to search in"),
-    export_format: str = typer.Option("csv", "--export-format", help="Export format: csv or json", show_default=True),
-):
-    """Search for files or folders matching filters across all providers and export results as CSV or JSON."""
-    import re
-    import csv
-    import json
-    from datetime import datetime
-    from docbinder_oss.helpers.config import load_config
-    from docbinder_oss.services import create_provider_instance
-
-    config = load_config()
-    if not config.providers:
-        typer.echo("No providers configured.")
-        raise typer.Exit(code=1)
-
-    results = []
-    for provider_config in config.providers:
-        if provider and provider_config.name != provider:
-            continue
-        client = create_provider_instance(provider_config)
-        if client is None or not hasattr(client, "list_all_files"):
-            continue
-        try:
-            files = client.list_all_files()
-            for item in files:
-                # Name regex filter
-                if name:
-                    if not re.search(name, item.name or "", re.IGNORECASE):
-                        continue
-                # Owner/contributor/reader email filter
-                if owner:
-                    emails = set()
-                    owners_list = getattr(item, "owners", None) or []
-                    emails.update([u.email_address for u in owners_list if u and getattr(u, "email_address", None)])
-                    last_mod_user = getattr(item, "last_modifying_user", None)
-                    if last_mod_user and getattr(last_mod_user, "email_address", None):
-                        emails.add(last_mod_user.email_address)
-                    if owner not in emails:
-                        continue
-                # Last update filter
-                if updated_after:
-                    if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) < datetime.fromisoformat(updated_after):
-                        continue
-                if updated_before:
-                    if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) > datetime.fromisoformat(updated_before):
-                        continue
-                # Created at filter
-                if created_after:
-                    if not item.created_time or datetime.fromisoformat(str(item.created_time)) < datetime.fromisoformat(created_after):
-                        continue
-                if created_before:
-                    if not item.created_time or datetime.fromisoformat(str(item.created_time)) > datetime.fromisoformat(created_before):
-                        continue
-                # Size filter (in KB)
-                if min_size is not None:
-                    try:
-                        if not item.size or int(item.size) < min_size * 1024:
-                            continue
-                    except Exception:
-                        continue
-                if max_size is not None:
-                    try:
-                        if not item.size or int(item.size) > max_size * 1024:
-                            continue
-                    except Exception:
-                        continue
-                # Collect all possible params for export
-                results.append({
-                    "provider": provider_config.name,
-                    "id": getattr(item, "id", None),
-                    "name": getattr(item, "name", None),
-                    "size": getattr(item, "size", None),
-                    "mime_type": getattr(item, "mime_type", None),
-                    "created_time": getattr(item, "created_time", None),
-                    "modified_time": getattr(item, "modified_time", None),
-                    "owners": ",".join([u.email_address for u in (getattr(item, "owners", None) or []) if u and getattr(u, "email_address", None)]) if getattr(item, "owners", None) else None,
-                    "last_modifying_user": getattr(getattr(item, "last_modifying_user", None), "email_address", None),
-                    "web_view_link": getattr(item, "web_view_link", None),
-                    "web_content_link": getattr(item, "web_content_link", None),
-                    "shared": getattr(item, "shared", None),
-                    "trashed": getattr(item, "trashed", None),
-                })
-        except Exception as e:
-            typer.echo(f"Error searching provider '{provider_config.name}': {e}")
-    # Write results to CSV or JSON
-    if results:
-        fieldnames = [
-            "provider", "id", "name", "size", "mime_type", "created_time", "modified_time", "owners", "last_modifying_user", "web_view_link", "web_content_link", "shared", "trashed"
-        ]
-        if export_format.lower() == "json":
-            with open("search_results.json", "w") as jsonfile:
-                json.dump(results, jsonfile, indent=2, default=str)
-            typer.echo(f"{len(results)} results written to search_results.json")
-        else:
-            with open("search_results.csv", "w", newline="") as csvfile:
-                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-                writer.writeheader()
-                for row in results:
-                    writer.writerow(row)
-            typer.echo(f"{len(results)} results written to search_results.csv")
-    else:
-        typer.echo("No results found.")
-    return results
-
 if __name__ == "__main__":
     app()
diff --git a/src/docbinder_oss/provider.py b/src/docbinder_oss/provider.py
deleted file mode 100644
index 4f97fa1..0000000
--- a/src/docbinder_oss/provider.py
+++ /dev/null
@@ -1,93 +0,0 @@
-from typing import Annotated
-import typer
-from .main import app
-from docbinder_oss.services import create_provider_instance
-
-
-# --- Provider Subcommand Group ---
-# We create a separate Typer app for the 'provider' command.
-# This allows us to nest commands like 'provider list' and 'provider get'.
-provider_app = typer.Typer(
-    help="Commands to manage providers. List them or get details for a specific one."
-)
-# We add this group to our main application.
-app.add_typer(provider_app, name="provider")
-
-@provider_app.command()
-def list():
-    """List all configured providers."""
-    from docbinder_oss.helpers.config import load_config
-
-    config = load_config()
-    if not config.providers:
-        typer.echo("No providers configured.")
-        raise typer.Exit(code=1)
-
-    for provider in config.providers:
-        typer.echo(f"Provider: {provider.name}, Type: {provider.type}")
-
-
-@provider_app.command("get")
-def get_provider(
-    connection_type: str = typer.Option(
-        None, "--type", "-t", help="The type of the provider to get."
-    ),
-    name: str = typer.Option(
-        None, "--name", "-n", help="The name of the provider to get."
-    ),
-):
-    """Get connection information for a specific provider."""
-    from docbinder_oss.helpers.config import load_config
-
-    config = load_config()
-
-    count = 0
-    if not config.providers:
-        typer.echo("No providers configured.")
-        raise typer.Exit(code=1)
-    for provider in config.providers:
-        if provider.name == name:
-            typer.echo(f"Provider '{name}' found with config: {provider}")
-            count += 1
-        if provider.type == connection_type:
-            typer.echo(
-                f"Provider '{provider.name}' of type '{connection_type}' found with config: {provider}"
-            )
-            count += 1
-    if count == 0:
-        typer.echo(
-            f"No providers found with name '{name}' or type '{connection_type}'."
-        )
-        raise typer.Exit(code=1)
-
-
-@provider_app.command("test")
-def test(
-    name: Annotated[
-        str, typer.Argument(help="The name of the provider to test the connection.")
-    ],
-):
-    """Test the connection to a specific provider."""
-    from docbinder_oss.helpers.config import load_config
-
-    config = load_config()
-    if not config.providers:
-        typer.echo("No providers configured.")
-        raise typer.Exit(code=1)
-    for provider_config in config.providers:
-        if provider_config.name == name:
-            typer.echo(f"Testing connection for provider '{name}'...")
-            try:
-                client = create_provider_instance(provider_config)
-                if client is None:
-                    typer.echo(f"Provider '{name}' is not supported or not implemented.")
-                    raise typer.Exit(code=1)
-                # Attempt to test the connection
-                client.test_connection()
-                typer.echo(f"Connection to provider '{name}' is successful.")
-            except Exception as e:
-                typer.echo(f"Failed to connect to provider '{name}': {e}")
-            return
-    # If we reach here, the provider was not found
-    typer.echo(f"Provider '{name}' not found in configuration.")
-    raise typer.Exit(code=1)
diff --git a/tests/commands/test_search_command.py b/tests/commands/test_search_command.py
new file mode 100644
index 0000000..9f1bd67
--- /dev/null
+++ b/tests/commands/test_search_command.py
@@ -0,0 +1,154 @@
+import os
+import csv
+import json
+import pytest
+from typer.testing import CliRunner
+from docbinder_oss.main import app
+
+class DummyFile:
+    def __init__(self, **kwargs):
+        self.id = kwargs.get("id", "fileid1")
+        self.name = kwargs.get("name", "Test File")
+        self.size = kwargs.get("size", 12345)
+        self.mime_type = kwargs.get("mime_type", "application/pdf")
+        self.created_time = kwargs.get("created_time", "2024-01-01T00:00:00")
+        self.modified_time = kwargs.get("modified_time", "2024-01-02T00:00:00")
+        self.owners = kwargs.get("owners", [type("User", (), {"email_address": "owner@example.com"})()])
+        self.last_modifying_user = kwargs.get("last_modifying_user", type("User", (), {"email_address": "mod@example.com"})())
+        self.web_view_link = kwargs.get("web_view_link", "http://example.com/view")
+        self.web_content_link = kwargs.get("web_content_link", "http://example.com/content")
+        self.shared = kwargs.get("shared", True)
+        self.trashed = kwargs.get("trashed", False)
+
+@pytest.fixture(autouse=True)
+def patch_provider(monkeypatch, tmp_path):
+    # Patch config loader to return two dummy provider configs
+    class DummyProviderConfig:
+        def __init__(self, name):
+            self.name = name
+    class DummyConfig:
+        providers = [DummyProviderConfig("dummy1"), DummyProviderConfig("dummy2")]
+    monkeypatch.setattr("docbinder_oss.helpers.config.load_config", lambda: DummyConfig())
+    # Patch create_provider_instance to return a dummy client with different files per provider
+    def create_provider_instance(cfg):
+        if cfg.name == "dummy1":
+            return type("DummyClient", (), {"list_all_files": lambda self: [
+                DummyFile(id="f1", name="Alpha Report", size=2048, owners=[type("User", (), {"email_address": "alpha@a.com"})()],
+                          created_time="2024-01-01T10:00:00", modified_time="2024-01-02T10:00:00")
+            ]})()
+        else:
+            return type("DummyClient", (), {"list_all_files": lambda self: [
+                DummyFile(id="f2", name="Beta Notes", size=4096, owners=[type("User", (), {"email_address": "beta@b.com"})()],
+                          created_time="2024-02-01T10:00:00", modified_time="2024-02-02T10:00:00")
+            ]})()
+    monkeypatch.setattr("docbinder_oss.services.create_provider_instance", create_provider_instance)
+    # Change working directory to a temp dir for file output
+    orig_cwd = os.getcwd()
+    os.chdir(tmp_path)
+    yield
+    os.chdir(orig_cwd)
+
+def test_search_export_csv():
+    runner = CliRunner()
+    result = runner.invoke(app, ["search", "--export-format", "csv"])
+    assert result.exit_code == 0
+    assert os.path.exists("search_results.csv")
+    with open("search_results.csv") as f:
+        reader = csv.DictReader(f)
+        rows = list(reader)
+        assert len(rows) == 2
+        names = set(r["name"] for r in rows)
+        assert names == {"Alpha Report", "Beta Notes"}
+        # Check owners field is a string
+        for r in rows:
+            if r["name"] == "Alpha Report":
+                assert r["owners"] == "alpha@a.com"
+            if r["name"] == "Beta Notes":
+                assert r["owners"] == "beta@b.com"
+
+def test_search_export_json():
+    runner = CliRunner()
+    result = runner.invoke(app, ["search", "--export-format", "json"])
+    assert result.exit_code == 0
+    assert os.path.exists("search_results.json")
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert isinstance(data, list)
+        assert len(data) == 2
+        names = set(d["name"] for d in data)
+        assert names == {"Alpha Report", "Beta Notes"}
+
+def test_search_name_filter():
+    runner = CliRunner()
+    result = runner.invoke(app, ["search", "--name", "Alpha", "--export-format", "json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data) == 1
+        assert data[0]["name"] == "Alpha Report"
+
+def test_search_owner_filter():
+    runner = CliRunner()
+    result = runner.invoke(app, ["search", "--owner", "beta@b.com", "--export-format", "json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data) == 1
+        assert data[0]["name"] == "Beta Notes"
+
+def test_search_updated_after_filter():
+    runner = CliRunner()
+    result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data) == 1
+        assert data[0]["name"] == "Beta Notes"
+
+def test_search_created_before_filter():
+    runner = CliRunner()
+    result = runner.invoke(app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-format", "json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data) == 1
+        assert data[0]["name"] == "Alpha Report"
+
+def test_search_min_size_filter():
+    runner = CliRunner()
+    result = runner.invoke(app, ["search", "--min-size", "3", "--export-format", "json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data) == 1
+        assert data[0]["name"] == "Beta Notes"
+
+def test_search_max_size_filter():
+    runner = CliRunner()
+    result = runner.invoke(app, ["search", "--max-size", "3", "--export-format", "json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data) == 1
+        assert data[0]["name"] == "Alpha Report"
+
+def test_search_provider_filter():
+    runner = CliRunner()
+    result = runner.invoke(app, ["search", "--provider", "dummy2", "--export-format", "json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data) == 1
+        assert data[0]["provider"] == "dummy2"
+        assert data[0]["name"] == "Beta Notes"
+
+def test_search_combined_filters():
+    runner = CliRunner()
+    result = runner.invoke(app, ["search", "--name", "Beta", "--owner", "beta@b.com", "--min-size", "3", "--provider", "dummy2", "--export-format", "json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data) == 1
+        assert data[0]["name"] == "Beta Notes"
+        assert data[0]["provider"] == "dummy2"
+        assert data[0]["owners"] == "beta@b.com"
diff --git a/tests/services/test_search_export.py b/tests/services/test_search_export.py
new file mode 100644
index 0000000..b998449
--- /dev/null
+++ b/tests/services/test_search_export.py
@@ -0,0 +1,149 @@
+import os
+import csv
+import json
+import tempfile
+import shutil
+import pytest
+from typer.testing import CliRunner
+from docbinder_oss.main import app
+
+class DummyFile:
+    def __init__(self, **kwargs):
+        self.id = kwargs.get("id", "fileid1")
+        self.name = kwargs.get("name", "Test File")
+        self.size = kwargs.get("size", 12345)
+        self.mime_type = kwargs.get("mime_type", "application/pdf")
+        self.created_time = kwargs.get("created_time", "2024-01-01T00:00:00")
+        self.modified_time = kwargs.get("modified_time", "2024-01-02T00:00:00")
+        self.owners = kwargs.get("owners", [type("User", (), {"email_address": "owner@example.com"})()])
+        self.last_modifying_user = kwargs.get("last_modifying_user", type("User", (), {"email_address": "mod@example.com"})())
+        self.web_view_link = kwargs.get("web_view_link", "http://example.com/view")
+        self.web_content_link = kwargs.get("web_content_link", "http://example.com/content")
+        self.shared = kwargs.get("shared", True)
+        self.trashed = kwargs.get("trashed", False)
+
+@pytest.fixture(autouse=True)
+def patch_provider(monkeypatch, tmp_path):
+    # Patch config loader to return two dummy provider configs
+    class DummyProviderConfig:
+        def __init__(self, name):
+            self.name = name
+    class DummyConfig:
+        providers = [DummyProviderConfig("dummy1"), DummyProviderConfig("dummy2")]
+    monkeypatch.setattr("docbinder_oss.helpers.config.load_config", lambda: DummyConfig())
+    # Patch create_provider_instance to return a dummy client with different files per provider
+    def create_provider_instance(cfg):
+        if cfg.name == "dummy1":
+            return type("DummyClient", (), {"list_all_files": lambda self: [
+                DummyFile(id="f1", name="Alpha Report", size=2048, owners=[type("User", (), {"email_address": "alpha@a.com"})()],
+                          created_time="2024-01-01T10:00:00", modified_time="2024-01-02T10:00:00")
+            ]})()
+        else:
+            return type("DummyClient", (), {"list_all_files": lambda self: [
+                DummyFile(id="f2", name="Beta Notes", size=4096, owners=[type("User", (), {"email_address": "beta@b.com"})()],
+                          created_time="2024-02-01T10:00:00", modified_time="2024-02-02T10:00:00")
+            ]})()
+    monkeypatch.setattr("docbinder_oss.services.create_provider_instance", create_provider_instance)
+    # Change working directory to a temp dir for file output
+    orig_cwd = os.getcwd()
+    os.chdir(tmp_path)
+    yield
+    os.chdir(orig_cwd)
+
+def test_search_export_csv():
+    runner = CliRunner()
+    result = runner.invoke(app, ["search", "--export-format", "csv"])
+    assert result.exit_code == 0
+    assert os.path.exists("search_results.csv")
+    with open("search_results.csv") as f:
+        reader = csv.DictReader(f)
+        rows = list(reader)
+        assert len(rows) == 2
+        assert set(r["name"] for r in rows) == {"Alpha Report", "Beta Notes"}
+
+def test_search_export_json():
+    runner = CliRunner()
+    result = runner.invoke(app, ["search", "--export-format", "json"])
+    assert result.exit_code == 0
+    assert os.path.exists("search_results.json")
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert isinstance(data, list)
+        assert len(data) == 2
+        names = set(d["name"] for d in data)
+        assert names == {"Alpha Report", "Beta Notes"}
+
+def test_search_name_filter():
+    runner = CliRunner()
+    result = runner.invoke(app, ["search", "--name", "Alpha", "--export-format", "json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data) == 1
+        assert data[0]["name"] == "Alpha Report"
+
+def test_search_owner_filter():
+    runner = CliRunner()
+    result = runner.invoke(app, ["search", "--owner", "beta@b.com", "--export-format", "json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data) == 1
+        assert data[0]["name"] == "Beta Notes"
+
+def test_search_updated_after_filter():
+    runner = CliRunner()
+    result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data) == 1
+        assert data[0]["name"] == "Beta Notes"
+
+def test_search_created_before_filter():
+    runner = CliRunner()
+    result = runner.invoke(app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-format", "json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data) == 1
+        assert data[0]["name"] == "Alpha Report"
+
+def test_search_min_size_filter():
+    runner = CliRunner()
+    result = runner.invoke(app, ["search", "--min-size", "3", "--export-format", "json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data) == 1
+        assert data[0]["name"] == "Beta Notes"
+
+def test_search_max_size_filter():
+    runner = CliRunner()
+    result = runner.invoke(app, ["search", "--max-size", "3", "--export-format", "json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data) == 1
+        assert data[0]["name"] == "Alpha Report"
+
+def test_search_provider_filter():
+    runner = CliRunner()
+    result = runner.invoke(app, ["search", "--provider", "dummy2", "--export-format", "json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data) == 1
+        assert data[0]["provider"] == "dummy2"
+        assert data[0]["name"] == "Beta Notes"
+
+def test_search_combined_filters():
+    runner = CliRunner()
+    result = runner.invoke(app, ["search", "--name", "Beta", "--owner", "beta@b.com", "--min-size", "3", "--provider", "dummy2", "--export-format", "json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data) == 1
+        assert data[0]["name"] == "Beta Notes"
+        assert data[0]["provider"] == "dummy2"
+        assert data[0]["owners"] == "beta@b.com"

From a9fc52c261091bb4129af94f06925ef005c99417 Mon Sep 17 00:00:00 2001
From: Christophe Beke <beke.christophe@gmail.com>
Date: Thu, 19 Jun 2025 13:27:16 +0200
Subject: [PATCH 03/39] Added traversal of child directories to get all the
 files

---
 search_results.csv                            |  4 ++
 .../google_drive/google_drive_client.py       | 20 +++++-
 .../google_drive/google_drive_files.py        | 36 ++++++----
 .../google_drive/test_google_drive_files.py   | 68 +++++++++++++++++++
 4 files changed, 112 insertions(+), 16 deletions(-)
 create mode 100644 search_results.csv

diff --git a/search_results.csv b/search_results.csv
new file mode 100644
index 0000000..f62fd5b
--- /dev/null
+++ b/search_results.csv
@@ -0,0 +1,4 @@
+provider,id,name,size,mime_type,created_time,modified_time,owners,last_modifying_user,web_view_link,web_content_link,shared,trashed
+my_google_drive,1mpnjaTRDfT7vRP5iq4adIaBFXhkwqjtZ,New Folder,,application/vnd.google-apps.folder,2025-06-15 13:56:34.594000+00:00,2025-06-15 13:56:34.594000+00:00,snappylab25@gmail.com,snappylab25@gmail.com,https://drive.google.com/drive/folders/1mpnjaTRDfT7vRP5iq4adIaBFXhkwqjtZ,,False,False
+my_google_drive,1C37OTKAK3rLnZrJ1ZtusHQsGAlncUHR-0RIYCadQob8,Test Sheet,1024,application/vnd.google-apps.spreadsheet,2025-06-15 13:56:41.248000+00:00,2025-06-15 13:56:45.986000+00:00,snappylab25@gmail.com,snappylab25@gmail.com,https://docs.google.com/spreadsheets/d/1C37OTKAK3rLnZrJ1ZtusHQsGAlncUHR-0RIYCadQob8/edit?usp=drivesdk,,False,False
+my_google_drive,1eYc3W-SqWZT_mT43o9HSkOP_pzFEl5u5U7FDChDWgWA,Test Doc,1024,application/vnd.google-apps.document,2025-06-15 11:37:05.926000+00:00,2025-06-15 11:37:10.663000+00:00,snappylab25@gmail.com,snappylab25@gmail.com,https://docs.google.com/document/d/1eYc3W-SqWZT_mT43o9HSkOP_pzFEl5u5U7FDChDWgWA/edit?usp=drivesdk,,False,False
diff --git a/src/docbinder_oss/services/google_drive/google_drive_client.py b/src/docbinder_oss/services/google_drive/google_drive_client.py
index 77aab51..3e6fb17 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_client.py
+++ b/src/docbinder_oss/services/google_drive/google_drive_client.py
@@ -78,11 +78,26 @@ def list_files(self, folder_id: Optional[str] = None) -> List[File]:
         return self.files.list_files(folder_id)
     
     def list_all_files(self) -> List[File]:
+        """
+        Recursively list all files and folders in all buckets (drives).
+        Handles My Drive and Shared Drives correctly.
+        """
+        def _recursive_list(folder_id, is_drive_root=False):
+            items = self.files.list_files(folder_id, is_drive_root=is_drive_root)
+            all_items = []
+            for item in items:
+                all_items.append(item)
+                # Use mime_type to check if this is a folder
+                if getattr(item, "mime_type", None) == "application/vnd.google-apps.folder":
+                    all_items.extend(_recursive_list(item.id))
+            return all_items
+
         buckets = self.list_buckets()
         all_files = []
         for bucket in buckets:
-            files = self.files.list_files(bucket.id)
-            all_files.extend(files)
+            # If bucket.id == "root", it's My Drive; otherwise, it's a shared drive
+            is_drive_root = bucket.id != "root"
+            all_files.extend(_recursive_list(bucket.id, is_drive_root=is_drive_root))
         return all_files
 
     def get_file_metadata(self, item_id: str) -> File:
@@ -90,3 +105,4 @@ def get_file_metadata(self, item_id: str) -> File:
 
     def get_permissions(self, item_id: str) -> List[Permission]:
         return self.permissions.get_permissions(item_id)
+    
\ No newline at end of file
diff --git a/src/docbinder_oss/services/google_drive/google_drive_files.py b/src/docbinder_oss/services/google_drive/google_drive_files.py
index a41b9f1..d1b687d 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_files.py
+++ b/src/docbinder_oss/services/google_drive/google_drive_files.py
@@ -13,35 +13,43 @@ class GoogleDriveFiles:
     def __init__(self, service: Resource):
         self.service = service
 
-    def list_files(self, folder_id=None):
-        if folder_id and len(folder_id.split("|", 1)) > 1:
-            logger.warning("Folder ID should not contain '|' character")
-            _, folder_id = folder_id.split("|", 1)
-
-        if folder_id == "root":
-            query = "'root' in parents and trashed=false"
+    def list_files(self, folder_id=None, is_drive_root=False) -> list[File]:
+        # If listing the root of a shared drive
+        if is_drive_root:
+            resp = (
+                self.service.files()  # type: ignore[attr-defined]
+                .list(
+                    corpora="drive",
+                    driveId=folder_id,
+                    includeItemsFromAllDrives=True,
+                    supportsAllDrives=True,
+                    q="'root' in parents and trashed=false",
+                    fields=f"files({REQUIRED_FIELDS})",
+                )
+                .execute()
+            )
+        elif folder_id == "root" or folder_id is None:
+            # Listing the root of My Drive
             resp = (
-                self.service.files()
+                self.service.files()  # type: ignore[attr-defined]
                 .list(
-                    q=query,
+                    q="'root' in parents and trashed=false",
                     fields=f"files({REQUIRED_FIELDS})",
                 )
                 .execute()
             )
         else:
+            # Listing a regular folder
             resp = (
-                self.service.files()
+                self.service.files()  # type: ignore[attr-defined]
                 .list(
-                    corpora="drive",
                     q=f"'{folder_id}' in parents and trashed=false",
-                    driveId=folder_id,
                     includeItemsFromAllDrives=True,
                     supportsAllDrives=True,
                     fields=f"files({REQUIRED_FIELDS})",
                 )
                 .execute()
             )
-
         return [
             File(
                 id=f.get("id"),
@@ -79,7 +87,7 @@ def list_files(self, folder_id=None):
 
     def get_file_metadata(self, file_id: str):
         item_metadata = (
-            self.service.files()
+            self.service.files()  # type: ignore[attr-defined]
             .get(
                 fileId=file_id,
                 fields=f"{REQUIRED_FIELDS}",
diff --git a/tests/services/google_drive/test_google_drive_files.py b/tests/services/google_drive/test_google_drive_files.py
index 6443cfb..1f5c379 100644
--- a/tests/services/google_drive/test_google_drive_files.py
+++ b/tests/services/google_drive/test_google_drive_files.py
@@ -1,6 +1,58 @@
 from datetime import datetime
+import os
+import pytest
+from typer.testing import CliRunner
 
 from docbinder_oss.core.schemas import File
+from docbinder_oss.main import app
+
+
+class DummyFile:
+    def __init__(self, id, name, parents=None, is_folder=False):
+        self.id = id
+        self.name = name
+        self.parents = parents or []
+        self.is_folder = is_folder
+        self.size = 1000
+        self.mime_type = "application/pdf"
+        self.created_time = "2024-01-01T00:00:00"
+        self.modified_time = "2024-01-02T00:00:00"
+        self.owners = [type("User", (), {"email_address": "owner@example.com"})()]
+        self.last_modifying_user = type("User", (), {"email_address": "mod@example.com"})()
+        self.web_view_link = "http://example.com/view"
+        self.web_content_link = "http://example.com/content"
+        self.shared = True
+        self.trashed = False
+
+
+@pytest.fixture(autouse=True)
+def patch_provider(monkeypatch, tmp_path):
+    class DummyProviderConfig:
+        name = "googledrive"
+
+    class DummyConfig:
+        providers = [DummyProviderConfig()]
+
+    monkeypatch.setattr("docbinder_oss.helpers.config.load_config", lambda: DummyConfig())
+    # Simulate a folder structure: root -> folder1 -> file1, file2; root -> file3
+    def list_all_files(self):
+        return [
+            DummyFile(id="root", name="root", is_folder=True),
+            DummyFile(id="folder1", name="folder1", parents=["root"], is_folder=True),
+            DummyFile(id="file1", name="file1.pdf", parents=["folder1"]),
+            DummyFile(id="file2", name="file2.pdf", parents=["folder1"]),
+            DummyFile(id="file3", name="file3.pdf", parents=["root"]),
+        ]
+
+    class DummyClient:
+        def list_all_files(self):
+            return list_all_files(self)
+
+    monkeypatch.setattr("docbinder_oss.services.create_provider_instance", lambda cfg: DummyClient())
+    orig_cwd = os.getcwd()
+    os.chdir(tmp_path)
+    yield
+    os.chdir(orig_cwd)
 
 
 def test_list_files(mock_gdrive_service, gdrive_client):
@@ -81,3 +133,19 @@ def test_list_files(mock_gdrive_service, gdrive_client):
             trashed=False,
         )
     ]
+
+
+def test_search_finds_all_files_recursively():
+    runner = CliRunner()
+    result = runner.invoke(app, ["search", "--export-format", "json"])
+    assert result.exit_code == 0
+    assert os.path.exists("search_results.json")
+    import json
+
+    with open("search_results.json") as f:
+        data = json.load(f)
+        # All files and folders should be included in the results
+        file_names = set(d["name"] for d in data)
+        expected = {"file1.pdf", "file2.pdf", "file3.pdf", "folder1", "root"}
+        assert file_names == expected
+        assert len(file_names) == 5

From 178ea9abb1105d747925a573b044346ce321b752 Mon Sep 17 00:00:00 2001
From: Christophe Beke <beke.christophe@gmail.com>
Date: Thu, 19 Jun 2025 13:29:02 +0200
Subject: [PATCH 04/39] Delete a file that wasn't supposed to be there

---
 search_results.csv | 4 ----
 1 file changed, 4 deletions(-)
 delete mode 100644 search_results.csv

diff --git a/search_results.csv b/search_results.csv
deleted file mode 100644
index f62fd5b..0000000
--- a/search_results.csv
+++ /dev/null
@@ -1,4 +0,0 @@
-provider,id,name,size,mime_type,created_time,modified_time,owners,last_modifying_user,web_view_link,web_content_link,shared,trashed
-my_google_drive,1mpnjaTRDfT7vRP5iq4adIaBFXhkwqjtZ,New Folder,,application/vnd.google-apps.folder,2025-06-15 13:56:34.594000+00:00,2025-06-15 13:56:34.594000+00:00,snappylab25@gmail.com,snappylab25@gmail.com,https://drive.google.com/drive/folders/1mpnjaTRDfT7vRP5iq4adIaBFXhkwqjtZ,,False,False
-my_google_drive,1C37OTKAK3rLnZrJ1ZtusHQsGAlncUHR-0RIYCadQob8,Test Sheet,1024,application/vnd.google-apps.spreadsheet,2025-06-15 13:56:41.248000+00:00,2025-06-15 13:56:45.986000+00:00,snappylab25@gmail.com,snappylab25@gmail.com,https://docs.google.com/spreadsheets/d/1C37OTKAK3rLnZrJ1ZtusHQsGAlncUHR-0RIYCadQob8/edit?usp=drivesdk,,False,False
-my_google_drive,1eYc3W-SqWZT_mT43o9HSkOP_pzFEl5u5U7FDChDWgWA,Test Doc,1024,application/vnd.google-apps.document,2025-06-15 11:37:05.926000+00:00,2025-06-15 11:37:10.663000+00:00,snappylab25@gmail.com,snappylab25@gmail.com,https://docs.google.com/document/d/1eYc3W-SqWZT_mT43o9HSkOP_pzFEl5u5U7FDChDWgWA/edit?usp=drivesdk,,False,False

From 7260899cfecc7b4c0717513ff8f81bbff7a7dff0 Mon Sep 17 00:00:00 2001
From: Christophe Beke <beke.christophe@gmail.com>
Date: Thu, 19 Jun 2025 13:31:15 +0200
Subject: [PATCH 05/39] Update tests

---
 tests/services/google_drive/test_google_drive_files.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/services/google_drive/test_google_drive_files.py b/tests/services/google_drive/test_google_drive_files.py
index 1f5c379..393849c 100644
--- a/tests/services/google_drive/test_google_drive_files.py
+++ b/tests/services/google_drive/test_google_drive_files.py
@@ -14,7 +14,8 @@ def __init__(self, id, name, parents=None, is_folder=False):
         self.parents = parents or []
         self.is_folder = is_folder
         self.size = 1000
-        self.mime_type = "application/pdf"
+        # Use correct mime_type for folders and files
+        self.mime_type = "application/vnd.google-apps.folder" if is_folder else "application/pdf"
         self.created_time = "2024-01-01T00:00:00"
         self.modified_time = "2024-01-02T00:00:00"
         self.owners = [type("User", (), {"email_address": "owner@example.com"})()]

From d107a8fe6b7c5921ca1560c8f2db3fa96603f7a8 Mon Sep 17 00:00:00 2001
From: Christophe Beke <beke.christophe@gmail.com>
Date: Thu, 19 Jun 2025 13:32:39 +0200
Subject: [PATCH 06/39] Fixed merge conflict in
 'src/docbinder_oss/services/google_drive/google_drive_client.py' of the
 docbinder-oss repository.

---
 src/docbinder_oss/services/google_drive/google_drive_client.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/docbinder_oss/services/google_drive/google_drive_client.py b/src/docbinder_oss/services/google_drive/google_drive_client.py
index a772fd9..e8e9611 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_client.py
+++ b/src/docbinder_oss/services/google_drive/google_drive_client.py
@@ -1,4 +1,5 @@
 import logging
+import os
 from typing import List, Optional
 
 from google.auth.transport.requests import Request

From 9e5a51b39fdfbb1b4c502e5ebda7ae46e61ee43a Mon Sep 17 00:00:00 2001
From: Christophe Beke <beke.christophe@gmail.com>
Date: Thu, 19 Jun 2025 13:44:21 +0200
Subject: [PATCH 07/39] Add the path of the items as an attribute.

---
 .gitignore                                    |   3 +
 src/docbinder_oss/commands/search.py          | 158 +++++++++++-------
 .../google_drive/google_drive_client.py       |   6 +-
 .../google_drive_service_config.py            |   3 +-
 4 files changed, 106 insertions(+), 64 deletions(-)

diff --git a/.gitignore b/.gitignore
index d434cbf..a8e0008 100644
--- a/.gitignore
+++ b/.gitignore
@@ -77,3 +77,6 @@ ENV/
 # Credentials
 gcp_credentials.json
 *_token.json
+
+# Test files
+search_results.csv
\ No newline at end of file
diff --git a/src/docbinder_oss/commands/search.py b/src/docbinder_oss/commands/search.py
index 9e61827..7253455 100644
--- a/src/docbinder_oss/commands/search.py
+++ b/src/docbinder_oss/commands/search.py
@@ -28,79 +28,119 @@ def search(
         typer.echo("No providers configured.")
         raise typer.Exit(code=1)
 
-    results = []
+    # Build a mapping of id -> file for path reconstruction
+    all_items_by_id = {}
+    all_results = []
+    drive_id_to_name = {}
+    # If provider is Google Drive, build a mapping of drive id to drive name
     for provider_config in config.providers:
         if provider and provider_config.name != provider:
             continue
         client = create_provider_instance(provider_config)
         if client is None or not hasattr(client, "list_all_files"):
             continue
+        # Try to get drive mapping if possible
+        drive_id_to_name_local = {}
+        if hasattr(client, "buckets") and hasattr(client.buckets, "list_buckets"):  # type: ignore[attr-defined]
+            try:
+                for bucket in client.buckets.list_buckets():  # type: ignore[attr-defined]
+                    drive_id_to_name_local[bucket.id] = bucket.name
+            except Exception:
+                pass
+        drive_id_to_name.update(drive_id_to_name_local)
         try:
             files = client.list_all_files()
             for item in files:
-                # Name regex filter
-                if name:
-                    if not re.search(name, item.name or "", re.IGNORECASE):
-                        continue
-                # Owner/contributor/reader email filter
-                if owner:
-                    emails = set()
-                    owners_list = getattr(item, "owners", None) or []
-                    emails.update([u.email_address for u in owners_list if u and getattr(u, "email_address", None)])
-                    last_mod_user = getattr(item, "last_modifying_user", None)
-                    if last_mod_user and getattr(last_mod_user, "email_address", None):
-                        emails.add(last_mod_user.email_address)
-                    if owner not in emails:
-                        continue
-                # Last update filter
-                if updated_after:
-                    if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) < datetime.fromisoformat(updated_after):
-                        continue
-                if updated_before:
-                    if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) > datetime.fromisoformat(updated_before):
-                        continue
-                # Created at filter
-                if created_after:
-                    if not item.created_time or datetime.fromisoformat(str(item.created_time)) < datetime.fromisoformat(created_after):
-                        continue
-                if created_before:
-                    if not item.created_time or datetime.fromisoformat(str(item.created_time)) > datetime.fromisoformat(created_before):
-                        continue
-                # Size filter (in KB)
-                if min_size is not None:
-                    try:
-                        if not item.size or int(item.size) < min_size * 1024:
-                            continue
-                    except Exception:
-                        continue
-                if max_size is not None:
-                    try:
-                        if not item.size or int(item.size) > max_size * 1024:
-                            continue
-                    except Exception:
-                        continue
-                # Collect all possible params for export
-                results.append({
-                    "provider": provider_config.name,
-                    "id": getattr(item, "id", None),
-                    "name": getattr(item, "name", None),
-                    "size": getattr(item, "size", None),
-                    "mime_type": getattr(item, "mime_type", None),
-                    "created_time": getattr(item, "created_time", None),
-                    "modified_time": getattr(item, "modified_time", None),
-                    "owners": ",".join([u.email_address for u in (getattr(item, "owners", None) or []) if u and getattr(u, "email_address", None)]) if getattr(item, "owners", None) else None,
-                    "last_modifying_user": getattr(getattr(item, "last_modifying_user", None), "email_address", None),
-                    "web_view_link": getattr(item, "web_view_link", None),
-                    "web_content_link": getattr(item, "web_content_link", None),
-                    "shared": getattr(item, "shared", None),
-                    "trashed": getattr(item, "trashed", None),
-                })
+                all_items_by_id[item.id] = item
+                # Attach drive_id for later lookup
+                all_results.append((provider_config.name, item, getattr(item, "parents", ["root"])[0] if hasattr(item, "parents") and getattr(item, "parents", None) else "root", drive_id_to_name_local))
         except Exception as e:
             typer.echo(f"Error searching provider '{provider_config.name}': {e}")
+
+    def build_path(item):
+        # Reconstruct the path by walking up parents
+        path_parts = [item.name]
+        current = item
+        seen = set()
+        while getattr(current, "parents", None):
+            parent_ids = current.parents if isinstance(current.parents, list) else [current.parents]
+            parent_id = parent_ids[0] if parent_ids else None
+            if not parent_id or parent_id in seen or parent_id not in all_items_by_id:
+                break
+            seen.add(parent_id)
+            parent = all_items_by_id[parent_id]
+            path_parts.append(parent.name)
+            current = parent
+        return "/".join(reversed(path_parts))
+
+    results = []
+    for provider_name, item, parent_id, drive_map in all_results:
+        # Name regex filter
+        if name:
+            if not re.search(name, item.name or "", re.IGNORECASE):
+                continue
+        # Owner/contributor/reader email filter
+        if owner:
+            emails = set()
+            owners_list = getattr(item, "owners", None) or []
+            emails.update([u.email_address for u in owners_list if u and getattr(u, "email_address", None)])
+            last_mod_user = getattr(item, "last_modifying_user", None)
+            if last_mod_user and getattr(last_mod_user, "email_address", None):
+                emails.add(last_mod_user.email_address)
+            if owner not in emails:
+                continue
+        # Last update filter
+        if updated_after:
+            if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) < datetime.fromisoformat(updated_after):
+                continue
+        if updated_before:
+            if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) > datetime.fromisoformat(updated_before):
+                continue
+        # Created at filter
+        if created_after:
+            if not item.created_time or datetime.fromisoformat(str(item.created_time)) < datetime.fromisoformat(created_after):
+                continue
+        if created_before:
+            if not item.created_time or datetime.fromisoformat(str(item.created_time)) > datetime.fromisoformat(created_before):
+                continue
+        # Size filter (in KB)
+        if min_size is not None:
+            try:
+                if not item.size or int(item.size) < min_size * 1024:
+                    continue
+            except Exception:
+                continue
+        if max_size is not None:
+            try:
+                if not item.size or int(item.size) > max_size * 1024:
+                    continue
+            except Exception:
+                continue
+        # Find drive name
+        drive_name = drive_map.get(parent_id) or drive_id_to_name.get(parent_id) or drive_id_to_name.get("root") or "Unknown"
+        # Collect all possible params for export, including path, is_folder, and drive_name
+        results.append({
+            "provider": provider_name,
+            "id": getattr(item, "id", None),
+            "name": getattr(item, "name", None),
+            "path": build_path(item),
+            "is_folder": getattr(item, "mime_type", None) == "application/vnd.google-apps.folder",
+            "drive_name": drive_name,
+            "size": getattr(item, "size", None),
+            "mime_type": getattr(item, "mime_type", None),
+            "created_time": getattr(item, "created_time", None),
+            "modified_time": getattr(item, "modified_time", None),
+            "owners": ",".join([u.email_address for u in (getattr(item, "owners", None) or []) if u and getattr(u, "email_address", None)]) if getattr(item, "owners", None) else None,
+            "last_modifying_user": getattr(getattr(item, "last_modifying_user", None), "email_address", None),
+            "web_view_link": getattr(item, "web_view_link", None),
+            "web_content_link": getattr(item, "web_content_link", None),
+            "shared": getattr(item, "shared", None),
+            "trashed": getattr(item, "trashed", None),
+        })
     # Write results to CSV or JSON
     if results:
         fieldnames = [
-            "provider", "id", "name", "size", "mime_type", "created_time", "modified_time", "owners", "last_modifying_user", "web_view_link", "web_content_link", "shared", "trashed"
+            "provider", "id", "name", "path", "is_folder", "drive_name", "size", "mime_type", "created_time", "modified_time", "owners", "last_modifying_user", "web_view_link", "web_content_link", "shared", "trashed"
         ]
         if export_format.lower() == "json":
             with open("search_results.json", "w") as jsonfile:
diff --git a/src/docbinder_oss/services/google_drive/google_drive_client.py b/src/docbinder_oss/services/google_drive/google_drive_client.py
index e8e9611..ed8ed0d 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_client.py
+++ b/src/docbinder_oss/services/google_drive/google_drive_client.py
@@ -45,7 +45,7 @@ def _get_credentials(self):
 
         try:
             creds = Credentials.from_authorized_user_file(
-                self.config.gcp_token_json, scopes=self.SCOPES
+                TOKEN_PATH, scopes=self.SCOPES
             )
         except (FileNotFoundError, ValueError):
             logger.warning("Credentials file not found or invalid, re-authenticating")
@@ -55,11 +55,11 @@ def _get_credentials(self):
                 creds.refresh(Request())
             else:
                 flow = InstalledAppFlow.from_client_secrets_file(
-                    self.config.gcp_credentials_json, self.SCOPES
+                    TOKEN_PATH, self.SCOPES
                 )
                 creds = flow.run_local_server(port=0)
             # Save the credentials for the next run
-            with open(self.config.gcp_token_json, "w") as token:
+            with open(TOKEN_PATH, "w") as token:
                 token.write(creds.to_json())
         return creds
 
diff --git a/src/docbinder_oss/services/google_drive/google_drive_service_config.py b/src/docbinder_oss/services/google_drive/google_drive_service_config.py
index dd6c957..236f4d3 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_service_config.py
+++ b/src/docbinder_oss/services/google_drive/google_drive_service_config.py
@@ -5,5 +5,4 @@
 
 class GoogleDriveServiceConfig(ServiceConfig):
     type: Literal["google_drive"] = "google_drive"  # type: ignore[override]
-    gcp_credentials_json: str
-    gcp_token_json: str
+    gcp_credentials_json: str
\ No newline at end of file

From c753807af7964b3bdbc934cac56d2056a9e71b06 Mon Sep 17 00:00:00 2001
From: ChristopheBeke <48618152+ChristopheBeke@users.noreply.github.com>
Date: Thu, 19 Jun 2025 13:52:03 +0200
Subject: [PATCH 08/39] Update
 src/docbinder_oss/services/google_drive/google_drive_client.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/docbinder_oss/services/google_drive/google_drive_client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/docbinder_oss/services/google_drive/google_drive_client.py b/src/docbinder_oss/services/google_drive/google_drive_client.py
index ed8ed0d..d827ea3 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_client.py
+++ b/src/docbinder_oss/services/google_drive/google_drive_client.py
@@ -55,7 +55,7 @@ def _get_credentials(self):
                 creds.refresh(Request())
             else:
                 flow = InstalledAppFlow.from_client_secrets_file(
-                    TOKEN_PATH, self.SCOPES
+                    self.config.gcp_credentials_json, self.SCOPES
                 )
                 creds = flow.run_local_server(port=0)
             # Save the credentials for the next run

From 9dbf490bd0849482f2926f4bfe0212dc3d683d20 Mon Sep 17 00:00:00 2001
From: ChristopheBeke <48618152+ChristopheBeke@users.noreply.github.com>
Date: Thu, 19 Jun 2025 13:52:13 +0200
Subject: [PATCH 09/39] Update
 src/docbinder_oss/services/google_drive/__init__.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/docbinder_oss/services/google_drive/__init__.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/docbinder_oss/services/google_drive/__init__.py b/src/docbinder_oss/services/google_drive/__init__.py
index 6f3bc44..9059dab 100644
--- a/src/docbinder_oss/services/google_drive/__init__.py
+++ b/src/docbinder_oss/services/google_drive/__init__.py
@@ -38,5 +38,4 @@ def get_service_display_name() -> str:
     Returns the display name of the service.
     This is used for user-friendly identification.
     """
-    return "Google Drive Service"
-    return
\ No newline at end of file
+    return "Google Drive Service"
\ No newline at end of file

From 98a6bc8115066fec77b4071970e3826f7a265c49 Mon Sep 17 00:00:00 2001
From: ChristopheBeke <48618152+ChristopheBeke@users.noreply.github.com>
Date: Thu, 19 Jun 2025 13:52:36 +0200
Subject: [PATCH 10/39] Update tests/services/test_search_export.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/services/test_search_export.py | 43 ++--------------------------
 1 file changed, 2 insertions(+), 41 deletions(-)

diff --git a/tests/services/test_search_export.py b/tests/services/test_search_export.py
index b998449..c16c44b 100644
--- a/tests/services/test_search_export.py
+++ b/tests/services/test_search_export.py
@@ -50,47 +50,8 @@ def create_provider_instance(cfg):
     yield
     os.chdir(orig_cwd)
 
-def test_search_export_csv():
-    runner = CliRunner()
-    result = runner.invoke(app, ["search", "--export-format", "csv"])
-    assert result.exit_code == 0
-    assert os.path.exists("search_results.csv")
-    with open("search_results.csv") as f:
-        reader = csv.DictReader(f)
-        rows = list(reader)
-        assert len(rows) == 2
-        assert set(r["name"] for r in rows) == {"Alpha Report", "Beta Notes"}
-
-def test_search_export_json():
-    runner = CliRunner()
-    result = runner.invoke(app, ["search", "--export-format", "json"])
-    assert result.exit_code == 0
-    assert os.path.exists("search_results.json")
-    with open("search_results.json") as f:
-        data = json.load(f)
-        assert isinstance(data, list)
-        assert len(data) == 2
-        names = set(d["name"] for d in data)
-        assert names == {"Alpha Report", "Beta Notes"}
-
-def test_search_name_filter():
-    runner = CliRunner()
-    result = runner.invoke(app, ["search", "--name", "Alpha", "--export-format", "json"])
-    assert result.exit_code == 0
-    with open("search_results.json") as f:
-        data = json.load(f)
-        assert len(data) == 1
-        assert data[0]["name"] == "Alpha Report"
-
-def test_search_owner_filter():
-    runner = CliRunner()
-    result = runner.invoke(app, ["search", "--owner", "beta@b.com", "--export-format", "json"])
-    assert result.exit_code == 0
-    with open("search_results.json") as f:
-        data = json.load(f)
-        assert len(data) == 1
-        assert data[0]["name"] == "Beta Notes"
-
+# The test logic for search export and filters has been consolidated into `tests/commands/test_search_command.py`.
+# This file no longer contains duplicate tests.
 def test_search_updated_after_filter():
     runner = CliRunner()
     result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"])

From 77476805be0906a244ccf65cd51678a09ea949ce Mon Sep 17 00:00:00 2001
From: Christophe Beke <beke.christophe@gmail.com>
Date: Thu, 19 Jun 2025 14:04:12 +0200
Subject: [PATCH 11/39] Removed unused parameter

---
 example_file.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/example_file.yaml b/example_file.yaml
index e55f752..9a94d45 100644
--- a/example_file.yaml
+++ b/example_file.yaml
@@ -2,7 +2,6 @@ providers:
   - type: google_drive
     name: my_google_drive
     gcp_credentials_json: gcp_credentials.json
-    gcp_token_json: gcp_token.json
   # - type: dropbox
   #   name: my_dropbox
   #   api_key: dropbox-api-key

From 59b00ffeeee70c8dc82e7955275263844057e222 Mon Sep 17 00:00:00 2001
From: PaoloLeonard <paololeonard@skynet.be>
Date: Sat, 21 Jun 2025 12:11:18 +0200
Subject: [PATCH 12/39] improved help message and internal logic

---
 src/docbinder_oss/commands/provider/get.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/docbinder_oss/commands/provider/get.py b/src/docbinder_oss/commands/provider/get.py
index a20a7fa..1bd8f65 100644
--- a/src/docbinder_oss/commands/provider/get.py
+++ b/src/docbinder_oss/commands/provider/get.py
@@ -10,25 +10,26 @@ def get_provider(
         None, "--name", "-n", help="The name of the provider to get."
     ),
 ):
-    """Get connection information for a specific provider."""
+    """Get connection information for a provider by name or by type.
+    If both options are provided, it will search for providers matching either criterion."""
     from docbinder_oss.helpers.config import load_config
 
     config = load_config()
 
-    count = 0
+    provider_found = False
     if not config.providers:
         typer.echo("No providers configured.")
         raise typer.Exit(code=1)
     for provider in config.providers:
         if provider.name == name:
             typer.echo(f"Provider '{name}' found with config: {provider}")
-            count += 1
+            provider_found = True
         if provider.type == connection_type:
             typer.echo(
                 f"Provider '{provider.name}' of type '{connection_type}' found with config: {provider}"
             )
-            count += 1
-    if count == 0:
+            provider_found = True
+    if not provider_found:
         typer.echo(
             f"No providers found with name '{name}' or type '{connection_type}'."
         )

From 3da72b568bd3b55c729d7276c0a31c141896b65a Mon Sep 17 00:00:00 2001
From: PaoloLeonard <paololeonard@skynet.be>
Date: Sat, 21 Jun 2025 12:13:55 +0200
Subject: [PATCH 13/39] typo in list

---
 src/docbinder_oss/commands/provider/list.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/docbinder_oss/commands/provider/list.py b/src/docbinder_oss/commands/provider/list.py
index a6fc0b7..4d4d19e 100644
--- a/src/docbinder_oss/commands/provider/list.py
+++ b/src/docbinder_oss/commands/provider/list.py
@@ -12,4 +12,4 @@ def list():
         raise typer.Exit(code=1)
 
     for provider in config.providers:
-        typer.echo(f"Provider: {provider.name}, Type: {provider.type}")
\ No newline at end of file
+        typer.echo(f"Provider: {provider.name}, type: {provider.type}")
\ No newline at end of file

From 6e68c5788d2ad37084c8702ce7f7c91ae4cf5618 Mon Sep 17 00:00:00 2001
From: PaoloLeonard <paololeonard@skynet.be>
Date: Sat, 21 Jun 2025 12:19:00 +0200
Subject: [PATCH 14/39] improved the internal logic

---
 src/docbinder_oss/commands/provider/test.py | 44 +++++++++++++--------
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/src/docbinder_oss/commands/provider/test.py b/src/docbinder_oss/commands/provider/test.py
index 354b8fa..7a18db2 100644
--- a/src/docbinder_oss/commands/provider/test.py
+++ b/src/docbinder_oss/commands/provider/test.py
@@ -1,35 +1,45 @@
 from docbinder_oss.commands.provider import provider_app
 import typer
 from typing import Annotated
-from docbinder_oss.services import create_provider_instance
+
 
 @provider_app.command("test")
 def test(
-    name: Annotated[
-        str, typer.Argument(help="The name of the provider to test the connection.")
-    ],
+    name: Annotated[str, typer.Argument(help="The name of the provider to test the connection.")],
 ):
     """Test the connection to a specific provider."""
     from docbinder_oss.helpers.config import load_config
+    from docbinder_oss.services import create_provider_instance
+    
+    if not name:
+        typer.echo("Provider name is required.")
+        raise typer.Exit(code=1)
 
     config = load_config()
     if not config.providers:
         typer.echo("No providers configured.")
         raise typer.Exit(code=1)
+
+    found_provider_config = None
     for provider_config in config.providers:
         if provider_config.name == name:
-            typer.echo(f"Testing connection for provider '{name}'...")
-            try:
-                client = create_provider_instance(provider_config)
-                if client is None:
-                    typer.echo(f"Provider '{name}' is not supported or not implemented.")
-                    raise typer.Exit(code=1)
-                # Attempt to test the connection
-                client.test_connection()
-                typer.echo(f"Connection to provider '{name}' is successful.")
-            except Exception as e:
-                typer.echo(f"Failed to connect to provider '{name}': {e}")
-            return
+            found_provider_config = provider_config
+            break # Exit the loop once the provider is found
+    
+    if found_provider_config:
+        typer.echo(f"Testing connection for provider '{name}'...")
+        try:
+            client = create_provider_instance(provider_config)
+            if client is None:
+                typer.echo(f"Provider '{name}' is not supported or not implemented.")
+                raise typer.Exit(code=1)
+            # Attempt to test the connection
+            client.test_connection()
+            typer.echo(f"Connection to provider '{name}' is successful.")
+        except Exception as e:
+            typer.echo(f"Failed to connect to provider '{name}': {e}")
+        return
+    
     # If we reach here, the provider was not found
     typer.echo(f"Provider '{name}' not found in configuration.")
-    raise typer.Exit(code=1)
\ No newline at end of file
+    raise typer.Exit(code=1)

From b4e209696b8c42cd2a8ca02efa1adcc7b5096f65 Mon Sep 17 00:00:00 2001
From: PaoloLeonard <paololeonard@skynet.be>
Date: Sun, 22 Jun 2025 15:07:45 +0200
Subject: [PATCH 15/39] change to the provdier

---
 src/docbinder_oss/commands/provider/__init__.py |  2 +-
 src/docbinder_oss/commands/provider/get.py      | 11 ++++-------
 src/docbinder_oss/commands/provider/list.py     |  3 ++-
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/docbinder_oss/commands/provider/__init__.py b/src/docbinder_oss/commands/provider/__init__.py
index 4fa1055..e6057c3 100644
--- a/src/docbinder_oss/commands/provider/__init__.py
+++ b/src/docbinder_oss/commands/provider/__init__.py
@@ -8,4 +8,4 @@
     help="Commands to manage providers. List them or get details for a specific one."
 )
 # We add this group to our main application.
-app.add_typer(provider_app, name="provider")
\ No newline at end of file
+app.add_typer(provider_app, name="provider")
diff --git a/src/docbinder_oss/commands/provider/get.py b/src/docbinder_oss/commands/provider/get.py
index 1bd8f65..0e9f1fe 100644
--- a/src/docbinder_oss/commands/provider/get.py
+++ b/src/docbinder_oss/commands/provider/get.py
@@ -1,14 +1,13 @@
 from docbinder_oss.commands.provider import provider_app
 import typer
 
+
 @provider_app.command("get")
 def get_provider(
     connection_type: str = typer.Option(
         None, "--type", "-t", help="The type of the provider to get."
     ),
-    name: str = typer.Option(
-        None, "--name", "-n", help="The name of the provider to get."
-    ),
+    name: str = typer.Option(None, "--name", "-n", help="The name of the provider to get."),
 ):
     """Get connection information for a provider by name or by type.
     If both options are provided, it will search for providers matching either criterion."""
@@ -30,7 +29,5 @@ def get_provider(
             )
             provider_found = True
     if not provider_found:
-        typer.echo(
-            f"No providers found with name '{name}' or type '{connection_type}'."
-        )
-        raise typer.Exit(code=1)
\ No newline at end of file
+        typer.echo(f"No providers found with name '{name}' or type '{connection_type}'.")
+        raise typer.Exit(code=1)
diff --git a/src/docbinder_oss/commands/provider/list.py b/src/docbinder_oss/commands/provider/list.py
index 4d4d19e..ce0a664 100644
--- a/src/docbinder_oss/commands/provider/list.py
+++ b/src/docbinder_oss/commands/provider/list.py
@@ -1,6 +1,7 @@
 from docbinder_oss.commands.provider import provider_app
 import typer
 
+
 @provider_app.command()
 def list():
     """List all configured providers."""
@@ -12,4 +13,4 @@ def list():
         raise typer.Exit(code=1)
 
     for provider in config.providers:
-        typer.echo(f"Provider: {provider.name}, type: {provider.type}")
\ No newline at end of file
+        typer.echo(f"Provider: {provider.name}, type: {provider.type}")

From eebc5b5a8dfaefe42ea4c86337272159c2619616 Mon Sep 17 00:00:00 2001
From: PaoloLeonard <paololeonard@skynet.be>
Date: Sun, 22 Jun 2025 21:06:46 +0200
Subject: [PATCH 16/39] improved the cli

---
 .../{commands => cli}/__init__.py             |   0
 src/docbinder_oss/cli/provider/__init__.py    |  12 ++
 .../{commands => cli}/provider/get.py         |   8 +-
 .../{commands => cli}/provider/list.py        |   5 +-
 .../{commands => cli}/provider/test.py        |  13 +-
 src/docbinder_oss/{commands => cli}/search.py | 141 +++++++++++++-----
 src/docbinder_oss/{commands => cli}/setup.py  |   6 +-
 .../commands/provider/__init__.py             |  11 --
 src/docbinder_oss/core/schemas.py             |  12 +-
 src/docbinder_oss/helpers/config.py           |   8 +-
 src/docbinder_oss/main.py                     | 140 +----------------
 src/docbinder_oss/services/__init__.py        |   4 +-
 src/docbinder_oss/services/base_class.py      |   2 +-
 .../services/google_drive/__init__.py         |   8 +-
 .../google_drive/google_drive_client.py       |  44 ++----
 .../google_drive/google_drive_files.py        |  81 +++++-----
 .../google_drive/google_drive_permissions.py  |   6 +-
 .../google_drive_service_config.py            |   3 +-
 tests/commands/test_search_command.py         |  89 +++++++++--
 tests/services/google_drive/conftest.py       |   4 +-
 .../google_drive/test_google_drive_files.py   |   5 +-
 tests/services/test_search_export.py          |  91 ++++++++---
 22 files changed, 364 insertions(+), 329 deletions(-)
 rename src/docbinder_oss/{commands => cli}/__init__.py (100%)
 create mode 100644 src/docbinder_oss/cli/provider/__init__.py
 rename src/docbinder_oss/{commands => cli}/provider/get.py (90%)
 rename src/docbinder_oss/{commands => cli}/provider/list.py (82%)
 rename src/docbinder_oss/{commands => cli}/provider/test.py (90%)
 rename src/docbinder_oss/{commands => cli}/search.py (54%)
 rename src/docbinder_oss/{commands => cli}/setup.py (93%)
 delete mode 100644 src/docbinder_oss/commands/provider/__init__.py

diff --git a/src/docbinder_oss/commands/__init__.py b/src/docbinder_oss/cli/__init__.py
similarity index 100%
rename from src/docbinder_oss/commands/__init__.py
rename to src/docbinder_oss/cli/__init__.py
diff --git a/src/docbinder_oss/cli/provider/__init__.py b/src/docbinder_oss/cli/provider/__init__.py
new file mode 100644
index 0000000..6066b1a
--- /dev/null
+++ b/src/docbinder_oss/cli/provider/__init__.py
@@ -0,0 +1,12 @@
+import typer
+from .get import app as get_app
+from .list import app as list_app
+from .test import app as test_app
+
+# --- Provider Subcommand Group ---
+# We create a separate Typer app for the 'provider' command.
+# This allows us to nest commands like 'provider list' and 'provider get'.
+app = typer.Typer(help="Commands to manage providers. List them or get details for a specific one.")
+app.add_typer(get_app)
+app.add_typer(list_app)
+app.add_typer(test_app)
diff --git a/src/docbinder_oss/commands/provider/get.py b/src/docbinder_oss/cli/provider/get.py
similarity index 90%
rename from src/docbinder_oss/commands/provider/get.py
rename to src/docbinder_oss/cli/provider/get.py
index 0e9f1fe..7793870 100644
--- a/src/docbinder_oss/commands/provider/get.py
+++ b/src/docbinder_oss/cli/provider/get.py
@@ -1,8 +1,9 @@
-from docbinder_oss.commands.provider import provider_app
 import typer
 
+app = typer.Typer()
 
-@provider_app.command("get")
+
+@app.command("get")
 def get_provider(
     connection_type: str = typer.Option(
         None, "--type", "-t", help="The type of the provider to get."
@@ -25,7 +26,8 @@ def get_provider(
             provider_found = True
         if provider.type == connection_type:
             typer.echo(
-                f"Provider '{provider.name}' of type '{connection_type}' found with config: {provider}"
+                f"Provider '{provider.name}' of type '{connection_type}'"
+                f" found with config: {provider}"
             )
             provider_found = True
     if not provider_found:
diff --git a/src/docbinder_oss/commands/provider/list.py b/src/docbinder_oss/cli/provider/list.py
similarity index 82%
rename from src/docbinder_oss/commands/provider/list.py
rename to src/docbinder_oss/cli/provider/list.py
index ce0a664..c3bd5f9 100644
--- a/src/docbinder_oss/commands/provider/list.py
+++ b/src/docbinder_oss/cli/provider/list.py
@@ -1,8 +1,9 @@
-from docbinder_oss.commands.provider import provider_app
 import typer
 
+app = typer.Typer()
 
-@provider_app.command()
+
+@app.command()
 def list():
     """List all configured providers."""
     from docbinder_oss.helpers.config import load_config
diff --git a/src/docbinder_oss/commands/provider/test.py b/src/docbinder_oss/cli/provider/test.py
similarity index 90%
rename from src/docbinder_oss/commands/provider/test.py
rename to src/docbinder_oss/cli/provider/test.py
index 7a18db2..d01262d 100644
--- a/src/docbinder_oss/commands/provider/test.py
+++ b/src/docbinder_oss/cli/provider/test.py
@@ -1,16 +1,17 @@
-from docbinder_oss.commands.provider import provider_app
 import typer
 from typing import Annotated
 
+app = typer.Typer()
 
-@provider_app.command("test")
+
+@app.command("test")
 def test(
     name: Annotated[str, typer.Argument(help="The name of the provider to test the connection.")],
 ):
     """Test the connection to a specific provider."""
     from docbinder_oss.helpers.config import load_config
     from docbinder_oss.services import create_provider_instance
-    
+
     if not name:
         typer.echo("Provider name is required.")
         raise typer.Exit(code=1)
@@ -24,8 +25,8 @@ def test(
     for provider_config in config.providers:
         if provider_config.name == name:
             found_provider_config = provider_config
-            break # Exit the loop once the provider is found
-    
+            break  # Exit the loop once the provider is found
+
     if found_provider_config:
         typer.echo(f"Testing connection for provider '{name}'...")
         try:
@@ -39,7 +40,7 @@ def test(
         except Exception as e:
             typer.echo(f"Failed to connect to provider '{name}': {e}")
         return
-    
+
     # If we reach here, the provider was not found
     typer.echo(f"Provider '{name}' not found in configuration.")
     raise typer.Exit(code=1)
diff --git a/src/docbinder_oss/commands/search.py b/src/docbinder_oss/cli/search.py
similarity index 54%
rename from src/docbinder_oss/commands/search.py
rename to src/docbinder_oss/cli/search.py
index 7253455..f19d0c2 100644
--- a/src/docbinder_oss/commands/search.py
+++ b/src/docbinder_oss/cli/search.py
@@ -1,21 +1,38 @@
 import typer
 from typing import Optional
-from docbinder_oss.main import app
+
+app = typer.Typer()
+
 
 @app.command()
 def search(
     name: Optional[str] = typer.Option(None, "--name", help="Regex to match file name"),
-    owner: Optional[str] = typer.Option(None, "--owner", help="Owner/contributor/reader email address to filter"),
-    updated_after: Optional[str] = typer.Option(None, "--updated-after", help="Last update after (ISO timestamp)"),
-    updated_before: Optional[str] = typer.Option(None, "--updated-before", help="Last update before (ISO timestamp)"),
-    created_after: Optional[str] = typer.Option(None, "--created-after", help="Created after (ISO timestamp)"),
-    created_before: Optional[str] = typer.Option(None, "--created-before", help="Created before (ISO timestamp)"),
+    owner: Optional[str] = typer.Option(
+        None, "--owner", help="Owner/contributor/reader email address to filter"
+    ),
+    updated_after: Optional[str] = typer.Option(
+        None, "--updated-after", help="Last update after (ISO timestamp)"
+    ),
+    updated_before: Optional[str] = typer.Option(
+        None, "--updated-before", help="Last update before (ISO timestamp)"
+    ),
+    created_after: Optional[str] = typer.Option(
+        None, "--created-after", help="Created after (ISO timestamp)"
+    ),
+    created_before: Optional[str] = typer.Option(
+        None, "--created-before", help="Created before (ISO timestamp)"
+    ),
     min_size: Optional[int] = typer.Option(None, "--min-size", help="Minimum file size in KB"),
     max_size: Optional[int] = typer.Option(None, "--max-size", help="Maximum file size in KB"),
-    provider: Optional[str] = typer.Option(None, "--provider", "-p", help="Provider name to search in"),
-    export_format: str = typer.Option("csv", "--export-format", help="Export format: csv or json", show_default=True),
+    provider: Optional[str] = typer.Option(
+        None, "--provider", "-p", help="Provider name to search in"
+    ),
+    export_format: str = typer.Option(
+        "csv", "--export-format", help="Export format: csv or json", show_default=True
+    ),
 ):
-    """Search for files or folders matching filters across all providers and export results as CSV or JSON."""
+    """Search for files or folders matching filters across all
+    providers and export results as CSV or JSON."""
     import re
     import csv
     import json
@@ -53,7 +70,16 @@ def search(
             for item in files:
                 all_items_by_id[item.id] = item
                 # Attach drive_id for later lookup
-                all_results.append((provider_config.name, item, getattr(item, "parents", ["root"])[0] if hasattr(item, "parents") and getattr(item, "parents", None) else "root", drive_id_to_name_local))
+                all_results.append(
+                    (
+                        provider_config.name,
+                        item,
+                        getattr(item, "parents", ["root"])[0]
+                        if hasattr(item, "parents") and getattr(item, "parents", None)
+                        else "root",
+                        drive_id_to_name_local,
+                    )
+                )
         except Exception as e:
             typer.echo(f"Error searching provider '{provider_config.name}': {e}")
 
@@ -83,7 +109,9 @@ def build_path(item):
         if owner:
             emails = set()
             owners_list = getattr(item, "owners", None) or []
-            emails.update([u.email_address for u in owners_list if u and getattr(u, "email_address", None)])
+            emails.update(
+                [u.email_address for u in owners_list if u and getattr(u, "email_address", None)]
+            )
             last_mod_user = getattr(item, "last_modifying_user", None)
             if last_mod_user and getattr(last_mod_user, "email_address", None):
                 emails.add(last_mod_user.email_address)
@@ -91,17 +119,25 @@ def build_path(item):
                 continue
         # Last update filter
         if updated_after:
-            if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) < datetime.fromisoformat(updated_after):
+            if not item.modified_time or datetime.fromisoformat(
+                str(item.modified_time)
+            ) < datetime.fromisoformat(updated_after):
                 continue
         if updated_before:
-            if not item.modified_time or datetime.fromisoformat(str(item.modified_time)) > datetime.fromisoformat(updated_before):
+            if not item.modified_time or datetime.fromisoformat(
+                str(item.modified_time)
+            ) > datetime.fromisoformat(updated_before):
                 continue
         # Created at filter
         if created_after:
-            if not item.created_time or datetime.fromisoformat(str(item.created_time)) < datetime.fromisoformat(created_after):
+            if not item.created_time or datetime.fromisoformat(
+                str(item.created_time)
+            ) < datetime.fromisoformat(created_after):
                 continue
         if created_before:
-            if not item.created_time or datetime.fromisoformat(str(item.created_time)) > datetime.fromisoformat(created_before):
+            if not item.created_time or datetime.fromisoformat(
+                str(item.created_time)
+            ) > datetime.fromisoformat(created_before):
                 continue
         # Size filter (in KB)
         if min_size is not None:
@@ -117,30 +153,63 @@ def build_path(item):
             except Exception:
                 continue
         # Find drive name
-        drive_name = drive_map.get(parent_id) or drive_id_to_name.get(parent_id) or drive_id_to_name.get("root") or "Unknown"
+        drive_name = (
+            drive_map.get(parent_id)
+            or drive_id_to_name.get(parent_id)
+            or drive_id_to_name.get("root")
+            or "Unknown"
+        )
         # Collect all possible params for export, including path, is_folder, and drive_name
-        results.append({
-            "provider": provider_name,
-            "id": getattr(item, "id", None),
-            "name": getattr(item, "name", None),
-            "path": build_path(item),
-            "is_folder": getattr(item, "mime_type", None) == "application/vnd.google-apps.folder",
-            "drive_name": drive_name,
-            "size": getattr(item, "size", None),
-            "mime_type": getattr(item, "mime_type", None),
-            "created_time": getattr(item, "created_time", None),
-            "modified_time": getattr(item, "modified_time", None),
-            "owners": ",".join([u.email_address for u in (getattr(item, "owners", None) or []) if u and getattr(u, "email_address", None)]) if getattr(item, "owners", None) else None,
-            "last_modifying_user": getattr(getattr(item, "last_modifying_user", None), "email_address", None),
-            "web_view_link": getattr(item, "web_view_link", None),
-            "web_content_link": getattr(item, "web_content_link", None),
-            "shared": getattr(item, "shared", None),
-            "trashed": getattr(item, "trashed", None),
-        })
+        results.append(
+            {
+                "provider": provider_name,
+                "id": getattr(item, "id", None),
+                "name": getattr(item, "name", None),
+                "path": build_path(item),
+                "is_folder": getattr(item, "mime_type", None)
+                == "application/vnd.google-apps.folder",
+                "drive_name": drive_name,
+                "size": getattr(item, "size", None),
+                "mime_type": getattr(item, "mime_type", None),
+                "created_time": getattr(item, "created_time", None),
+                "modified_time": getattr(item, "modified_time", None),
+                "owners": ",".join(
+                    [
+                        u.email_address
+                        for u in (getattr(item, "owners", None) or [])
+                        if u and getattr(u, "email_address", None)
+                    ]
+                )
+                if getattr(item, "owners", None)
+                else None,
+                "last_modifying_user": getattr(
+                    getattr(item, "last_modifying_user", None), "email_address", None
+                ),
+                "web_view_link": getattr(item, "web_view_link", None),
+                "web_content_link": getattr(item, "web_content_link", None),
+                "shared": getattr(item, "shared", None),
+                "trashed": getattr(item, "trashed", None),
+            }
+        )
     # Write results to CSV or JSON
     if results:
         fieldnames = [
-            "provider", "id", "name", "path", "is_folder", "drive_name", "size", "mime_type", "created_time", "modified_time", "owners", "last_modifying_user", "web_view_link", "web_content_link", "shared", "trashed"
+            "provider",
+            "id",
+            "name",
+            "path",
+            "is_folder",
+            "drive_name",
+            "size",
+            "mime_type",
+            "created_time",
+            "modified_time",
+            "owners",
+            "last_modifying_user",
+            "web_view_link",
+            "web_content_link",
+            "shared",
+            "trashed",
         ]
         if export_format.lower() == "json":
             with open("search_results.json", "w") as jsonfile:
@@ -155,4 +224,4 @@ def build_path(item):
             typer.echo(f"{len(results)} results written to search_results.csv")
     else:
         typer.echo("No results found.")
-    return results
\ No newline at end of file
+    return results
diff --git a/src/docbinder_oss/commands/setup.py b/src/docbinder_oss/cli/setup.py
similarity index 93%
rename from src/docbinder_oss/commands/setup.py
rename to src/docbinder_oss/cli/setup.py
index dbe9839..b9ff56d 100644
--- a/src/docbinder_oss/commands/setup.py
+++ b/src/docbinder_oss/cli/setup.py
@@ -2,7 +2,9 @@
 from typing import List, Optional
 import yaml
 from docbinder_oss.helpers.config import save_config, validate_config
-from docbinder_oss.main import app
+
+app = typer.Typer(help="DocBinder configuration setup commands.")
+
 
 @app.command()
 def setup(
@@ -47,4 +49,4 @@ def setup(
     except Exception as e:
         typer.echo(f"Error saving config: {e}")
         raise typer.Exit(code=1)
-    typer.echo("Configuration saved successfully.")
\ No newline at end of file
+    typer.echo("Configuration saved successfully.")
diff --git a/src/docbinder_oss/commands/provider/__init__.py b/src/docbinder_oss/commands/provider/__init__.py
deleted file mode 100644
index e6057c3..0000000
--- a/src/docbinder_oss/commands/provider/__init__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import typer
-from docbinder_oss.main import app
-
-# --- Provider Subcommand Group ---
-# We create a separate Typer app for the 'provider' command.
-# This allows us to nest commands like 'provider list' and 'provider get'.
-provider_app = typer.Typer(
-    help="Commands to manage providers. List them or get details for a specific one."
-)
-# We add this group to our main application.
-app.add_typer(provider_app, name="provider")
diff --git a/src/docbinder_oss/core/schemas.py b/src/docbinder_oss/core/schemas.py
index 2718f1b..b9e2f9a 100644
--- a/src/docbinder_oss/core/schemas.py
+++ b/src/docbinder_oss/core/schemas.py
@@ -13,9 +13,7 @@ class Bucket(BaseModel):
     id: str
     name: str
     kind: Optional[str] = Field(description="Type of the bucket, e.g., 'drive#file'")
-    created_time: Optional[datetime] = Field(
-        description="Timestamp when the bucket was created."
-    )
+    created_time: Optional[datetime] = Field(description="Timestamp when the bucket was created.")
     viewable: Optional[bool]
     restrictions: Optional[Dict[str, Any]]
 
@@ -48,9 +46,7 @@ class File(BaseModel):
     mime_type: str
     kind: Optional[str]
 
-    is_folder: bool = Field(
-        False, description="True if the item is a folder, False otherwise."
-    )
+    is_folder: bool = Field(False, description="True if the item is a folder, False otherwise.")
 
     web_view_link: Optional[HttpUrl]
     icon_link: Optional[HttpUrl]
@@ -61,9 +57,7 @@ class File(BaseModel):
     owners: Optional[List[User]]
     last_modifying_user: Optional[User]
 
-    size: Optional[str] = Field(
-        description="Size in bytes, as a string. Only populated for files."
-    )
+    size: Optional[str] = Field(description="Size in bytes, as a string. Only populated for files.")
     parents: Optional[str] = Field(description="Parent folder ID, if applicable.")
 
     capabilities: Optional[FileCapabilities] = None
diff --git a/src/docbinder_oss/helpers/config.py b/src/docbinder_oss/helpers/config.py
index 77e17cf..d098793 100644
--- a/src/docbinder_oss/helpers/config.py
+++ b/src/docbinder_oss/helpers/config.py
@@ -21,9 +21,7 @@ class Config(BaseModel):
 
 def load_config() -> Config:
     if not os.path.exists(CONFIG_PATH):
-        typer.echo(
-            f"Config file not found at {CONFIG_PATH}. Please run 'docbinder setup' first."
-        )
+        typer.echo(f"Config file not found at {CONFIG_PATH}. Please run 'docbinder setup' first.")
         raise typer.Exit(code=1)
     with open(CONFIG_PATH, "r") as f:
         config_data = yaml.safe_load(f)
@@ -33,9 +31,7 @@ def load_config() -> Config:
         if config.get("type") not in provider_registry:
             typer.echo(f"Unknown provider type: {config['type']}")
             raise typer.Exit(code=1)
-        config_to_add.append(
-            provider_registry[config["type"]]["config_class"](**config)
-        )
+        config_to_add.append(provider_registry[config["type"]]["config_class"](**config))
     try:
         configss = Config(providers=config_to_add)
         return configss
diff --git a/src/docbinder_oss/main.py b/src/docbinder_oss/main.py
index 116a0c6..d28d3a4 100644
--- a/src/docbinder_oss/main.py
+++ b/src/docbinder_oss/main.py
@@ -1,11 +1,12 @@
 import typer
-from docbinder_oss.helpers.config import save_config, validate_config
+from docbinder_oss.cli.provider import app as provider_app
+from docbinder_oss.cli.search import app as search_app
+from docbinder_oss.cli.setup import app as setup_app
 
 app = typer.Typer()
-
-from docbinder_oss.commands import search
-from docbinder_oss.commands import setup
-from docbinder_oss.commands.provider import list, get, test
+app.add_typer(provider_app, name="provider")
+app.add_typer(search_app)
+app.add_typer(setup_app)
 
 
 # This is the main entry point for the DocBinder CLI.
@@ -15,134 +16,5 @@ def main():
     pass
 
 
-@app.command()
-def hello():
-    """Print a friendly greeting."""
-    typer.echo("Hello, DocBinder OSS!")
-
-
-@app.command()
-def setup(
-    file: Optional[str] = typer.Option(None, "--file", help="Path to YAML config file"),
-    provider: Optional[List[str]] = typer.Option(
-        None,
-        "--provider",
-        help="Provider config as provider:key1=val1,key2=val2",
-        callback=lambda v: v or [],
-    ),
-):
-    """Setup DocBinder configuration via YAML file or provider key-value pairs."""
-    config_data = {}
-    if file:
-        with open(file, "r") as f:
-            config_data = yaml.safe_load(f) or {}
-    elif provider:
-        providers = {}
-        for entry in provider:
-            if ":" not in entry:
-                typer.echo(
-                    f"Provider entry '{entry}' must be in provider:key1=val1,key2=val2 format."
-                )
-                raise typer.Exit(code=1)
-            prov_name, prov_kvs = entry.split(":", 1)
-            kv_dict = {}
-            for pair in prov_kvs.split(","):
-                if "=" not in pair:
-                    typer.echo(f"Provider config '{pair}' must be in key=value format.")
-                    raise typer.Exit(code=1)
-                k, v = pair.split("=", 1)
-                kv_dict[k] = v
-            providers[prov_name] = kv_dict
-        config_data["providers"] = providers
-    validated = validate_config(config_data)
-    if not validated.providers:
-        typer.echo("No providers configured. Please add at least one provider.")
-        raise typer.Exit(code=1)
-    # Save the validated config
-    try:
-        save_config(validated)
-    except Exception as e:
-        typer.echo(f"Error saving config: {e}")
-        raise typer.Exit(code=1)
-    typer.echo("Configuration saved successfully.")
-
-
-@provider_app.command()
-def list():
-    """List all configured providers."""
-    from docbinder_oss.helpers.config import load_config
-
-    config = load_config()
-    if not config.providers:
-        typer.echo("No providers configured.")
-        raise typer.Exit(code=1)
-
-    for provider in config.providers:
-        typer.echo(f"Provider: {provider.name}, Type: {provider.type}")
-
-
-@provider_app.command("get")
-def get_provider(
-    connection_type: str = typer.Option(
-        None, "--type", "-t", help="The type of the provider to get."
-    ),
-    name: str = typer.Option(
-        None, "--name", "-n", help="The name of the provider to get."
-    ),
-):
-    """Get connection information for a specific provider."""
-    from docbinder_oss.helpers.config import load_config
-
-    config = load_config()
-
-    count = 0
-    if not config.providers:
-        typer.echo("No providers configured.")
-        raise typer.Exit(code=1)
-    for provider in config.providers:
-        if provider.name == name:
-            typer.echo(f"Provider '{name}' found with config: {provider}")
-            count += 1
-        if provider.type == connection_type:
-            typer.echo(
-                f"Provider '{provider.name}' of type "
-                f"'{connection_type}' found with config: {provider}"
-            )
-            count += 1
-    if count == 0:
-        typer.echo(
-            f"No providers found with name '{name}' or type '{connection_type}'."
-        )
-        raise typer.Exit(code=1)
-
-
-@provider_app.command("test")
-def test(
-    name: Annotated[
-        str, typer.Argument(help="The name of the provider to test the connection.")
-    ],
-):
-    """Test the connection to a specific provider."""
-    from docbinder_oss.helpers.config import load_config
-
-    config = load_config()
-    if not config.providers:
-        typer.echo("No providers configured.")
-        raise typer.Exit(code=1)
-    for provider_config in config.providers:
-        if provider_config.name == name:
-            typer.echo(f"Testing connection for provider '{name}'...")
-            try:
-                client = create_provider_instance(provider_config)
-                client.test_connection()
-                typer.echo(f"Connection to provider '{name}' is successful.")
-            except Exception as e:
-                typer.echo(f"Failed to connect to provider '{name}': {e}")
-            return
-    # If we reach here, the provider was not found
-    typer.echo(f"Provider '{name}' not found in configuration.")
-    raise typer.Exit(code=1)
-
-
 if __name__ == "__main__":
     app()
diff --git a/src/docbinder_oss/services/__init__.py b/src/docbinder_oss/services/__init__.py
index 3384d07..78d738c 100644
--- a/src/docbinder_oss/services/__init__.py
+++ b/src/docbinder_oss/services/__init__.py
@@ -12,9 +12,7 @@
 
 if not logging.getLogger().handlers:
     FORMAT = "%(message)s"
-    logging.basicConfig(
-        level="NOTSET", format=FORMAT, datefmt="[%X]", handlers=[RichHandler()]
-    )
+    logging.basicConfig(level="NOTSET", format=FORMAT, datefmt="[%X]", handlers=[RichHandler()])
 
 logging.getLogger("googleapiclient").setLevel(logging.WARNING)
 logger = logging.getLogger(__name__)
diff --git a/src/docbinder_oss/services/base_class.py b/src/docbinder_oss/services/base_class.py
index dd51cec..dbeb328 100644
--- a/src/docbinder_oss/services/base_class.py
+++ b/src/docbinder_oss/services/base_class.py
@@ -56,7 +56,7 @@ def list_all_files(self) -> List[File]:
             A list of StorageItem objects representing all files and folders.
         """
         pass
-    
+
     @abstractmethod
     def get_file_metadata(self, item_id: str) -> File:
         """
diff --git a/src/docbinder_oss/services/google_drive/__init__.py b/src/docbinder_oss/services/google_drive/__init__.py
index 9059dab..71b6fe3 100644
--- a/src/docbinder_oss/services/google_drive/__init__.py
+++ b/src/docbinder_oss/services/google_drive/__init__.py
@@ -7,9 +7,7 @@
 
 if not logging.getLogger().handlers:
     FORMAT = "%(message)s"
-    logging.basicConfig(
-        level="NOTSET", format=FORMAT, datefmt="[%X]", handlers=[RichHandler()]
-    )
+    logging.basicConfig(level="NOTSET", format=FORMAT, datefmt="[%X]", handlers=[RichHandler()])
 
 logging.getLogger("googleapiclient").setLevel(logging.WARNING)
 
@@ -26,6 +24,7 @@ def register() -> dict:
         "client_class": GoogleDriveClient,
     }
 
+
 def get_service_name() -> str:
     """
     Returns the name of the service.
@@ -33,9 +32,10 @@ def get_service_name() -> str:
     """
     return "Google Drive"
 
+
 def get_service_display_name() -> str:
     """
     Returns the display name of the service.
     This is used for user-friendly identification.
     """
-    return "Google Drive Service"
\ No newline at end of file
+    return "Google Drive Service"
diff --git a/src/docbinder_oss/services/google_drive/google_drive_client.py b/src/docbinder_oss/services/google_drive/google_drive_client.py
index d827ea3..757f7ea 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_client.py
+++ b/src/docbinder_oss/services/google_drive/google_drive_client.py
@@ -1,5 +1,4 @@
 import logging
-import os
 from typing import List, Optional
 
 from google.auth.transport.requests import Request
@@ -7,7 +6,7 @@
 from google_auth_oauthlib.flow import InstalledAppFlow
 from googleapiclient.discovery import build
 
-from docbinder_oss.core.schemas import File, Permission
+from docbinder_oss.core.schemas import Bucket, File, Permission
 from docbinder_oss.services.base_class import BaseStorageClient
 from docbinder_oss.services.google_drive.google_drive_buckets import GoogleDriveBuckets
 from docbinder_oss.services.google_drive.google_drive_files import GoogleDriveFiles
@@ -39,13 +38,11 @@ def __init__(self, config: GoogleDriveServiceConfig):
         self.permissions = GoogleDrivePermissions(self.service)
 
     def _get_credentials(self):
-        TOKEN_PATH = os.path.expanduser("~/.config/docbinder/gcp/" + self.config.name + "_token.json")
-        # Ensure the directory exists
-        os.makedirs(os.path.dirname(TOKEN_PATH), exist_ok=True)
+        logger.info("Getting credentials for Google Drive client")
 
         try:
             creds = Credentials.from_authorized_user_file(
-                TOKEN_PATH, scopes=self.SCOPES
+                self.config.gcp_token_json, scopes=self.SCOPES
             )
         except (FileNotFoundError, ValueError):
             logger.warning("Credentials file not found or invalid, re-authenticating")
@@ -59,7 +56,7 @@ def _get_credentials(self):
                 )
                 creds = flow.run_local_server(port=0)
             # Save the credentials for the next run
-            with open(TOKEN_PATH, "w") as token:
+            with open(self.config.gcp_token_json, "w") as token:
                 token.write(creds.to_json())
         return creds
 
@@ -71,38 +68,25 @@ def test_connection(self) -> bool:
             logger.error(f"Test connection failed: {e}")
             return False
 
-    def list_buckets(self) -> list:
+    def list_buckets(self) -> list[Bucket]:
         return self.buckets.list_buckets()
 
     def list_files(self, folder_id: Optional[str] = None) -> List[File]:
         return self.files.list_files(folder_id)
-    
-    def list_all_files(self) -> List[File]:
-        """
-        Recursively list all files and folders in all buckets (drives).
-        Handles My Drive and Shared Drives correctly.
-        """
-        def _recursive_list(folder_id, is_drive_root=False):
-            items = self.files.list_files(folder_id, is_drive_root=is_drive_root)
-            all_items = []
-            for item in items:
-                all_items.append(item)
-                # Use mime_type to check if this is a folder
-                if getattr(item, "mime_type", None) == "application/vnd.google-apps.folder":
-                    all_items.extend(_recursive_list(item.id))
-            return all_items
 
-        buckets = self.list_buckets()
-        all_files = []
+    def list_files_recursively(self, bucket: Bucket = None) -> List[File]:
+        """List all files and folders recursively in the specified bucket or root."""
+        return self.files.list_files_recursively(bucket)
+
+    def list_all_files(self) -> List[File]:
+        files = []
+        buckets = self.buckets.list_buckets()
         for bucket in buckets:
-            # If bucket.id == "root", it's My Drive; otherwise, it's a shared drive
-            is_drive_root = bucket.id != "root"
-            all_files.extend(_recursive_list(bucket.id, is_drive_root=is_drive_root))
-        return all_files
+            files.extend(self.files.list_files_recursively(bucket))
+        return files
 
     def get_file_metadata(self, item_id: str) -> File:
         return self.files.get_file_metadata(item_id)
 
     def get_permissions(self, item_id: str) -> List[Permission]:
         return self.permissions.get_permissions(item_id)
-    
\ No newline at end of file
diff --git a/src/docbinder_oss/services/google_drive/google_drive_files.py b/src/docbinder_oss/services/google_drive/google_drive_files.py
index fa20258..225e1aa 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_files.py
+++ b/src/docbinder_oss/services/google_drive/google_drive_files.py
@@ -2,7 +2,7 @@
 
 from googleapiclient.discovery import Resource
 
-from docbinder_oss.core.schemas import File, User
+from docbinder_oss.core.schemas import Bucket, File, User
 
 logger = logging.getLogger(__name__)
 
@@ -18,43 +18,29 @@ class GoogleDriveFiles:
     def __init__(self, service: Resource):
         self.service = service
 
-    def list_files(self, folder_id=None, is_drive_root=False) -> list[File]:
-        # If listing the root of a shared drive
+    def list_files(self, bucket: Bucket = None, is_drive_root: bool = False) -> list[File]:
+        args = {
+            "includeItemsFromAllDrives": True,
+            "supportsAllDrives": True,
+            "fields": f"files({REQUIRED_FIELDS})",
+        }
+        folder_id = bucket.id if bucket else None
         if is_drive_root:
-            resp = (
-                self.service.files()  # type: ignore[attr-defined]
-                .list(
-                    corpora="drive",
-                    driveId=folder_id,
-                    includeItemsFromAllDrives=True,
-                    supportsAllDrives=True,
-                    q="'root' in parents and trashed=false",
-                    fields=f"files({REQUIRED_FIELDS})",
-                )
-                .execute()
+            if not folder_id:
+                raise ValueError("folder_id must be provided when is_drive_root is True")
+            args.update(
+                {
+                    "corpora": "drive",
+                    "driveId": folder_id,
+                    "q": "'root' in parents and trashed=false",
+                }
             )
         elif folder_id == "root" or folder_id is None:
-            # Listing the root of My Drive
-            resp = (
-                self.service.files()  # type: ignore[attr-defined]
-                .list(
-                    q="'root' in parents and trashed=false",
-                    fields=f"files({REQUIRED_FIELDS})",
-                )
-                .execute()
-            )
+            args["q"] = "'root' in parents and trashed=false"
         else:
-            # Listing a regular folder
-            resp = (
-                self.service.files()  # type: ignore[attr-defined]
-                .list(
-                    q=f"'{folder_id}' in parents and trashed=false",
-                    includeItemsFromAllDrives=True,
-                    supportsAllDrives=True,
-                    fields=f"files({REQUIRED_FIELDS})",
-                )
-                .execute()
-            )
+            args["q"] = f"'{folder_id}' in parents and trashed=false"
+
+        resp = self.service.files().list(**args).execute()
         return [
             File(
                 id=f.get("id"),
@@ -90,6 +76,22 @@ def list_files(self, folder_id=None, is_drive_root=False) -> list[File]:
             for f in resp.get("files")
         ]
 
+    def list_files_recursively(self, bucket: Bucket) -> list[File]:
+        """List all files in the Google Drive bucket."""
+        is_drive_root = bucket.id != "root"
+
+        def _recursive_list(folder_id: str):
+            items: list[File] = self.list_files(folder_id, is_drive_root=is_drive_root)
+            all_items = []
+            for item in items:
+                all_items.append(item)
+                # Use mime_type to check if this is a folder
+                if item.mime_type == "application/vnd.google-apps.folder":
+                    all_items.extend(_recursive_list(item.id))
+            return all_items
+
+        return _recursive_list(bucket.id)
+
     def get_file_metadata(self, file_id: str):
         item_metadata = (
             self.service.files()  # type: ignore[attr-defined]
@@ -118,12 +120,8 @@ def get_file_metadata(self, file_id: str):
                 for owner in item_metadata.get("owners")
             ],
             last_modifying_user=User(
-                display_name=item_metadata.get("lastModifyingUser", {}).get(
-                    "displayName"
-                ),
-                email_address=item_metadata.get("lastModifyingUser", {}).get(
-                    "emailAddress"
-                ),
+                display_name=item_metadata.get("lastModifyingUser", {}).get("displayName"),
+                email_address=item_metadata.get("lastModifyingUser", {}).get("emailAddress"),
                 photo_link=item_metadata.get("lastModifyingUser", {}).get("photoLink"),
                 kind=item_metadata.get("lastModifyingUser", {}).get("kind"),
             ),
@@ -132,7 +130,6 @@ def get_file_metadata(self, file_id: str):
             trashed=item_metadata.get("trashed"),
             shared=item_metadata.get("shared"),
             starred=item_metadata.get("starred"),
-            is_folder=item_metadata.get("mimeType")
-            == "application/vnd.google-apps.folder",
+            is_folder=item_metadata.get("mimeType") == "application/vnd.google-apps.folder",
             parents=None,  # This field is not populated by the API, so we set it to None for files.
         )
diff --git a/src/docbinder_oss/services/google_drive/google_drive_permissions.py b/src/docbinder_oss/services/google_drive/google_drive_permissions.py
index 70988e2..ab0b830 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_permissions.py
+++ b/src/docbinder_oss/services/google_drive/google_drive_permissions.py
@@ -31,11 +31,7 @@ def get_user(self):
         )
 
     def get_permissions(self, item_id: str):
-        resp = (
-            self.service.permissions()
-            .list(fileId=item_id, fields="permissions")
-            .execute()
-        )
+        resp = self.service.permissions().list(fileId=item_id, fields="permissions").execute()
 
         return [
             Permission(
diff --git a/src/docbinder_oss/services/google_drive/google_drive_service_config.py b/src/docbinder_oss/services/google_drive/google_drive_service_config.py
index 236f4d3..dd6c957 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_service_config.py
+++ b/src/docbinder_oss/services/google_drive/google_drive_service_config.py
@@ -5,4 +5,5 @@
 
 class GoogleDriveServiceConfig(ServiceConfig):
     type: Literal["google_drive"] = "google_drive"  # type: ignore[override]
-    gcp_credentials_json: str
\ No newline at end of file
+    gcp_credentials_json: str
+    gcp_token_json: str
diff --git a/tests/commands/test_search_command.py b/tests/commands/test_search_command.py
index 9f1bd67..c0f69ab 100644
--- a/tests/commands/test_search_command.py
+++ b/tests/commands/test_search_command.py
@@ -5,6 +5,7 @@
 from typer.testing import CliRunner
 from docbinder_oss.main import app
 
+
 class DummyFile:
     def __init__(self, **kwargs):
         self.id = kwargs.get("id", "fileid1")
@@ -13,34 +14,67 @@ def __init__(self, **kwargs):
         self.mime_type = kwargs.get("mime_type", "application/pdf")
         self.created_time = kwargs.get("created_time", "2024-01-01T00:00:00")
         self.modified_time = kwargs.get("modified_time", "2024-01-02T00:00:00")
-        self.owners = kwargs.get("owners", [type("User", (), {"email_address": "owner@example.com"})()])
-        self.last_modifying_user = kwargs.get("last_modifying_user", type("User", (), {"email_address": "mod@example.com"})())
+        self.owners = kwargs.get(
+            "owners", [type("User", (), {"email_address": "owner@example.com"})()]
+        )
+        self.last_modifying_user = kwargs.get(
+            "last_modifying_user", type("User", (), {"email_address": "mod@example.com"})()
+        )
         self.web_view_link = kwargs.get("web_view_link", "http://example.com/view")
         self.web_content_link = kwargs.get("web_content_link", "http://example.com/content")
         self.shared = kwargs.get("shared", True)
         self.trashed = kwargs.get("trashed", False)
 
+
 @pytest.fixture(autouse=True)
 def patch_provider(monkeypatch, tmp_path):
     # Patch config loader to return two dummy provider configs
     class DummyProviderConfig:
         def __init__(self, name):
             self.name = name
+
     class DummyConfig:
         providers = [DummyProviderConfig("dummy1"), DummyProviderConfig("dummy2")]
+
     monkeypatch.setattr("docbinder_oss.helpers.config.load_config", lambda: DummyConfig())
+
     # Patch create_provider_instance to return a dummy client with different files per provider
     def create_provider_instance(cfg):
         if cfg.name == "dummy1":
-            return type("DummyClient", (), {"list_all_files": lambda self: [
-                DummyFile(id="f1", name="Alpha Report", size=2048, owners=[type("User", (), {"email_address": "alpha@a.com"})()],
-                          created_time="2024-01-01T10:00:00", modified_time="2024-01-02T10:00:00")
-            ]})()
+            return type(
+                "DummyClient",
+                (),
+                {
+                    "list_all_files": lambda self: [
+                        DummyFile(
+                            id="f1",
+                            name="Alpha Report",
+                            size=2048,
+                            owners=[type("User", (), {"email_address": "alpha@a.com"})()],
+                            created_time="2024-01-01T10:00:00",
+                            modified_time="2024-01-02T10:00:00",
+                        )
+                    ]
+                },
+            )()
         else:
-            return type("DummyClient", (), {"list_all_files": lambda self: [
-                DummyFile(id="f2", name="Beta Notes", size=4096, owners=[type("User", (), {"email_address": "beta@b.com"})()],
-                          created_time="2024-02-01T10:00:00", modified_time="2024-02-02T10:00:00")
-            ]})()
+            return type(
+                "DummyClient",
+                (),
+                {
+                    "list_all_files": lambda self: [
+                        DummyFile(
+                            id="f2",
+                            name="Beta Notes",
+                            size=4096,
+                            owners=[type("User", (), {"email_address": "beta@b.com"})()],
+                            created_time="2024-02-01T10:00:00",
+                            modified_time="2024-02-02T10:00:00",
+                        )
+                    ]
+                },
+            )()
+
     monkeypatch.setattr("docbinder_oss.services.create_provider_instance", create_provider_instance)
     # Change working directory to a temp dir for file output
     orig_cwd = os.getcwd()
@@ -48,6 +82,7 @@ def create_provider_instance(cfg):
     yield
     os.chdir(orig_cwd)
 
+
 def test_search_export_csv():
     runner = CliRunner()
     result = runner.invoke(app, ["search", "--export-format", "csv"])
@@ -66,6 +101,7 @@ def test_search_export_csv():
             if r["name"] == "Beta Notes":
                 assert r["owners"] == "beta@b.com"
 
+
 def test_search_export_json():
     runner = CliRunner()
     result = runner.invoke(app, ["search", "--export-format", "json"])
@@ -78,6 +114,7 @@ def test_search_export_json():
         names = set(d["name"] for d in data)
         assert names == {"Alpha Report", "Beta Notes"}
 
+
 def test_search_name_filter():
     runner = CliRunner()
     result = runner.invoke(app, ["search", "--name", "Alpha", "--export-format", "json"])
@@ -87,6 +124,7 @@ def test_search_name_filter():
         assert len(data) == 1
         assert data[0]["name"] == "Alpha Report"
 
+
 def test_search_owner_filter():
     runner = CliRunner()
     result = runner.invoke(app, ["search", "--owner", "beta@b.com", "--export-format", "json"])
@@ -96,24 +134,31 @@ def test_search_owner_filter():
         assert len(data) == 1
         assert data[0]["name"] == "Beta Notes"
 
+
 def test_search_updated_after_filter():
     runner = CliRunner()
-    result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"])
+    result = runner.invoke(
+        app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"]
+    )
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)
         assert len(data) == 1
         assert data[0]["name"] == "Beta Notes"
 
+
 def test_search_created_before_filter():
     runner = CliRunner()
-    result = runner.invoke(app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-format", "json"])
+    result = runner.invoke(
+        app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-format", "json"]
+    )
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)
         assert len(data) == 1
         assert data[0]["name"] == "Alpha Report"
 
+
 def test_search_min_size_filter():
     runner = CliRunner()
     result = runner.invoke(app, ["search", "--min-size", "3", "--export-format", "json"])
@@ -123,6 +168,7 @@ def test_search_min_size_filter():
         assert len(data) == 1
         assert data[0]["name"] == "Beta Notes"
 
+
 def test_search_max_size_filter():
     runner = CliRunner()
     result = runner.invoke(app, ["search", "--max-size", "3", "--export-format", "json"])
@@ -132,6 +178,7 @@ def test_search_max_size_filter():
         assert len(data) == 1
         assert data[0]["name"] == "Alpha Report"
 
+
 def test_search_provider_filter():
     runner = CliRunner()
     result = runner.invoke(app, ["search", "--provider", "dummy2", "--export-format", "json"])
@@ -142,9 +189,25 @@ def test_search_provider_filter():
         assert data[0]["provider"] == "dummy2"
         assert data[0]["name"] == "Beta Notes"
 
+
 def test_search_combined_filters():
     runner = CliRunner()
-    result = runner.invoke(app, ["search", "--name", "Beta", "--owner", "beta@b.com", "--min-size", "3", "--provider", "dummy2", "--export-format", "json"])
+    result = runner.invoke(
+        app,
+        [
+            "search",
+            "--name",
+            "Beta",
+            "--owner",
+            "beta@b.com",
+            "--min-size",
+            "3",
+            "--provider",
+            "dummy2",
+            "--export-format",
+            "json",
+        ],
+    )
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)
diff --git a/tests/services/google_drive/conftest.py b/tests/services/google_drive/conftest.py
index ff50b73..c60300f 100644
--- a/tests/services/google_drive/conftest.py
+++ b/tests/services/google_drive/conftest.py
@@ -19,9 +19,7 @@ def mock_gdrive_service():
     Whenever `GoogleDriveClient` calls `build('drive', 'v3', ...)`, it will
     receive our mock object instead of making a real network call.
     """
-    with patch(
-        "docbinder_oss.services.google_drive.google_drive_client.build"
-    ) as mock_build:
+    with patch("docbinder_oss.services.google_drive.google_drive_client.build") as mock_build:
         # Create a mock for the service object that `build` would return
         mock_service = MagicMock()
         # Configure the `build` function to return our mock service
diff --git a/tests/services/google_drive/test_google_drive_files.py b/tests/services/google_drive/test_google_drive_files.py
index 393849c..7b0b019 100644
--- a/tests/services/google_drive/test_google_drive_files.py
+++ b/tests/services/google_drive/test_google_drive_files.py
@@ -35,6 +35,7 @@ class DummyConfig:
         providers = [DummyProviderConfig()]
 
     monkeypatch.setattr("docbinder_oss.helpers.config.load_config", lambda: DummyConfig())
+
     # Simulate a folder structure: root -> folder1 -> file1, file2; root -> file3
     def list_all_files(self):
         return [
@@ -49,7 +50,9 @@ class DummyClient:
         def list_all_files(self):
             return list_all_files(self)
 
-    monkeypatch.setattr("docbinder_oss.services.create_provider_instance", lambda cfg: DummyClient())
+    monkeypatch.setattr(
+        "docbinder_oss.services.create_provider_instance", lambda cfg: DummyClient()
+    )
     orig_cwd = os.getcwd()
     os.chdir(tmp_path)
     yield
diff --git a/tests/services/test_search_export.py b/tests/services/test_search_export.py
index c16c44b..78fd856 100644
--- a/tests/services/test_search_export.py
+++ b/tests/services/test_search_export.py
@@ -1,12 +1,10 @@
 import os
-import csv
 import json
-import tempfile
-import shutil
 import pytest
 from typer.testing import CliRunner
 from docbinder_oss.main import app
 
+
 class DummyFile:
     def __init__(self, **kwargs):
         self.id = kwargs.get("id", "fileid1")
@@ -15,34 +13,67 @@ def __init__(self, **kwargs):
         self.mime_type = kwargs.get("mime_type", "application/pdf")
         self.created_time = kwargs.get("created_time", "2024-01-01T00:00:00")
         self.modified_time = kwargs.get("modified_time", "2024-01-02T00:00:00")
-        self.owners = kwargs.get("owners", [type("User", (), {"email_address": "owner@example.com"})()])
-        self.last_modifying_user = kwargs.get("last_modifying_user", type("User", (), {"email_address": "mod@example.com"})())
+        self.owners = kwargs.get(
+            "owners", [type("User", (), {"email_address": "owner@example.com"})()]
+        )
+        self.last_modifying_user = kwargs.get(
+            "last_modifying_user", type("User", (), {"email_address": "mod@example.com"})()
+        )
         self.web_view_link = kwargs.get("web_view_link", "http://example.com/view")
         self.web_content_link = kwargs.get("web_content_link", "http://example.com/content")
         self.shared = kwargs.get("shared", True)
         self.trashed = kwargs.get("trashed", False)
 
+
 @pytest.fixture(autouse=True)
 def patch_provider(monkeypatch, tmp_path):
     # Patch config loader to return two dummy provider configs
     class DummyProviderConfig:
         def __init__(self, name):
             self.name = name
+
     class DummyConfig:
         providers = [DummyProviderConfig("dummy1"), DummyProviderConfig("dummy2")]
+
     monkeypatch.setattr("docbinder_oss.helpers.config.load_config", lambda: DummyConfig())
+
     # Patch create_provider_instance to return a dummy client with different files per provider
     def create_provider_instance(cfg):
         if cfg.name == "dummy1":
-            return type("DummyClient", (), {"list_all_files": lambda self: [
-                DummyFile(id="f1", name="Alpha Report", size=2048, owners=[type("User", (), {"email_address": "alpha@a.com"})()],
-                          created_time="2024-01-01T10:00:00", modified_time="2024-01-02T10:00:00")
-            ]})()
+            return type(
+                "DummyClient",
+                (),
+                {
+                    "list_all_files": lambda self: [
+                        DummyFile(
+                            id="f1",
+                            name="Alpha Report",
+                            size=2048,
+                            owners=[type("User", (), {"email_address": "alpha@a.com"})()],
+                            created_time="2024-01-01T10:00:00",
+                            modified_time="2024-01-02T10:00:00",
+                        )
+                    ]
+                },
+            )()
         else:
-            return type("DummyClient", (), {"list_all_files": lambda self: [
-                DummyFile(id="f2", name="Beta Notes", size=4096, owners=[type("User", (), {"email_address": "beta@b.com"})()],
-                          created_time="2024-02-01T10:00:00", modified_time="2024-02-02T10:00:00")
-            ]})()
+            return type(
+                "DummyClient",
+                (),
+                {
+                    "list_all_files": lambda self: [
+                        DummyFile(
+                            id="f2",
+                            name="Beta Notes",
+                            size=4096,
+                            owners=[type("User", (), {"email_address": "beta@b.com"})()],
+                            created_time="2024-02-01T10:00:00",
+                            modified_time="2024-02-02T10:00:00",
+                        )
+                    ]
+                },
+            )()
+
     monkeypatch.setattr("docbinder_oss.services.create_provider_instance", create_provider_instance)
     # Change working directory to a temp dir for file output
     orig_cwd = os.getcwd()
@@ -50,26 +81,34 @@ def create_provider_instance(cfg):
     yield
     os.chdir(orig_cwd)
 
-# The test logic for search export and filters has been consolidated into `tests/commands/test_search_command.py`.
+
+# The test logic for search export and filters has been consolidated into
+# `tests/commands/test_search_command.py`.
 # This file no longer contains duplicate tests.
 def test_search_updated_after_filter():
     runner = CliRunner()
-    result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"])
+    result = runner.invoke(
+        app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"]
+    )
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)
         assert len(data) == 1
         assert data[0]["name"] == "Beta Notes"
 
+
 def test_search_created_before_filter():
     runner = CliRunner()
-    result = runner.invoke(app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-format", "json"])
+    result = runner.invoke(
+        app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-format", "json"]
+    )
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)
         assert len(data) == 1
         assert data[0]["name"] == "Alpha Report"
 
+
 def test_search_min_size_filter():
     runner = CliRunner()
     result = runner.invoke(app, ["search", "--min-size", "3", "--export-format", "json"])
@@ -79,6 +118,7 @@ def test_search_min_size_filter():
         assert len(data) == 1
         assert data[0]["name"] == "Beta Notes"
 
+
 def test_search_max_size_filter():
     runner = CliRunner()
     result = runner.invoke(app, ["search", "--max-size", "3", "--export-format", "json"])
@@ -88,6 +128,7 @@ def test_search_max_size_filter():
         assert len(data) == 1
         assert data[0]["name"] == "Alpha Report"
 
+
 def test_search_provider_filter():
     runner = CliRunner()
     result = runner.invoke(app, ["search", "--provider", "dummy2", "--export-format", "json"])
@@ -98,9 +139,25 @@ def test_search_provider_filter():
         assert data[0]["provider"] == "dummy2"
         assert data[0]["name"] == "Beta Notes"
 
+
 def test_search_combined_filters():
     runner = CliRunner()
-    result = runner.invoke(app, ["search", "--name", "Beta", "--owner", "beta@b.com", "--min-size", "3", "--provider", "dummy2", "--export-format", "json"])
+    result = runner.invoke(
+        app,
+        [
+            "search",
+            "--name",
+            "Beta",
+            "--owner",
+            "beta@b.com",
+            "--min-size",
+            "3",
+            "--provider",
+            "dummy2",
+            "--export-format",
+            "json",
+        ],
+    )
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)

From ea4ebfc1ff70b4f84bbe263a3f0d3b16c0cd84fa Mon Sep 17 00:00:00 2001
From: PaoloLeonard <paololeonard@skynet.be>
Date: Sun, 22 Jun 2025 23:31:00 +0200
Subject: [PATCH 17/39] refactoring list all

---
 src/docbinder_oss/cli/search.py               | 247 +++++-------------
 src/docbinder_oss/services/__init__.py        |   6 +-
 src/docbinder_oss/services/base_class.py      |  14 +-
 .../services/dropbox/__init__.py              |  11 -
 .../services/dropbox/dropbox_client.py        |   5 -
 .../dropbox/dropbox_service_config.py         |   8 -
 .../google_drive/google_drive_client.py       |   6 +-
 .../google_drive/google_drive_files.py        |  39 +--
 8 files changed, 106 insertions(+), 230 deletions(-)
 delete mode 100644 src/docbinder_oss/services/dropbox/__init__.py
 delete mode 100644 src/docbinder_oss/services/dropbox/dropbox_client.py
 delete mode 100644 src/docbinder_oss/services/dropbox/dropbox_service_config.py

diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py
index f19d0c2..87b3988 100644
--- a/src/docbinder_oss/cli/search.py
+++ b/src/docbinder_oss/cli/search.py
@@ -1,6 +1,11 @@
+from datetime import datetime
+import re
 import typer
 from typing import Optional
 
+from docbinder_oss.helpers.config import Config
+from docbinder_oss.services.base_class import BaseProvider
+
 app = typer.Typer()
 
 
@@ -28,200 +33,82 @@ def search(
         None, "--provider", "-p", help="Provider name to search in"
     ),
     export_format: str = typer.Option(
-        "csv", "--export-format", help="Export format: csv or json", show_default=True
+        None, "--export-format", help="Export format: csv or json", show_default=True
     ),
 ):
     """Search for files or folders matching filters across all
     providers and export results as CSV or JSON."""
-    import re
     import csv
     import json
-    from datetime import datetime
     from docbinder_oss.helpers.config import load_config
     from docbinder_oss.services import create_provider_instance
-
-    config = load_config()
+    
+    # 1 Load documents with filter "provider"
+    # 2 Filter the documents based on the provided filters
+    # 3 Export results to CSV or JSON
+    
+    config: Config = load_config()
     if not config.providers:
         typer.echo("No providers configured.")
         raise typer.Exit(code=1)
-
-    # Build a mapping of id -> file for path reconstruction
-    all_items_by_id = {}
-    all_results = []
-    drive_id_to_name = {}
-    # If provider is Google Drive, build a mapping of drive id to drive name
+    
+    current_files = {}
     for provider_config in config.providers:
         if provider and provider_config.name != provider:
             continue
-        client = create_provider_instance(provider_config)
-        if client is None or not hasattr(client, "list_all_files"):
+        client: BaseProvider = create_provider_instance(provider_config)
+        if not client:
+            typer.echo(f"Provider '{provider_config.name}' is not supported or not implemented.")
+            raise typer.Exit(code=1)
+        current_files[provider_config.name] = client.list_all_files()
+    
+    current_files = filter_files(
+        current_files,
+        name=name,
+        owner=owner,
+        updated_after=updated_after,
+        updated_before=updated_before,
+        created_after=created_after,
+        created_before=created_before,
+        min_size=min_size,
+        max_size=max_size,
+    )
+    
+    if not export_format:
+        typer.echo(current_files)
+        return
+
+def filter_files(
+    files,
+    name=None,
+    owner=None,
+    updated_after=None,
+    updated_before=None,
+    created_after=None,
+    created_before=None,
+    min_size=None,
+    max_size=None,
+):
+    results = []
+    
+    for file in files:
+        if name and not re.search(name, file.name, re.IGNORECASE):
+            continue
+        if owner and not any(owner in u.email_address for u in file.owners):
+            continue
+        if updated_after and file.modified_time < datetime.fromisoformat(updated_after):
+            continue
+        if updated_before and file.modified_time > datetime.fromisoformat(updated_before):
+            continue
+        if created_after and file.created_time < datetime.fromisoformat(created_after):
+            continue
+        if created_before and file.created_time > datetime.fromisoformat(created_before):
+            continue
+        if min_size and file.size < min_size * 1024:
+            continue
+        if max_size and file.size > max_size * 1024:
             continue
-        # Try to get drive mapping if possible
-        drive_id_to_name_local = {}
-        if hasattr(client, "buckets") and hasattr(client.buckets, "list_buckets"):  # type: ignore[attr-defined]
-            try:
-                for bucket in client.buckets.list_buckets():  # type: ignore[attr-defined]
-                    drive_id_to_name_local[bucket.id] = bucket.name
-            except Exception:
-                pass
-        drive_id_to_name.update(drive_id_to_name_local)
-        try:
-            files = client.list_all_files()
-            for item in files:
-                all_items_by_id[item.id] = item
-                # Attach drive_id for later lookup
-                all_results.append(
-                    (
-                        provider_config.name,
-                        item,
-                        getattr(item, "parents", ["root"])[0]
-                        if hasattr(item, "parents") and getattr(item, "parents", None)
-                        else "root",
-                        drive_id_to_name_local,
-                    )
-                )
-        except Exception as e:
-            typer.echo(f"Error searching provider '{provider_config.name}': {e}")
 
-    def build_path(item):
-        # Reconstruct the path by walking up parents
-        path_parts = [item.name]
-        current = item
-        seen = set()
-        while getattr(current, "parents", None):
-            parent_ids = current.parents if isinstance(current.parents, list) else [current.parents]
-            parent_id = parent_ids[0] if parent_ids else None
-            if not parent_id or parent_id in seen or parent_id not in all_items_by_id:
-                break
-            seen.add(parent_id)
-            parent = all_items_by_id[parent_id]
-            path_parts.append(parent.name)
-            current = parent
-        return "/".join(reversed(path_parts))
+        results.append(file)
 
-    results = []
-    for provider_name, item, parent_id, drive_map in all_results:
-        # Name regex filter
-        if name:
-            if not re.search(name, item.name or "", re.IGNORECASE):
-                continue
-        # Owner/contributor/reader email filter
-        if owner:
-            emails = set()
-            owners_list = getattr(item, "owners", None) or []
-            emails.update(
-                [u.email_address for u in owners_list if u and getattr(u, "email_address", None)]
-            )
-            last_mod_user = getattr(item, "last_modifying_user", None)
-            if last_mod_user and getattr(last_mod_user, "email_address", None):
-                emails.add(last_mod_user.email_address)
-            if owner not in emails:
-                continue
-        # Last update filter
-        if updated_after:
-            if not item.modified_time or datetime.fromisoformat(
-                str(item.modified_time)
-            ) < datetime.fromisoformat(updated_after):
-                continue
-        if updated_before:
-            if not item.modified_time or datetime.fromisoformat(
-                str(item.modified_time)
-            ) > datetime.fromisoformat(updated_before):
-                continue
-        # Created at filter
-        if created_after:
-            if not item.created_time or datetime.fromisoformat(
-                str(item.created_time)
-            ) < datetime.fromisoformat(created_after):
-                continue
-        if created_before:
-            if not item.created_time or datetime.fromisoformat(
-                str(item.created_time)
-            ) > datetime.fromisoformat(created_before):
-                continue
-        # Size filter (in KB)
-        if min_size is not None:
-            try:
-                if not item.size or int(item.size) < min_size * 1024:
-                    continue
-            except Exception:
-                continue
-        if max_size is not None:
-            try:
-                if not item.size or int(item.size) > max_size * 1024:
-                    continue
-            except Exception:
-                continue
-        # Find drive name
-        drive_name = (
-            drive_map.get(parent_id)
-            or drive_id_to_name.get(parent_id)
-            or drive_id_to_name.get("root")
-            or "Unknown"
-        )
-        # Collect all possible params for export, including path, is_folder, and drive_name
-        results.append(
-            {
-                "provider": provider_name,
-                "id": getattr(item, "id", None),
-                "name": getattr(item, "name", None),
-                "path": build_path(item),
-                "is_folder": getattr(item, "mime_type", None)
-                == "application/vnd.google-apps.folder",
-                "drive_name": drive_name,
-                "size": getattr(item, "size", None),
-                "mime_type": getattr(item, "mime_type", None),
-                "created_time": getattr(item, "created_time", None),
-                "modified_time": getattr(item, "modified_time", None),
-                "owners": ",".join(
-                    [
-                        u.email_address
-                        for u in (getattr(item, "owners", None) or [])
-                        if u and getattr(u, "email_address", None)
-                    ]
-                )
-                if getattr(item, "owners", None)
-                else None,
-                "last_modifying_user": getattr(
-                    getattr(item, "last_modifying_user", None), "email_address", None
-                ),
-                "web_view_link": getattr(item, "web_view_link", None),
-                "web_content_link": getattr(item, "web_content_link", None),
-                "shared": getattr(item, "shared", None),
-                "trashed": getattr(item, "trashed", None),
-            }
-        )
-    # Write results to CSV or JSON
-    if results:
-        fieldnames = [
-            "provider",
-            "id",
-            "name",
-            "path",
-            "is_folder",
-            "drive_name",
-            "size",
-            "mime_type",
-            "created_time",
-            "modified_time",
-            "owners",
-            "last_modifying_user",
-            "web_view_link",
-            "web_content_link",
-            "shared",
-            "trashed",
-        ]
-        if export_format.lower() == "json":
-            with open("search_results.json", "w") as jsonfile:
-                json.dump(results, jsonfile, indent=2, default=str)
-            typer.echo(f"{len(results)} results written to search_results.json")
-        else:
-            with open("search_results.csv", "w", newline="") as csvfile:
-                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-                writer.writeheader()
-                for row in results:
-                    writer.writerow(row)
-            typer.echo(f"{len(results)} results written to search_results.csv")
-    else:
-        typer.echo("No results found.")
-    return results
+    return results
\ No newline at end of file
diff --git a/src/docbinder_oss/services/__init__.py b/src/docbinder_oss/services/__init__.py
index 78d738c..0e57925 100644
--- a/src/docbinder_oss/services/__init__.py
+++ b/src/docbinder_oss/services/__init__.py
@@ -8,13 +8,13 @@
 from rich.logging import RichHandler
 
 from docbinder_oss import services
-from docbinder_oss.services.base_class import BaseStorageClient, ServiceConfig
+from docbinder_oss.services.base_class import BaseProvider, ServiceConfig
 
 if not logging.getLogger().handlers:
     FORMAT = "%(message)s"
     logging.basicConfig(level="NOTSET", format=FORMAT, datefmt="[%X]", handlers=[RichHandler()])
 
-logging.getLogger("googleapiclient").setLevel(logging.WARNING)
+logging.getLogger("services").setLevel(logging.WARNING)
 logger = logging.getLogger(__name__)
 
 _provider_registry = None  # Module-level cache
@@ -37,7 +37,7 @@ def get_provider_registry() -> dict:
     return _provider_registry
 
 
-def create_provider_instance(config: ServiceConfig) -> Optional["BaseStorageClient"]:
+def create_provider_instance(config: ServiceConfig) -> Optional["BaseProvider"]:
     """
     Factory function to create a provider instance from its config.
     """
diff --git a/src/docbinder_oss/services/base_class.py b/src/docbinder_oss/services/base_class.py
index dbeb328..7b62f72 100644
--- a/src/docbinder_oss/services/base_class.py
+++ b/src/docbinder_oss/services/base_class.py
@@ -3,7 +3,7 @@
 
 from pydantic import BaseModel
 
-from docbinder_oss.core.schemas import File, Permission
+from docbinder_oss.core.schemas import Bucket, File, Permission
 
 
 class ServiceConfig(BaseModel):
@@ -13,7 +13,7 @@ class ServiceConfig(BaseModel):
     name: str
 
 
-class BaseStorageClient(ABC):
+class BaseProvider(ABC):
     """
     Abstract base class for a client that interacts with a cloud storage service.
     Defines a standard interface for listing items and retrieving metadata.
@@ -32,6 +32,16 @@ def test_connection(self) -> bool:
             True if the connection is successful, False otherwise.
         """
         pass
+    
+    @abstractmethod
+    def list_buckets(self) -> List[Bucket]:
+        """
+        Lists all available buckets in the storage service.
+
+        Returns:
+            A list of bucket names.
+        """
+        pass
 
     @abstractmethod
     def list_files(self, folder_id: Optional[str] = None) -> List[File]:
diff --git a/src/docbinder_oss/services/dropbox/__init__.py b/src/docbinder_oss/services/dropbox/__init__.py
deleted file mode 100644
index 80759af..0000000
--- a/src/docbinder_oss/services/dropbox/__init__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from .dropbox_client import DropboxClient
-from .dropbox_service_config import DropboxServiceConfig
-
-
-def register():
-    # Register the Dropbox client
-    return {
-        "display_name": "dropbox",
-        "config_class": DropboxServiceConfig,
-        "client_class": DropboxClient,
-    }
diff --git a/src/docbinder_oss/services/dropbox/dropbox_client.py b/src/docbinder_oss/services/dropbox/dropbox_client.py
deleted file mode 100644
index 3919701..0000000
--- a/src/docbinder_oss/services/dropbox/dropbox_client.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from docbinder_oss.services.base_class import BaseStorageClient
-
-
-class DropboxClient(BaseStorageClient):
-    pass
diff --git a/src/docbinder_oss/services/dropbox/dropbox_service_config.py b/src/docbinder_oss/services/dropbox/dropbox_service_config.py
deleted file mode 100644
index 515c471..0000000
--- a/src/docbinder_oss/services/dropbox/dropbox_service_config.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from typing import Literal
-
-from docbinder_oss.services.base_class import ServiceConfig
-
-
-class DropboxServiceConfig(ServiceConfig):
-    type: Literal["dropbox"] = "dropbox"  # type: ignore[override]
-    api_key: str
diff --git a/src/docbinder_oss/services/google_drive/google_drive_client.py b/src/docbinder_oss/services/google_drive/google_drive_client.py
index 757f7ea..fe8f93c 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_client.py
+++ b/src/docbinder_oss/services/google_drive/google_drive_client.py
@@ -7,7 +7,7 @@
 from googleapiclient.discovery import build
 
 from docbinder_oss.core.schemas import Bucket, File, Permission
-from docbinder_oss.services.base_class import BaseStorageClient
+from docbinder_oss.services.base_class import BaseProvider
 from docbinder_oss.services.google_drive.google_drive_buckets import GoogleDriveBuckets
 from docbinder_oss.services.google_drive.google_drive_files import GoogleDriveFiles
 from docbinder_oss.services.google_drive.google_drive_permissions import (
@@ -21,7 +21,7 @@
 logger.setLevel(logging.INFO)
 
 
-class GoogleDriveClient(BaseStorageClient):
+class GoogleDriveClient(BaseProvider):
     def __init__(self, config: GoogleDriveServiceConfig):
         super().__init__(config)
         logger.info("Initializing Google Drive client")
@@ -74,7 +74,7 @@ def list_buckets(self) -> list[Bucket]:
     def list_files(self, folder_id: Optional[str] = None) -> List[File]:
         return self.files.list_files(folder_id)
 
-    def list_files_recursively(self, bucket: Bucket = None) -> List[File]:
+    def list_files_recursively(self, bucket: str = None) -> List[File]:
         """List all files and folders recursively in the specified bucket or root."""
         return self.files.list_files_recursively(bucket)
 
diff --git a/src/docbinder_oss/services/google_drive/google_drive_files.py b/src/docbinder_oss/services/google_drive/google_drive_files.py
index 225e1aa..fac56f7 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_files.py
+++ b/src/docbinder_oss/services/google_drive/google_drive_files.py
@@ -10,7 +10,7 @@
     "id,name,mimeType,kind,size,createdTime,modifiedTime,"
     "owners(permissionId,displayName,emailAddress,photoLink),"
     "lastModifyingUser(permissionId,displayName,emailAddress,photoLink),"
-    "webViewLink,iconLink,trashed,shared,starred"
+    "webViewLink,iconLink,trashed,shared,starred,parents"
 )
 
 
@@ -18,29 +18,33 @@ class GoogleDriveFiles:
     def __init__(self, service: Resource):
         self.service = service
 
-    def list_files(self, bucket: Bucket = None, is_drive_root: bool = False) -> list[File]:
+    def list_files(self, bucket: str = None, is_drive_root: bool = False) -> list[File]:
         args = {
             "includeItemsFromAllDrives": True,
             "supportsAllDrives": True,
-            "fields": f"files({REQUIRED_FIELDS})",
+            "fields": f"nextPageToken,files({REQUIRED_FIELDS})",
         }
-        folder_id = bucket.id if bucket else None
-        if is_drive_root:
-            if not folder_id:
-                raise ValueError("folder_id must be provided when is_drive_root is True")
+        logger.debug(f"{type(bucket)}: {bucket}")
+        bucket_id = bucket.id if hasattr(bucket, "id") else bucket
+
+        if is_drive_root and bucket_id != "root":
             args.update(
                 {
                     "corpora": "drive",
-                    "driveId": folder_id,
+                    "driveId": bucket_id,
                     "q": "'root' in parents and trashed=false",
                 }
             )
-        elif folder_id == "root" or folder_id is None:
-            args["q"] = "'root' in parents and trashed=false"
         else:
-            args["q"] = f"'{folder_id}' in parents and trashed=false"
-
+            parent_id = bucket_id
+            if parent_id == "root" or parent_id is None:
+                args["q"] = "'root' in parents and trashed=false"
+            else:
+                args["q"] = f"'{parent_id}' in parents and trashed=false"  
+        
         resp = self.service.files().list(**args).execute()
+        print(len(resp["files"]))
+        exit(1)
         return [
             File(
                 id=f.get("id"),
@@ -71,26 +75,25 @@ def list_files(self, bucket: Bucket = None, is_drive_root: bool = False) -> list
                 shared=f.get("shared"),
                 starred=f.get("starred"),
                 is_folder=f.get("mimeType") == "application/vnd.google-apps.folder",
-                parents=folder_id if folder_id else None,
+                parents=bucket_id if bucket_id else None,
             )
             for f in resp.get("files")
         ]
 
-    def list_files_recursively(self, bucket: Bucket) -> list[File]:
+    def list_files_recursively(self, bucket: str) -> list[File]:
         """List all files in the Google Drive bucket."""
-        is_drive_root = bucket.id != "root"
+        is_drive_root = bucket != "root"
 
         def _recursive_list(folder_id: str):
             items: list[File] = self.list_files(folder_id, is_drive_root=is_drive_root)
             all_items = []
             for item in items:
                 all_items.append(item)
-                # Use mime_type to check if this is a folder
-                if item.mime_type == "application/vnd.google-apps.folder":
+                if item.is_folder:
                     all_items.extend(_recursive_list(item.id))
             return all_items
 
-        return _recursive_list(bucket.id)
+        return _recursive_list(bucket)
 
     def get_file_metadata(self, file_id: str):
         item_metadata = (

From a5d2e6b002904663f07a43fd5ccc5aeb5d0e5074 Mon Sep 17 00:00:00 2001
From: Christophe Beke <beke.christophe@gmail.com>
Date: Mon, 23 Jun 2025 22:41:24 +0200
Subject: [PATCH 18/39] Update with all new comments. Also made sure the search
 command now fully adds all required fields for csv and json.

---
 .gitignore                                    |   3 +-
 provider_setup_example.yml                    |   4 +
 src/docbinder_oss/cli/provider/test.py        |   2 +-
 src/docbinder_oss/cli/search.py               | 160 ++++++++++++++---
 src/docbinder_oss/core/schemas.py             |  17 +-
 src/docbinder_oss/helpers/config.py           |   2 +-
 src/docbinder_oss/helpers/path_utils.py       |  83 +++++++++
 src/docbinder_oss/services/base_class.py      |   2 +-
 .../google_drive/google_drive_buckets.py      |   2 +-
 .../google_drive/google_drive_client.py       |  28 +--
 .../google_drive/google_drive_files.py        |  34 ++--
 .../google_drive/google_drive_permissions.py  |   4 +-
 .../google_drive_service_config.py            |   4 +-
 tests/commands/test_search_command.py         |  43 ++++-
 tests/services/google_drive/conftest.py       |   1 -
 .../google_drive/test_google_drive_files.py   |  96 +++++-----
 tests/services/test_search_export.py          | 167 ------------------
 17 files changed, 362 insertions(+), 290 deletions(-)
 create mode 100644 provider_setup_example.yml
 create mode 100644 src/docbinder_oss/helpers/path_utils.py
 delete mode 100644 tests/services/test_search_export.py

diff --git a/.gitignore b/.gitignore
index a8e0008..8ec8616 100644
--- a/.gitignore
+++ b/.gitignore
@@ -79,4 +79,5 @@ gcp_credentials.json
 *_token.json
 
 # Test files
-search_results.csv
\ No newline at end of file
+search_results.csv
+search_results.json
\ No newline at end of file
diff --git a/provider_setup_example.yml b/provider_setup_example.yml
new file mode 100644
index 0000000..ff3c851
--- /dev/null
+++ b/provider_setup_example.yml
@@ -0,0 +1,4 @@
+providers:
+  - type: google_drive
+    name: my_google_drive
+    gcp_credentials_json: gcp_credentials.json
diff --git a/src/docbinder_oss/cli/provider/test.py b/src/docbinder_oss/cli/provider/test.py
index d01262d..be424d9 100644
--- a/src/docbinder_oss/cli/provider/test.py
+++ b/src/docbinder_oss/cli/provider/test.py
@@ -30,7 +30,7 @@ def test(
     if found_provider_config:
         typer.echo(f"Testing connection for provider '{name}'...")
         try:
-            client = create_provider_instance(provider_config)
+            client = create_provider_instance(found_provider_config)
             if client is None:
                 typer.echo(f"Provider '{name}' is not supported or not implemented.")
                 raise typer.Exit(code=1)
diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py
index 87b3988..8f79b03 100644
--- a/src/docbinder_oss/cli/search.py
+++ b/src/docbinder_oss/cli/search.py
@@ -2,9 +2,14 @@
 import re
 import typer
 from typing import Optional
+import csv
+import json
 
+from docbinder_oss.helpers.config import load_config
+from docbinder_oss.services import create_provider_instance
 from docbinder_oss.helpers.config import Config
 from docbinder_oss.services.base_class import BaseProvider
+from docbinder_oss.helpers.path_utils import build_id_to_item, get_full_path, build_all_full_paths
 
 app = typer.Typer()
 
@@ -38,10 +43,6 @@ def search(
 ):
     """Search for files or folders matching filters across all
     providers and export results as CSV or JSON."""
-    import csv
-    import json
-    from docbinder_oss.helpers.config import load_config
-    from docbinder_oss.services import create_provider_instance
     
     # 1 Load documents with filter "provider"
     # 2 Filter the documents based on the provided filters
@@ -56,27 +57,66 @@ def search(
     for provider_config in config.providers:
         if provider and provider_config.name != provider:
             continue
-        client: BaseProvider = create_provider_instance(provider_config)
+        client: Optional[BaseProvider] = create_provider_instance(provider_config)
         if not client:
             typer.echo(f"Provider '{provider_config.name}' is not supported or not implemented.")
             raise typer.Exit(code=1)
         current_files[provider_config.name] = client.list_all_files()
-    
-    current_files = filter_files(
-        current_files,
-        name=name,
-        owner=owner,
-        updated_after=updated_after,
-        updated_before=updated_before,
-        created_after=created_after,
-        created_before=created_before,
-        min_size=min_size,
-        max_size=max_size,
-    )
-    
+
+    # After collecting all files, build id-to-item mapping for all providers
+    all_files = []
+    for files in current_files.values():
+        all_files.extend(files)
+
+    # Build root_id_to_name mapping for all drives (My Drive, Shared Drives, etc.)
+    root_id_to_name = {}
+    # Heuristic: a file/folder with no parents or with a special marker is a root
+    for f in all_files:
+        # If a file has no parents, treat it as a root
+        if not getattr(f, 'parents', None) or (isinstance(f.parents, list) and not f.parents[0]):
+            root_id_to_name[f.id] = f.name
+        # Optionally, if you have a drive_id or drive_name attribute, add here
+        # elif hasattr(f, 'drive_id') and hasattr(f, 'drive_name'):
+        #     root_id_to_name[f.drive_id] = f.drive_name
+
+    # Fallback for Google Drive: always include 'root': 'My Drive' if not present
+    if 'root' not in root_id_to_name:
+        root_id_to_name['root'] = 'My Drive'
+
+    id_to_path = build_all_full_paths(all_files, root_id_to_name=root_id_to_name)
+    # Add full_path to each file using the memoized paths
+    for files in current_files.values():
+        for file in files:
+            file.full_path = id_to_path.get(file.id, file.name)
+
+    # Filter files per provider, keep grouping
+    filtered_files_by_provider = {}
+    for provider_name, files in current_files.items():
+        filtered = filter_files(
+            files,
+            name=name,
+            owner=owner,
+            updated_after=updated_after,
+            updated_before=updated_before,
+            created_after=created_after,
+            created_before=created_before,
+            min_size=min_size,
+            max_size=max_size,
+        )
+        filtered_files_by_provider[provider_name] = filtered
+
     if not export_format:
-        typer.echo(current_files)
+        typer.echo(filtered_files_by_provider)
         return
+    elif export_format.lower() == "csv":
+        __write_csv(filtered_files_by_provider, "search_results.csv")
+        typer.echo("Results written to search_results.csv")
+    elif export_format.lower() == "json":
+        __write_json(filtered_files_by_provider, "search_results.json", flat=True)  # or flat=False for grouped
+        typer.echo("Results written to search_results.json")
+    else:
+        typer.echo(f"Unsupported export format: {export_format}")
+        raise typer.Exit(code=1)
 
 def filter_files(
     files,
@@ -90,25 +130,93 @@ def filter_files(
     max_size=None,
 ):
     results = []
-    
     for file in files:
         if name and not re.search(name, file.name, re.IGNORECASE):
             continue
         if owner and not any(owner in u.email_address for u in file.owners):
             continue
-        if updated_after and file.modified_time < datetime.fromisoformat(updated_after):
+        if updated_after and __parse_dt(file.modified_time) < __parse_dt(updated_after):
             continue
-        if updated_before and file.modified_time > datetime.fromisoformat(updated_before):
+        if updated_before and __parse_dt(file.modified_time) > __parse_dt(updated_before):
             continue
-        if created_after and file.created_time < datetime.fromisoformat(created_after):
+        if created_after and __parse_dt(file.created_time) < __parse_dt(created_after):
             continue
-        if created_before and file.created_time > datetime.fromisoformat(created_before):
+        if created_before and __parse_dt(file.created_time) > __parse_dt(created_before):
             continue
         if min_size and file.size < min_size * 1024:
             continue
         if max_size and file.size > max_size * 1024:
             continue
-
         results.append(file)
+    return results
+
+def __parse_dt(val):
+    if isinstance(val, datetime):
+        return val
+    try:
+        return datetime.fromisoformat(val)
+    except Exception:
+        return val
+
+def __write_csv(files_by_provider, filename):
+    # Collect all possible fieldnames from all files
+    all_fieldnames = set(["provider"])
+    for files in files_by_provider.values():
+        for file in files:
+            file_dict = file.model_dump() if hasattr(file, 'model_dump') else file.__dict__.copy()
+            all_fieldnames.update(file_dict.keys())
+    # Move provider to the front, rest sorted
+    fieldnames = ["provider"] + sorted(f for f in all_fieldnames if f != "provider")
+    with open(filename, "w", newline="") as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+        writer.writeheader()
+        for provider, files in files_by_provider.items():
+            for file in files:
+                file_dict = file.model_dump() if hasattr(file, 'model_dump') else file.__dict__.copy()
+                file_dict["provider"] = provider
+                # Flatten owners for CSV (only email addresses)
+                owners = file_dict.get("owners")
+                if isinstance(owners, list):
+                    emails = []
+                    for u in owners:
+                        if hasattr(u, "email_address") and u.email_address:
+                            emails.append(u.email_address)
+                        elif isinstance(u, dict) and u.get("email_address"):
+                            emails.append(u["email_address"])
+                        elif isinstance(u, str):
+                            emails.append(u)
+                    file_dict["owners"] = ";".join(emails)
+                # Flatten last_modifying_user for CSV (only email address)
+                last_mod = file_dict.get("last_modifying_user")
+                if last_mod is not None:
+                    if hasattr(last_mod, "email_address"):
+                        file_dict["last_modifying_user"] = last_mod.email_address
+                    elif isinstance(last_mod, dict) and "email_address" in last_mod:
+                        file_dict["last_modifying_user"] = last_mod["email_address"]
+                    else:
+                        file_dict["last_modifying_user"] = str(last_mod)
+                # Flatten parents for CSV
+                parents = file_dict.get("parents")
+                if isinstance(parents, list):
+                    file_dict["parents"] = ";".join(str(p) for p in parents)
+                writer.writerow({fn: file_dict.get(fn, "") for fn in fieldnames})
 
-    return results
\ No newline at end of file
+def __write_json(files_by_provider, filename, flat=False):
+    with open(filename, "w") as jsonfile:
+        if flat:
+            all_files = []
+            for provider, files in files_by_provider.items():
+                for file in files:
+                    file_dict = file.model_dump() if hasattr(file, 'model_dump') else file.__dict__.copy()
+                    file_dict["provider"] = provider
+                    all_files.append(file_dict)
+            json.dump(all_files, jsonfile, default=str, indent=2)
+        else:
+            grouped = {
+                provider: [
+                    file.model_dump() if hasattr(file, 'model_dump') else file.__dict__.copy()
+                    for file in files
+                ]
+                for provider, files in files_by_provider.items()
+            }
+            json.dump(grouped, jsonfile, default=str, indent=2)
\ No newline at end of file
diff --git a/src/docbinder_oss/core/schemas.py b/src/docbinder_oss/core/schemas.py
index b9e2f9a..70b1604 100644
--- a/src/docbinder_oss/core/schemas.py
+++ b/src/docbinder_oss/core/schemas.py
@@ -58,7 +58,7 @@ class File(BaseModel):
     last_modifying_user: Optional[User]
 
     size: Optional[str] = Field(description="Size in bytes, as a string. Only populated for files.")
-    parents: Optional[str] = Field(description="Parent folder ID, if applicable.")
+    parents: Optional[List[str]] = Field(description="Parent folder IDs, if applicable.")
 
     capabilities: Optional[FileCapabilities] = None
 
@@ -66,8 +66,21 @@ class File(BaseModel):
     starred: Optional[bool]
     trashed: Optional[bool]
 
-    # If you want a more robust way to set is_folder after initialization:
+    # Add full_path as an optional field for export/CLI assignment
+    full_path: Optional[str] = Field(default=None, description="Full path of the file/folder, computed at runtime.")
+
     def __init__(self, **data: Any):
+        # Coerce parents to a list of strings or None
+        if 'parents' in data:
+            if data['parents'] is None:
+                data['parents'] = None
+            elif isinstance(data['parents'], str):
+                data['parents'] = [data['parents']]
+            elif isinstance(data['parents'], list):
+                # Ensure all elements are strings
+                data['parents'] = [str(p) for p in data['parents'] if p is not None]
+            else:
+                data['parents'] = [str(data['parents'])]
         super().__init__(**data)
         if self.mime_type == "application/vnd.google-apps.folder":
             self.is_folder = True
diff --git a/src/docbinder_oss/helpers/config.py b/src/docbinder_oss/helpers/config.py
index d098793..e92acb4 100644
--- a/src/docbinder_oss/helpers/config.py
+++ b/src/docbinder_oss/helpers/config.py
@@ -16,7 +16,7 @@
 class Config(BaseModel):
     """Main configuration model that holds a list of all provider configs."""
 
-    providers: List[ServiceUnion]
+    providers: list
 
 
 def load_config() -> Config:
diff --git a/src/docbinder_oss/helpers/path_utils.py b/src/docbinder_oss/helpers/path_utils.py
new file mode 100644
index 0000000..d0b9cf9
--- /dev/null
+++ b/src/docbinder_oss/helpers/path_utils.py
@@ -0,0 +1,83 @@
+def build_id_to_item(files):
+    """
+    Build a mapping from file/folder id to the file/folder object.
+    """
+    return {getattr(f, 'id', None): f for f in files if hasattr(f, 'id')}
+
+def get_full_path(file, id_to_item, root_id='root', root_name='My Drive'):
+    """
+    Recursively build the full path for a file or folder using its parents.
+    Returns a string like '/My Drive/Folder/Subfolder/File.pdf'.
+    """
+    path_parts = [file.name]
+    current = file
+    while True:
+        parents = getattr(current, 'parents', None)
+        if not parents or not isinstance(parents, list) or not parents[0]:
+            break
+        parent_id = parents[0]
+        if parent_id == root_id:
+            path_parts.append(root_name)
+            break
+        parent = id_to_item.get(parent_id)
+        if not parent:
+            break
+        path_parts.append(parent.name)
+        current = parent
+    return '/' + '/'.join(reversed(path_parts))
+
+def build_all_full_paths(files, root_id='root', root_name='My Drive', root_id_to_name=None):
+    """
+    Efficiently compute the full path for every file/folder in one pass using an iterative approach and memoization.
+    Supports multiple drives by using a root_id_to_name mapping.
+    Returns a dict: {file_id: full_path}
+    """
+    id_to_item = build_id_to_item(files)
+    id_to_path = {}
+    if root_id_to_name is None:
+        root_id_to_name = {root_id: root_name}
+    for item in files:
+        if not hasattr(item, 'id') or not hasattr(item, 'name'):
+            continue
+        if item.id in id_to_path:
+            continue
+        # Iterative path construction
+        current = item
+        temp_stack = []
+        while True:
+            if current.id in id_to_path:
+                break
+            parents = getattr(current, 'parents', None)
+            if not parents or not isinstance(parents, list) or not parents[0]:
+                temp_stack.append((current.id, '/' + current.name))
+                break
+            parent_id = parents[0]
+            if parent_id in root_id_to_name:
+                temp_stack.append((current.id, '/' + root_id_to_name[parent_id] + '/' + current.name))
+                break
+            parent = id_to_item.get(parent_id)
+            if not parent:
+                temp_stack.append((current.id, '/' + current.name))
+                break
+            temp_stack.append((current.id, None))  # Mark as not yet resolved
+            current = parent
+        # Now unwind the stack and build the paths
+        while temp_stack:
+            file_id, path = temp_stack.pop()
+            if path is not None:
+                id_to_path[file_id] = path
+            else:
+                parent_id = id_to_item[file_id].parents[0]
+                parent_path = id_to_path.get(parent_id, '')
+                id_to_path[file_id] = parent_path.rstrip('/') + '/' + id_to_item[file_id].name
+        # Ensure root_name is present at the start (for legacy single-drive fallback)
+        found_root = False
+        for root_name_val in root_id_to_name.values():
+            if id_to_path[item.id].lstrip('/').startswith(root_name_val + '/'):  # e.g. 'My Drive/'
+                found_root = True
+                break
+        if not found_root:
+            # Use the first root_name as fallback
+            fallback_root = next(iter(root_id_to_name.values()))
+            id_to_path[item.id] = '/' + fallback_root + id_to_path[item.id] if not id_to_path[item.id].startswith('/') else '/' + fallback_root + id_to_path[item.id]
+    return id_to_path
diff --git a/src/docbinder_oss/services/base_class.py b/src/docbinder_oss/services/base_class.py
index 7b62f72..5d8f09e 100644
--- a/src/docbinder_oss/services/base_class.py
+++ b/src/docbinder_oss/services/base_class.py
@@ -44,7 +44,7 @@ def list_buckets(self) -> List[Bucket]:
         pass
 
     @abstractmethod
-    def list_files(self, folder_id: Optional[str] = None) -> List[File]:
+    def list_files_in_folder(self, folder_id: Optional[str] = None) -> List[File]:
         """
         Lists items (files and folders) within a specific folder.
 
diff --git a/src/docbinder_oss/services/google_drive/google_drive_buckets.py b/src/docbinder_oss/services/google_drive/google_drive_buckets.py
index e5746be..1976b89 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_buckets.py
+++ b/src/docbinder_oss/services/google_drive/google_drive_buckets.py
@@ -25,7 +25,7 @@ def list_buckets(self) -> List[Bucket]:
         ]  # Default root drive
 
         resp = (
-            self.service.drives()
+            self.service.drives()  # type: ignore[attr-defined]
             .list(fields="drives(id,name,kind,createdTime,hidden,restrictions)")
             .execute()
         )
diff --git a/src/docbinder_oss/services/google_drive/google_drive_client.py b/src/docbinder_oss/services/google_drive/google_drive_client.py
index fe8f93c..02ca1d7 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_client.py
+++ b/src/docbinder_oss/services/google_drive/google_drive_client.py
@@ -1,4 +1,5 @@
 import logging
+import os
 from typing import List, Optional
 
 from google.auth.transport.requests import Request
@@ -40,9 +41,14 @@ def __init__(self, config: GoogleDriveServiceConfig):
     def _get_credentials(self):
         logger.info("Getting credentials for Google Drive client")
 
+        TOKEN_PATH = os.path.expanduser("~/.config/docbinder/gcp/" + self.config.name + "_token.json")
+        # Ensure the directory exists
+        os.makedirs(os.path.dirname(TOKEN_PATH), exist_ok=True)
+        logger.debug(f"Token path: {TOKEN_PATH}")
+
         try:
             creds = Credentials.from_authorized_user_file(
-                self.config.gcp_token_json, scopes=self.SCOPES
+                TOKEN_PATH, scopes=self.SCOPES
             )
         except (FileNotFoundError, ValueError):
             logger.warning("Credentials file not found or invalid, re-authenticating")
@@ -56,7 +62,7 @@ def _get_credentials(self):
                 )
                 creds = flow.run_local_server(port=0)
             # Save the credentials for the next run
-            with open(self.config.gcp_token_json, "w") as token:
+            with open(TOKEN_PATH, "w") as token:
                 token.write(creds.to_json())
         return creds
 
@@ -71,19 +77,19 @@ def test_connection(self) -> bool:
     def list_buckets(self) -> list[Bucket]:
         return self.buckets.list_buckets()
 
-    def list_files(self, folder_id: Optional[str] = None) -> List[File]:
-        return self.files.list_files(folder_id)
+    def list_files_in_folder(self, folder_id: Optional[str] = None) -> List[File]:
+        return self.files.list_files_in_folder(folder_id)
 
-    def list_files_recursively(self, bucket: str = None) -> List[File]:
+    def list_files_recursively(self, bucket_id: str | None = None) -> List[File]:
         """List all files and folders recursively in the specified bucket or root."""
-        return self.files.list_files_recursively(bucket)
+        if bucket_id is None:
+            bucket_id = "root"
+        logger.info(f"Listing files recursively in bucket: {bucket_id}")
+        return self.files.list_files_recursively(bucket_id)
 
     def list_all_files(self) -> List[File]:
-        files = []
-        buckets = self.buckets.list_buckets()
-        for bucket in buckets:
-            files.extend(self.files.list_files_recursively(bucket))
-        return files
+        buckets = self.buckets.list_buckets()            
+        return self.files.list_all_files(buckets)
 
     def get_file_metadata(self, item_id: str) -> File:
         return self.files.get_file_metadata(item_id)
diff --git a/src/docbinder_oss/services/google_drive/google_drive_files.py b/src/docbinder_oss/services/google_drive/google_drive_files.py
index fac56f7..39f477e 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_files.py
+++ b/src/docbinder_oss/services/google_drive/google_drive_files.py
@@ -18,14 +18,17 @@ class GoogleDriveFiles:
     def __init__(self, service: Resource):
         self.service = service
 
-    def list_files(self, bucket: str = None, is_drive_root: bool = False) -> list[File]:
+    def list_files_in_folder(self, bucket_id: str | None = None, is_drive_root: bool = False) -> list[File]:
         args = {
             "includeItemsFromAllDrives": True,
             "supportsAllDrives": True,
             "fields": f"nextPageToken,files({REQUIRED_FIELDS})",
         }
-        logger.debug(f"{type(bucket)}: {bucket}")
-        bucket_id = bucket.id if hasattr(bucket, "id") else bucket
+        if bucket_id is None:
+            logger.debug("Listing files in the root directory.")
+            bucket_id = "root"
+        else:
+            logger.debug(f"{type(bucket_id)}: {bucket_id}")
 
         if is_drive_root and bucket_id != "root":
             args.update(
@@ -40,11 +43,10 @@ def list_files(self, bucket: str = None, is_drive_root: bool = False) -> list[Fi
             if parent_id == "root" or parent_id is None:
                 args["q"] = "'root' in parents and trashed=false"
             else:
-                args["q"] = f"'{parent_id}' in parents and trashed=false"  
-        
-        resp = self.service.files().list(**args).execute()
-        print(len(resp["files"]))
-        exit(1)
+                args["q"] = f"'{parent_id}' in parents and trashed=false"
+
+        resp = self.service.files().list(**args).execute()  # type: ignore[attr-defined]
+
         return [
             File(
                 id=f.get("id"),
@@ -75,21 +77,23 @@ def list_files(self, bucket: str = None, is_drive_root: bool = False) -> list[Fi
                 shared=f.get("shared"),
                 starred=f.get("starred"),
                 is_folder=f.get("mimeType") == "application/vnd.google-apps.folder",
-                parents=bucket_id if bucket_id else None,
+                parents=f.get("parents") if isinstance(f.get("parents"), list) else None,
             )
             for f in resp.get("files")
         ]
 
     def list_files_recursively(self, bucket: str) -> list[File]:
-        """List all files in the Google Drive bucket."""
+        """List all files in the Google Drive bucket, including all subfolders."""
         is_drive_root = bucket != "root"
 
         def _recursive_list(folder_id: str):
-            items: list[File] = self.list_files(folder_id, is_drive_root=is_drive_root)
+            logger.debug(f"Listing files in folder: {folder_id}")
+            items: list[File] = self.list_files_in_folder(folder_id, is_drive_root=is_drive_root)
             all_items = []
             for item in items:
                 all_items.append(item)
-                if item.is_folder:
+                # Recursively list files in subfolders
+                if hasattr(item, "is_folder") and item.is_folder:
                     all_items.extend(_recursive_list(item.id))
             return all_items
 
@@ -136,3 +140,9 @@ def get_file_metadata(self, file_id: str):
             is_folder=item_metadata.get("mimeType") == "application/vnd.google-apps.folder",
             parents=None,  # This field is not populated by the API, so we set it to None for files.
         )
+
+    def list_all_files(self, buckets: list[Bucket]) -> list[File]:
+        files = []
+        for bucket in buckets:
+            files.extend(self.list_files_recursively(bucket.id))
+        return files
diff --git a/src/docbinder_oss/services/google_drive/google_drive_permissions.py b/src/docbinder_oss/services/google_drive/google_drive_permissions.py
index ab0b830..8b6fd23 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_permissions.py
+++ b/src/docbinder_oss/services/google_drive/google_drive_permissions.py
@@ -18,7 +18,7 @@ def get_user(self):
         Returns:
             User object containing the user's details.
         """
-        resp = self.service.about().get(fields="user").execute()
+        resp = self.service.about().get(fields="user").execute()  # type: ignore[attr-defined]
         user_info = resp.get("user", {})
 
         return User(
@@ -31,7 +31,7 @@ def get_user(self):
         )
 
     def get_permissions(self, item_id: str):
-        resp = self.service.permissions().list(fileId=item_id, fields="permissions").execute()
+        resp = self.service.permissions().list(fileId=item_id, fields="permissions").execute()  # type: ignore[attr-defined]
 
         return [
             Permission(
diff --git a/src/docbinder_oss/services/google_drive/google_drive_service_config.py b/src/docbinder_oss/services/google_drive/google_drive_service_config.py
index dd6c957..022b9ba 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_service_config.py
+++ b/src/docbinder_oss/services/google_drive/google_drive_service_config.py
@@ -5,5 +5,5 @@
 
 class GoogleDriveServiceConfig(ServiceConfig):
     type: Literal["google_drive"] = "google_drive"  # type: ignore[override]
-    gcp_credentials_json: str
-    gcp_token_json: str
+    name: str
+    gcp_credentials_json: str
\ No newline at end of file
diff --git a/tests/commands/test_search_command.py b/tests/commands/test_search_command.py
index c0f69ab..2a2a406 100644
--- a/tests/commands/test_search_command.py
+++ b/tests/commands/test_search_command.py
@@ -4,6 +4,8 @@
 import pytest
 from typer.testing import CliRunner
 from docbinder_oss.main import app
+import sys
+import importlib
 
 
 class DummyFile:
@@ -25,6 +27,23 @@ def __init__(self, **kwargs):
         self.shared = kwargs.get("shared", True)
         self.trashed = kwargs.get("trashed", False)
 
+    def model_dump(self):
+        # Simulate pydantic's model_dump for test compatibility
+        return {
+            "id": self.id,
+            "name": self.name,
+            "size": self.size,
+            "mime_type": self.mime_type,
+            "created_time": self.created_time,
+            "modified_time": self.modified_time,
+            "owners": [u.email_address for u in self.owners],
+            "last_modifying_user": getattr(self.last_modifying_user, "email_address", None),
+            "web_view_link": self.web_view_link,
+            "web_content_link": self.web_content_link,
+            "shared": self.shared,
+            "trashed": self.trashed,
+        }
+
 
 @pytest.fixture(autouse=True)
 def patch_provider(monkeypatch, tmp_path):
@@ -32,13 +51,15 @@ def patch_provider(monkeypatch, tmp_path):
     class DummyProviderConfig:
         def __init__(self, name):
             self.name = name
+            self.type = name  # Simulate type for registry
 
     class DummyConfig:
         providers = [DummyProviderConfig("dummy1"), DummyProviderConfig("dummy2")]
 
-    monkeypatch.setattr("docbinder_oss.helpers.config.load_config", lambda: DummyConfig())
+    # Patch load_config in the CLI's namespace
+    monkeypatch.setattr("docbinder_oss.cli.search.load_config", lambda: DummyConfig())
 
-    # Patch create_provider_instance to return a dummy client with different files per provider
+    # Patch create_provider_instance in the CLI's namespace
     def create_provider_instance(cfg):
         if cfg.name == "dummy1":
             return type(
@@ -75,7 +96,8 @@ def create_provider_instance(cfg):
                 },
             )()
 
-    monkeypatch.setattr("docbinder_oss.services.create_provider_instance", create_provider_instance)
+    monkeypatch.setattr("docbinder_oss.cli.search.create_provider_instance", create_provider_instance)
+
     # Change working directory to a temp dir for file output
     orig_cwd = os.getcwd()
     os.chdir(tmp_path)
@@ -94,12 +116,13 @@ def test_search_export_csv():
         assert len(rows) == 2
         names = set(r["name"] for r in rows)
         assert names == {"Alpha Report", "Beta Notes"}
-        # Check owners field is a string
+        # Check owners field is a string and contains the expected email
         for r in rows:
+            owners = r["owners"]
             if r["name"] == "Alpha Report":
-                assert r["owners"] == "alpha@a.com"
+                assert "alpha@a.com" in owners
             if r["name"] == "Beta Notes":
-                assert r["owners"] == "beta@b.com"
+                assert "beta@b.com" in owners
 
 
 def test_search_export_json():
@@ -113,6 +136,12 @@ def test_search_export_json():
         assert len(data) == 2
         names = set(d["name"] for d in data)
         assert names == {"Alpha Report", "Beta Notes"}
+        # Check owners field is a string or list
+        for d in data:
+            if d["name"] == "Alpha Report":
+                assert "alpha@a.com" in d["owners"]
+            if d["name"] == "Beta Notes":
+                assert "beta@b.com" in d["owners"]
 
 
 def test_search_name_filter():
@@ -214,4 +243,4 @@ def test_search_combined_filters():
         assert len(data) == 1
         assert data[0]["name"] == "Beta Notes"
         assert data[0]["provider"] == "dummy2"
-        assert data[0]["owners"] == "beta@b.com"
+        assert "beta@b.com" in data[0]["owners"]
diff --git a/tests/services/google_drive/conftest.py b/tests/services/google_drive/conftest.py
index c60300f..f95b44b 100644
--- a/tests/services/google_drive/conftest.py
+++ b/tests/services/google_drive/conftest.py
@@ -42,6 +42,5 @@ def gdrive_client(mock_gdrive_service):
         config = GoogleDriveServiceConfig(
             name="test_gdrive",
             gcp_credentials_json="fake_creds.json",
-            gcp_token_json="fake_token.json",
         )
         return GoogleDriveClient(config=config)
diff --git a/tests/services/google_drive/test_google_drive_files.py b/tests/services/google_drive/test_google_drive_files.py
index 7b0b019..c5dc850 100644
--- a/tests/services/google_drive/test_google_drive_files.py
+++ b/tests/services/google_drive/test_google_drive_files.py
@@ -3,7 +3,7 @@
 import pytest
 from typer.testing import CliRunner
 
-from docbinder_oss.core.schemas import File
+from docbinder_oss.core import schemas
 from docbinder_oss.main import app
 
 
@@ -11,7 +11,13 @@ class DummyFile:
     def __init__(self, id, name, parents=None, is_folder=False):
         self.id = id
         self.name = name
-        self.parents = parents or []
+        # Always use a list for parents, or None
+        if parents is None:
+            self.parents = None
+        elif isinstance(parents, list):
+            self.parents = parents
+        else:
+            self.parents = [parents]
         self.is_folder = is_folder
         self.size = 1000
         # Use correct mime_type for folders and files
@@ -40,10 +46,10 @@ class DummyConfig:
     def list_all_files(self):
         return [
             DummyFile(id="root", name="root", is_folder=True),
-            DummyFile(id="folder1", name="folder1", parents=["root"], is_folder=True),
-            DummyFile(id="file1", name="file1.pdf", parents=["folder1"]),
-            DummyFile(id="file2", name="file2.pdf", parents=["folder1"]),
-            DummyFile(id="file3", name="file3.pdf", parents=["root"]),
+            DummyFile(id="folder1", name="folder1", parents="root", is_folder=True),
+            DummyFile(id="file1", name="file1.pdf", parents="folder1"),
+            DummyFile(id="file2", name="file2.pdf", parents="folder1"),
+            DummyFile(id="file3", name="file3.pdf", parents="root"),
         ]
 
     class DummyClient:
@@ -99,57 +105,37 @@ def test_list_files(mock_gdrive_service, gdrive_client):
         fake_api_response
     )
 
-    files = gdrive_client.list_files()
+    files = gdrive_client.list_files_in_folder()
 
     print(files)
 
     assert isinstance(files, list)
     assert len(files) == 1
-    assert files == [
-        File(
-            id="1234",
-            name="testDrive",
-            mime_type="application/vnd.google-apps.drive",
-            kind="drive#drive",
-            is_folder=False,
-            web_view_link="https://drive.google.com/drive/folders/1234",
-            icon_link="https://drive.google.com/drive/folders/1234/icon",
-            created_time=datetime(2023, 10, 1, 12, 0, 0),
-            modified_time=datetime(2023, 10, 1, 12, 0, 0),
-            owners=[
-                {
-                    "display_name": "Test User",
-                    "email_address": "test@test.com",
-                    "kind": "drive#user",
-                    "photo_link": "https://example.com/photo.jpg",
-                }
-            ],
-            last_modifying_user={
-                "display_name": "Test User",
-                "email_address": "test@test.com",
-                "kind": "drive#user",
-                "photo_link": "https://example.com/photo.jpg",
-            },
-            size="1024",
-            parents=None,
-            shared=True,
-            starred=False,
-            trashed=False,
-        )
-    ]
-
-
-def test_search_finds_all_files_recursively():
-    runner = CliRunner()
-    result = runner.invoke(app, ["search", "--export-format", "json"])
-    assert result.exit_code == 0
-    assert os.path.exists("search_results.json")
-    import json
-
-    with open("search_results.json") as f:
-        data = json.load(f)
-        # All files and folders should be included in the results
-        file_names = set(d["name"] for d in data)
-        expected = {"file1.pdf", "file2.pdf", "file3.pdf", "folder1", "root"}
-        assert file_names == expected
-        assert len(file_names) == 5
+    # Compare fields individually to match the actual File model structure
+    file = files[0]
+    assert file.id == "1234"
+    assert file.name == "testDrive"
+    assert file.mime_type == "application/vnd.google-apps.drive"
+    assert file.kind == "drive#drive"
+    assert file.is_folder is False
+    assert str(file.web_view_link) == "https://drive.google.com/drive/folders/1234"
+    assert str(file.icon_link) == "https://drive.google.com/drive/folders/1234/icon"
+    assert file.created_time == datetime(2023, 10, 1, 12, 0, 0)
+    assert file.modified_time == datetime(2023, 10, 1, 12, 0, 0)
+    assert len(file.owners) == 1
+    owner = file.owners[0]
+    assert getattr(owner, "display_name", None) == "Test User"
+    assert getattr(owner, "email_address", None) == "test@test.com"
+    assert getattr(owner, "kind", None) == "drive#user"
+    assert str(getattr(owner, "photo_link", "")) == "https://example.com/photo.jpg"
+    last_mod = file.last_modifying_user
+    assert getattr(last_mod, "display_name", None) == "Test User"
+    assert getattr(last_mod, "email_address", None) == "test@test.com"
+    assert getattr(last_mod, "kind", None) == "drive#user"
+    assert str(getattr(last_mod, "photo_link", "")) == "https://example.com/photo.jpg"
+    assert file.size == "1024"
+    # Accept None or any list value for parents
+    assert file.parents is None or isinstance(file.parents, list)
+    assert file.shared is True
+    assert file.starred is False
+    assert file.trashed is False
\ No newline at end of file
diff --git a/tests/services/test_search_export.py b/tests/services/test_search_export.py
deleted file mode 100644
index 78fd856..0000000
--- a/tests/services/test_search_export.py
+++ /dev/null
@@ -1,167 +0,0 @@
-import os
-import json
-import pytest
-from typer.testing import CliRunner
-from docbinder_oss.main import app
-
-
-class DummyFile:
-    def __init__(self, **kwargs):
-        self.id = kwargs.get("id", "fileid1")
-        self.name = kwargs.get("name", "Test File")
-        self.size = kwargs.get("size", 12345)
-        self.mime_type = kwargs.get("mime_type", "application/pdf")
-        self.created_time = kwargs.get("created_time", "2024-01-01T00:00:00")
-        self.modified_time = kwargs.get("modified_time", "2024-01-02T00:00:00")
-        self.owners = kwargs.get(
-            "owners", [type("User", (), {"email_address": "owner@example.com"})()]
-        )
-        self.last_modifying_user = kwargs.get(
-            "last_modifying_user", type("User", (), {"email_address": "mod@example.com"})()
-        )
-        self.web_view_link = kwargs.get("web_view_link", "http://example.com/view")
-        self.web_content_link = kwargs.get("web_content_link", "http://example.com/content")
-        self.shared = kwargs.get("shared", True)
-        self.trashed = kwargs.get("trashed", False)
-
-
-@pytest.fixture(autouse=True)
-def patch_provider(monkeypatch, tmp_path):
-    # Patch config loader to return two dummy provider configs
-    class DummyProviderConfig:
-        def __init__(self, name):
-            self.name = name
-
-    class DummyConfig:
-        providers = [DummyProviderConfig("dummy1"), DummyProviderConfig("dummy2")]
-
-    monkeypatch.setattr("docbinder_oss.helpers.config.load_config", lambda: DummyConfig())
-
-    # Patch create_provider_instance to return a dummy client with different files per provider
-    def create_provider_instance(cfg):
-        if cfg.name == "dummy1":
-            return type(
-                "DummyClient",
-                (),
-                {
-                    "list_all_files": lambda self: [
-                        DummyFile(
-                            id="f1",
-                            name="Alpha Report",
-                            size=2048,
-                            owners=[type("User", (), {"email_address": "alpha@a.com"})()],
-                            created_time="2024-01-01T10:00:00",
-                            modified_time="2024-01-02T10:00:00",
-                        )
-                    ]
-                },
-            )()
-        else:
-            return type(
-                "DummyClient",
-                (),
-                {
-                    "list_all_files": lambda self: [
-                        DummyFile(
-                            id="f2",
-                            name="Beta Notes",
-                            size=4096,
-                            owners=[type("User", (), {"email_address": "beta@b.com"})()],
-                            created_time="2024-02-01T10:00:00",
-                            modified_time="2024-02-02T10:00:00",
-                        )
-                    ]
-                },
-            )()
-
-    monkeypatch.setattr("docbinder_oss.services.create_provider_instance", create_provider_instance)
-    # Change working directory to a temp dir for file output
-    orig_cwd = os.getcwd()
-    os.chdir(tmp_path)
-    yield
-    os.chdir(orig_cwd)
-
-
-# The test logic for search export and filters has been consolidated into
-# `tests/commands/test_search_command.py`.
-# This file no longer contains duplicate tests.
-def test_search_updated_after_filter():
-    runner = CliRunner()
-    result = runner.invoke(
-        app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"]
-    )
-    assert result.exit_code == 0
-    with open("search_results.json") as f:
-        data = json.load(f)
-        assert len(data) == 1
-        assert data[0]["name"] == "Beta Notes"
-
-
-def test_search_created_before_filter():
-    runner = CliRunner()
-    result = runner.invoke(
-        app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-format", "json"]
-    )
-    assert result.exit_code == 0
-    with open("search_results.json") as f:
-        data = json.load(f)
-        assert len(data) == 1
-        assert data[0]["name"] == "Alpha Report"
-
-
-def test_search_min_size_filter():
-    runner = CliRunner()
-    result = runner.invoke(app, ["search", "--min-size", "3", "--export-format", "json"])
-    assert result.exit_code == 0
-    with open("search_results.json") as f:
-        data = json.load(f)
-        assert len(data) == 1
-        assert data[0]["name"] == "Beta Notes"
-
-
-def test_search_max_size_filter():
-    runner = CliRunner()
-    result = runner.invoke(app, ["search", "--max-size", "3", "--export-format", "json"])
-    assert result.exit_code == 0
-    with open("search_results.json") as f:
-        data = json.load(f)
-        assert len(data) == 1
-        assert data[0]["name"] == "Alpha Report"
-
-
-def test_search_provider_filter():
-    runner = CliRunner()
-    result = runner.invoke(app, ["search", "--provider", "dummy2", "--export-format", "json"])
-    assert result.exit_code == 0
-    with open("search_results.json") as f:
-        data = json.load(f)
-        assert len(data) == 1
-        assert data[0]["provider"] == "dummy2"
-        assert data[0]["name"] == "Beta Notes"
-
-
-def test_search_combined_filters():
-    runner = CliRunner()
-    result = runner.invoke(
-        app,
-        [
-            "search",
-            "--name",
-            "Beta",
-            "--owner",
-            "beta@b.com",
-            "--min-size",
-            "3",
-            "--provider",
-            "dummy2",
-            "--export-format",
-            "json",
-        ],
-    )
-    assert result.exit_code == 0
-    with open("search_results.json") as f:
-        data = json.load(f)
-        assert len(data) == 1
-        assert data[0]["name"] == "Beta Notes"
-        assert data[0]["provider"] == "dummy2"
-        assert data[0]["owners"] == "beta@b.com"

From 63bd58c5954b536da1aed05f93a4a233f7e1a12b Mon Sep 17 00:00:00 2001
From: PaoloLeonard <paololeonard@skynet.be>
Date: Tue, 24 Jun 2025 10:26:15 +0200
Subject: [PATCH 19/39] initial change to gdrive

---
 src/docbinder_oss/cli/search.py               | 12 ++++++++++--
 src/docbinder_oss/helpers/rich_helpers.py     | 19 +++++++++++++++++++
 .../google_drive/google_drive_client.py       |  6 +-----
 .../google_drive/google_drive_files.py        |  2 +-
 4 files changed, 31 insertions(+), 8 deletions(-)
 create mode 100644 src/docbinder_oss/helpers/rich_helpers.py

diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py
index 87b3988..ae9287c 100644
--- a/src/docbinder_oss/cli/search.py
+++ b/src/docbinder_oss/cli/search.py
@@ -1,9 +1,11 @@
 from datetime import datetime
 import re
 import typer
+from rich import print as rich_print
 from typing import Optional
 
 from docbinder_oss.helpers.config import Config
+from docbinder_oss.helpers.rich_helpers import create_rich_table
 from docbinder_oss.services.base_class import BaseProvider
 
 app = typer.Typer()
@@ -62,6 +64,8 @@ def search(
             raise typer.Exit(code=1)
         current_files[provider_config.name] = client.list_all_files()
     
+    rich_print(current_files["my_google_drive"])
+    
     current_files = filter_files(
         current_files,
         name=name,
@@ -73,9 +77,13 @@ def search(
         min_size=min_size,
         max_size=max_size,
     )
-    
+    rich_print(current_files["my_google_drive"])
     if not export_format:
-        typer.echo(current_files)
+        table = create_rich_table(
+            headers=["Provider", "Name", "ID", "Size", "Created Time", "Modified Time"],
+            rows=current_files
+        )
+        rich_print(table)
         return
 
 def filter_files(
diff --git a/src/docbinder_oss/helpers/rich_helpers.py b/src/docbinder_oss/helpers/rich_helpers.py
new file mode 100644
index 0000000..87ae580
--- /dev/null
+++ b/src/docbinder_oss/helpers/rich_helpers.py
@@ -0,0 +1,19 @@
+from typing import List
+from rich.table import Table
+
+
+def create_rich_table(headers: List[str], rows: List[List[str]]) -> Table:
+    """
+    Create a Rich table with the given headers and rows.
+    
+    Args:
+        headers (List[str]): The headers for the table.
+        rows (List[List[str]]): The data rows for the table.
+    
+    Returns:
+        Table: A Rich Table object.
+    """
+    table = Table(*headers, show_header=True, header_style="bold magenta")
+    for row in rows:
+        table.add_row(*row)
+    return table
\ No newline at end of file
diff --git a/src/docbinder_oss/services/google_drive/google_drive_client.py b/src/docbinder_oss/services/google_drive/google_drive_client.py
index fe8f93c..7af3eb1 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_client.py
+++ b/src/docbinder_oss/services/google_drive/google_drive_client.py
@@ -74,15 +74,11 @@ def list_buckets(self) -> list[Bucket]:
     def list_files(self, folder_id: Optional[str] = None) -> List[File]:
         return self.files.list_files(folder_id)
 
-    def list_files_recursively(self, bucket: str = None) -> List[File]:
-        """List all files and folders recursively in the specified bucket or root."""
-        return self.files.list_files_recursively(bucket)
-
     def list_all_files(self) -> List[File]:
         files = []
         buckets = self.buckets.list_buckets()
         for bucket in buckets:
-            files.extend(self.files.list_files_recursively(bucket))
+            files.extend(self.files.list_files(bucket))
         return files
 
     def get_file_metadata(self, item_id: str) -> File:
diff --git a/src/docbinder_oss/services/google_drive/google_drive_files.py b/src/docbinder_oss/services/google_drive/google_drive_files.py
index fac56f7..96aeb96 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_files.py
+++ b/src/docbinder_oss/services/google_drive/google_drive_files.py
@@ -44,7 +44,7 @@ def list_files(self, bucket: str = None, is_drive_root: bool = False) -> list[Fi
         
         resp = self.service.files().list(**args).execute()
         print(len(resp["files"]))
-        exit(1)
+        # exit(1)
         return [
             File(
                 id=f.get("id"),

From b2d8d877459db59a4e9b5247c3b7a115f3ac6af6 Mon Sep 17 00:00:00 2001
From: PaoloLeonard <paololeonard@skynet.be>
Date: Tue, 24 Jun 2025 19:02:41 +0200
Subject: [PATCH 20/39] refactor google drive

---
 src/docbinder_oss/cli/provider/test.py        |  2 +-
 src/docbinder_oss/cli/search.py               | 60 +++++++++++-------
 src/docbinder_oss/helpers/config.py           |  2 +-
 .../{services => providers}/__init__.py       |  6 +-
 .../{services => providers}/base_class.py     |  0
 .../google_drive/__init__.py                  |  0
 .../google_drive/google_drive_buckets.py      |  0
 .../google_drive/google_drive_client.py       | 32 +++-------
 .../google_drive/google_drive_files.py        | 63 +++++--------------
 .../google_drive/google_drive_permissions.py  |  0
 .../google_drive_service_config.py            |  2 +-
 tests/services/google_drive/conftest.py       |  4 +-
 12 files changed, 71 insertions(+), 100 deletions(-)
 rename src/docbinder_oss/{services => providers}/__init__.py (95%)
 rename src/docbinder_oss/{services => providers}/base_class.py (100%)
 rename src/docbinder_oss/{services => providers}/google_drive/__init__.py (100%)
 rename src/docbinder_oss/{services => providers}/google_drive/google_drive_buckets.py (100%)
 rename src/docbinder_oss/{services => providers}/google_drive/google_drive_client.py (73%)
 rename src/docbinder_oss/{services => providers}/google_drive/google_drive_files.py (67%)
 rename src/docbinder_oss/{services => providers}/google_drive/google_drive_permissions.py (100%)
 rename src/docbinder_oss/{services => providers}/google_drive/google_drive_service_config.py (76%)

diff --git a/src/docbinder_oss/cli/provider/test.py b/src/docbinder_oss/cli/provider/test.py
index be424d9..2ba7091 100644
--- a/src/docbinder_oss/cli/provider/test.py
+++ b/src/docbinder_oss/cli/provider/test.py
@@ -10,7 +10,7 @@ def test(
 ):
     """Test the connection to a specific provider."""
     from docbinder_oss.helpers.config import load_config
-    from docbinder_oss.services import create_provider_instance
+    from docbinder_oss.providers import create_provider_instance
 
     if not name:
         typer.echo("Provider name is required.")
diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py
index c6fe85a..e43ef07 100644
--- a/src/docbinder_oss/cli/search.py
+++ b/src/docbinder_oss/cli/search.py
@@ -6,11 +6,12 @@
 import csv
 import json
 
+from docbinder_oss.core.schemas import File
 from docbinder_oss.helpers.config import load_config
-from docbinder_oss.services import create_provider_instance
+from docbinder_oss.providers import create_provider_instance
 from docbinder_oss.helpers.config import Config
 from docbinder_oss.helpers.rich_helpers import create_rich_table
-from docbinder_oss.services.base_class import BaseProvider
+from docbinder_oss.providers.base_class import BaseProvider
 from docbinder_oss.helpers.path_utils import build_id_to_item, get_full_path, build_all_full_paths
 
 app = typer.Typer()
@@ -65,8 +66,6 @@ def search(
             raise typer.Exit(code=1)
         current_files[provider_config.name] = client.list_all_files()
     
-    rich_print(current_files["my_google_drive"])
-    
     current_files = filter_files(
         current_files,
         name=name,
@@ -78,14 +77,11 @@ def search(
         min_size=min_size,
         max_size=max_size,
     )
-    rich_print(current_files["my_google_drive"])
+    
     if not export_format:
-        table = create_rich_table(
-            headers=["Provider", "Name", "ID", "Size", "Created Time", "Modified Time"],
-            rows=current_files
-        )
-        rich_print(table)
+        typer.echo(current_files)
         return
+    
     elif export_format.lower() == "csv":
         __write_csv(filtered_files_by_provider, "search_results.csv")
         typer.echo("Results written to search_results.csv")
@@ -107,26 +103,46 @@ def filter_files(
     min_size=None,
     max_size=None,
 ):
-    results = []
-    for file in files:
+    """
+    Filters a collection of files based on various criteria such as name, owner, modification/creation dates, and file size.
+
+    Args:
+        files (dict): A dictionary where keys are providers and values are lists of file objects.
+        name (str, optional): A regex pattern to match file names (case-insensitive).
+        owner (str, optional): An email address to match file owners.
+        updated_after (str, optional): ISO format datetime string; only include files modified after this date.
+        updated_before (str, optional): ISO format datetime string; only include files modified before this date.
+        created_after (str, optional): ISO format datetime string; only include files created after this date.
+        created_before (str, optional): ISO format datetime string; only include files created before this date.
+        min_size (int, optional): Minimum file size in kilobytes (KB).
+        max_size (int, optional): Maximum file size in kilobytes (KB).
+
+    Returns:
+        list: A list of file objects that match the specified filters.
+    """
+    def file_matches(file: File):
         if name and not re.search(name, file.name, re.IGNORECASE):
-            continue
+            return False
         if owner and not any(owner in u.email_address for u in file.owners):
-            continue
+            return False
         if updated_after and __parse_dt(file.modified_time) < __parse_dt(updated_after):
-            continue
+            return False
         if updated_before and __parse_dt(file.modified_time) > __parse_dt(updated_before):
-            continue
+            return False
         if created_after and __parse_dt(file.created_time) < __parse_dt(created_after):
-            continue
+            return False
         if created_before and __parse_dt(file.created_time) > __parse_dt(created_before):
-            continue
+            return False
         if min_size and file.size < min_size * 1024:
-            continue
+            return False
         if max_size and file.size > max_size * 1024:
-            continue
-        results.append(file)
-    return results
+            return False
+        return True
+
+    filtered = {}
+    for provider, file_list in files.items():
+        filtered[provider] = [file for file in file_list if file_matches(file)]
+    return filtered
 
 def __parse_dt(val):
     if isinstance(val, datetime):
diff --git a/src/docbinder_oss/helpers/config.py b/src/docbinder_oss/helpers/config.py
index e92acb4..2fad950 100644
--- a/src/docbinder_oss/helpers/config.py
+++ b/src/docbinder_oss/helpers/config.py
@@ -6,7 +6,7 @@
 import yaml
 from pydantic import BaseModel, ValidationError
 
-from docbinder_oss.services import ServiceUnion, get_provider_registry
+from docbinder_oss.providers import ServiceUnion, get_provider_registry
 
 logger = logging.getLogger(__name__)
 
diff --git a/src/docbinder_oss/services/__init__.py b/src/docbinder_oss/providers/__init__.py
similarity index 95%
rename from src/docbinder_oss/services/__init__.py
rename to src/docbinder_oss/providers/__init__.py
index 0e57925..0fe786e 100644
--- a/src/docbinder_oss/services/__init__.py
+++ b/src/docbinder_oss/providers/__init__.py
@@ -7,8 +7,8 @@
 from pydantic import Field
 from rich.logging import RichHandler
 
-from docbinder_oss import services
-from docbinder_oss.services.base_class import BaseProvider, ServiceConfig
+from docbinder_oss import providers
+from docbinder_oss.providers.base_class import BaseProvider, ServiceConfig
 
 if not logging.getLogger().handlers:
     FORMAT = "%(message)s"
@@ -86,5 +86,5 @@ def get_service_union() -> Annotated:
     return Annotated[dynamic_union, Field(discriminator="type")]
 
 
-load_services(services)
+load_services(providers)
 ServiceUnion = get_service_union()
diff --git a/src/docbinder_oss/services/base_class.py b/src/docbinder_oss/providers/base_class.py
similarity index 100%
rename from src/docbinder_oss/services/base_class.py
rename to src/docbinder_oss/providers/base_class.py
diff --git a/src/docbinder_oss/services/google_drive/__init__.py b/src/docbinder_oss/providers/google_drive/__init__.py
similarity index 100%
rename from src/docbinder_oss/services/google_drive/__init__.py
rename to src/docbinder_oss/providers/google_drive/__init__.py
diff --git a/src/docbinder_oss/services/google_drive/google_drive_buckets.py b/src/docbinder_oss/providers/google_drive/google_drive_buckets.py
similarity index 100%
rename from src/docbinder_oss/services/google_drive/google_drive_buckets.py
rename to src/docbinder_oss/providers/google_drive/google_drive_buckets.py
diff --git a/src/docbinder_oss/services/google_drive/google_drive_client.py b/src/docbinder_oss/providers/google_drive/google_drive_client.py
similarity index 73%
rename from src/docbinder_oss/services/google_drive/google_drive_client.py
rename to src/docbinder_oss/providers/google_drive/google_drive_client.py
index 74049b3..45eb703 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_client.py
+++ b/src/docbinder_oss/providers/google_drive/google_drive_client.py
@@ -8,12 +8,13 @@
 from googleapiclient.discovery import build
 
 from docbinder_oss.core.schemas import Bucket, File, Permission
-from docbinder_oss.services.base_class import BaseProvider
-from docbinder_oss.services.google_drive.google_drive_buckets import GoogleDriveBuckets
-from docbinder_oss.services.google_drive.google_drive_files import GoogleDriveFiles
-from docbinder_oss.services.google_drive.google_drive_permissions import (
+from docbinder_oss.providers.base_class import BaseProvider
+from docbinder_oss.providers.google_drive.google_drive_buckets import GoogleDriveBuckets
+from docbinder_oss.providers.google_drive.google_drive_files import GoogleDriveFiles
+from docbinder_oss.providers.google_drive.google_drive_permissions import (
     GoogleDrivePermissions,
 )
+from docbinder_oss.providers.google_drive.google_drive_service_config import GoogleDriveServiceConfig
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -28,8 +29,8 @@ def __init__(self, config: GoogleDriveServiceConfig):
             "https://www.googleapis.com/auth/drive.metadata.readonly",
             "https://www.googleapis.com/auth/drive.activity.readonly",
         ]
-        self.settings = Settings()
-        self.creds = credentials or self._get_credentials()
+        self.settings = config
+        self.creds = self._get_credentials()
         self.service = build("drive", "v3", credentials=self.creds)
         self.buckets = GoogleDriveBuckets(self.service)
         self.files = GoogleDriveFiles(self.service)
@@ -77,22 +78,9 @@ def list_buckets(self) -> list[Bucket]:
     def list_files_in_folder(self, folder_id: Optional[str] = None) -> List[File]:
         return self.files.list_files_in_folder(folder_id)
 
-    def list_files_recursively(self, bucket_id: str | None = None) -> List[File]:
-        """List all files and folders recursively in the specified bucket or root."""
-        if bucket_id is None:
-            bucket_id = "root"
-        logger.info(f"Listing files recursively in bucket: {bucket_id}")
-        return self.files.list_files_recursively(bucket_id)
-
-    def list_all_files(self) -> List[File]:
-        files = []
-        buckets = self.buckets.list_buckets()
-        for bucket in buckets:
-            files.extend(self.files.list_files(bucket))
-        return files
-        buckets = self.buckets.list_buckets()            
-        return self.files.list_all_files(buckets)
-
+    def list_all_files(self) -> List[File]:        
+        return self.files.list_files_in_folder()
+        
     def get_file_metadata(self, item_id: str) -> File:
         return self.files.get_file_metadata(item_id)
 
diff --git a/src/docbinder_oss/services/google_drive/google_drive_files.py b/src/docbinder_oss/providers/google_drive/google_drive_files.py
similarity index 67%
rename from src/docbinder_oss/services/google_drive/google_drive_files.py
rename to src/docbinder_oss/providers/google_drive/google_drive_files.py
index 0ac2519..b86ee37 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_files.py
+++ b/src/docbinder_oss/providers/google_drive/google_drive_files.py
@@ -18,36 +18,26 @@ class GoogleDriveFiles:
     def __init__(self, service: Resource):
         self.service = service
 
-    def list_files_in_folder(self, bucket_id: str | None = None, is_drive_root: bool = False) -> list[File]:
+    def list_files_in_folder(self, bucket_id: str | None = None) -> list[File]:
         args = {
-            "includeItemsFromAllDrives": True,
-            "supportsAllDrives": True,
             "fields": f"nextPageToken,files({REQUIRED_FIELDS})",
         }
-        if bucket_id is None:
-            logger.debug("Listing files in the root directory.")
-            bucket_id = "root"
+        
+        if bucket_id:
+            args["q"] = f"'{bucket_id}' in parents and trashed=false"
         else:
-            logger.debug(f"{type(bucket_id)}: {bucket_id}")
+            args["q"] = "sharedWithMe=true and trashed=false"
 
-        if is_drive_root and bucket_id != "root":
-            args.update(
-                {
-                    "corpora": "drive",
-                    "driveId": bucket_id,
-                    "q": "'root' in parents and trashed=false",
-                }
-            )
-        else:
-            parent_id = bucket_id
-            if parent_id == "root" or parent_id is None:
-                args["q"] = "'root' in parents and trashed=false"
-            else:
-                args["q"] = f"'{parent_id}' in parents and trashed=false"  
-        
         resp = self.service.files().list(**args).execute()
-        print(len(resp["files"]))
-        # exit(1)
+        files = resp.get("files", [])
+        next_page_token = resp.get("nextPageToken")
+
+        while next_page_token:
+            logger.debug("Getting next page...")
+            current_page = self.service.files().list(**args, pageToken=next_page_token).execute()
+            files.extend(current_page.get("files", []))
+            next_page_token = current_page.get("nextPageToken")
+        
         return [
             File(
                 id=f.get("id"),
@@ -80,26 +70,9 @@ def list_files_in_folder(self, bucket_id: str | None = None, is_drive_root: bool
                 is_folder=f.get("mimeType") == "application/vnd.google-apps.folder",
                 parents=f.get("parents") if isinstance(f.get("parents"), list) else None,
             )
-            for f in resp.get("files")
+            for f in files
         ]
 
-    def list_files_recursively(self, bucket: str) -> list[File]:
-        """List all files in the Google Drive bucket, including all subfolders."""
-        is_drive_root = bucket != "root"
-
-        def _recursive_list(folder_id: str):
-            logger.debug(f"Listing files in folder: {folder_id}")
-            items: list[File] = self.list_files_in_folder(folder_id, is_drive_root=is_drive_root)
-            all_items = []
-            for item in items:
-                all_items.append(item)
-                # Recursively list files in subfolders
-                if hasattr(item, "is_folder") and item.is_folder:
-                    all_items.extend(_recursive_list(item.id))
-            return all_items
-
-        return _recursive_list(bucket)
-
     def get_file_metadata(self, file_id: str):
         item_metadata = (
             self.service.files()  # type: ignore[attr-defined]
@@ -141,9 +114,3 @@ def get_file_metadata(self, file_id: str):
             is_folder=item_metadata.get("mimeType") == "application/vnd.google-apps.folder",
             parents=None,  # This field is not populated by the API, so we set it to None for files.
         )
-
-    def list_all_files(self, buckets: list[Bucket]) -> list[File]:
-        files = []
-        for bucket in buckets:
-            files.extend(self.list_files_recursively(bucket.id))
-        return files
diff --git a/src/docbinder_oss/services/google_drive/google_drive_permissions.py b/src/docbinder_oss/providers/google_drive/google_drive_permissions.py
similarity index 100%
rename from src/docbinder_oss/services/google_drive/google_drive_permissions.py
rename to src/docbinder_oss/providers/google_drive/google_drive_permissions.py
diff --git a/src/docbinder_oss/services/google_drive/google_drive_service_config.py b/src/docbinder_oss/providers/google_drive/google_drive_service_config.py
similarity index 76%
rename from src/docbinder_oss/services/google_drive/google_drive_service_config.py
rename to src/docbinder_oss/providers/google_drive/google_drive_service_config.py
index 022b9ba..f99c350 100644
--- a/src/docbinder_oss/services/google_drive/google_drive_service_config.py
+++ b/src/docbinder_oss/providers/google_drive/google_drive_service_config.py
@@ -1,6 +1,6 @@
 from typing import Literal
 
-from docbinder_oss.services.base_class import ServiceConfig
+from docbinder_oss.providers.base_class import ServiceConfig
 
 
 class GoogleDriveServiceConfig(ServiceConfig):
diff --git a/tests/services/google_drive/conftest.py b/tests/services/google_drive/conftest.py
index f95b44b..8f3fe03 100644
--- a/tests/services/google_drive/conftest.py
+++ b/tests/services/google_drive/conftest.py
@@ -2,10 +2,10 @@
 
 import pytest
 
-from docbinder_oss.services.google_drive.google_drive_client import (
+from docbinder_oss.providers.google_drive.google_drive_client import (
     GoogleDriveClient,
 )
-from docbinder_oss.services.google_drive.google_drive_service_config import (
+from docbinder_oss.providers.google_drive.google_drive_service_config import (
     GoogleDriveServiceConfig,
 )
 

From 36fa20dced283b4a22c7f420e5d450558f7a71e5 Mon Sep 17 00:00:00 2001
From: PaoloLeonard <paololeonard@skynet.be>
Date: Tue, 24 Jun 2025 19:03:44 +0200
Subject: [PATCH 21/39] increased the page size

---
 src/docbinder_oss/providers/google_drive/google_drive_files.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/docbinder_oss/providers/google_drive/google_drive_files.py b/src/docbinder_oss/providers/google_drive/google_drive_files.py
index b86ee37..f5af39f 100644
--- a/src/docbinder_oss/providers/google_drive/google_drive_files.py
+++ b/src/docbinder_oss/providers/google_drive/google_drive_files.py
@@ -21,6 +21,7 @@ def __init__(self, service: Resource):
     def list_files_in_folder(self, bucket_id: str | None = None) -> list[File]:
         args = {
             "fields": f"nextPageToken,files({REQUIRED_FIELDS})",
+            "pageSize": 1000,
         }
         
         if bucket_id:

From a475692af30835a127b54cab00ee5a75faa5b992 Mon Sep 17 00:00:00 2001
From: PaoloLeonard <paololeonard@skynet.be>
Date: Tue, 24 Jun 2025 19:09:46 +0200
Subject: [PATCH 22/39] ruff linting

---
 src/docbinder_oss/cli/search.py               | 53 +++++++++++--------
 src/docbinder_oss/core/schemas.py             | 20 +++----
 src/docbinder_oss/helpers/config.py           |  3 +-
 src/docbinder_oss/helpers/path_utils.py       | 39 ++++++++------
 src/docbinder_oss/helpers/rich_helpers.py     |  6 +--
 src/docbinder_oss/providers/base_class.py     |  2 +-
 .../google_drive/google_drive_client.py       | 16 +++---
 .../google_drive/google_drive_files.py        |  6 +--
 .../google_drive_service_config.py            |  2 +-
 tests/commands/test_search_command.py         |  6 +--
 .../google_drive/test_google_drive_files.py   |  6 +--
 11 files changed, 89 insertions(+), 70 deletions(-)

diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py
index e43ef07..673aa08 100644
--- a/src/docbinder_oss/cli/search.py
+++ b/src/docbinder_oss/cli/search.py
@@ -1,7 +1,6 @@
 from datetime import datetime
 import re
 import typer
-from rich import print as rich_print
 from typing import Optional
 import csv
 import json
@@ -10,9 +9,7 @@
 from docbinder_oss.helpers.config import load_config
 from docbinder_oss.providers import create_provider_instance
 from docbinder_oss.helpers.config import Config
-from docbinder_oss.helpers.rich_helpers import create_rich_table
 from docbinder_oss.providers.base_class import BaseProvider
-from docbinder_oss.helpers.path_utils import build_id_to_item, get_full_path, build_all_full_paths
 
 app = typer.Typer()
 
@@ -46,16 +43,16 @@ def search(
 ):
     """Search for files or folders matching filters across all
     providers and export results as CSV or JSON."""
-    
+
     # 1 Load documents with filter "provider"
     # 2 Filter the documents based on the provided filters
     # 3 Export results to CSV or JSON
-    
+
     config: Config = load_config()
     if not config.providers:
         typer.echo("No providers configured.")
         raise typer.Exit(code=1)
-    
+
     current_files = {}
     for provider_config in config.providers:
         if provider and provider_config.name != provider:
@@ -65,7 +62,7 @@ def search(
             typer.echo(f"Provider '{provider_config.name}' is not supported or not implemented.")
             raise typer.Exit(code=1)
         current_files[provider_config.name] = client.list_all_files()
-    
+
     current_files = filter_files(
         current_files,
         name=name,
@@ -77,21 +74,22 @@ def search(
         min_size=min_size,
         max_size=max_size,
     )
-    
+
     if not export_format:
         typer.echo(current_files)
         return
-    
+
     elif export_format.lower() == "csv":
-        __write_csv(filtered_files_by_provider, "search_results.csv")
+        __write_csv(current_files, "search_results.csv")
         typer.echo("Results written to search_results.csv")
     elif export_format.lower() == "json":
-        __write_json(filtered_files_by_provider, "search_results.json", flat=True)  # or flat=False for grouped
+        __write_json(current_files, "search_results.json", flat=True)  # or flat=False for grouped
         typer.echo("Results written to search_results.json")
     else:
         typer.echo(f"Unsupported export format: {export_format}")
         raise typer.Exit(code=1)
 
+
 def filter_files(
     files,
     name=None,
@@ -104,22 +102,28 @@ def filter_files(
     max_size=None,
 ):
     """
-    Filters a collection of files based on various criteria such as name, owner, modification/creation dates, and file size.
+    Filters a collection of files based on various criteria such as name, owner,
+    modification/creation dates, and file size.
 
     Args:
         files (dict): A dictionary where keys are providers and values are lists of file objects.
         name (str, optional): A regex pattern to match file names (case-insensitive).
         owner (str, optional): An email address to match file owners.
-        updated_after (str, optional): ISO format datetime string; only include files modified after this date.
-        updated_before (str, optional): ISO format datetime string; only include files modified before this date.
-        created_after (str, optional): ISO format datetime string; only include files created after this date.
-        created_before (str, optional): ISO format datetime string; only include files created before this date.
+        updated_after (str, optional): ISO format datetime string; only include files modified
+        after this date.
+        updated_before (str, optional): ISO format datetime string; only include files modified
+        before this date.
+        created_after (str, optional): ISO format datetime string; only include files created after
+        this date.
+        created_before (str, optional): ISO format datetime string; only include files created
+        before this date.
         min_size (int, optional): Minimum file size in kilobytes (KB).
         max_size (int, optional): Maximum file size in kilobytes (KB).
 
     Returns:
         list: A list of file objects that match the specified filters.
     """
+
     def file_matches(file: File):
         if name and not re.search(name, file.name, re.IGNORECASE):
             return False
@@ -144,6 +148,7 @@ def file_matches(file: File):
         filtered[provider] = [file for file in file_list if file_matches(file)]
     return filtered
 
+
 def __parse_dt(val):
     if isinstance(val, datetime):
         return val
@@ -152,12 +157,13 @@ def __parse_dt(val):
     except Exception:
         return val
 
+
 def __write_csv(files_by_provider, filename):
     # Collect all possible fieldnames from all files
     all_fieldnames = set(["provider"])
     for files in files_by_provider.values():
         for file in files:
-            file_dict = file.model_dump() if hasattr(file, 'model_dump') else file.__dict__.copy()
+            file_dict = file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy()
             all_fieldnames.update(file_dict.keys())
     # Move provider to the front, rest sorted
     fieldnames = ["provider"] + sorted(f for f in all_fieldnames if f != "provider")
@@ -166,7 +172,9 @@ def __write_csv(files_by_provider, filename):
         writer.writeheader()
         for provider, files in files_by_provider.items():
             for file in files:
-                file_dict = file.model_dump() if hasattr(file, 'model_dump') else file.__dict__.copy()
+                file_dict = (
+                    file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy()
+                )
                 file_dict["provider"] = provider
                 # Flatten owners for CSV (only email addresses)
                 owners = file_dict.get("owners")
@@ -195,22 +203,25 @@ def __write_csv(files_by_provider, filename):
                     file_dict["parents"] = ";".join(str(p) for p in parents)
                 writer.writerow({fn: file_dict.get(fn, "") for fn in fieldnames})
 
+
 def __write_json(files_by_provider, filename, flat=False):
     with open(filename, "w") as jsonfile:
         if flat:
             all_files = []
             for provider, files in files_by_provider.items():
                 for file in files:
-                    file_dict = file.model_dump() if hasattr(file, 'model_dump') else file.__dict__.copy()
+                    file_dict = (
+                        file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy()
+                    )
                     file_dict["provider"] = provider
                     all_files.append(file_dict)
             json.dump(all_files, jsonfile, default=str, indent=2)
         else:
             grouped = {
                 provider: [
-                    file.model_dump() if hasattr(file, 'model_dump') else file.__dict__.copy()
+                    file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy()
                     for file in files
                 ]
                 for provider, files in files_by_provider.items()
             }
-            json.dump(grouped, jsonfile, default=str, indent=2)
\ No newline at end of file
+            json.dump(grouped, jsonfile, default=str, indent=2)
diff --git a/src/docbinder_oss/core/schemas.py b/src/docbinder_oss/core/schemas.py
index 70b1604..e11307b 100644
--- a/src/docbinder_oss/core/schemas.py
+++ b/src/docbinder_oss/core/schemas.py
@@ -67,20 +67,22 @@ class File(BaseModel):
     trashed: Optional[bool]
 
     # Add full_path as an optional field for export/CLI assignment
-    full_path: Optional[str] = Field(default=None, description="Full path of the file/folder, computed at runtime.")
+    full_path: Optional[str] = Field(
+        default=None, description="Full path of the file/folder, computed at runtime."
+    )
 
     def __init__(self, **data: Any):
         # Coerce parents to a list of strings or None
-        if 'parents' in data:
-            if data['parents'] is None:
-                data['parents'] = None
-            elif isinstance(data['parents'], str):
-                data['parents'] = [data['parents']]
-            elif isinstance(data['parents'], list):
+        if "parents" in data:
+            if data["parents"] is None:
+                data["parents"] = None
+            elif isinstance(data["parents"], str):
+                data["parents"] = [data["parents"]]
+            elif isinstance(data["parents"], list):
                 # Ensure all elements are strings
-                data['parents'] = [str(p) for p in data['parents'] if p is not None]
+                data["parents"] = [str(p) for p in data["parents"] if p is not None]
             else:
-                data['parents'] = [str(data['parents'])]
+                data["parents"] = [str(data["parents"])]
         super().__init__(**data)
         if self.mime_type == "application/vnd.google-apps.folder":
             self.is_folder = True
diff --git a/src/docbinder_oss/helpers/config.py b/src/docbinder_oss/helpers/config.py
index 2fad950..8a49070 100644
--- a/src/docbinder_oss/helpers/config.py
+++ b/src/docbinder_oss/helpers/config.py
@@ -1,12 +1,11 @@
 import logging
 import os
-from typing import List
 
 import typer
 import yaml
 from pydantic import BaseModel, ValidationError
 
-from docbinder_oss.providers import ServiceUnion, get_provider_registry
+from docbinder_oss.providers import get_provider_registry
 
 logger = logging.getLogger(__name__)
 
diff --git a/src/docbinder_oss/helpers/path_utils.py b/src/docbinder_oss/helpers/path_utils.py
index d0b9cf9..a724e9f 100644
--- a/src/docbinder_oss/helpers/path_utils.py
+++ b/src/docbinder_oss/helpers/path_utils.py
@@ -2,9 +2,10 @@ def build_id_to_item(files):
     """
     Build a mapping from file/folder id to the file/folder object.
     """
-    return {getattr(f, 'id', None): f for f in files if hasattr(f, 'id')}
+    return {getattr(f, "id", None): f for f in files if hasattr(f, "id")}
 
-def get_full_path(file, id_to_item, root_id='root', root_name='My Drive'):
+
+def get_full_path(file, id_to_item, root_id="root", root_name="My Drive"):
     """
     Recursively build the full path for a file or folder using its parents.
     Returns a string like '/My Drive/Folder/Subfolder/File.pdf'.
@@ -12,7 +13,7 @@ def get_full_path(file, id_to_item, root_id='root', root_name='My Drive'):
     path_parts = [file.name]
     current = file
     while True:
-        parents = getattr(current, 'parents', None)
+        parents = getattr(current, "parents", None)
         if not parents or not isinstance(parents, list) or not parents[0]:
             break
         parent_id = parents[0]
@@ -24,11 +25,13 @@ def get_full_path(file, id_to_item, root_id='root', root_name='My Drive'):
             break
         path_parts.append(parent.name)
         current = parent
-    return '/' + '/'.join(reversed(path_parts))
+    return "/" + "/".join(reversed(path_parts))
+
 
-def build_all_full_paths(files, root_id='root', root_name='My Drive', root_id_to_name=None):
+def build_all_full_paths(files, root_id="root", root_name="My Drive", root_id_to_name=None):
     """
-    Efficiently compute the full path for every file/folder in one pass using an iterative approach and memoization.
+    Efficiently compute the full path for every file/folder in one pass using an iterative approach
+    and memoization.
     Supports multiple drives by using a root_id_to_name mapping.
     Returns a dict: {file_id: full_path}
     """
@@ -37,7 +40,7 @@ def build_all_full_paths(files, root_id='root', root_name='My Drive', root_id_to
     if root_id_to_name is None:
         root_id_to_name = {root_id: root_name}
     for item in files:
-        if not hasattr(item, 'id') or not hasattr(item, 'name'):
+        if not hasattr(item, "id") or not hasattr(item, "name"):
             continue
         if item.id in id_to_path:
             continue
@@ -47,17 +50,19 @@ def build_all_full_paths(files, root_id='root', root_name='My Drive', root_id_to
         while True:
             if current.id in id_to_path:
                 break
-            parents = getattr(current, 'parents', None)
+            parents = getattr(current, "parents", None)
             if not parents or not isinstance(parents, list) or not parents[0]:
-                temp_stack.append((current.id, '/' + current.name))
+                temp_stack.append((current.id, "/" + current.name))
                 break
             parent_id = parents[0]
             if parent_id in root_id_to_name:
-                temp_stack.append((current.id, '/' + root_id_to_name[parent_id] + '/' + current.name))
+                temp_stack.append(
+                    (current.id, "/" + root_id_to_name[parent_id] + "/" + current.name)
+                )
                 break
             parent = id_to_item.get(parent_id)
             if not parent:
-                temp_stack.append((current.id, '/' + current.name))
+                temp_stack.append((current.id, "/" + current.name))
                 break
             temp_stack.append((current.id, None))  # Mark as not yet resolved
             current = parent
@@ -68,16 +73,20 @@ def build_all_full_paths(files, root_id='root', root_name='My Drive', root_id_to
                 id_to_path[file_id] = path
             else:
                 parent_id = id_to_item[file_id].parents[0]
-                parent_path = id_to_path.get(parent_id, '')
-                id_to_path[file_id] = parent_path.rstrip('/') + '/' + id_to_item[file_id].name
+                parent_path = id_to_path.get(parent_id, "")
+                id_to_path[file_id] = parent_path.rstrip("/") + "/" + id_to_item[file_id].name
         # Ensure root_name is present at the start (for legacy single-drive fallback)
         found_root = False
         for root_name_val in root_id_to_name.values():
-            if id_to_path[item.id].lstrip('/').startswith(root_name_val + '/'):  # e.g. 'My Drive/'
+            if id_to_path[item.id].lstrip("/").startswith(root_name_val + "/"):  # e.g. 'My Drive/'
                 found_root = True
                 break
         if not found_root:
             # Use the first root_name as fallback
             fallback_root = next(iter(root_id_to_name.values()))
-            id_to_path[item.id] = '/' + fallback_root + id_to_path[item.id] if not id_to_path[item.id].startswith('/') else '/' + fallback_root + id_to_path[item.id]
+            id_to_path[item.id] = (
+                "/" + fallback_root + id_to_path[item.id]
+                if not id_to_path[item.id].startswith("/")
+                else "/" + fallback_root + id_to_path[item.id]
+            )
     return id_to_path
diff --git a/src/docbinder_oss/helpers/rich_helpers.py b/src/docbinder_oss/helpers/rich_helpers.py
index 87ae580..6faefe5 100644
--- a/src/docbinder_oss/helpers/rich_helpers.py
+++ b/src/docbinder_oss/helpers/rich_helpers.py
@@ -5,15 +5,15 @@
 def create_rich_table(headers: List[str], rows: List[List[str]]) -> Table:
     """
     Create a Rich table with the given headers and rows.
-    
+
     Args:
         headers (List[str]): The headers for the table.
         rows (List[List[str]]): The data rows for the table.
-    
+
     Returns:
         Table: A Rich Table object.
     """
     table = Table(*headers, show_header=True, header_style="bold magenta")
     for row in rows:
         table.add_row(*row)
-    return table
\ No newline at end of file
+    return table
diff --git a/src/docbinder_oss/providers/base_class.py b/src/docbinder_oss/providers/base_class.py
index 5d8f09e..4eb7862 100644
--- a/src/docbinder_oss/providers/base_class.py
+++ b/src/docbinder_oss/providers/base_class.py
@@ -32,7 +32,7 @@ def test_connection(self) -> bool:
             True if the connection is successful, False otherwise.
         """
         pass
-    
+
     @abstractmethod
     def list_buckets(self) -> List[Bucket]:
         """
diff --git a/src/docbinder_oss/providers/google_drive/google_drive_client.py b/src/docbinder_oss/providers/google_drive/google_drive_client.py
index 45eb703..a171a2a 100644
--- a/src/docbinder_oss/providers/google_drive/google_drive_client.py
+++ b/src/docbinder_oss/providers/google_drive/google_drive_client.py
@@ -14,7 +14,9 @@
 from docbinder_oss.providers.google_drive.google_drive_permissions import (
     GoogleDrivePermissions,
 )
-from docbinder_oss.providers.google_drive.google_drive_service_config import GoogleDriveServiceConfig
+from docbinder_oss.providers.google_drive.google_drive_service_config import (
+    GoogleDriveServiceConfig,
+)
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -39,15 +41,15 @@ def __init__(self, config: GoogleDriveServiceConfig):
     def _get_credentials(self):
         logger.info("Getting credentials for Google Drive client")
 
-        TOKEN_PATH = os.path.expanduser("~/.config/docbinder/gcp/" + self.config.name + "_token.json")
+        TOKEN_PATH = os.path.expanduser(
+            "~/.config/docbinder/gcp/" + self.config.name + "_token.json"
+        )
         # Ensure the directory exists
         os.makedirs(os.path.dirname(TOKEN_PATH), exist_ok=True)
         logger.debug(f"Token path: {TOKEN_PATH}")
 
         try:
-            creds = Credentials.from_authorized_user_file(
-                TOKEN_PATH, scopes=self.SCOPES
-            )
+            creds = Credentials.from_authorized_user_file(TOKEN_PATH, scopes=self.SCOPES)
         except (FileNotFoundError, ValueError):
             logger.warning("Credentials file not found or invalid, re-authenticating")
             creds = None
@@ -78,9 +80,9 @@ def list_buckets(self) -> list[Bucket]:
     def list_files_in_folder(self, folder_id: Optional[str] = None) -> List[File]:
         return self.files.list_files_in_folder(folder_id)
 
-    def list_all_files(self) -> List[File]:        
+    def list_all_files(self) -> List[File]:
         return self.files.list_files_in_folder()
-        
+
     def get_file_metadata(self, item_id: str) -> File:
         return self.files.get_file_metadata(item_id)
 
diff --git a/src/docbinder_oss/providers/google_drive/google_drive_files.py b/src/docbinder_oss/providers/google_drive/google_drive_files.py
index f5af39f..c8c08b7 100644
--- a/src/docbinder_oss/providers/google_drive/google_drive_files.py
+++ b/src/docbinder_oss/providers/google_drive/google_drive_files.py
@@ -2,7 +2,7 @@
 
 from googleapiclient.discovery import Resource
 
-from docbinder_oss.core.schemas import Bucket, File, User
+from docbinder_oss.core.schemas import File, User
 
 logger = logging.getLogger(__name__)
 
@@ -23,7 +23,7 @@ def list_files_in_folder(self, bucket_id: str | None = None) -> list[File]:
             "fields": f"nextPageToken,files({REQUIRED_FIELDS})",
             "pageSize": 1000,
         }
-        
+
         if bucket_id:
             args["q"] = f"'{bucket_id}' in parents and trashed=false"
         else:
@@ -38,7 +38,7 @@ def list_files_in_folder(self, bucket_id: str | None = None) -> list[File]:
             current_page = self.service.files().list(**args, pageToken=next_page_token).execute()
             files.extend(current_page.get("files", []))
             next_page_token = current_page.get("nextPageToken")
-        
+
         return [
             File(
                 id=f.get("id"),
diff --git a/src/docbinder_oss/providers/google_drive/google_drive_service_config.py b/src/docbinder_oss/providers/google_drive/google_drive_service_config.py
index f99c350..d98c058 100644
--- a/src/docbinder_oss/providers/google_drive/google_drive_service_config.py
+++ b/src/docbinder_oss/providers/google_drive/google_drive_service_config.py
@@ -6,4 +6,4 @@
 class GoogleDriveServiceConfig(ServiceConfig):
     type: Literal["google_drive"] = "google_drive"  # type: ignore[override]
     name: str
-    gcp_credentials_json: str
\ No newline at end of file
+    gcp_credentials_json: str
diff --git a/tests/commands/test_search_command.py b/tests/commands/test_search_command.py
index 2a2a406..46899c8 100644
--- a/tests/commands/test_search_command.py
+++ b/tests/commands/test_search_command.py
@@ -4,8 +4,6 @@
 import pytest
 from typer.testing import CliRunner
 from docbinder_oss.main import app
-import sys
-import importlib
 
 
 class DummyFile:
@@ -96,7 +94,9 @@ def create_provider_instance(cfg):
                 },
             )()
 
-    monkeypatch.setattr("docbinder_oss.cli.search.create_provider_instance", create_provider_instance)
+    monkeypatch.setattr(
+        "docbinder_oss.cli.search.create_provider_instance", create_provider_instance
+    )
 
     # Change working directory to a temp dir for file output
     orig_cwd = os.getcwd()
diff --git a/tests/services/google_drive/test_google_drive_files.py b/tests/services/google_drive/test_google_drive_files.py
index c5dc850..4ed40b2 100644
--- a/tests/services/google_drive/test_google_drive_files.py
+++ b/tests/services/google_drive/test_google_drive_files.py
@@ -1,10 +1,6 @@
 from datetime import datetime
 import os
 import pytest
-from typer.testing import CliRunner
-
-from docbinder_oss.core import schemas
-from docbinder_oss.main import app
 
 
 class DummyFile:
@@ -138,4 +134,4 @@ def test_list_files(mock_gdrive_service, gdrive_client):
     assert file.parents is None or isinstance(file.parents, list)
     assert file.shared is True
     assert file.starred is False
-    assert file.trashed is False
\ No newline at end of file
+    assert file.trashed is False

From 2c5718fa881cd17a07b4f09159c5d8017374913f Mon Sep 17 00:00:00 2001
From: PaoloLeonard <paololeonard@skynet.be>
Date: Tue, 24 Jun 2025 19:55:51 +0200
Subject: [PATCH 23/39] added nice writers and printing

---
 src/docbinder_oss/cli/search.py     | 16 ++----
 src/docbinder_oss/core/schemas.py   | 14 ++---
 src/docbinder_oss/helpers/writer.py | 84 +++++++++++++++++++++++++++++
 3 files changed, 95 insertions(+), 19 deletions(-)
 create mode 100644 src/docbinder_oss/helpers/writer.py

diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py
index 673aa08..036e812 100644
--- a/src/docbinder_oss/cli/search.py
+++ b/src/docbinder_oss/cli/search.py
@@ -10,6 +10,7 @@
 from docbinder_oss.providers import create_provider_instance
 from docbinder_oss.helpers.config import Config
 from docbinder_oss.providers.base_class import BaseProvider
+from docbinder_oss.helpers.writer import MultiFormatWriter
 
 app = typer.Typer()
 
@@ -75,19 +76,8 @@ def search(
         max_size=max_size,
     )
 
-    if not export_format:
-        typer.echo(current_files)
-        return
-
-    elif export_format.lower() == "csv":
-        __write_csv(current_files, "search_results.csv")
-        typer.echo("Results written to search_results.csv")
-    elif export_format.lower() == "json":
-        __write_json(current_files, "search_results.json", flat=True)  # or flat=False for grouped
-        typer.echo("Results written to search_results.json")
-    else:
-        typer.echo(f"Unsupported export format: {export_format}")
-        raise typer.Exit(code=1)
+    MultiFormatWriter.write(current_files, export_format)
+    return
 
 
 def filter_files(
diff --git a/src/docbinder_oss/core/schemas.py b/src/docbinder_oss/core/schemas.py
index e11307b..1d8b72b 100644
--- a/src/docbinder_oss/core/schemas.py
+++ b/src/docbinder_oss/core/schemas.py
@@ -41,10 +41,12 @@ class FileCapabilities(BaseModel):
 class File(BaseModel):
     """Represents a file or folder"""
 
-    id: str
-    name: str
-    mime_type: str
-    kind: Optional[str]
+    id: str = Field(repr=True, description="Unique identifier for the file or folder.")
+    name: str = Field(
+        repr=True, description="Name of the file or folder. May not be unique."
+    )
+    mime_type: str = Field(repr=True, description="MIME type of the file or folder.")
+    kind: Optional[str] = Field(repr=True, description="Kind of the item, e.g., 'drive#file'.")
 
     is_folder: bool = Field(False, description="True if the item is a folder, False otherwise.")
 
@@ -52,9 +54,9 @@ class File(BaseModel):
     icon_link: Optional[HttpUrl]
 
     created_time: Optional[datetime]
-    modified_time: Optional[datetime]
+    modified_time: Optional[datetime] = Field(repr=True, description="Last modified time of the file or folder.")
 
-    owners: Optional[List[User]]
+    owners: Optional[List[User]] = Field(repr=True, description="List of owners of the file or folder.")
     last_modifying_user: Optional[User]
 
     size: Optional[str] = Field(description="Size in bytes, as a string. Only populated for files.")
diff --git a/src/docbinder_oss/helpers/writer.py b/src/docbinder_oss/helpers/writer.py
new file mode 100644
index 0000000..0363bea
--- /dev/null
+++ b/src/docbinder_oss/helpers/writer.py
@@ -0,0 +1,84 @@
+import csv
+import json
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import Any, Dict, List, Union
+from rich import print
+from rich.panel import Panel
+
+
+class Writer(ABC):
+    """Abstract base writer class."""
+    
+    @abstractmethod
+    def write(self, data: Any, file_path: Union[None, str, Path]) -> None:
+        """Write data to file."""
+        pass
+
+
+class MultiFormatWriter:
+    """Factory writer that automatically detects format from file extension."""
+    
+    _writers = {
+        '.csv': 'CSVWriter',
+        '.json': 'JSONWriter',
+        '.txt': 'TextWriter',
+    }
+    
+    @classmethod
+    def write(cls, data: Any, file_path: Union[None, str, Path]) -> None:
+        """Write data to file, format determined by extension."""
+        if file_path is None:
+            # If no file path is provided, write to console
+            ConsoleWriter().write(data)
+            return
+        path = Path(file_path)
+        extension = path.suffix.lower()
+        
+        if extension not in cls._writers:
+            raise ValueError(f"Unsupported format: {extension}")
+        
+        writer_class = globals()[cls._writers[extension]]
+        writer = writer_class()
+        writer.write(data, file_path)
+
+
+class CSVWriter(Writer):
+    def write(self, data: List[Dict], file_path: Union[str, Path]) -> None:
+        if not data:
+            return
+        
+        with open(file_path, 'w', newline='', encoding='utf-8') as f:
+            writer = csv.DictWriter(f, fieldnames=data[0].keys())
+            writer.writeheader()
+            writer.writerows(data)
+
+
+class JSONWriter(Writer):
+    def write(self, data: Any, file_path: Union[str, Path]) -> None:
+        with open(file_path, 'w', encoding='utf-8') as f:
+            json.dump(data, f, indent=2, ensure_ascii=False, default=str)
+
+
+class ConsoleWriter(Writer):
+    def write(self, data: Dict) -> None:
+        from rich.table import Table
+
+        table = Table(title="Files and Folders")
+        table.add_column("Provider", justify="right", style="cyan", no_wrap=True)
+        table.add_column("Id", style="magenta")
+        table.add_column("Name", style="magenta")
+        table.add_column("Kind", style="magenta")
+        for provider, items in data.items():
+            for item in items:
+                table.add_row(provider, item.id, item.name, item.kind)
+        print(table)
+
+
+class TextWriter(Writer):
+    def write(self, data: Any, file_path: Union[str, Path]) -> None:
+        with open(file_path, 'w', encoding='utf-8') as f:
+            if isinstance(data, (list, dict)):
+                f.write(json.dumps(data, indent=2, default=str))
+            else:
+                f.write(str(data))
\ No newline at end of file

From ec570516142db873dc8955149995e00340fb1e64 Mon Sep 17 00:00:00 2001
From: PaoloLeonard <paololeonard@skynet.be>
Date: Tue, 24 Jun 2025 20:45:56 +0200
Subject: [PATCH 24/39] =?UTF-8?q?corrected=20tests=C3=A9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/docbinder_oss/cli/search.py               | 23 ------
 src/docbinder_oss/core/schemas.py             |  7 --
 src/docbinder_oss/helpers/writer.py           | 36 +++++----
 tests/helpers/test_writer.py                  | 80 +++++++++++++++++++
 .../google_drive/__init__.py                  |  0
 .../google_drive/conftest.py                  | 20 ++---
 .../google_drive/test_google_drive_buckets.py |  4 +-
 .../google_drive/test_google_drive_files.py   |  6 +-
 .../test_google_drive_permissions.py          |  4 +-
 9 files changed, 119 insertions(+), 61 deletions(-)
 create mode 100644 tests/helpers/test_writer.py
 rename tests/{services => providers}/google_drive/__init__.py (100%)
 rename tests/{services => providers}/google_drive/conftest.py (63%)
 rename tests/{services => providers}/google_drive/test_google_drive_buckets.py (90%)
 rename tests/{services => providers}/google_drive/test_google_drive_files.py (95%)
 rename tests/{services => providers}/google_drive/test_google_drive_permissions.py (88%)

diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py
index 036e812..b6ab969 100644
--- a/src/docbinder_oss/cli/search.py
+++ b/src/docbinder_oss/cli/search.py
@@ -192,26 +192,3 @@ def __write_csv(files_by_provider, filename):
                 if isinstance(parents, list):
                     file_dict["parents"] = ";".join(str(p) for p in parents)
                 writer.writerow({fn: file_dict.get(fn, "") for fn in fieldnames})
-
-
-def __write_json(files_by_provider, filename, flat=False):
-    with open(filename, "w") as jsonfile:
-        if flat:
-            all_files = []
-            for provider, files in files_by_provider.items():
-                for file in files:
-                    file_dict = (
-                        file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy()
-                    )
-                    file_dict["provider"] = provider
-                    all_files.append(file_dict)
-            json.dump(all_files, jsonfile, default=str, indent=2)
-        else:
-            grouped = {
-                provider: [
-                    file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy()
-                    for file in files
-                ]
-                for provider, files in files_by_provider.items()
-            }
-            json.dump(grouped, jsonfile, default=str, indent=2)
diff --git a/src/docbinder_oss/core/schemas.py b/src/docbinder_oss/core/schemas.py
index 1d8b72b..354a61a 100644
--- a/src/docbinder_oss/core/schemas.py
+++ b/src/docbinder_oss/core/schemas.py
@@ -62,17 +62,10 @@ class File(BaseModel):
     size: Optional[str] = Field(description="Size in bytes, as a string. Only populated for files.")
     parents: Optional[List[str]] = Field(description="Parent folder IDs, if applicable.")
 
-    capabilities: Optional[FileCapabilities] = None
-
     shared: Optional[bool]
     starred: Optional[bool]
     trashed: Optional[bool]
 
-    # Add full_path as an optional field for export/CLI assignment
-    full_path: Optional[str] = Field(
-        default=None, description="Full path of the file/folder, computed at runtime."
-    )
-
     def __init__(self, **data: Any):
         # Coerce parents to a list of strings or None
         if "parents" in data:
diff --git a/src/docbinder_oss/helpers/writer.py b/src/docbinder_oss/helpers/writer.py
index 0363bea..eddf4d5 100644
--- a/src/docbinder_oss/helpers/writer.py
+++ b/src/docbinder_oss/helpers/writer.py
@@ -3,8 +3,13 @@
 from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Any, Dict, List, Union
+from pydantic import BaseModel
 from rich import print
-from rich.panel import Panel
+
+import logging
+
+
+logger = logging.getLogger(__name__)
 
 
 class Writer(ABC):
@@ -22,7 +27,6 @@ class MultiFormatWriter:
     _writers = {
         '.csv': 'CSVWriter',
         '.json': 'JSONWriter',
-        '.txt': 'TextWriter',
     }
     
     @classmethod
@@ -44,18 +48,31 @@ def write(cls, data: Any, file_path: Union[None, str, Path]) -> None:
 
 
 class CSVWriter(Writer):
+    def get_fieldnames(self, data: Dict[str, List[BaseModel]]) -> List[str]:
+        fieldnames = next(iter(data.values()))[0].model_fields_set
+        return ["provider", *fieldnames]
+
     def write(self, data: List[Dict], file_path: Union[str, Path]) -> None:
         if not data:
+            logger.warning("No data to write to CSV.")
             return
         
         with open(file_path, 'w', newline='', encoding='utf-8') as f:
-            writer = csv.DictWriter(f, fieldnames=data[0].keys())
+            writer = csv.DictWriter(f, fieldnames=self.get_fieldnames(data))
             writer.writeheader()
-            writer.writerows(data)
+            for provider, items in data.items():
+                for item in items:
+                    item_dict = item.model_dump() if isinstance(item, BaseModel) else item
+                    item_dict['provider'] = provider
+                    writer.writerow(item_dict)
 
 
 class JSONWriter(Writer):
-    def write(self, data: Any, file_path: Union[str, Path]) -> None:
+    def write(self, data: Dict[str, List[BaseModel]], file_path: Union[str, Path]) -> None:
+        data = {
+            provider: [item.model_dump() for item in items]
+            for provider, items in data.items()
+        }
         with open(file_path, 'w', encoding='utf-8') as f:
             json.dump(data, f, indent=2, ensure_ascii=False, default=str)
 
@@ -73,12 +90,3 @@ def write(self, data: Dict) -> None:
             for item in items:
                 table.add_row(provider, item.id, item.name, item.kind)
         print(table)
-
-
-class TextWriter(Writer):
-    def write(self, data: Any, file_path: Union[str, Path]) -> None:
-        with open(file_path, 'w', encoding='utf-8') as f:
-            if isinstance(data, (list, dict)):
-                f.write(json.dumps(data, indent=2, default=str))
-            else:
-                f.write(str(data))
\ No newline at end of file
diff --git a/tests/helpers/test_writer.py b/tests/helpers/test_writer.py
new file mode 100644
index 0000000..d3cf8ce
--- /dev/null
+++ b/tests/helpers/test_writer.py
@@ -0,0 +1,80 @@
+import json
+import csv
+import pytest
+from pydantic import BaseModel
+
+from docbinder_oss.helpers.writer import (
+    MultiFormatWriter,
+    CSVWriter,
+    JSONWriter,
+)
+
+class DummyModel(BaseModel):
+    id: str
+    name: str
+    kind: str
+
+@pytest.fixture
+def sample_data():
+    return {
+        "provider1": [
+            DummyModel(id="1", name="FileA", kind="file"),
+            DummyModel(id="2", name="FolderB", kind="folder"),
+        ],
+        "provider2": [
+            DummyModel(id="3", name="FileC", kind="file"),
+        ],
+    }
+
+def test_csv_writer(tmp_path, sample_data):
+    file_path = tmp_path / "output.csv"
+    writer = CSVWriter()
+    writer.write(sample_data, file_path)
+    assert file_path.exists()
+    with open(file_path, newline='', encoding='utf-8') as f:
+        reader = csv.DictReader(f)
+        rows = list(reader)
+        assert len(rows) == 3
+        assert set(rows[0].keys()) == {"provider", "id", "name", "kind"}
+        assert rows[0]["provider"] == "provider1"
+
+def test_json_writer(tmp_path, sample_data):
+    file_path = tmp_path / "output.json"
+    writer = JSONWriter()
+    writer.write(sample_data, file_path)
+    assert file_path.exists()
+    with open(file_path, encoding='utf-8') as f:
+        data = json.load(f)
+        assert "provider1" in data
+        assert isinstance(data["provider1"], list)
+        assert data["provider1"][0]["id"] == "1"
+
+
+def test_multiformat_writer_csv(tmp_path, sample_data):
+    file_path = tmp_path / "test.csv"
+    MultiFormatWriter.write(sample_data, file_path)
+    assert file_path.exists()
+    with open(file_path, newline='', encoding='utf-8') as f:
+        reader = csv.DictReader(f)
+        rows = list(reader)
+        assert len(rows) == 3
+
+def test_multiformat_writer_json(tmp_path, sample_data):
+    file_path = tmp_path / "test.json"
+    MultiFormatWriter.write(sample_data, file_path)
+    assert file_path.exists()
+    with open(file_path, encoding='utf-8') as f:
+        data = json.load(f)
+        assert "provider2" in data
+
+def test_multiformat_writer_unsupported(tmp_path, sample_data):
+    file_path = tmp_path / "test.unsupported"
+    with pytest.raises(ValueError):
+        MultiFormatWriter.write(sample_data, file_path)
+
+def test_csv_writer_empty_data(tmp_path, caplog):
+    file_path = tmp_path / "empty.csv"
+    writer = CSVWriter()
+    with caplog.at_level("WARNING"):
+        writer.write({}, file_path)
+        assert "No data to write to CSV." in caplog.text
diff --git a/tests/services/google_drive/__init__.py b/tests/providers/google_drive/__init__.py
similarity index 100%
rename from tests/services/google_drive/__init__.py
rename to tests/providers/google_drive/__init__.py
diff --git a/tests/services/google_drive/conftest.py b/tests/providers/google_drive/conftest.py
similarity index 63%
rename from tests/services/google_drive/conftest.py
rename to tests/providers/google_drive/conftest.py
index 8f3fe03..b248aac 100644
--- a/tests/services/google_drive/conftest.py
+++ b/tests/providers/google_drive/conftest.py
@@ -11,7 +11,7 @@
 
 
 @pytest.fixture
-def mock_gdrive_service():
+def mock_gdrive_provider():
     """
     This is the core of our testing strategy. We use 'patch' to replace
     the `build` function from the googleapiclient library.
@@ -19,24 +19,24 @@ def mock_gdrive_service():
     Whenever `GoogleDriveClient` calls `build('drive', 'v3', ...)`, it will
     receive our mock object instead of making a real network call.
     """
-    with patch("docbinder_oss.services.google_drive.google_drive_client.build") as mock_build:
-        # Create a mock for the service object that `build` would return
-        mock_service = MagicMock()
-        # Configure the `build` function to return our mock service
-        mock_build.return_value = mock_service
-        yield mock_service
+    with patch("docbinder_oss.providers.google_drive.google_drive_client.build") as mock_build:
+        # Create a mock for the provider object that `build` would return
+        mock_provider = MagicMock()
+        # Configure the `build` function to return our mock provider
+        mock_build.return_value = mock_provider
+        yield mock_provider
 
 
 @pytest.fixture
-def gdrive_client(mock_gdrive_service):
+def gdrive_client(mock_gdrive_provider):
     """
     Creates an instance of our GoogleDriveClient.
     It will be initialized with a fake config and will use
-    the mock_gdrive_service fixture internally.
+    the mock_gdrive_provider fixture internally.
     """
     # Patch _get_credentials to avoid real auth
     with patch(
-        "docbinder_oss.services.google_drive.google_drive_client.GoogleDriveClient._get_credentials",
+        "docbinder_oss.providers.google_drive.google_drive_client.GoogleDriveClient._get_credentials",
         return_value=MagicMock(),
     ):
         config = GoogleDriveServiceConfig(
diff --git a/tests/services/google_drive/test_google_drive_buckets.py b/tests/providers/google_drive/test_google_drive_buckets.py
similarity index 90%
rename from tests/services/google_drive/test_google_drive_buckets.py
rename to tests/providers/google_drive/test_google_drive_buckets.py
index 44e3bd5..a4a91c3 100644
--- a/tests/services/google_drive/test_google_drive_buckets.py
+++ b/tests/providers/google_drive/test_google_drive_buckets.py
@@ -3,7 +3,7 @@
 from docbinder_oss.core.schemas import Bucket
 
 
-def test_list_buckets(mock_gdrive_service, gdrive_client):
+def test_list_buckets(mock_gdrive_provider, gdrive_client):
     fake_api_response = {
         "drives": [
             {
@@ -21,7 +21,7 @@ def test_list_buckets(mock_gdrive_service, gdrive_client):
             }
         ]
     }
-    mock_gdrive_service.drives.return_value.list.return_value.execute.return_value = (
+    mock_gdrive_provider.drives.return_value.list.return_value.execute.return_value = (
         fake_api_response
     )
 
diff --git a/tests/services/google_drive/test_google_drive_files.py b/tests/providers/google_drive/test_google_drive_files.py
similarity index 95%
rename from tests/services/google_drive/test_google_drive_files.py
rename to tests/providers/google_drive/test_google_drive_files.py
index 4ed40b2..432af3a 100644
--- a/tests/services/google_drive/test_google_drive_files.py
+++ b/tests/providers/google_drive/test_google_drive_files.py
@@ -53,7 +53,7 @@ def list_all_files(self):
             return list_all_files(self)
 
     monkeypatch.setattr(
-        "docbinder_oss.services.create_provider_instance", lambda cfg: DummyClient()
+        "docbinder_oss.providers.create_provider_instance", lambda cfg: DummyClient()
     )
     orig_cwd = os.getcwd()
     os.chdir(tmp_path)
@@ -61,7 +61,7 @@ def list_all_files(self):
     os.chdir(orig_cwd)
 
 
-def test_list_files(mock_gdrive_service, gdrive_client):
+def test_list_files(mock_gdrive_provider, gdrive_client):
     fake_api_response = {
         "files": [
             {
@@ -97,7 +97,7 @@ def test_list_files(mock_gdrive_service, gdrive_client):
         ]
     }
 
-    mock_gdrive_service.files.return_value.list.return_value.execute.return_value = (
+    mock_gdrive_provider.files.return_value.list.return_value.execute.return_value = (
         fake_api_response
     )
 
diff --git a/tests/services/google_drive/test_google_drive_permissions.py b/tests/providers/google_drive/test_google_drive_permissions.py
similarity index 88%
rename from tests/services/google_drive/test_google_drive_permissions.py
rename to tests/providers/google_drive/test_google_drive_permissions.py
index ddc0b8c..e4b14f6 100644
--- a/tests/services/google_drive/test_google_drive_permissions.py
+++ b/tests/providers/google_drive/test_google_drive_permissions.py
@@ -1,7 +1,7 @@
 from docbinder_oss.core.schemas import Permission, User
 
 
-def test_get_permissions(mock_gdrive_service, gdrive_client):
+def test_get_permissions(mock_gdrive_provider, gdrive_client):
     fake_api_response = {
         "permissions": [
             {
@@ -18,7 +18,7 @@ def test_get_permissions(mock_gdrive_service, gdrive_client):
             }
         ]
     }
-    mock_gdrive_service.permissions.return_value.list.return_value.execute.return_value = (
+    mock_gdrive_provider.permissions.return_value.list.return_value.execute.return_value = (
         fake_api_response
     )
 

From 8a958e147fd7c211c62940f9f16effe6502e7a78 Mon Sep 17 00:00:00 2001
From: Christophe Beke <beke.christophe@gmail.com>
Date: Fri, 27 Jun 2025 18:15:36 +0200
Subject: [PATCH 25/39] Changed filter_files to private method and updated
 linting.

---
 pyproject.toml                  | 2 +-
 src/docbinder_oss/cli/search.py | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 09ae953..f80c86b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,7 +48,7 @@ testpaths = [
 
 [tool.ruff]
 # Set the maximum line length to 100.
-line-length = 100
+line-length = 125
 
 [tool.ruff.lint]
 # Add the `line-too-long` rule to the enforced rule set. By default, Ruff omits rules that
diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py
index b6ab969..f760cd7 100644
--- a/src/docbinder_oss/cli/search.py
+++ b/src/docbinder_oss/cli/search.py
@@ -3,7 +3,6 @@
 import typer
 from typing import Optional
 import csv
-import json
 
 from docbinder_oss.core.schemas import File
 from docbinder_oss.helpers.config import load_config
@@ -64,7 +63,7 @@ def search(
             raise typer.Exit(code=1)
         current_files[provider_config.name] = client.list_all_files()
 
-    current_files = filter_files(
+    current_files = __filter_files(
         current_files,
         name=name,
         owner=owner,
@@ -80,7 +79,7 @@ def search(
     return
 
 
-def filter_files(
+def __filter_files(
     files,
     name=None,
     owner=None,

From 01f451e23b9fabb18d66e04d5c0c26f91e15cfad Mon Sep 17 00:00:00 2001
From: Christophe Beke <beke.christophe@gmail.com>
Date: Fri, 27 Jun 2025 18:21:03 +0200
Subject: [PATCH 26/39] Added black for formatting and ruff for linting,
 including pre-commits

---
 .pre-commit-config.yaml                       |  9 +++++
 CONTRIBUTING.md                               | 24 +++++++++++-
 pyproject.toml                                |  6 ++-
 src/docbinder_oss/cli/provider/get.py         |  9 +----
 src/docbinder_oss/cli/search.py               | 32 ++++-----------
 src/docbinder_oss/cli/setup.py                |  4 +-
 src/docbinder_oss/core/schemas.py             |  4 +-
 src/docbinder_oss/helpers/path_utils.py       |  4 +-
 src/docbinder_oss/helpers/writer.py           | 27 ++++++-------
 .../google_drive/google_drive_client.py       |  8 +---
 tests/commands/test_search_command.py         | 16 ++------
 tests/helpers/test_writer.py                  | 15 +++++--
 .../google_drive/test_google_drive_buckets.py |  4 +-
 .../google_drive/test_google_drive_files.py   |  8 +---
 .../test_google_drive_permissions.py          |  4 +-
 uv.lock                                       | 39 +++++++++++++++++++
 16 files changed, 122 insertions(+), 91 deletions(-)
 create mode 100644 .pre-commit-config.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..9168817
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,9 @@
+repos:
+  - repo: https://github.com/psf/black
+    rev: 24.3.0
+    hooks:
+      - id: black
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.4.4
+    hooks:
+      - id: ruff
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 4226b1e..f5b442f 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -56,4 +56,26 @@ All dependencies are tracked in `pyproject.toml`. Use `uv` commands to keep it u
 ---
 
 **Note:**  
-Always use `uv` commands to manage dependencies and environments to keep `pyproject.toml` in sync.
\ No newline at end of file
+Always use `uv` commands to manage dependencies and environments to keep `pyproject.toml` in sync.
+
+## Code Style and Linting
+
+This project uses [Black](https://black.readthedocs.io/en/stable/) for code formatting and [Ruff](https://docs.astral.sh/ruff/) for linting. All code should be formatted and linted before committing.
+
+- Run the following before committing code:
+
+```zsh
+uv run black .
+uv run ruff check .
+```
+
+- To automatically format and lint code on every commit, install pre-commit hooks:
+
+```zsh
+uv pip install pre-commit
+pre-commit install
+```
+
+This will ensure Black and Ruff are run on staged files before each commit.
+
+Configuration for Black and Ruff is in `pyproject.toml`. This enforces consistent quotes, spacing, and other style rules for all contributors.
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index f80c86b..51ca21b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,6 +32,7 @@ include = ["src/docbinder_oss/**"]
 
 [dependency-groups]
 dev = [
+    "black>=25.1.0",
     "mkdocs>=1.6.1",
     "mkdocs-material>=9.6.14",
     "pytest>=8.4.0",
@@ -46,8 +47,11 @@ testpaths = [
     "tests",
 ]
 
+[tool.black]
+line-length = 125
+skip-string-normalization = false
+
 [tool.ruff]
-# Set the maximum line length to 100.
 line-length = 125
 
 [tool.ruff.lint]
diff --git a/src/docbinder_oss/cli/provider/get.py b/src/docbinder_oss/cli/provider/get.py
index 7793870..e86b7be 100644
--- a/src/docbinder_oss/cli/provider/get.py
+++ b/src/docbinder_oss/cli/provider/get.py
@@ -5,9 +5,7 @@
 
 @app.command("get")
 def get_provider(
-    connection_type: str = typer.Option(
-        None, "--type", "-t", help="The type of the provider to get."
-    ),
+    connection_type: str = typer.Option(None, "--type", "-t", help="The type of the provider to get."),
     name: str = typer.Option(None, "--name", "-n", help="The name of the provider to get."),
 ):
     """Get connection information for a provider by name or by type.
@@ -25,10 +23,7 @@ def get_provider(
             typer.echo(f"Provider '{name}' found with config: {provider}")
             provider_found = True
         if provider.type == connection_type:
-            typer.echo(
-                f"Provider '{provider.name}' of type '{connection_type}'"
-                f" found with config: {provider}"
-            )
+            typer.echo(f"Provider '{provider.name}' of type '{connection_type}'" f" found with config: {provider}")
             provider_found = True
     if not provider_found:
         typer.echo(f"No providers found with name '{name}' or type '{connection_type}'.")
diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py
index f760cd7..6ba0583 100644
--- a/src/docbinder_oss/cli/search.py
+++ b/src/docbinder_oss/cli/search.py
@@ -17,29 +17,15 @@
 @app.command()
 def search(
     name: Optional[str] = typer.Option(None, "--name", help="Regex to match file name"),
-    owner: Optional[str] = typer.Option(
-        None, "--owner", help="Owner/contributor/reader email address to filter"
-    ),
-    updated_after: Optional[str] = typer.Option(
-        None, "--updated-after", help="Last update after (ISO timestamp)"
-    ),
-    updated_before: Optional[str] = typer.Option(
-        None, "--updated-before", help="Last update before (ISO timestamp)"
-    ),
-    created_after: Optional[str] = typer.Option(
-        None, "--created-after", help="Created after (ISO timestamp)"
-    ),
-    created_before: Optional[str] = typer.Option(
-        None, "--created-before", help="Created before (ISO timestamp)"
-    ),
+    owner: Optional[str] = typer.Option(None, "--owner", help="Owner/contributor/reader email address to filter"),
+    updated_after: Optional[str] = typer.Option(None, "--updated-after", help="Last update after (ISO timestamp)"),
+    updated_before: Optional[str] = typer.Option(None, "--updated-before", help="Last update before (ISO timestamp)"),
+    created_after: Optional[str] = typer.Option(None, "--created-after", help="Created after (ISO timestamp)"),
+    created_before: Optional[str] = typer.Option(None, "--created-before", help="Created before (ISO timestamp)"),
     min_size: Optional[int] = typer.Option(None, "--min-size", help="Minimum file size in KB"),
     max_size: Optional[int] = typer.Option(None, "--max-size", help="Maximum file size in KB"),
-    provider: Optional[str] = typer.Option(
-        None, "--provider", "-p", help="Provider name to search in"
-    ),
-    export_format: str = typer.Option(
-        None, "--export-format", help="Export format: csv or json", show_default=True
-    ),
+    provider: Optional[str] = typer.Option(None, "--provider", "-p", help="Provider name to search in"),
+    export_format: str = typer.Option(None, "--export-format", help="Export format: csv or json", show_default=True),
 ):
     """Search for files or folders matching filters across all
     providers and export results as CSV or JSON."""
@@ -161,9 +147,7 @@ def __write_csv(files_by_provider, filename):
         writer.writeheader()
         for provider, files in files_by_provider.items():
             for file in files:
-                file_dict = (
-                    file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy()
-                )
+                file_dict = file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy()
                 file_dict["provider"] = provider
                 # Flatten owners for CSV (only email addresses)
                 owners = file_dict.get("owners")
diff --git a/src/docbinder_oss/cli/setup.py b/src/docbinder_oss/cli/setup.py
index b9ff56d..b74cbc9 100644
--- a/src/docbinder_oss/cli/setup.py
+++ b/src/docbinder_oss/cli/setup.py
@@ -25,9 +25,7 @@ def setup(
         providers = {}
         for entry in provider:
             if ":" not in entry:
-                typer.echo(
-                    f"Provider entry '{entry}' must be in provider:key1=val1,key2=val2 format."
-                )
+                typer.echo(f"Provider entry '{entry}' must be in provider:key1=val1,key2=val2 format.")
                 raise typer.Exit(code=1)
             prov_name, prov_kvs = entry.split(":", 1)
             kv_dict = {}
diff --git a/src/docbinder_oss/core/schemas.py b/src/docbinder_oss/core/schemas.py
index 354a61a..5fd8268 100644
--- a/src/docbinder_oss/core/schemas.py
+++ b/src/docbinder_oss/core/schemas.py
@@ -42,9 +42,7 @@ class File(BaseModel):
     """Represents a file or folder"""
 
     id: str = Field(repr=True, description="Unique identifier for the file or folder.")
-    name: str = Field(
-        repr=True, description="Name of the file or folder. May not be unique."
-    )
+    name: str = Field(repr=True, description="Name of the file or folder. May not be unique.")
     mime_type: str = Field(repr=True, description="MIME type of the file or folder.")
     kind: Optional[str] = Field(repr=True, description="Kind of the item, e.g., 'drive#file'.")
 
diff --git a/src/docbinder_oss/helpers/path_utils.py b/src/docbinder_oss/helpers/path_utils.py
index a724e9f..b3a20b3 100644
--- a/src/docbinder_oss/helpers/path_utils.py
+++ b/src/docbinder_oss/helpers/path_utils.py
@@ -56,9 +56,7 @@ def build_all_full_paths(files, root_id="root", root_name="My Drive", root_id_to
                 break
             parent_id = parents[0]
             if parent_id in root_id_to_name:
-                temp_stack.append(
-                    (current.id, "/" + root_id_to_name[parent_id] + "/" + current.name)
-                )
+                temp_stack.append((current.id, "/" + root_id_to_name[parent_id] + "/" + current.name))
                 break
             parent = id_to_item.get(parent_id)
             if not parent:
diff --git a/src/docbinder_oss/helpers/writer.py b/src/docbinder_oss/helpers/writer.py
index eddf4d5..92e917a 100644
--- a/src/docbinder_oss/helpers/writer.py
+++ b/src/docbinder_oss/helpers/writer.py
@@ -14,7 +14,7 @@
 
 class Writer(ABC):
     """Abstract base writer class."""
-    
+
     @abstractmethod
     def write(self, data: Any, file_path: Union[None, str, Path]) -> None:
         """Write data to file."""
@@ -23,12 +23,12 @@ def write(self, data: Any, file_path: Union[None, str, Path]) -> None:
 
 class MultiFormatWriter:
     """Factory writer that automatically detects format from file extension."""
-    
+
     _writers = {
-        '.csv': 'CSVWriter',
-        '.json': 'JSONWriter',
+        ".csv": "CSVWriter",
+        ".json": "JSONWriter",
     }
-    
+
     @classmethod
     def write(cls, data: Any, file_path: Union[None, str, Path]) -> None:
         """Write data to file, format determined by extension."""
@@ -38,10 +38,10 @@ def write(cls, data: Any, file_path: Union[None, str, Path]) -> None:
             return
         path = Path(file_path)
         extension = path.suffix.lower()
-        
+
         if extension not in cls._writers:
             raise ValueError(f"Unsupported format: {extension}")
-        
+
         writer_class = globals()[cls._writers[extension]]
         writer = writer_class()
         writer.write(data, file_path)
@@ -56,24 +56,21 @@ def write(self, data: List[Dict], file_path: Union[str, Path]) -> None:
         if not data:
             logger.warning("No data to write to CSV.")
             return
-        
-        with open(file_path, 'w', newline='', encoding='utf-8') as f:
+
+        with open(file_path, "w", newline="", encoding="utf-8") as f:
             writer = csv.DictWriter(f, fieldnames=self.get_fieldnames(data))
             writer.writeheader()
             for provider, items in data.items():
                 for item in items:
                     item_dict = item.model_dump() if isinstance(item, BaseModel) else item
-                    item_dict['provider'] = provider
+                    item_dict["provider"] = provider
                     writer.writerow(item_dict)
 
 
 class JSONWriter(Writer):
     def write(self, data: Dict[str, List[BaseModel]], file_path: Union[str, Path]) -> None:
-        data = {
-            provider: [item.model_dump() for item in items]
-            for provider, items in data.items()
-        }
-        with open(file_path, 'w', encoding='utf-8') as f:
+        data = {provider: [item.model_dump() for item in items] for provider, items in data.items()}
+        with open(file_path, "w", encoding="utf-8") as f:
             json.dump(data, f, indent=2, ensure_ascii=False, default=str)
 
 
diff --git a/src/docbinder_oss/providers/google_drive/google_drive_client.py b/src/docbinder_oss/providers/google_drive/google_drive_client.py
index a171a2a..6c68a71 100644
--- a/src/docbinder_oss/providers/google_drive/google_drive_client.py
+++ b/src/docbinder_oss/providers/google_drive/google_drive_client.py
@@ -41,9 +41,7 @@ def __init__(self, config: GoogleDriveServiceConfig):
     def _get_credentials(self):
         logger.info("Getting credentials for Google Drive client")
 
-        TOKEN_PATH = os.path.expanduser(
-            "~/.config/docbinder/gcp/" + self.config.name + "_token.json"
-        )
+        TOKEN_PATH = os.path.expanduser("~/.config/docbinder/gcp/" + self.config.name + "_token.json")
         # Ensure the directory exists
         os.makedirs(os.path.dirname(TOKEN_PATH), exist_ok=True)
         logger.debug(f"Token path: {TOKEN_PATH}")
@@ -57,9 +55,7 @@ def _get_credentials(self):
             if creds and creds.expired and creds.refresh_token:
                 creds.refresh(Request())
             else:
-                flow = InstalledAppFlow.from_client_secrets_file(
-                    self.settings.gcp_credentials_json, self.SCOPES
-                )
+                flow = InstalledAppFlow.from_client_secrets_file(self.settings.gcp_credentials_json, self.SCOPES)
                 creds = flow.run_local_server(port=0)
             # Save the credentials for the next run
             with open(TOKEN_PATH, "w") as token:
diff --git a/tests/commands/test_search_command.py b/tests/commands/test_search_command.py
index 46899c8..1a709b4 100644
--- a/tests/commands/test_search_command.py
+++ b/tests/commands/test_search_command.py
@@ -14,9 +14,7 @@ def __init__(self, **kwargs):
         self.mime_type = kwargs.get("mime_type", "application/pdf")
         self.created_time = kwargs.get("created_time", "2024-01-01T00:00:00")
         self.modified_time = kwargs.get("modified_time", "2024-01-02T00:00:00")
-        self.owners = kwargs.get(
-            "owners", [type("User", (), {"email_address": "owner@example.com"})()]
-        )
+        self.owners = kwargs.get("owners", [type("User", (), {"email_address": "owner@example.com"})()])
         self.last_modifying_user = kwargs.get(
             "last_modifying_user", type("User", (), {"email_address": "mod@example.com"})()
         )
@@ -94,9 +92,7 @@ def create_provider_instance(cfg):
                 },
             )()
 
-    monkeypatch.setattr(
-        "docbinder_oss.cli.search.create_provider_instance", create_provider_instance
-    )
+    monkeypatch.setattr("docbinder_oss.cli.search.create_provider_instance", create_provider_instance)
 
     # Change working directory to a temp dir for file output
     orig_cwd = os.getcwd()
@@ -166,9 +162,7 @@ def test_search_owner_filter():
 
 def test_search_updated_after_filter():
     runner = CliRunner()
-    result = runner.invoke(
-        app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"]
-    )
+    result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"])
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)
@@ -178,9 +172,7 @@ def test_search_updated_after_filter():
 
 def test_search_created_before_filter():
     runner = CliRunner()
-    result = runner.invoke(
-        app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-format", "json"]
-    )
+    result = runner.invoke(app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-format", "json"])
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)
diff --git a/tests/helpers/test_writer.py b/tests/helpers/test_writer.py
index d3cf8ce..3a05ebf 100644
--- a/tests/helpers/test_writer.py
+++ b/tests/helpers/test_writer.py
@@ -9,11 +9,13 @@
     JSONWriter,
 )
 
+
 class DummyModel(BaseModel):
     id: str
     name: str
     kind: str
 
+
 @pytest.fixture
 def sample_data():
     return {
@@ -26,24 +28,26 @@ def sample_data():
         ],
     }
 
+
 def test_csv_writer(tmp_path, sample_data):
     file_path = tmp_path / "output.csv"
     writer = CSVWriter()
     writer.write(sample_data, file_path)
     assert file_path.exists()
-    with open(file_path, newline='', encoding='utf-8') as f:
+    with open(file_path, newline="", encoding="utf-8") as f:
         reader = csv.DictReader(f)
         rows = list(reader)
         assert len(rows) == 3
         assert set(rows[0].keys()) == {"provider", "id", "name", "kind"}
         assert rows[0]["provider"] == "provider1"
 
+
 def test_json_writer(tmp_path, sample_data):
     file_path = tmp_path / "output.json"
     writer = JSONWriter()
     writer.write(sample_data, file_path)
     assert file_path.exists()
-    with open(file_path, encoding='utf-8') as f:
+    with open(file_path, encoding="utf-8") as f:
         data = json.load(f)
         assert "provider1" in data
         assert isinstance(data["provider1"], list)
@@ -54,24 +58,27 @@ def test_multiformat_writer_csv(tmp_path, sample_data):
     file_path = tmp_path / "test.csv"
     MultiFormatWriter.write(sample_data, file_path)
     assert file_path.exists()
-    with open(file_path, newline='', encoding='utf-8') as f:
+    with open(file_path, newline="", encoding="utf-8") as f:
         reader = csv.DictReader(f)
         rows = list(reader)
         assert len(rows) == 3
 
+
 def test_multiformat_writer_json(tmp_path, sample_data):
     file_path = tmp_path / "test.json"
     MultiFormatWriter.write(sample_data, file_path)
     assert file_path.exists()
-    with open(file_path, encoding='utf-8') as f:
+    with open(file_path, encoding="utf-8") as f:
         data = json.load(f)
         assert "provider2" in data
 
+
 def test_multiformat_writer_unsupported(tmp_path, sample_data):
     file_path = tmp_path / "test.unsupported"
     with pytest.raises(ValueError):
         MultiFormatWriter.write(sample_data, file_path)
 
+
 def test_csv_writer_empty_data(tmp_path, caplog):
     file_path = tmp_path / "empty.csv"
     writer = CSVWriter()
diff --git a/tests/providers/google_drive/test_google_drive_buckets.py b/tests/providers/google_drive/test_google_drive_buckets.py
index a4a91c3..bff2dee 100644
--- a/tests/providers/google_drive/test_google_drive_buckets.py
+++ b/tests/providers/google_drive/test_google_drive_buckets.py
@@ -21,9 +21,7 @@ def test_list_buckets(mock_gdrive_provider, gdrive_client):
             }
         ]
     }
-    mock_gdrive_provider.drives.return_value.list.return_value.execute.return_value = (
-        fake_api_response
-    )
+    mock_gdrive_provider.drives.return_value.list.return_value.execute.return_value = fake_api_response
 
     buckets = gdrive_client.list_buckets()
 
diff --git a/tests/providers/google_drive/test_google_drive_files.py b/tests/providers/google_drive/test_google_drive_files.py
index 432af3a..b8a5866 100644
--- a/tests/providers/google_drive/test_google_drive_files.py
+++ b/tests/providers/google_drive/test_google_drive_files.py
@@ -52,9 +52,7 @@ class DummyClient:
         def list_all_files(self):
             return list_all_files(self)
 
-    monkeypatch.setattr(
-        "docbinder_oss.providers.create_provider_instance", lambda cfg: DummyClient()
-    )
+    monkeypatch.setattr("docbinder_oss.providers.create_provider_instance", lambda cfg: DummyClient())
     orig_cwd = os.getcwd()
     os.chdir(tmp_path)
     yield
@@ -97,9 +95,7 @@ def test_list_files(mock_gdrive_provider, gdrive_client):
         ]
     }
 
-    mock_gdrive_provider.files.return_value.list.return_value.execute.return_value = (
-        fake_api_response
-    )
+    mock_gdrive_provider.files.return_value.list.return_value.execute.return_value = fake_api_response
 
     files = gdrive_client.list_files_in_folder()
 
diff --git a/tests/providers/google_drive/test_google_drive_permissions.py b/tests/providers/google_drive/test_google_drive_permissions.py
index e4b14f6..63d8865 100644
--- a/tests/providers/google_drive/test_google_drive_permissions.py
+++ b/tests/providers/google_drive/test_google_drive_permissions.py
@@ -18,9 +18,7 @@ def test_get_permissions(mock_gdrive_provider, gdrive_client):
             }
         ]
     }
-    mock_gdrive_provider.permissions.return_value.list.return_value.execute.return_value = (
-        fake_api_response
-    )
+    mock_gdrive_provider.permissions.return_value.list.return_value.execute.return_value = fake_api_response
 
     permissions = gdrive_client.get_permissions("1234")
 
diff --git a/uv.lock b/uv.lock
index 8630097..fe662e7 100644
--- a/uv.lock
+++ b/uv.lock
@@ -37,6 +37,34 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0c/37/fb6973edeb700f6e3d6ff222400602ab1830446c25c7b4676d8de93e65b8/backrefs-5.8-py39-none-any.whl", hash = "sha256:a66851e4533fb5b371aa0628e1fee1af05135616b86140c9d787a2ffdf4b8fdc", size = 380336, upload-time = "2025-02-25T16:53:29.858Z" },
 ]
 
+[[package]]
+name = "black"
+version = "25.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click" },
+    { name = "mypy-extensions" },
+    { name = "packaging" },
+    { name = "pathspec" },
+    { name = "platformdirs" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/94/49/26a7b0f3f35da4b5a65f081943b7bcd22d7002f5f0fb8098ec1ff21cb6ef/black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666", size = 649449, upload-time = "2025-01-29T04:15:40.373Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7e/4f/87f596aca05c3ce5b94b8663dbfe242a12843caaa82dd3f85f1ffdc3f177/black-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a39337598244de4bae26475f77dda852ea00a93bd4c728e09eacd827ec929df0", size = 1614372, upload-time = "2025-01-29T05:37:11.71Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/d0/2c34c36190b741c59c901e56ab7f6e54dad8df05a6272a9747ecef7c6036/black-25.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96c1c7cd856bba8e20094e36e0f948718dc688dba4a9d78c3adde52b9e6c2299", size = 1442865, upload-time = "2025-01-29T05:37:14.309Z" },
+    { url = "https://files.pythonhosted.org/packages/21/d4/7518c72262468430ead45cf22bd86c883a6448b9eb43672765d69a8f1248/black-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce2e264d59c91e52d8000d507eb20a9aca4a778731a08cfff7e5ac4a4bb7096", size = 1749699, upload-time = "2025-01-29T04:18:17.688Z" },
+    { url = "https://files.pythonhosted.org/packages/58/db/4f5beb989b547f79096e035c4981ceb36ac2b552d0ac5f2620e941501c99/black-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:172b1dbff09f86ce6f4eb8edf9dede08b1fce58ba194c87d7a4f1a5aa2f5b3c2", size = 1428028, upload-time = "2025-01-29T04:18:51.711Z" },
+    { url = "https://files.pythonhosted.org/packages/83/71/3fe4741df7adf015ad8dfa082dd36c94ca86bb21f25608eb247b4afb15b2/black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b", size = 1650988, upload-time = "2025-01-29T05:37:16.707Z" },
+    { url = "https://files.pythonhosted.org/packages/13/f3/89aac8a83d73937ccd39bbe8fc6ac8860c11cfa0af5b1c96d081facac844/black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc", size = 1453985, upload-time = "2025-01-29T05:37:18.273Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/22/b99efca33f1f3a1d2552c714b1e1b5ae92efac6c43e790ad539a163d1754/black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f", size = 1783816, upload-time = "2025-01-29T04:18:33.823Z" },
+    { url = "https://files.pythonhosted.org/packages/18/7e/a27c3ad3822b6f2e0e00d63d58ff6299a99a5b3aee69fa77cd4b0076b261/black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba", size = 1440860, upload-time = "2025-01-29T04:19:12.944Z" },
+    { url = "https://files.pythonhosted.org/packages/98/87/0edf98916640efa5d0696e1abb0a8357b52e69e82322628f25bf14d263d1/black-25.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f", size = 1650673, upload-time = "2025-01-29T05:37:20.574Z" },
+    { url = "https://files.pythonhosted.org/packages/52/e5/f7bf17207cf87fa6e9b676576749c6b6ed0d70f179a3d812c997870291c3/black-25.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3", size = 1453190, upload-time = "2025-01-29T05:37:22.106Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/ee/adda3d46d4a9120772fae6de454c8495603c37c4c3b9c60f25b1ab6401fe/black-25.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171", size = 1782926, upload-time = "2025-01-29T04:18:58.564Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/64/94eb5f45dcb997d2082f097a3944cfc7fe87e071907f677e80788a2d7b7a/black-25.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18", size = 1442613, upload-time = "2025-01-29T04:19:27.63Z" },
+    { url = "https://files.pythonhosted.org/packages/09/71/54e999902aed72baf26bca0d50781b01838251a462612966e9fc4891eadd/black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717", size = 207646, upload-time = "2025-01-29T04:15:38.082Z" },
+]
+
 [[package]]
 name = "cachetools"
 version = "5.5.2"
@@ -167,6 +195,7 @@ dependencies = [
 
 [package.dev-dependencies]
 dev = [
+    { name = "black" },
     { name = "mkdocs" },
     { name = "mkdocs-material" },
     { name = "pytest" },
@@ -188,6 +217,7 @@ requires-dist = [
 
 [package.metadata.requires-dev]
 dev = [
+    { name = "black", specifier = ">=25.1.0" },
     { name = "mkdocs", specifier = ">=1.6.1" },
     { name = "mkdocs-material", specifier = ">=9.6.14" },
     { name = "pytest", specifier = ">=8.4.0" },
@@ -511,6 +541,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5b/54/662a4743aa81d9582ee9339d4ffa3c8fd40a4965e033d77b9da9774d3960/mkdocs_material_extensions-1.3.1-py3-none-any.whl", hash = "sha256:adff8b62700b25cb77b53358dad940f3ef973dd6db797907c49e3c2ef3ab4e31", size = 8728, upload-time = "2023-11-22T19:09:43.465Z" },
 ]
 
+[[package]]
+name = "mypy-extensions"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
+]
+
 [[package]]
 name = "oauthlib"
 version = "3.2.2"

From 97d749e919dc42d81ee85d2bd6a84abed3001b87 Mon Sep 17 00:00:00 2001
From: Christophe Beke <beke.christophe@gmail.com>
Date: Fri, 27 Jun 2025 18:21:50 +0200
Subject: [PATCH 27/39] Added pre-commit

---
 pyproject.toml |  1 +
 uv.lock        | 45 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 51ca21b..ced6ecc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,6 +35,7 @@ dev = [
     "black>=25.1.0",
     "mkdocs>=1.6.1",
     "mkdocs-material>=9.6.14",
+    "pre-commit>=4.2.0",
     "pytest>=8.4.0",
     "tox>=4.26.0",
     "tox-uv>=1.26.0",
diff --git a/uv.lock b/uv.lock
index fe662e7..61dfd65 100644
--- a/uv.lock
+++ b/uv.lock
@@ -83,6 +83,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/4a/7e/3db2bd1b1f9e95f7cddca6d6e75e2f2bd9f51b1246e546d88addca0106bd/certifi-2025.4.26-py3-none-any.whl", hash = "sha256:30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3", size = 159618, upload-time = "2025-04-26T02:12:27.662Z" },
 ]
 
+[[package]]
+name = "cfgv"
+version = "3.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560", size = 7114, upload-time = "2023-08-12T20:38:17.776Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249, upload-time = "2023-08-12T20:38:16.269Z" },
+]
+
 [[package]]
 name = "chardet"
 version = "5.2.0"
@@ -198,6 +207,7 @@ dev = [
     { name = "black" },
     { name = "mkdocs" },
     { name = "mkdocs-material" },
+    { name = "pre-commit" },
     { name = "pytest" },
     { name = "tox" },
     { name = "tox-uv" },
@@ -220,6 +230,7 @@ dev = [
     { name = "black", specifier = ">=25.1.0" },
     { name = "mkdocs", specifier = ">=1.6.1" },
     { name = "mkdocs-material", specifier = ">=9.6.14" },
+    { name = "pre-commit", specifier = ">=4.2.0" },
     { name = "pytest", specifier = ">=8.4.0" },
     { name = "tox", specifier = ">=4.26.0" },
     { name = "tox-uv", specifier = ">=1.26.0" },
@@ -355,6 +366,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a8/6c/d2fbdaaa5959339d53ba38e94c123e4e84b8fbc4b84beb0e70d7c1608486/httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc", size = 96854, upload-time = "2023-03-21T22:29:35.683Z" },
 ]
 
+[[package]]
+name = "identify"
+version = "2.6.12"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/88/d193a27416618628a5eea64e3223acd800b40749a96ffb322a9b55a49ed1/identify-2.6.12.tar.gz", hash = "sha256:d8de45749f1efb108badef65ee8386f0f7bb19a7f26185f74de6367bffbaf0e6", size = 99254, upload-time = "2025-05-23T20:37:53.3Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7a/cd/18f8da995b658420625f7ef13f037be53ae04ec5ad33f9b718240dcfd48c/identify-2.6.12-py2.py3-none-any.whl", hash = "sha256:ad9672d5a72e0d2ff7c5c8809b62dfa60458626352fb0eb7b55e69bdc45334a2", size = 99145, upload-time = "2025-05-23T20:37:51.495Z" },
+]
+
 [[package]]
 name = "idna"
 version = "3.10"
@@ -550,6 +570,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
 ]
 
+[[package]]
+name = "nodeenv"
+version = "1.9.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437, upload-time = "2024-06-04T18:44:11.171Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" },
+]
+
 [[package]]
 name = "oauthlib"
 version = "3.2.2"
@@ -604,6 +633,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
 ]
 
+[[package]]
+name = "pre-commit"
+version = "4.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cfgv" },
+    { name = "identify" },
+    { name = "nodeenv" },
+    { name = "pyyaml" },
+    { name = "virtualenv" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/08/39/679ca9b26c7bb2999ff122d50faa301e49af82ca9c066ec061cfbc0c6784/pre_commit-4.2.0.tar.gz", hash = "sha256:601283b9757afd87d40c4c4a9b2b5de9637a8ea02eaff7adc2d0fb4e04841146", size = 193424, upload-time = "2025-03-18T21:35:20.987Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/74/a88bf1b1efeae488a0c0b7bdf71429c313722d1fc0f377537fbe554e6180/pre_commit-4.2.0-py2.py3-none-any.whl", hash = "sha256:a009ca7205f1eb497d10b845e52c838a98b6cdd2102a6c8e4540e94ee75c58bd", size = 220707, upload-time = "2025-03-18T21:35:19.343Z" },
+]
+
 [[package]]
 name = "proto-plus"
 version = "1.26.1"

From 8c1fd2984a8574b13aff0b13913242b2f6d77fd9 Mon Sep 17 00:00:00 2001
From: Christophe Beke <beke.christophe@gmail.com>
Date: Fri, 27 Jun 2025 18:24:12 +0200
Subject: [PATCH 28/39] Update contributing file

---
 docs/CONTRIBUTING.md | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md
index 4226b1e..f5b442f 100644
--- a/docs/CONTRIBUTING.md
+++ b/docs/CONTRIBUTING.md
@@ -56,4 +56,26 @@ All dependencies are tracked in `pyproject.toml`. Use `uv` commands to keep it u
 ---
 
 **Note:**  
-Always use `uv` commands to manage dependencies and environments to keep `pyproject.toml` in sync.
\ No newline at end of file
+Always use `uv` commands to manage dependencies and environments to keep `pyproject.toml` in sync.
+
+## Code Style and Linting
+
+This project uses [Black](https://black.readthedocs.io/en/stable/) for code formatting and [Ruff](https://docs.astral.sh/ruff/) for linting. All code should be formatted and linted before committing.
+
+- Run the following before committing code:
+
+```zsh
+uv run black .
+uv run ruff check .
+```
+
+- To automatically format and lint code on every commit, install pre-commit hooks:
+
+```zsh
+uv pip install pre-commit
+pre-commit install
+```
+
+This will ensure Black and Ruff are run on staged files before each commit.
+
+Configuration for Black and Ruff is in `pyproject.toml`. This enforces consistent quotes, spacing, and other style rules for all contributors.
\ No newline at end of file

From c31d4bb6c3c16ae5c8a2a473f45124fba1aa774d Mon Sep 17 00:00:00 2001
From: Christophe Beke <beke.christophe@gmail.com>
Date: Fri, 27 Jun 2025 18:54:12 +0200
Subject: [PATCH 29/39] Fixed the search cli --export-filename and improved the
 writer function to now show which extensions are supported.

---
 src/docbinder_oss/cli/search.py     | 12 +++++-------
 src/docbinder_oss/helpers/writer.py |  2 +-
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py
index 6ba0583..25226f8 100644
--- a/src/docbinder_oss/cli/search.py
+++ b/src/docbinder_oss/cli/search.py
@@ -25,14 +25,12 @@ def search(
     min_size: Optional[int] = typer.Option(None, "--min-size", help="Minimum file size in KB"),
     max_size: Optional[int] = typer.Option(None, "--max-size", help="Maximum file size in KB"),
     provider: Optional[str] = typer.Option(None, "--provider", "-p", help="Provider name to search in"),
-    export_format: str = typer.Option(None, "--export-format", help="Export format: csv or json", show_default=True),
+    export_file: Optional[str] = typer.Option(
+        None, "--export-file", help="Export file name (e.g. results.csv or results.json)"
+    ),
 ):
     """Search for files or folders matching filters across all
-    providers and export results as CSV or JSON."""
-
-    # 1 Load documents with filter "provider"
-    # 2 Filter the documents based on the provided filters
-    # 3 Export results to CSV or JSON
+    providers and export results as CSV or JSON. If --export-file is not provided, results are printed to the console."""
 
     config: Config = load_config()
     if not config.providers:
@@ -61,7 +59,7 @@ def search(
         max_size=max_size,
     )
 
-    MultiFormatWriter.write(current_files, export_format)
+    MultiFormatWriter.write(current_files, export_file)
     return
 
 
diff --git a/src/docbinder_oss/helpers/writer.py b/src/docbinder_oss/helpers/writer.py
index 92e917a..7def033 100644
--- a/src/docbinder_oss/helpers/writer.py
+++ b/src/docbinder_oss/helpers/writer.py
@@ -40,7 +40,7 @@ def write(cls, data: Any, file_path: Union[None, str, Path]) -> None:
         extension = path.suffix.lower()
 
         if extension not in cls._writers:
-            raise ValueError(f"Unsupported format: {extension}")
+            raise ValueError(f"Unsupported format: {extension}. Supported formats are: {', '.join(cls._writers.keys())}")
 
         writer_class = globals()[cls._writers[extension]]
         writer = writer_class()

From 27a1b5d55cbf9b863ad685782f75bcbc6a1f073b Mon Sep 17 00:00:00 2001
From: Christophe Beke <beke.christophe@gmail.com>
Date: Fri, 27 Jun 2025 19:34:36 +0200
Subject: [PATCH 30/39] Updated the writer functions to work and improve
 readibility and align the tests with it.

---
 src/docbinder_oss/cli/search.py               | 32 ++++---
 src/docbinder_oss/helpers/writer.py           | 89 -------------------
 src/docbinder_oss/helpers/writers/base.py     | 11 +++
 .../helpers/writers/helper_functions.py       | 46 ++++++++++
 .../helpers/writers/multiformat_writer.py     | 34 +++++++
 .../helpers/writers/writer_console.py         | 29 ++++++
 .../helpers/writers/writer_csv.py             | 41 +++++++++
 .../helpers/writers/writer_json.py            | 29 ++++++
 tests/commands/test_search_command.py         | 24 ++---
 tests/helpers/test_writer.py                  | 34 ++++---
 10 files changed, 247 insertions(+), 122 deletions(-)
 delete mode 100644 src/docbinder_oss/helpers/writer.py
 create mode 100644 src/docbinder_oss/helpers/writers/base.py
 create mode 100644 src/docbinder_oss/helpers/writers/helper_functions.py
 create mode 100644 src/docbinder_oss/helpers/writers/multiformat_writer.py
 create mode 100644 src/docbinder_oss/helpers/writers/writer_console.py
 create mode 100644 src/docbinder_oss/helpers/writers/writer_csv.py
 create mode 100644 src/docbinder_oss/helpers/writers/writer_json.py

diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py
index 25226f8..6cfd24a 100644
--- a/src/docbinder_oss/cli/search.py
+++ b/src/docbinder_oss/cli/search.py
@@ -9,7 +9,7 @@
 from docbinder_oss.providers import create_provider_instance
 from docbinder_oss.helpers.config import Config
 from docbinder_oss.providers.base_class import BaseProvider
-from docbinder_oss.helpers.writer import MultiFormatWriter
+from docbinder_oss.helpers.writers.multiformat_writer import MultiFormatWriter
 
 app = typer.Typer()
 
@@ -100,16 +100,28 @@ def __filter_files(
     def file_matches(file: File):
         if name and not re.search(name, file.name, re.IGNORECASE):
             return False
-        if owner and not any(owner in u.email_address for u in file.owners):
-            return False
-        if updated_after and __parse_dt(file.modified_time) < __parse_dt(updated_after):
-            return False
-        if updated_before and __parse_dt(file.modified_time) > __parse_dt(updated_before):
-            return False
-        if created_after and __parse_dt(file.created_time) < __parse_dt(created_after):
-            return False
-        if created_before and __parse_dt(file.created_time) > __parse_dt(created_before):
+        if owner and (not file.owners or not any(owner in u.email_address for u in file.owners)):
             return False
+        if updated_after:
+            file_mod_time = __parse_dt(file.modified_time)
+            updated_after_dt = __parse_dt(updated_after)
+            if file_mod_time is None or updated_after_dt is None or file_mod_time < updated_after_dt:
+                return False
+        if updated_before:
+            file_mod_time = __parse_dt(file.modified_time)
+            updated_before_dt = __parse_dt(updated_before)
+            if file_mod_time is None or updated_before_dt is None or file_mod_time > updated_before_dt:
+                return False
+        if created_after:
+            file_created_time = __parse_dt(file.created_time)
+            created_after_dt = __parse_dt(created_after)
+            if file_created_time is None or created_after_dt is None or file_created_time < created_after_dt:
+                return False
+        if created_before:
+            file_created_time = __parse_dt(file.created_time)
+            created_before_dt = __parse_dt(created_before)
+            if file_created_time is not None and created_before_dt is not None and file_created_time > created_before_dt:
+                return False
         if min_size and file.size < min_size * 1024:
             return False
         if max_size and file.size > max_size * 1024:
diff --git a/src/docbinder_oss/helpers/writer.py b/src/docbinder_oss/helpers/writer.py
deleted file mode 100644
index 7def033..0000000
--- a/src/docbinder_oss/helpers/writer.py
+++ /dev/null
@@ -1,89 +0,0 @@
-import csv
-import json
-from abc import ABC, abstractmethod
-from pathlib import Path
-from typing import Any, Dict, List, Union
-from pydantic import BaseModel
-from rich import print
-
-import logging
-
-
-logger = logging.getLogger(__name__)
-
-
-class Writer(ABC):
-    """Abstract base writer class."""
-
-    @abstractmethod
-    def write(self, data: Any, file_path: Union[None, str, Path]) -> None:
-        """Write data to file."""
-        pass
-
-
-class MultiFormatWriter:
-    """Factory writer that automatically detects format from file extension."""
-
-    _writers = {
-        ".csv": "CSVWriter",
-        ".json": "JSONWriter",
-    }
-
-    @classmethod
-    def write(cls, data: Any, file_path: Union[None, str, Path]) -> None:
-        """Write data to file, format determined by extension."""
-        if file_path is None:
-            # If no file path is provided, write to console
-            ConsoleWriter().write(data)
-            return
-        path = Path(file_path)
-        extension = path.suffix.lower()
-
-        if extension not in cls._writers:
-            raise ValueError(f"Unsupported format: {extension}. Supported formats are: {', '.join(cls._writers.keys())}")
-
-        writer_class = globals()[cls._writers[extension]]
-        writer = writer_class()
-        writer.write(data, file_path)
-
-
-class CSVWriter(Writer):
-    def get_fieldnames(self, data: Dict[str, List[BaseModel]]) -> List[str]:
-        fieldnames = next(iter(data.values()))[0].model_fields_set
-        return ["provider", *fieldnames]
-
-    def write(self, data: List[Dict], file_path: Union[str, Path]) -> None:
-        if not data:
-            logger.warning("No data to write to CSV.")
-            return
-
-        with open(file_path, "w", newline="", encoding="utf-8") as f:
-            writer = csv.DictWriter(f, fieldnames=self.get_fieldnames(data))
-            writer.writeheader()
-            for provider, items in data.items():
-                for item in items:
-                    item_dict = item.model_dump() if isinstance(item, BaseModel) else item
-                    item_dict["provider"] = provider
-                    writer.writerow(item_dict)
-
-
-class JSONWriter(Writer):
-    def write(self, data: Dict[str, List[BaseModel]], file_path: Union[str, Path]) -> None:
-        data = {provider: [item.model_dump() for item in items] for provider, items in data.items()}
-        with open(file_path, "w", encoding="utf-8") as f:
-            json.dump(data, f, indent=2, ensure_ascii=False, default=str)
-
-
-class ConsoleWriter(Writer):
-    def write(self, data: Dict) -> None:
-        from rich.table import Table
-
-        table = Table(title="Files and Folders")
-        table.add_column("Provider", justify="right", style="cyan", no_wrap=True)
-        table.add_column("Id", style="magenta")
-        table.add_column("Name", style="magenta")
-        table.add_column("Kind", style="magenta")
-        for provider, items in data.items():
-            for item in items:
-                table.add_row(provider, item.id, item.name, item.kind)
-        print(table)
diff --git a/src/docbinder_oss/helpers/writers/base.py b/src/docbinder_oss/helpers/writers/base.py
new file mode 100644
index 0000000..b0da8af
--- /dev/null
+++ b/src/docbinder_oss/helpers/writers/base.py
@@ -0,0 +1,11 @@
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import Any
+
+
+class Writer(ABC):
+    """Abstract base writer class for exporting data."""
+
+    @abstractmethod
+    def write(self, data: Any, file_path: str | Path | None = None) -> None:
+        pass
diff --git a/src/docbinder_oss/helpers/writers/helper_functions.py b/src/docbinder_oss/helpers/writers/helper_functions.py
new file mode 100644
index 0000000..9277d74
--- /dev/null
+++ b/src/docbinder_oss/helpers/writers/helper_functions.py
@@ -0,0 +1,46 @@
+def flatten_file(item, provider=None):
+    """
+    Convert a file object (Pydantic, DummyFile, or dict) to a flat dict for export.
+    Flattens owners, parents, and last_modifying_user fields, and adds provider if given.
+    """
+    # Convert to dict
+    if hasattr(item, "model_dump"):
+        result = item.model_dump()
+    elif hasattr(item, "__dict__"):
+        result = dict(item.__dict__)
+    else:
+        result = dict(item)
+    # Add provider field to output dict
+    if provider:
+        result["provider"] = provider
+    # Flatten owners to email addresses
+    owners = result.get("owners")
+    if owners:
+        emails = []
+        for owner in owners:
+            if isinstance(owner, dict):
+                emails.append(owner.get("email_address") or owner.get("email") or str(owner))
+            elif hasattr(owner, "email_address"):
+                emails.append(owner.email_address)
+            else:
+                emails.append(str(owner))
+        result["owners"] = ";".join(filter(None, emails))
+    # Flatten parents to semicolon-separated string
+    parents = result.get("parents")
+    if isinstance(parents, list):
+        result["parents"] = ";".join(str(p) for p in parents)
+    elif parents is None:
+        result["parents"] = ""
+    else:
+        result["parents"] = str(parents)
+    # Flatten last_modifying_user to email address
+    lmu = result.get("last_modifying_user")
+    if lmu:
+        if isinstance(lmu, dict):
+            result["last_modifying_user"] = lmu.get("email_address") or lmu.get("email") or str(lmu)
+        elif hasattr(lmu, "email_address"):
+            result["last_modifying_user"] = lmu.email_address
+        else:
+            result["last_modifying_user"] = str(lmu)
+
+    return result
diff --git a/src/docbinder_oss/helpers/writers/multiformat_writer.py b/src/docbinder_oss/helpers/writers/multiformat_writer.py
new file mode 100644
index 0000000..4cae081
--- /dev/null
+++ b/src/docbinder_oss/helpers/writers/multiformat_writer.py
@@ -0,0 +1,34 @@
+from pathlib import Path
+from typing import Any
+
+from docbinder_oss.helpers.writers.writer_console import ConsoleWriter
+from docbinder_oss.helpers.writers.writer_csv import CSVWriter
+from docbinder_oss.helpers.writers.writer_json import JSONWriter
+
+
+class MultiFormatWriter:
+    """
+    Factory writer that automatically detects format from file extension or format string.
+    If file_path is None, prints to console.
+    """
+
+    _writers = {
+        ".csv": CSVWriter,
+        ".json": JSONWriter,
+        "csv": CSVWriter,
+        "json": JSONWriter,
+    }
+
+    @classmethod
+    def write(cls, data: Any, file_path: str | None = None) -> None:
+        if not file_path:
+            ConsoleWriter().write(data)
+            return
+        extension = Path(file_path).suffix.lower()
+        # Use extension or fallback to format string
+        writer_key = extension if extension in cls._writers else file_path.lower()
+        if writer_key not in cls._writers:
+            raise ValueError(f"Unsupported format: {file_path}")
+        writer_class = cls._writers[writer_key]
+        writer = writer_class()
+        writer.write(data, file_path)
diff --git a/src/docbinder_oss/helpers/writers/writer_console.py b/src/docbinder_oss/helpers/writers/writer_console.py
new file mode 100644
index 0000000..0fae481
--- /dev/null
+++ b/src/docbinder_oss/helpers/writers/writer_console.py
@@ -0,0 +1,29 @@
+from pathlib import Path
+from typing import Any
+from docbinder_oss.helpers.writers.base import Writer
+
+
+class ConsoleWriter(Writer):
+    """Writer for pretty-printing data to the console using rich tables."""
+
+    def write(self, data: Any, file_path: str | Path | None = None) -> None:
+        from rich.table import Table
+
+        table = Table(title="Files and Folders")
+        table.add_column("Provider", justify="right", style="cyan", no_wrap=True)
+        table.add_column("Id", style="magenta")
+        table.add_column("Name", style="magenta")
+        table.add_column("Kind", style="magenta")
+        for provider, items in data.items() if isinstance(data, dict) else [("?", data)]:
+            for item in items:
+                if hasattr(item, "model_dump"):
+                    item = item.model_dump()
+                elif hasattr(item, "__dict__"):
+                    item = dict(item.__dict__)
+                table.add_row(
+                    str(provider),
+                    str(item.get("id", "")),
+                    str(item.get("name", "")),
+                    str(item.get("kind", "")),
+                )
+        print(table)
diff --git a/src/docbinder_oss/helpers/writers/writer_csv.py b/src/docbinder_oss/helpers/writers/writer_csv.py
new file mode 100644
index 0000000..3d6eb64
--- /dev/null
+++ b/src/docbinder_oss/helpers/writers/writer_csv.py
@@ -0,0 +1,41 @@
+import csv
+import logging
+from pathlib import Path
+from typing import Any
+from docbinder_oss.helpers.writers.base import Writer
+from docbinder_oss.helpers.writers.helper_functions import flatten_file
+
+
+class CSVWriter(Writer):
+    """Writer for exporting data to CSV files."""
+
+    def get_fieldnames(self, rows: list) -> list:
+        fieldnames = set()
+        for row in rows:
+            fieldnames.update(row.keys())
+        # Provider first, then the rest sorted
+        return ["provider"] + sorted(f for f in fieldnames if f != "provider")
+
+    def write(self, data: Any, file_path: str | Path | None = None) -> None:
+        """
+        Always flattens grouped dicts to a flat list for CSV export.
+        """
+        rows = []
+        if isinstance(data, dict):
+            for provider, items in data.items():
+                for item in items:
+                    rows.append(flatten_file(item, provider))
+        elif isinstance(data, list):
+            for item in data:
+                provider = item.get("provider") if isinstance(item, dict) else getattr(item, "provider", None)
+                rows.append(flatten_file(item, provider))
+        else:
+            return
+        if not rows or not file_path:
+            logging.warning("No data to write to CSV.")
+            return
+        with open(file_path, "w", newline="", encoding="utf-8") as f:
+            writer = csv.DictWriter(f, fieldnames=self.get_fieldnames(rows))
+            writer.writeheader()
+            for row in rows:
+                writer.writerow(row)
diff --git a/src/docbinder_oss/helpers/writers/writer_json.py b/src/docbinder_oss/helpers/writers/writer_json.py
new file mode 100644
index 0000000..977ce3f
--- /dev/null
+++ b/src/docbinder_oss/helpers/writers/writer_json.py
@@ -0,0 +1,29 @@
+import json
+from pathlib import Path
+from typing import Any
+from docbinder_oss.helpers.writers.base import Writer
+from docbinder_oss.helpers.writers.helper_functions import flatten_file
+
+
+class JSONWriter(Writer):
+    """Writer for exporting data to JSON files."""
+
+    def write(self, data: Any, file_path: str | Path | None = None) -> None:
+        """
+        Always flattens grouped dicts to a flat list for JSON export.
+        """
+        flat = []
+        if isinstance(data, dict):
+            for provider, items in data.items():
+                for item in items:
+                    flat.append(flatten_file(item, provider))
+        elif isinstance(data, list):
+            for item in data:
+                provider = item.get("provider") if isinstance(item, dict) else getattr(item, "provider", None)
+                flat.append(flatten_file(item, provider))
+        else:
+            return
+        if not file_path:
+            return
+        with open(file_path, "w", encoding="utf-8") as f:
+            json.dump(flat, f, indent=2, ensure_ascii=False, default=str)
diff --git a/tests/commands/test_search_command.py b/tests/commands/test_search_command.py
index 1a709b4..8608fac 100644
--- a/tests/commands/test_search_command.py
+++ b/tests/commands/test_search_command.py
@@ -103,7 +103,7 @@ def create_provider_instance(cfg):
 
 def test_search_export_csv():
     runner = CliRunner()
-    result = runner.invoke(app, ["search", "--export-format", "csv"])
+    result = runner.invoke(app, ["search", "--export-file", "search_results.csv"])
     assert result.exit_code == 0
     assert os.path.exists("search_results.csv")
     with open("search_results.csv") as f:
@@ -123,7 +123,7 @@ def test_search_export_csv():
 
 def test_search_export_json():
     runner = CliRunner()
-    result = runner.invoke(app, ["search", "--export-format", "json"])
+    result = runner.invoke(app, ["search", "--export-file", "search_results.json"])
     assert result.exit_code == 0
     assert os.path.exists("search_results.json")
     with open("search_results.json") as f:
@@ -142,7 +142,7 @@ def test_search_export_json():
 
 def test_search_name_filter():
     runner = CliRunner()
-    result = runner.invoke(app, ["search", "--name", "Alpha", "--export-format", "json"])
+    result = runner.invoke(app, ["search", "--name", "Alpha", "--export-file", "search_results.json"])
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)
@@ -152,7 +152,7 @@ def test_search_name_filter():
 
 def test_search_owner_filter():
     runner = CliRunner()
-    result = runner.invoke(app, ["search", "--owner", "beta@b.com", "--export-format", "json"])
+    result = runner.invoke(app, ["search", "--owner", "beta@b.com", "--export-file", "search_results.json"])
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)
@@ -162,7 +162,7 @@ def test_search_owner_filter():
 
 def test_search_updated_after_filter():
     runner = CliRunner()
-    result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-format", "json"])
+    result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-file", "search_results.json"])
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)
@@ -172,7 +172,9 @@ def test_search_updated_after_filter():
 
 def test_search_created_before_filter():
     runner = CliRunner()
-    result = runner.invoke(app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-format", "json"])
+    result = runner.invoke(
+        app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-file", "search_results.json"]
+    )
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)
@@ -182,7 +184,7 @@ def test_search_created_before_filter():
 
 def test_search_min_size_filter():
     runner = CliRunner()
-    result = runner.invoke(app, ["search", "--min-size", "3", "--export-format", "json"])
+    result = runner.invoke(app, ["search", "--min-size", "3", "--export-file", "search_results.json"])
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)
@@ -192,7 +194,7 @@ def test_search_min_size_filter():
 
 def test_search_max_size_filter():
     runner = CliRunner()
-    result = runner.invoke(app, ["search", "--max-size", "3", "--export-format", "json"])
+    result = runner.invoke(app, ["search", "--max-size", "3", "--export-file", "search_results.json"])
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)
@@ -202,7 +204,7 @@ def test_search_max_size_filter():
 
 def test_search_provider_filter():
     runner = CliRunner()
-    result = runner.invoke(app, ["search", "--provider", "dummy2", "--export-format", "json"])
+    result = runner.invoke(app, ["search", "--provider", "dummy2", "--export-file", "search_results.json"])
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)
@@ -225,8 +227,8 @@ def test_search_combined_filters():
             "3",
             "--provider",
             "dummy2",
-            "--export-format",
-            "json",
+            "--export-file",
+            "search_results.json",
         ],
     )
     assert result.exit_code == 0
diff --git a/tests/helpers/test_writer.py b/tests/helpers/test_writer.py
index 3a05ebf..651bf87 100644
--- a/tests/helpers/test_writer.py
+++ b/tests/helpers/test_writer.py
@@ -3,11 +3,9 @@
 import pytest
 from pydantic import BaseModel
 
-from docbinder_oss.helpers.writer import (
-    MultiFormatWriter,
-    CSVWriter,
-    JSONWriter,
-)
+from docbinder_oss.helpers.writers.multiformat_writer import MultiFormatWriter
+from docbinder_oss.helpers.writers.writer_csv import CSVWriter
+from docbinder_oss.helpers.writers.writer_json import JSONWriter
 
 
 class DummyModel(BaseModel):
@@ -38,7 +36,10 @@ def test_csv_writer(tmp_path, sample_data):
         reader = csv.DictReader(f)
         rows = list(reader)
         assert len(rows) == 3
-        assert set(rows[0].keys()) == {"provider", "id", "name", "kind"}
+        # Allow extra fields, but required fields must be present
+        for row in rows:
+            for field in ("provider", "id", "name", "kind"):
+                assert field in row
         assert rows[0]["provider"] == "provider1"
 
 
@@ -49,9 +50,12 @@ def test_json_writer(tmp_path, sample_data):
     assert file_path.exists()
     with open(file_path, encoding="utf-8") as f:
         data = json.load(f)
-        assert "provider1" in data
-        assert isinstance(data["provider1"], list)
-        assert data["provider1"][0]["id"] == "1"
+        assert isinstance(data, list)
+        assert len(data) == 3
+        providers = {d["provider"] for d in data}
+        assert "provider1" in providers
+        assert "provider2" in providers
+        assert any(d["id"] == "1" and d["provider"] == "provider1" for d in data)
 
 
 def test_multiformat_writer_csv(tmp_path, sample_data):
@@ -70,18 +74,24 @@ def test_multiformat_writer_json(tmp_path, sample_data):
     assert file_path.exists()
     with open(file_path, encoding="utf-8") as f:
         data = json.load(f)
-        assert "provider2" in data
+        assert isinstance(data, list)
+        providers = {d["provider"] for d in data}
+        assert "provider2" in providers
 
 
 def test_multiformat_writer_unsupported(tmp_path, sample_data):
     file_path = tmp_path / "test.unsupported"
+    # Convert file_path to str for .lower() in MultiFormatWriter
     with pytest.raises(ValueError):
-        MultiFormatWriter.write(sample_data, file_path)
+        MultiFormatWriter.write(sample_data, str(file_path))
 
 
 def test_csv_writer_empty_data(tmp_path, caplog):
+    import logging
+
     file_path = tmp_path / "empty.csv"
     writer = CSVWriter()
-    with caplog.at_level("WARNING"):
+    logger = logging.getLogger()
+    with caplog.at_level("WARNING", logger=logger.name):
         writer.write({}, file_path)
         assert "No data to write to CSV." in caplog.text

From c7747b23d7baa54a6065d67da41fb3d46d9508d0 Mon Sep 17 00:00:00 2001
From: Christophe Beke <beke.christophe@gmail.com>
Date: Fri, 27 Jun 2025 19:40:25 +0200
Subject: [PATCH 31/39] Make sure to get all files, not only the shared ones

---
 src/docbinder_oss/providers/google_drive/google_drive_files.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/docbinder_oss/providers/google_drive/google_drive_files.py b/src/docbinder_oss/providers/google_drive/google_drive_files.py
index c8c08b7..76512d3 100644
--- a/src/docbinder_oss/providers/google_drive/google_drive_files.py
+++ b/src/docbinder_oss/providers/google_drive/google_drive_files.py
@@ -27,7 +27,7 @@ def list_files_in_folder(self, bucket_id: str | None = None) -> list[File]:
         if bucket_id:
             args["q"] = f"'{bucket_id}' in parents and trashed=false"
         else:
-            args["q"] = "sharedWithMe=true and trashed=false"
+            args["q"] = None
 
         resp = self.service.files().list(**args).execute()
         files = resp.get("files", [])

From 463ce846d5d87f8fb5306a8f34cd2c521c45c6ee Mon Sep 17 00:00:00 2001
From: Christophe Beke <beke.christophe@gmail.com>
Date: Fri, 27 Jun 2025 19:45:18 +0200
Subject: [PATCH 32/39] Fix mkdocs

---
 docs/tool/providers/custom_provider.md | 14 +++++++-------
 mkdocs.yml                             |  3 +++
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/docs/tool/providers/custom_provider.md b/docs/tool/providers/custom_provider.md
index bad4644..8a2cca7 100644
--- a/docs/tool/providers/custom_provider.md
+++ b/docs/tool/providers/custom_provider.md
@@ -6,7 +6,7 @@ This guide explains how to integrate a new storage provider (e.g., DropBox, OneD
 
 ## 1. Create a Service Configuration Class
 
-Each provider must define a configuration class that inherits from [`ServiceConfig`](src/docbinder_oss/services/base_class.py):
+Each provider must define a configuration class that inherits from [`ServiceConfig`](https://github.com/SnappyLab/DocBinder-OSS/blob/main/src/docbinder_oss/services/base_class.py):
 
 ```python
 # filepath: src/docbinder_oss/services/my_provider/my_provider_service_config.py
@@ -26,7 +26,7 @@ class MyProviderServiceConfig(ServiceConfig):
 
 ## 2. Implement the Storage Client
 
-Create a client class that inherits from [`BaseStorageClient`](src/docbinder_oss/services/base_class.py) and implements all abstract methods:
+Create a client class that inherits from [`BaseStorageClient`](https://github.com/SnappyLab/DocBinder-OSS/blob/main/src/docbinder_oss/services/base_class.py) and implements all abstract methods:
 
 ```python
 # filepath: src/docbinder_oss/services/my_provider/my_provider_client.py
@@ -57,7 +57,7 @@ class MyProviderClient(BaseStorageClient):
         pass
 ```
 
-- Use the shared models [`File`](src/docbinder_oss/core/schemas.py), [`Permission`](src/docbinder_oss/core/schemas.py), etc., for return types.
+- Use the shared models [`File`](https://github.com/SnappyLab/DocBinder-OSS/blob/main/src/docbinder_oss/core/schemas.py), [`Permission`](https://github.com/SnappyLab/DocBinder-OSS/blob/main/src/docbinder_oss/core/schemas.py), etc., for return types.
 
 ---
 
@@ -109,10 +109,10 @@ providers:
 
 ## Reference
 
-- [src/docbinder_oss/services/base_class.py](src/docbinder_oss/services/base_class.py)
-- [src/docbinder_oss/core/schemas.py](src/docbinder_oss/core/schemas.py)
-- [src/docbinder_oss/services/google_drive/](src/docbinder_oss/services/google_drive/) (example implementation)
-- [src/docbinder_oss/services/__init__.py](src/docbinder_oss/services/__init__.py)
+- [src/docbinder_oss/services/base_class.py](https://github.com/SnappyLab/DocBinder-OSS/blob/main/src/docbinder_oss/services/base_class.py)
+- [src/docbinder_oss/core/schemas.py](https://github.com/SnappyLab/DocBinder-OSS/blob/main/src/docbinder_oss/core/schemas.py)
+- [src/docbinder_oss/services/google_drive/](https://github.com/SnappyLab/DocBinder-OSS/tree/main/src/docbinder_oss/services/google_drive/) (example implementation)
+- [src/docbinder_oss/services/__init__.py](https://github.com/SnappyLab/DocBinder-OSS/blob/main/src/docbinder_oss/services/__init__.py)
 
 ---
 
diff --git a/mkdocs.yml b/mkdocs.yml
index e864210..78c26bd 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -10,6 +10,9 @@ nav:
       - Commands:
           - Main CLI: commands/main.md
           - Provider: commands/provider.md
+  - Providers:
+      - Google Drive: tool/providers/google_drive.md
+      - Custom Provider: tool/providers/custom_provider.md
   - Contributing: CONTRIBUTING.md
   - Code of Conduct: CODE_OF_CONDUCT.md
   - Security: SECURITY.md

From 4827ce9dfa3cf92d00f6548f4f2cb4a87819897d Mon Sep 17 00:00:00 2001
From: Christophe Beke <beke.christophe@gmail.com>
Date: Fri, 27 Jun 2025 19:48:45 +0200
Subject: [PATCH 33/39] Update incorrect readme reference in mkdocs

---
 docs/tool/providers/google_drive.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/tool/providers/google_drive.md b/docs/tool/providers/google_drive.md
index a390973..9791cad 100644
--- a/docs/tool/providers/google_drive.md
+++ b/docs/tool/providers/google_drive.md
@@ -65,4 +65,4 @@ providers:
 ## References
 
 - [Google Drive API Documentation](https://developers.google.com/drive)
-- [DocBinder Documentation](../README.md)
\ No newline at end of file
+- [DocBinder Documentation](https://github.com/SnappyLab/DocBinder-OSS)
\ No newline at end of file

From da028871617c9d69a04d79d3586375b4f4431444 Mon Sep 17 00:00:00 2001
From: Christophe Beke <beke.christophe@gmail.com>
Date: Fri, 27 Jun 2025 19:52:10 +0200
Subject: [PATCH 34/39] update workflow of docbinder oss to not trigger on doc
 updates and changed the reference name of docbinder documentation to be more
 correct

---
 .github/workflows/docbinder-oss.yml | 6 ++++++
 docs/tool/providers/google_drive.md | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/docbinder-oss.yml b/.github/workflows/docbinder-oss.yml
index 3072209..ea94412 100644
--- a/.github/workflows/docbinder-oss.yml
+++ b/.github/workflows/docbinder-oss.yml
@@ -5,10 +5,16 @@ on:
     branches:
       - main
       - dev
+    paths-ignore:
+      - "docs/**"
+      - "mkdocs.yml"
   pull_request:
     branches:
       - main
       - dev
+    paths-ignore:
+      - "docs/**"
+      - "mkdocs.yml"
 jobs:
   test:
     runs-on: ubuntu-latest
diff --git a/docs/tool/providers/google_drive.md b/docs/tool/providers/google_drive.md
index 9791cad..62d488c 100644
--- a/docs/tool/providers/google_drive.md
+++ b/docs/tool/providers/google_drive.md
@@ -65,4 +65,4 @@ providers:
 ## References
 
 - [Google Drive API Documentation](https://developers.google.com/drive)
-- [DocBinder Documentation](https://github.com/SnappyLab/DocBinder-OSS)
\ No newline at end of file
+- [DocBinder OSS - GitHub](https://github.com/SnappyLab/DocBinder-OSS)
\ No newline at end of file

From ca62bc66e7ce96c504e174c0e75a6191a28c492e Mon Sep 17 00:00:00 2001
From: PaoloLeonard <paololeonard@skynet.be>
Date: Mon, 30 Jun 2025 11:39:19 +0200
Subject: [PATCH 35/39] revert back to writer and improve tests

---
 .pre-commit-config.yaml                       |  25 +-
 pyproject.toml                                |   4 -
 src/docbinder_oss/cli/search.py               |  69 +---
 src/docbinder_oss/helpers/config.py           |   5 +-
 src/docbinder_oss/helpers/rich_helpers.py     |  19 -
 .../helpers/writers/multiformat_writer.py     |   9 +-
 .../helpers/writers/writer_console.py         |  17 +-
 .../helpers/writers/writer_csv.py             |  46 +--
 .../helpers/writers/writer_json.py            |  30 +-
 .../google_drive/google_drive_files.py        |   2 +-
 tests/commands/test_search_command.py         | 372 ++++++++++--------
 tests/conftest.py                             | 135 +++++++
 tests/helpers/test_writer.py                  |  18 +-
 tests/providers/google_drive/conftest.py      |  46 ---
 14 files changed, 430 insertions(+), 367 deletions(-)
 delete mode 100644 src/docbinder_oss/helpers/rich_helpers.py
 create mode 100644 tests/conftest.py
 delete mode 100644 tests/providers/google_drive/conftest.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9168817..9591f18 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,9 +1,24 @@
 repos:
-  - repo: https://github.com/psf/black
-    rev: 24.3.0
+  - repo: https://github.com/astral-sh/uv-pre-commit
+    rev: 0.7.16
     hooks:
-      - id: black
+      - id: uv-export
+      - id: uv-lock
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-added-large-files
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.4.4
+    # Ruff version.
+    rev: v0.12.1
     hooks:
-      - id: ruff
+    # Run the linter.
+    - id: ruff-check
+      types_or: [ python, pyi ]
+      args: [ --select, I, --fix ]
+    # Run the formatter.
+    - id: ruff-format
+      types_or: [ python, pyi ]
diff --git a/pyproject.toml b/pyproject.toml
index ced6ecc..ed3fec0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,10 +48,6 @@ testpaths = [
     "tests",
 ]
 
-[tool.black]
-line-length = 125
-skip-string-normalization = false
-
 [tool.ruff]
 line-length = 125
 
diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py
index 6cfd24a..34feb90 100644
--- a/src/docbinder_oss/cli/search.py
+++ b/src/docbinder_oss/cli/search.py
@@ -1,7 +1,8 @@
 from datetime import datetime
+import logging
 import re
 import typer
-from typing import Optional
+from typing import Dict, List, Optional
 import csv
 
 from docbinder_oss.core.schemas import File
@@ -64,7 +65,7 @@ def search(
 
 
 def __filter_files(
-    files,
+    files: Dict[str, List[File]],
     name=None,
     owner=None,
     updated_after=None,
@@ -73,7 +74,7 @@ def __filter_files(
     created_before=None,
     min_size=None,
     max_size=None,
-):
+) -> Dict[str, List[File]]:
     """
     Filters a collection of files based on various criteria such as name, owner,
     modification/creation dates, and file size.
@@ -103,14 +104,14 @@ def file_matches(file: File):
         if owner and (not file.owners or not any(owner in u.email_address for u in file.owners)):
             return False
         if updated_after:
-            file_mod_time = __parse_dt(file.modified_time)
+            file_modified_time = __parse_dt(file.modified_time)
             updated_after_dt = __parse_dt(updated_after)
-            if file_mod_time is None or updated_after_dt is None or file_mod_time < updated_after_dt:
+            if file_modified_time is None or updated_after_dt is None or file_modified_time < updated_after_dt:
                 return False
         if updated_before:
-            file_mod_time = __parse_dt(file.modified_time)
+            file_modified_time = __parse_dt(file.modified_time)
             updated_before_dt = __parse_dt(updated_before)
-            if file_mod_time is None or updated_before_dt is None or file_mod_time > updated_before_dt:
+            if file_modified_time is None or updated_before_dt is None or file_modified_time > updated_before_dt:
                 return False
         if created_after:
             file_created_time = __parse_dt(file.created_time)
@@ -120,11 +121,12 @@ def file_matches(file: File):
         if created_before:
             file_created_time = __parse_dt(file.created_time)
             created_before_dt = __parse_dt(created_before)
+            logging.debug(f"File created time: {file_created_time}, Created before: {created_before_dt}, Type: {type(file_created_time)}, Type: {type(created_before_dt)}")
             if file_created_time is not None and created_before_dt is not None and file_created_time > created_before_dt:
                 return False
-        if min_size and file.size < min_size * 1024:
+        if min_size and file.size < min_size:
             return False
-        if max_size and file.size > max_size * 1024:
+        if max_size and file.size > max_size:
             return False
         return True
 
@@ -139,49 +141,6 @@ def __parse_dt(val):
         return val
     try:
         return datetime.fromisoformat(val)
-    except Exception:
-        return val
-
-
-def __write_csv(files_by_provider, filename):
-    # Collect all possible fieldnames from all files
-    all_fieldnames = set(["provider"])
-    for files in files_by_provider.values():
-        for file in files:
-            file_dict = file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy()
-            all_fieldnames.update(file_dict.keys())
-    # Move provider to the front, rest sorted
-    fieldnames = ["provider"] + sorted(f for f in all_fieldnames if f != "provider")
-    with open(filename, "w", newline="") as csvfile:
-        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-        writer.writeheader()
-        for provider, files in files_by_provider.items():
-            for file in files:
-                file_dict = file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy()
-                file_dict["provider"] = provider
-                # Flatten owners for CSV (only email addresses)
-                owners = file_dict.get("owners")
-                if isinstance(owners, list):
-                    emails = []
-                    for u in owners:
-                        if hasattr(u, "email_address") and u.email_address:
-                            emails.append(u.email_address)
-                        elif isinstance(u, dict) and u.get("email_address"):
-                            emails.append(u["email_address"])
-                        elif isinstance(u, str):
-                            emails.append(u)
-                    file_dict["owners"] = ";".join(emails)
-                # Flatten last_modifying_user for CSV (only email address)
-                last_mod = file_dict.get("last_modifying_user")
-                if last_mod is not None:
-                    if hasattr(last_mod, "email_address"):
-                        file_dict["last_modifying_user"] = last_mod.email_address
-                    elif isinstance(last_mod, dict) and "email_address" in last_mod:
-                        file_dict["last_modifying_user"] = last_mod["email_address"]
-                    else:
-                        file_dict["last_modifying_user"] = str(last_mod)
-                # Flatten parents for CSV
-                parents = file_dict.get("parents")
-                if isinstance(parents, list):
-                    file_dict["parents"] = ";".join(str(p) for p in parents)
-                writer.writerow({fn: file_dict.get(fn, "") for fn in fieldnames})
+    except Exception as e:
+        typer.echo(f"Failed to parse datetime from value: {val} with error: {e}", err=True)
+        raise ValueError(f"Invalid datetime format: {val}") from e
diff --git a/src/docbinder_oss/helpers/config.py b/src/docbinder_oss/helpers/config.py
index 8a49070..088d95d 100644
--- a/src/docbinder_oss/helpers/config.py
+++ b/src/docbinder_oss/helpers/config.py
@@ -1,11 +1,12 @@
 import logging
 import os
+from typing import List
 
 import typer
 import yaml
 from pydantic import BaseModel, ValidationError
 
-from docbinder_oss.providers import get_provider_registry
+from docbinder_oss.providers import ServiceUnion, get_provider_registry
 
 logger = logging.getLogger(__name__)
 
@@ -15,7 +16,7 @@
 class Config(BaseModel):
     """Main configuration model that holds a list of all provider configs."""
 
-    providers: list
+    providers: List[ServiceUnion] # type: ignore
 
 
 def load_config() -> Config:
diff --git a/src/docbinder_oss/helpers/rich_helpers.py b/src/docbinder_oss/helpers/rich_helpers.py
deleted file mode 100644
index 6faefe5..0000000
--- a/src/docbinder_oss/helpers/rich_helpers.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from typing import List
-from rich.table import Table
-
-
-def create_rich_table(headers: List[str], rows: List[List[str]]) -> Table:
-    """
-    Create a Rich table with the given headers and rows.
-
-    Args:
-        headers (List[str]): The headers for the table.
-        rows (List[List[str]]): The data rows for the table.
-
-    Returns:
-        Table: A Rich Table object.
-    """
-    table = Table(*headers, show_header=True, header_style="bold magenta")
-    for row in rows:
-        table.add_row(*row)
-    return table
diff --git a/src/docbinder_oss/helpers/writers/multiformat_writer.py b/src/docbinder_oss/helpers/writers/multiformat_writer.py
index 4cae081..c6b688e 100644
--- a/src/docbinder_oss/helpers/writers/multiformat_writer.py
+++ b/src/docbinder_oss/helpers/writers/multiformat_writer.py
@@ -1,6 +1,8 @@
 from pathlib import Path
-from typing import Any
+from typing import Any, Dict, List
 
+from docbinder_oss.core.schemas import File
+from docbinder_oss.helpers.writers.base import Writer
 from docbinder_oss.helpers.writers.writer_console import ConsoleWriter
 from docbinder_oss.helpers.writers.writer_csv import CSVWriter
 from docbinder_oss.helpers.writers.writer_json import JSONWriter
@@ -20,7 +22,7 @@ class MultiFormatWriter:
     }
 
     @classmethod
-    def write(cls, data: Any, file_path: str | None = None) -> None:
+    def write(cls, data: Dict[str, List[File]], file_path: str | None = None) -> None:
         if not file_path:
             ConsoleWriter().write(data)
             return
@@ -30,5 +32,6 @@ def write(cls, data: Any, file_path: str | None = None) -> None:
         if writer_key not in cls._writers:
             raise ValueError(f"Unsupported format: {file_path}")
         writer_class = cls._writers[writer_key]
-        writer = writer_class()
+        writer: Writer = writer_class()
         writer.write(data, file_path)
+ 
\ No newline at end of file
diff --git a/src/docbinder_oss/helpers/writers/writer_console.py b/src/docbinder_oss/helpers/writers/writer_console.py
index 0fae481..ff17bff 100644
--- a/src/docbinder_oss/helpers/writers/writer_console.py
+++ b/src/docbinder_oss/helpers/writers/writer_console.py
@@ -1,5 +1,7 @@
 from pathlib import Path
 from typing import Any
+from rich.table import Table
+from rich import print
 from docbinder_oss.helpers.writers.base import Writer
 
 
@@ -7,23 +9,12 @@ class ConsoleWriter(Writer):
     """Writer for pretty-printing data to the console using rich tables."""
 
     def write(self, data: Any, file_path: str | Path | None = None) -> None:
-        from rich.table import Table
-
         table = Table(title="Files and Folders")
         table.add_column("Provider", justify="right", style="cyan", no_wrap=True)
         table.add_column("Id", style="magenta")
         table.add_column("Name", style="magenta")
         table.add_column("Kind", style="magenta")
-        for provider, items in data.items() if isinstance(data, dict) else [("?", data)]:
+        for provider, items in data.items():
             for item in items:
-                if hasattr(item, "model_dump"):
-                    item = item.model_dump()
-                elif hasattr(item, "__dict__"):
-                    item = dict(item.__dict__)
-                table.add_row(
-                    str(provider),
-                    str(item.get("id", "")),
-                    str(item.get("name", "")),
-                    str(item.get("kind", "")),
-                )
+                table.add_row(provider, item.id, item.name, item.kind)
         print(table)
diff --git a/src/docbinder_oss/helpers/writers/writer_csv.py b/src/docbinder_oss/helpers/writers/writer_csv.py
index 3d6eb64..0d9c281 100644
--- a/src/docbinder_oss/helpers/writers/writer_csv.py
+++ b/src/docbinder_oss/helpers/writers/writer_csv.py
@@ -1,41 +1,29 @@
 import csv
 import logging
 from pathlib import Path
-from typing import Any
+from typing import List, Dict, Union
+from pydantic import BaseModel
 from docbinder_oss.helpers.writers.base import Writer
-from docbinder_oss.helpers.writers.helper_functions import flatten_file
+
+logger = logging.getLogger(__name__)
 
 
 class CSVWriter(Writer):
     """Writer for exporting data to CSV files."""
+    def get_fieldnames(self, data: Dict[str, List[BaseModel]]) -> List[str]:
+        fieldnames = next(iter(data.values()))[0].model_fields_set
+        return ["provider", *fieldnames]
 
-    def get_fieldnames(self, rows: list) -> list:
-        fieldnames = set()
-        for row in rows:
-            fieldnames.update(row.keys())
-        # Provider first, then the rest sorted
-        return ["provider"] + sorted(f for f in fieldnames if f != "provider")
+    def write(self, data: List[Dict], file_path: Union[str, Path]) -> None:
+        if not data:
+            logger.warning("No data to write to CSV.")
+            return
 
-    def write(self, data: Any, file_path: str | Path | None = None) -> None:
-        """
-        Always flattens grouped dicts to a flat list for CSV export.
-        """
-        rows = []
-        if isinstance(data, dict):
+        with open(file_path, 'w', newline='', encoding='utf-8') as f:
+            writer = csv.DictWriter(f, fieldnames=self.get_fieldnames(data))
+            writer.writeheader()
             for provider, items in data.items():
                 for item in items:
-                    rows.append(flatten_file(item, provider))
-        elif isinstance(data, list):
-            for item in data:
-                provider = item.get("provider") if isinstance(item, dict) else getattr(item, "provider", None)
-                rows.append(flatten_file(item, provider))
-        else:
-            return
-        if not rows or not file_path:
-            logging.warning("No data to write to CSV.")
-            return
-        with open(file_path, "w", newline="", encoding="utf-8") as f:
-            writer = csv.DictWriter(f, fieldnames=self.get_fieldnames(rows))
-            writer.writeheader()
-            for row in rows:
-                writer.writerow(row)
+                    item_dict = item.model_dump() if isinstance(item, BaseModel) else item
+                    item_dict['provider'] = provider
+                    writer.writerow(item_dict)
diff --git a/src/docbinder_oss/helpers/writers/writer_json.py b/src/docbinder_oss/helpers/writers/writer_json.py
index 977ce3f..d928814 100644
--- a/src/docbinder_oss/helpers/writers/writer_json.py
+++ b/src/docbinder_oss/helpers/writers/writer_json.py
@@ -1,29 +1,17 @@
 import json
 from pathlib import Path
-from typing import Any
+from typing import Dict, List, Union
+from docbinder_oss.core.schemas import File
 from docbinder_oss.helpers.writers.base import Writer
-from docbinder_oss.helpers.writers.helper_functions import flatten_file
 
 
 class JSONWriter(Writer):
     """Writer for exporting data to JSON files."""
 
-    def write(self, data: Any, file_path: str | Path | None = None) -> None:
-        """
-        Always flattens grouped dicts to a flat list for JSON export.
-        """
-        flat = []
-        if isinstance(data, dict):
-            for provider, items in data.items():
-                for item in items:
-                    flat.append(flatten_file(item, provider))
-        elif isinstance(data, list):
-            for item in data:
-                provider = item.get("provider") if isinstance(item, dict) else getattr(item, "provider", None)
-                flat.append(flatten_file(item, provider))
-        else:
-            return
-        if not file_path:
-            return
-        with open(file_path, "w", encoding="utf-8") as f:
-            json.dump(flat, f, indent=2, ensure_ascii=False, default=str)
+    def write(self, data: Dict[str, List[File]], file_path: Union[str, Path]) -> None:
+        data = {
+            provider: [item.model_dump() for item in items]
+            for provider, items in data.items()
+        }
+        with open(file_path, 'w', encoding='utf-8') as f:
+            json.dump(data, f, indent=2, ensure_ascii=False, default=str)
diff --git a/src/docbinder_oss/providers/google_drive/google_drive_files.py b/src/docbinder_oss/providers/google_drive/google_drive_files.py
index 76512d3..18fbb58 100644
--- a/src/docbinder_oss/providers/google_drive/google_drive_files.py
+++ b/src/docbinder_oss/providers/google_drive/google_drive_files.py
@@ -27,7 +27,7 @@ def list_files_in_folder(self, bucket_id: str | None = None) -> list[File]:
         if bucket_id:
             args["q"] = f"'{bucket_id}' in parents and trashed=false"
         else:
-            args["q"] = None
+            args["q"] = "trashed=false"
 
         resp = self.service.files().list(**args).execute()
         files = resp.get("files", [])
diff --git a/tests/commands/test_search_command.py b/tests/commands/test_search_command.py
index 8608fac..1ccc378 100644
--- a/tests/commands/test_search_command.py
+++ b/tests/commands/test_search_command.py
@@ -1,220 +1,272 @@
-import os
 import csv
 import json
+from typing import Dict
 import pytest
+from pathlib import Path
 from typer.testing import CliRunner
+from docbinder_oss.core.schemas import User
 from docbinder_oss.main import app
+from conftest import DummyModel
 
 
-class DummyFile:
-    def __init__(self, **kwargs):
-        self.id = kwargs.get("id", "fileid1")
-        self.name = kwargs.get("name", "Test File")
-        self.size = kwargs.get("size", 12345)
-        self.mime_type = kwargs.get("mime_type", "application/pdf")
-        self.created_time = kwargs.get("created_time", "2024-01-01T00:00:00")
-        self.modified_time = kwargs.get("modified_time", "2024-01-02T00:00:00")
-        self.owners = kwargs.get("owners", [type("User", (), {"email_address": "owner@example.com"})()])
-        self.last_modifying_user = kwargs.get(
-            "last_modifying_user", type("User", (), {"email_address": "mod@example.com"})()
-        )
-        self.web_view_link = kwargs.get("web_view_link", "http://example.com/view")
-        self.web_content_link = kwargs.get("web_content_link", "http://example.com/content")
-        self.shared = kwargs.get("shared", True)
-        self.trashed = kwargs.get("trashed", False)
-
-    def model_dump(self):
-        # Simulate pydantic's model_dump for test compatibility
-        return {
-            "id": self.id,
-            "name": self.name,
-            "size": self.size,
-            "mime_type": self.mime_type,
-            "created_time": self.created_time,
-            "modified_time": self.modified_time,
-            "owners": [u.email_address for u in self.owners],
-            "last_modifying_user": getattr(self.last_modifying_user, "email_address", None),
-            "web_view_link": self.web_view_link,
-            "web_content_link": self.web_content_link,
-            "shared": self.shared,
-            "trashed": self.trashed,
-        }
-
-
-@pytest.fixture(autouse=True)
-def patch_provider(monkeypatch, tmp_path):
-    # Patch config loader to return two dummy provider configs
-    class DummyProviderConfig:
-        def __init__(self, name):
-            self.name = name
-            self.type = name  # Simulate type for registry
-
-    class DummyConfig:
-        providers = [DummyProviderConfig("dummy1"), DummyProviderConfig("dummy2")]
-
-    # Patch load_config in the CLI's namespace
-    monkeypatch.setattr("docbinder_oss.cli.search.load_config", lambda: DummyConfig())
-
-    # Patch create_provider_instance in the CLI's namespace
-    def create_provider_instance(cfg):
-        if cfg.name == "dummy1":
-            return type(
-                "DummyClient",
-                (),
-                {
-                    "list_all_files": lambda self: [
-                        DummyFile(
-                            id="f1",
-                            name="Alpha Report",
-                            size=2048,
-                            owners=[type("User", (), {"email_address": "alpha@a.com"})()],
-                            created_time="2024-01-01T10:00:00",
-                            modified_time="2024-01-02T10:00:00",
-                        )
-                    ]
-                },
-            )()
-        else:
-            return type(
-                "DummyClient",
-                (),
-                {
-                    "list_all_files": lambda self: [
-                        DummyFile(
-                            id="f2",
-                            name="Beta Notes",
-                            size=4096,
-                            owners=[type("User", (), {"email_address": "beta@b.com"})()],
-                            created_time="2024-02-01T10:00:00",
-                            modified_time="2024-02-02T10:00:00",
-                        )
-                    ]
-                },
-            )()
-
-    monkeypatch.setattr("docbinder_oss.cli.search.create_provider_instance", create_provider_instance)
-
-    # Change working directory to a temp dir for file output
-    orig_cwd = os.getcwd()
-    os.chdir(tmp_path)
-    yield
-    os.chdir(orig_cwd)
-
-
-def test_search_export_csv():
-    runner = CliRunner()
+runner = CliRunner()
+
+@pytest.mark.parametrize('load_config_mock', [("dummy", 2)], indirect=True)
+@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True)
+@pytest.mark.parametrize('list_all_files_mock', [([
+                DummyModel(id="dummy_file1", name="dummy File 1", kind="file"),
+                DummyModel(id="dummy_file2", name="dummy File 2", kind="file"),
+            ])], indirect=True)
+def test_search_export_csv(load_config_mock, create_provider_instance_mock, list_all_files_mock):
+    """Test happy path for search command with CSV export."""
     result = runner.invoke(app, ["search", "--export-file", "search_results.csv"])
     assert result.exit_code == 0
-    assert os.path.exists("search_results.csv")
+    assert Path("search_results.csv").exists()
     with open("search_results.csv") as f:
         reader = csv.DictReader(f)
         rows = list(reader)
-        assert len(rows) == 2
-        names = set(r["name"] for r in rows)
-        assert names == {"Alpha Report", "Beta Notes"}
-        # Check owners field is a string and contains the expected email
-        for r in rows:
-            owners = r["owners"]
-            if r["name"] == "Alpha Report":
-                assert "alpha@a.com" in owners
-            if r["name"] == "Beta Notes":
-                assert "beta@b.com" in owners
-
-
-def test_search_export_json():
-    runner = CliRunner()
+        assert len(rows) == 4
+        assert set(r["provider"] for r in rows) == {"dummy1", "dummy2"}
+
+@pytest.mark.parametrize('load_config_mock', [("dummy", 2)], indirect=True)
+@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True)
+@pytest.mark.parametrize('list_all_files_mock', [([
+                DummyModel(id="dummy_file1", name="dummy File 1", kind="file"),
+                DummyModel(id="dummy_file2", name="dummy File 2", kind="file"),
+            ])], indirect=True)
+def test_search_export_json(load_config_mock, create_provider_instance_mock, list_all_files_mock):
+    """Test happy path for search command with CSV export."""
     result = runner.invoke(app, ["search", "--export-file", "search_results.json"])
     assert result.exit_code == 0
-    assert os.path.exists("search_results.json")
+    assert Path("search_results.json").exists()
     with open("search_results.json") as f:
-        data = json.load(f)
-        assert isinstance(data, list)
-        assert len(data) == 2
-        names = set(d["name"] for d in data)
-        assert names == {"Alpha Report", "Beta Notes"}
-        # Check owners field is a string or list
-        for d in data:
-            if d["name"] == "Alpha Report":
-                assert "alpha@a.com" in d["owners"]
-            if d["name"] == "Beta Notes":
-                assert "beta@b.com" in d["owners"]
-
+        data: Dict = json.load(f)
+        assert len(data.keys()) == 2
+        assert len(data["dummy1"]) == 2
+        assert len(data["dummy2"]) == 2
+        assert all(key in data for key in ("dummy1", "dummy2"))
 
-def test_search_name_filter():
-    runner = CliRunner()
+@pytest.mark.parametrize('load_config_mock', [("dummy", 2)], indirect=True)
+@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True)
+@pytest.mark.parametrize('list_all_files_mock', [([
+                DummyModel(id="dummy_file1", name="dummy File 1", kind="file"),
+                DummyModel(id="dummy_file2", name="dummy File 2", kind="file"),
+            ])], indirect=True)
+def test_search_name_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock):
+    """
+    Test search command with name filter that returns no results.
+    """
     result = runner.invoke(app, ["search", "--name", "Alpha", "--export-file", "search_results.json"])
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)
-        assert len(data) == 1
-        assert data[0]["name"] == "Alpha Report"
+        assert len(data["dummy1"]) == 0
+        assert len(data["dummy2"]) == 0
+        
+@pytest.mark.parametrize('load_config_mock', [("dummy", 2)], indirect=True)
+@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True)
+@pytest.mark.parametrize('list_all_files_mock', [([
+                DummyModel(id="dummy_file1", name="dummy File 1", kind="file"),
+                DummyModel(id="dummy_file2", name="File 2", kind="file"),
+            ])], indirect=True)
+def test_search_name_filter_not_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock):
+    """
+    Test search command with name filter that returns some results.
+    """
+    result = runner.invoke(app, ["search", "--name", "dummy", "--export-file", "search_results.json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data) == 2
+        assert data["dummy1"][0]["name"] == "dummy File 1"
+        assert data["dummy2"][0]["name"] == "dummy File 1"
 
+@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True)
+@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True)
+@pytest.mark.parametrize('list_all_files_mock', [([
+                DummyModel(id="dummy_file1", name="dummy File 1", kind="file", owners=[User(display_name="test", email_address="beta@a.com", photo_link="https://test.com", kind="")]),
+                DummyModel(id="dummy_file2", name="File 2", kind="file", owners=[]),
+            ])], indirect=True)
+def test_search_owner_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock):
+    """Test search command with owner filter that returns no results."""
+    result = runner.invoke(app, ["search", "--owner", "beta@b.com", "--export-file", "search_results.json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data["dummy1"]) == 0
 
-def test_search_owner_filter():
-    runner = CliRunner()
+@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True)
+@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True)
+@pytest.mark.parametrize('list_all_files_mock', [([
+                DummyModel(id="dummy_file1", name="dummy File 1", kind="file", owners=[User(display_name="test", email_address="beta@b.com", photo_link="https://test.com", kind="")]),
+                DummyModel(id="dummy_file2", name="File 2", kind="file", owners=[]),
+            ])], indirect=True)
+def test_search_owner_filter_not_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock):
+    """Test search command with owner filter that returns some results."""
     result = runner.invoke(app, ["search", "--owner", "beta@b.com", "--export-file", "search_results.json"])
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)
         assert len(data) == 1
-        assert data[0]["name"] == "Beta Notes"
-
+        assert data["dummy1"][0]["owners"][0]["email_address"] == "beta@b.com"
 
-def test_search_updated_after_filter():
-    runner = CliRunner()
+@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True)
+@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True)
+@pytest.mark.parametrize('list_all_files_mock', [([
+                DummyModel(id="dummy_file1", name="dummy File 1", kind="file", modified_time="2023-02-02T00:00:00"),
+                DummyModel(id="dummy_file2", name="dummy File 2", kind="file", modified_time="2024-01-31T00:00:00"),
+            ])], indirect=True)   
+def test_search_updated_after_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock):
+    """Test search command with updated_after filter that returns no results."""
     result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-file", "search_results.json"])
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)
-        assert len(data) == 1
-        assert data[0]["name"] == "Beta Notes"
+        assert len(data["dummy1"]) == 0
+        
+@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True)
+@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True)
+@pytest.mark.parametrize('list_all_files_mock', [([
+                DummyModel(id="dummy_file1", name="dummy File 1", kind="file", modified_time="2024-02-02T00:00:00"),
+                DummyModel(id="dummy_file2", name="dummy File 2", kind="file", modified_time="2024-01-31T00:00:00"),
+            ])], indirect=True)   
+def test_search_updated_after_filter_not_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock):
+    """Test search command with updated_after filter that returns some results."""
+    result = runner.invoke(app, ["search", "--updated-after", "2024-02-01T00:00:00", "--export-file", "search_results.json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data["dummy1"]) == 1
+        assert data["dummy1"][0]["name"] == "dummy File 1"
 
+@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True)
+@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True)
+@pytest.mark.parametrize('list_all_files_mock', [([
+                DummyModel(id="dummy_file1", name="dummy File 1", kind="file", created_time="2024-04-02T00:00:00"),
+                DummyModel(id="dummy_file2", name="dummy File 2", kind="file", created_time="2024-04-30T00:00:00"),
+            ])], indirect=True)  
+def test_search_created_before_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock):
+    """Test search command with created_before filter that returns no results."""
+    result = runner.invoke(
+        app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-file", "search_results.json"]
+    )
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data["dummy1"]) == 0
 
-def test_search_created_before_filter():
-    runner = CliRunner()
+@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True)
+@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True)
+@pytest.mark.parametrize('list_all_files_mock', [([
+                DummyModel(id="dummy_file1", name="dummy File 1", kind="file", created_time="2024-02-02T00:00:00"),
+                DummyModel(id="dummy_file2", name="dummy File 2", kind="file", created_time="2024-01-31T00:00:00"),
+            ])], indirect=True)  
+def test_search_created_before_filter_not_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock):
+    """Test search command with created_before filter that returns some results."""
     result = runner.invoke(
         app, ["search", "--created-before", "2024-02-01T00:00:00", "--export-file", "search_results.json"]
     )
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)
-        assert len(data) == 1
-        assert data[0]["name"] == "Alpha Report"
+        assert len(data["dummy1"]) == 1
+        assert data["dummy1"][0]["name"] == "dummy File 2"
 
+@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True)
+@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True)
+@pytest.mark.parametrize('list_all_files_mock', [([
+                DummyModel(id="dummy_file1", name="dummy File 1", kind="file", size=1),
+                DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=2),
+            ])], indirect=True)  
+def test_search_min_size_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock):
+    """Test search command with min_size filter that returns no results."""
+    result = runner.invoke(app, ["search", "--min-size", 3, "--export-file", "search_results.json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data["dummy1"]) == 0
 
-def test_search_min_size_filter():
+@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True)
+@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True)
+@pytest.mark.parametrize('list_all_files_mock', [([
+                DummyModel(id="dummy_file1", name="dummy File 1", kind="file", size=5),
+                DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=2),
+            ])], indirect=True) 
+def test_search_min_size_filter_not_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock):
     runner = CliRunner()
-    result = runner.invoke(app, ["search", "--min-size", "3", "--export-file", "search_results.json"])
+    result = runner.invoke(app, ["search", "--min-size", 3, "--export-file", "search_results.json"])
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)
-        assert len(data) == 1
-        assert data[0]["name"] == "Beta Notes"
+        assert len(data["dummy1"]) == 1
+        assert data["dummy1"][0]["name"] == "dummy File 1"
 
+@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True)
+@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True)
+@pytest.mark.parametrize('list_all_files_mock', [([
+                DummyModel(id="dummy_file1", name="dummy File 1", kind="file", size=5),
+                DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=3),
+            ])], indirect=True) 
+def test_search_max_size_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock):
+    """Test search command with max_size filter that returns no results."""
+    result = runner.invoke(app, ["search", "--max-size", "3", "--export-file", "search_results.json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data["dummy1"]) == 1
 
-def test_search_max_size_filter():
-    runner = CliRunner()
+@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True)
+@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True)
+@pytest.mark.parametrize('list_all_files_mock', [([
+                DummyModel(id="dummy_file1", name="dummy File 1", kind="file", size=5),
+                DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=2),
+            ])], indirect=True) 
+def test_search_max_size_filter_not_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock):
+    """Test search command with max_size filter that returns some results."""
     result = runner.invoke(app, ["search", "--max-size", "3", "--export-file", "search_results.json"])
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)
-        assert len(data) == 1
-        assert data[0]["name"] == "Alpha Report"
+        assert len(data["dummy1"]) == 1
+        assert data["dummy1"][0]["name"] == "dummy File 2"
 
+@pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True)
+@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True)
+@pytest.mark.parametrize('list_all_files_mock', [([
+                DummyModel(id="dummy_file1", name="dummy File 1", kind="file", size=5),
+                DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=2),
+            ])], indirect=True) 
+def test_search_provider_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock):
+    """Test search command with provider filter that returns no results."""
+    result = runner.invoke(app, ["search", "--provider", "dummy2", "--export-file", "search_results.json"])
+    assert result.exit_code == 0
+    with open("search_results.json") as f:
+        data = json.load(f)
+        assert len(data) == 0
 
-def test_search_provider_filter():
-    runner = CliRunner()
+@pytest.mark.parametrize('load_config_mock', [("dummy", 2)], indirect=True)
+@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True)
+@pytest.mark.parametrize('list_all_files_mock', [([
+                DummyModel(id="dummy_file1", name="dummy File 1", kind="file", size=5),
+                DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=2),
+            ])], indirect=True) 
+def test_search_provider_filter(load_config_mock, create_provider_instance_mock, list_all_files_mock):
+    """Test search command with provider filter that returns some results."""
     result = runner.invoke(app, ["search", "--provider", "dummy2", "--export-file", "search_results.json"])
     assert result.exit_code == 0
     with open("search_results.json") as f:
         data = json.load(f)
         assert len(data) == 1
-        assert data[0]["provider"] == "dummy2"
-        assert data[0]["name"] == "Beta Notes"
-
+        assert "dummy2" in data
 
-def test_search_combined_filters():
-    runner = CliRunner()
+@pytest.mark.parametrize('load_config_mock', [("dummy", 2)], indirect=True)
+@pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True)
+@pytest.mark.parametrize('list_all_files_mock', [([
+                DummyModel(id="dummy_file1", name="Beta File 1", kind="file", size=5, owners=[User(display_name="test", email_address="beta@b.com", photo_link="https://test.com", kind="")]),
+                DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=2),
+            ])], indirect=True) 
+def test_search_combined_filters(load_config_mock, create_provider_instance_mock, list_all_files_mock):
+    """Test search command with combined filters."""
     result = runner.invoke(
         app,
         [
@@ -235,6 +287,6 @@ def test_search_combined_filters():
     with open("search_results.json") as f:
         data = json.load(f)
         assert len(data) == 1
-        assert data[0]["name"] == "Beta Notes"
-        assert data[0]["provider"] == "dummy2"
-        assert "beta@b.com" in data[0]["owners"]
+        assert "dummy2" in data
+        assert data["dummy2"][0]["name"] == "Beta File 1"
+        assert data["dummy2"][0]["owners"][0]["email_address"] == "beta@b.com"
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..e3f37ee
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,135 @@
+from typing import List
+from unittest.mock import MagicMock, patch
+
+from pydantic import BaseModel, ConfigDict
+import pytest
+
+from docbinder_oss.providers.base_class import BaseProvider
+from docbinder_oss.providers.google_drive.google_drive_client import (
+    GoogleDriveClient,
+)
+from docbinder_oss.providers.google_drive.google_drive_service_config import (
+    GoogleDriveServiceConfig,
+)
+
+
+class DummyModel(BaseModel):
+    id: str
+    name: str
+    kind: str
+    
+    model_config = ConfigDict(extra="allow")
+
+
+class DummyProvider(BaseProvider):
+        def __init__(self, name, type=None):
+            self.name = name
+            self.type = type if type else f"{name}_type"
+        
+        def list_all_files(self):
+            raise NotImplementedError("Please use the pytest parametrize settings to add your test data.")
+        def test_connection(self):
+            raise NotImplementedError("This provider does not implement connection testing")
+        def list_buckets(self):
+            raise NotImplementedError("This provider does not implement buckets")
+        def get_permissions(self):
+            raise NotImplementedError("This provider does not implement permissions")
+        def list_files_in_folder(self):
+            raise NotImplementedError("This provider does not implement folder listing")
+        def get_file_metadata(self, item_id):
+            raise NotImplementedError("This provider does not implement file metadata retrieval")
+
+class DummyConfig:
+        providers: List[DummyProvider] = []
+
+@pytest.fixture
+def sample_data():
+    return {
+        "provider1": [
+            DummyModel(id="1", name="FileA", kind="file"),
+            DummyModel(id="2", name="FolderB", kind="folder"),
+        ],
+        "provider2": [
+            DummyModel(id="3", name="FileC", kind="file"),
+        ],
+    }
+
+@pytest.fixture
+def mock_gdrive_provider():
+    """
+    This is the core of our testing strategy. We use 'patch' to replace
+    the `build` function from the googleapiclient library.
+
+    Whenever `GoogleDriveClient` calls `build('drive', 'v3', ...)`, it will
+    receive our mock object instead of making a real network call.
+    """
+    with patch("docbinder_oss.providers.google_drive.google_drive_client.build") as mock_build:
+        # Create a mock for the provider object that `build` would return
+        mock_provider = MagicMock()
+        # Configure the `build` function to return our mock provider
+        mock_build.return_value = mock_provider
+        yield mock_provider
+
+
+@pytest.fixture
+def gdrive_client(mock_gdrive_provider):
+    """
+    Creates an instance of our GoogleDriveClient.
+    It will be initialized with a fake config and will use
+    the mock_gdrive_provider fixture internally.
+    """
+    # Patch _get_credentials to avoid real auth
+    with patch(
+        "docbinder_oss.providers.google_drive.google_drive_client.GoogleDriveClient._get_credentials",
+        return_value=MagicMock(),
+    ):
+        config = GoogleDriveServiceConfig(
+            name="test_gdrive",
+            gcp_credentials_json="fake_creds.json",
+        )
+        return GoogleDriveClient(config=config)
+
+@pytest.fixture(scope='session')
+def load_config_mock(request, create_config_mock):
+    """
+    This fixture mocks the `load_config` function to return
+    a dummy configuration with a specified number of providers.
+    """
+    name, number_of_providers = request.param
+    with patch("docbinder_oss.cli.search.load_config", return_value=create_config_mock(name, number_of_providers)) as _fixture:
+        yield _fixture
+
+@pytest.fixture(scope='session')
+def create_provider_instance_mock(request, create_provider_mock):
+    """
+    This fixture mocks the `create_provider_instance` function to return
+    a dummy provider instance based on the provider name.
+    """
+    with patch("docbinder_oss.cli.search.create_provider_instance", return_value=create_provider_mock(request.param)) as _fixture:
+        yield _fixture
+
+@pytest.fixture(scope="session")
+def list_all_files_mock(request):
+    """
+    
+    Yields:
+        _type_: _description_
+    """
+    data = request.param
+    with patch("conftest.DummyProvider.list_all_files", return_value=data) as _fixture:
+        yield _fixture
+
+@pytest.fixture(scope='session')
+def create_provider_mock():
+    def create_dummy_provider(name):
+        return DummyProvider(name=name, type=f"{name}_type")
+    yield create_dummy_provider
+
+@pytest.fixture(scope='session')
+def create_config_mock(create_provider_mock):
+    """This fixture creates a dummy configuration with a specified number of providers."""
+    def create_dummy_config(name, number_of_providers=2):
+        dummy_config = DummyConfig()
+        dummy_config.providers = [create_provider_mock(f"{name}{i+1}") for i in range(number_of_providers)]
+        return dummy_config
+    yield create_dummy_config
\ No newline at end of file
diff --git a/tests/helpers/test_writer.py b/tests/helpers/test_writer.py
index 651bf87..abe0920 100644
--- a/tests/helpers/test_writer.py
+++ b/tests/helpers/test_writer.py
@@ -50,12 +50,12 @@ def test_json_writer(tmp_path, sample_data):
     assert file_path.exists()
     with open(file_path, encoding="utf-8") as f:
         data = json.load(f)
-        assert isinstance(data, list)
-        assert len(data) == 3
-        providers = {d["provider"] for d in data}
-        assert "provider1" in providers
-        assert "provider2" in providers
-        assert any(d["id"] == "1" and d["provider"] == "provider1" for d in data)
+        assert isinstance(data, dict)
+        assert len(data) == 2
+        assert "provider1" in data
+        assert "provider2" in data
+        assert data["provider1"][0]["id"] == "1"
+        assert data["provider2"][0]["id"] == "3"
 
 
 def test_multiformat_writer_csv(tmp_path, sample_data):
@@ -74,9 +74,9 @@ def test_multiformat_writer_json(tmp_path, sample_data):
     assert file_path.exists()
     with open(file_path, encoding="utf-8") as f:
         data = json.load(f)
-        assert isinstance(data, list)
-        providers = {d["provider"] for d in data}
-        assert "provider2" in providers
+        assert isinstance(data, dict)
+        assert "provider1" in data
+        assert "provider2" in data
 
 
 def test_multiformat_writer_unsupported(tmp_path, sample_data):
diff --git a/tests/providers/google_drive/conftest.py b/tests/providers/google_drive/conftest.py
deleted file mode 100644
index b248aac..0000000
--- a/tests/providers/google_drive/conftest.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from docbinder_oss.providers.google_drive.google_drive_client import (
-    GoogleDriveClient,
-)
-from docbinder_oss.providers.google_drive.google_drive_service_config import (
-    GoogleDriveServiceConfig,
-)
-
-
-@pytest.fixture
-def mock_gdrive_provider():
-    """
-    This is the core of our testing strategy. We use 'patch' to replace
-    the `build` function from the googleapiclient library.
-
-    Whenever `GoogleDriveClient` calls `build('drive', 'v3', ...)`, it will
-    receive our mock object instead of making a real network call.
-    """
-    with patch("docbinder_oss.providers.google_drive.google_drive_client.build") as mock_build:
-        # Create a mock for the provider object that `build` would return
-        mock_provider = MagicMock()
-        # Configure the `build` function to return our mock provider
-        mock_build.return_value = mock_provider
-        yield mock_provider
-
-
-@pytest.fixture
-def gdrive_client(mock_gdrive_provider):
-    """
-    Creates an instance of our GoogleDriveClient.
-    It will be initialized with a fake config and will use
-    the mock_gdrive_provider fixture internally.
-    """
-    # Patch _get_credentials to avoid real auth
-    with patch(
-        "docbinder_oss.providers.google_drive.google_drive_client.GoogleDriveClient._get_credentials",
-        return_value=MagicMock(),
-    ):
-        config = GoogleDriveServiceConfig(
-            name="test_gdrive",
-            gcp_credentials_json="fake_creds.json",
-        )
-        return GoogleDriveClient(config=config)

From 090ee2983b229435e489d746d0d737541f77a45c Mon Sep 17 00:00:00 2001
From: PaoloLeonard <paololeonard@skynet.be>
Date: Mon, 30 Jun 2025 11:41:00 +0200
Subject: [PATCH 36/39] remove logger

---
 src/docbinder_oss/cli/search.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py
index 34feb90..0a58f1f 100644
--- a/src/docbinder_oss/cli/search.py
+++ b/src/docbinder_oss/cli/search.py
@@ -121,7 +121,6 @@ def file_matches(file: File):
         if created_before:
             file_created_time = __parse_dt(file.created_time)
             created_before_dt = __parse_dt(created_before)
-            logging.debug(f"File created time: {file_created_time}, Created before: {created_before_dt}, Type: {type(file_created_time)}, Type: {type(created_before_dt)}")
             if file_created_time is not None and created_before_dt is not None and file_created_time > created_before_dt:
                 return False
         if min_size and file.size < min_size:

From a803975aa68151aa963b7f89186a8ead3cd17a9c Mon Sep 17 00:00:00 2001
From: PaoloLeonard <paololeonard@skynet.be>
Date: Mon, 30 Jun 2025 11:55:29 +0200
Subject: [PATCH 37/39] fix linting

---
 .pre-commit-config.yaml                       |  2 +-
 src/docbinder_oss/cli/search.py               |  2 -
 .../helpers/writers/multiformat_writer.py     |  2 +-
 tests/commands/test_search_command.py         | 43 +++++++++++++++++--
 tests/conftest.py                             | 10 ++++-
 5 files changed, 50 insertions(+), 9 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9591f18..e885ddb 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -18,7 +18,7 @@ repos:
     # Run the linter.
     - id: ruff-check
       types_or: [ python, pyi ]
-      args: [ --select, I, --fix ]
+      args: [ --select, I, --fix, --select=E501 ]
     # Run the formatter.
     - id: ruff-format
       types_or: [ python, pyi ]
diff --git a/src/docbinder_oss/cli/search.py b/src/docbinder_oss/cli/search.py
index 0a58f1f..d4b63e3 100644
--- a/src/docbinder_oss/cli/search.py
+++ b/src/docbinder_oss/cli/search.py
@@ -1,9 +1,7 @@
 from datetime import datetime
-import logging
 import re
 import typer
 from typing import Dict, List, Optional
-import csv
 
 from docbinder_oss.core.schemas import File
 from docbinder_oss.helpers.config import load_config
diff --git a/src/docbinder_oss/helpers/writers/multiformat_writer.py b/src/docbinder_oss/helpers/writers/multiformat_writer.py
index c6b688e..ba282fa 100644
--- a/src/docbinder_oss/helpers/writers/multiformat_writer.py
+++ b/src/docbinder_oss/helpers/writers/multiformat_writer.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import Any, Dict, List
+from typing import Dict, List
 
 from docbinder_oss.core.schemas import File
 from docbinder_oss.helpers.writers.base import Writer
diff --git a/tests/commands/test_search_command.py b/tests/commands/test_search_command.py
index 1ccc378..eb37e4d 100644
--- a/tests/commands/test_search_command.py
+++ b/tests/commands/test_search_command.py
@@ -84,7 +84,19 @@ def test_search_name_filter_not_empty(load_config_mock, create_provider_instance
 @pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True)
 @pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True)
 @pytest.mark.parametrize('list_all_files_mock', [([
-                DummyModel(id="dummy_file1", name="dummy File 1", kind="file", owners=[User(display_name="test", email_address="beta@a.com", photo_link="https://test.com", kind="")]),
+                DummyModel(
+                    id="dummy_file1",
+                    name="dummy File 1",
+                    kind="file",
+                    owners=[
+                        User(
+                            display_name="test",
+                            email_address="beta@a.com",
+                            photo_link="https://test.com",
+                            kind=""
+                        )
+                    ]
+                ),
                 DummyModel(id="dummy_file2", name="File 2", kind="file", owners=[]),
             ])], indirect=True)
 def test_search_owner_filter_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock):
@@ -98,7 +110,19 @@ def test_search_owner_filter_empty(load_config_mock, create_provider_instance_mo
 @pytest.mark.parametrize('load_config_mock', [("dummy", 1)], indirect=True)
 @pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True)
 @pytest.mark.parametrize('list_all_files_mock', [([
-                DummyModel(id="dummy_file1", name="dummy File 1", kind="file", owners=[User(display_name="test", email_address="beta@b.com", photo_link="https://test.com", kind="")]),
+                DummyModel(
+                    id="dummy_file1",
+                    name="dummy File 1",
+                    kind="file",
+                    owners=[
+                        User(
+                            display_name="test",
+                            email_address="beta@b.com",
+                            photo_link="https://test.com",
+                            kind=""
+                        )
+                    ]
+                ),
                 DummyModel(id="dummy_file2", name="File 2", kind="file", owners=[]),
             ])], indirect=True)
 def test_search_owner_filter_not_empty(load_config_mock, create_provider_instance_mock, list_all_files_mock):
@@ -262,7 +286,20 @@ def test_search_provider_filter(load_config_mock, create_provider_instance_mock,
 @pytest.mark.parametrize('load_config_mock', [("dummy", 2)], indirect=True)
 @pytest.mark.parametrize('create_provider_instance_mock', [("dummy")], indirect=True)
 @pytest.mark.parametrize('list_all_files_mock', [([
-                DummyModel(id="dummy_file1", name="Beta File 1", kind="file", size=5, owners=[User(display_name="test", email_address="beta@b.com", photo_link="https://test.com", kind="")]),
+                DummyModel(
+                    id="dummy_file1",
+                    name="Beta File 1",
+                    kind="file",
+                    size=5,
+                    owners=[
+                        User(
+                            display_name="test",
+                            email_address="beta@b.com",
+                            photo_link="https://test.com",
+                            kind=""
+                        )
+                    ]
+                ),
                 DummyModel(id="dummy_file2", name="dummy File 2", kind="file", size=2),
             ])], indirect=True) 
 def test_search_combined_filters(load_config_mock, create_provider_instance_mock, list_all_files_mock):
diff --git a/tests/conftest.py b/tests/conftest.py
index e3f37ee..062bc2a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -96,7 +96,10 @@ def load_config_mock(request, create_config_mock):
     a dummy configuration with a specified number of providers.
     """
     name, number_of_providers = request.param
-    with patch("docbinder_oss.cli.search.load_config", return_value=create_config_mock(name, number_of_providers)) as _fixture:
+    with patch(
+        "docbinder_oss.cli.search.load_config",
+        return_value=create_config_mock(name, number_of_providers)
+    ) as _fixture:
         yield _fixture
 
 @pytest.fixture(scope='session')
@@ -105,7 +108,10 @@ def create_provider_instance_mock(request, create_provider_mock):
     This fixture mocks the `create_provider_instance` function to return
     a dummy provider instance based on the provider name.
     """
-    with patch("docbinder_oss.cli.search.create_provider_instance", return_value=create_provider_mock(request.param)) as _fixture:
+    with patch(
+        "docbinder_oss.cli.search.create_provider_instance", 
+        return_value=create_provider_mock(request.param)
+    ) as _fixture:
         yield _fixture
 
 @pytest.fixture(scope="session")

From 1a325b9695e533a1bcc50dc24b4af9f325b865f7 Mon Sep 17 00:00:00 2001
From: Christophe Beke <beke.christophe@gmail.com>
Date: Tue, 1 Jul 2025 09:35:43 +0200
Subject: [PATCH 38/39] Updated namings

---
 .github/workflows/{docbinder-oss.yml => ci.yml} | 2 +-
 .github/workflows/pypi-publish.yml              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename .github/workflows/{docbinder-oss.yml => ci.yml} (96%)

diff --git a/.github/workflows/docbinder-oss.yml b/.github/workflows/ci.yml
similarity index 96%
rename from .github/workflows/docbinder-oss.yml
rename to .github/workflows/ci.yml
index 6a6a044..bbf2c95 100644
--- a/.github/workflows/docbinder-oss.yml
+++ b/.github/workflows/ci.yml
@@ -1,4 +1,4 @@
-name: DocBinder OSS Library CI/CD
+name: DocBinder OSS Library CI
 
 on:
   pull_request:
diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml
index c9ff952..1614841 100644
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -1,4 +1,4 @@
-name: Publish to PyPI
+name: DocBinder OSS Publish to PyPI
 # This workflow publishes the package to PyPI when a new tag is created on the main branch.
 on:
   push:

From 70f7c0b2624838c2b90d46ed0ebab4fa55a152b3 Mon Sep 17 00:00:00 2001
From: PaoloLeonard <paololeonard@skynet.be>
Date: Tue, 1 Jul 2025 09:42:50 +0200
Subject: [PATCH 39/39] removed black and added pre-commit in ci

---
 .github/workflows/ci.yml |  2 +-
 pyproject.toml           |  1 -
 uv.lock                  | 39 ---------------------------------------
 3 files changed, 1 insertion(+), 41 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index bbf2c95..2c162ef 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -40,7 +40,7 @@ jobs:
 
       - name: Lint code with uv
         run: |
-          uv tool run ruff check
+          uv tool run pre-commit
 
       - name: Run tox with uv
         run: |
diff --git a/pyproject.toml b/pyproject.toml
index ed3fec0..026bb96 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,7 +32,6 @@ include = ["src/docbinder_oss/**"]
 
 [dependency-groups]
 dev = [
-    "black>=25.1.0",
     "mkdocs>=1.6.1",
     "mkdocs-material>=9.6.14",
     "pre-commit>=4.2.0",
diff --git a/uv.lock b/uv.lock
index 61dfd65..e5d1e47 100644
--- a/uv.lock
+++ b/uv.lock
@@ -37,34 +37,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0c/37/fb6973edeb700f6e3d6ff222400602ab1830446c25c7b4676d8de93e65b8/backrefs-5.8-py39-none-any.whl", hash = "sha256:a66851e4533fb5b371aa0628e1fee1af05135616b86140c9d787a2ffdf4b8fdc", size = 380336, upload-time = "2025-02-25T16:53:29.858Z" },
 ]
 
-[[package]]
-name = "black"
-version = "25.1.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "click" },
-    { name = "mypy-extensions" },
-    { name = "packaging" },
-    { name = "pathspec" },
-    { name = "platformdirs" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/94/49/26a7b0f3f35da4b5a65f081943b7bcd22d7002f5f0fb8098ec1ff21cb6ef/black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666", size = 649449, upload-time = "2025-01-29T04:15:40.373Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7e/4f/87f596aca05c3ce5b94b8663dbfe242a12843caaa82dd3f85f1ffdc3f177/black-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a39337598244de4bae26475f77dda852ea00a93bd4c728e09eacd827ec929df0", size = 1614372, upload-time = "2025-01-29T05:37:11.71Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/d0/2c34c36190b741c59c901e56ab7f6e54dad8df05a6272a9747ecef7c6036/black-25.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96c1c7cd856bba8e20094e36e0f948718dc688dba4a9d78c3adde52b9e6c2299", size = 1442865, upload-time = "2025-01-29T05:37:14.309Z" },
-    { url = "https://files.pythonhosted.org/packages/21/d4/7518c72262468430ead45cf22bd86c883a6448b9eb43672765d69a8f1248/black-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce2e264d59c91e52d8000d507eb20a9aca4a778731a08cfff7e5ac4a4bb7096", size = 1749699, upload-time = "2025-01-29T04:18:17.688Z" },
-    { url = "https://files.pythonhosted.org/packages/58/db/4f5beb989b547f79096e035c4981ceb36ac2b552d0ac5f2620e941501c99/black-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:172b1dbff09f86ce6f4eb8edf9dede08b1fce58ba194c87d7a4f1a5aa2f5b3c2", size = 1428028, upload-time = "2025-01-29T04:18:51.711Z" },
-    { url = "https://files.pythonhosted.org/packages/83/71/3fe4741df7adf015ad8dfa082dd36c94ca86bb21f25608eb247b4afb15b2/black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b", size = 1650988, upload-time = "2025-01-29T05:37:16.707Z" },
-    { url = "https://files.pythonhosted.org/packages/13/f3/89aac8a83d73937ccd39bbe8fc6ac8860c11cfa0af5b1c96d081facac844/black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc", size = 1453985, upload-time = "2025-01-29T05:37:18.273Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/22/b99efca33f1f3a1d2552c714b1e1b5ae92efac6c43e790ad539a163d1754/black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f", size = 1783816, upload-time = "2025-01-29T04:18:33.823Z" },
-    { url = "https://files.pythonhosted.org/packages/18/7e/a27c3ad3822b6f2e0e00d63d58ff6299a99a5b3aee69fa77cd4b0076b261/black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba", size = 1440860, upload-time = "2025-01-29T04:19:12.944Z" },
-    { url = "https://files.pythonhosted.org/packages/98/87/0edf98916640efa5d0696e1abb0a8357b52e69e82322628f25bf14d263d1/black-25.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f", size = 1650673, upload-time = "2025-01-29T05:37:20.574Z" },
-    { url = "https://files.pythonhosted.org/packages/52/e5/f7bf17207cf87fa6e9b676576749c6b6ed0d70f179a3d812c997870291c3/black-25.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3", size = 1453190, upload-time = "2025-01-29T05:37:22.106Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/ee/adda3d46d4a9120772fae6de454c8495603c37c4c3b9c60f25b1ab6401fe/black-25.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171", size = 1782926, upload-time = "2025-01-29T04:18:58.564Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/64/94eb5f45dcb997d2082f097a3944cfc7fe87e071907f677e80788a2d7b7a/black-25.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18", size = 1442613, upload-time = "2025-01-29T04:19:27.63Z" },
-    { url = "https://files.pythonhosted.org/packages/09/71/54e999902aed72baf26bca0d50781b01838251a462612966e9fc4891eadd/black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717", size = 207646, upload-time = "2025-01-29T04:15:38.082Z" },
-]
-
 [[package]]
 name = "cachetools"
 version = "5.5.2"
@@ -204,7 +176,6 @@ dependencies = [
 
 [package.dev-dependencies]
 dev = [
-    { name = "black" },
     { name = "mkdocs" },
     { name = "mkdocs-material" },
     { name = "pre-commit" },
@@ -227,7 +198,6 @@ requires-dist = [
 
 [package.metadata.requires-dev]
 dev = [
-    { name = "black", specifier = ">=25.1.0" },
     { name = "mkdocs", specifier = ">=1.6.1" },
     { name = "mkdocs-material", specifier = ">=9.6.14" },
     { name = "pre-commit", specifier = ">=4.2.0" },
@@ -561,15 +531,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5b/54/662a4743aa81d9582ee9339d4ffa3c8fd40a4965e033d77b9da9774d3960/mkdocs_material_extensions-1.3.1-py3-none-any.whl", hash = "sha256:adff8b62700b25cb77b53358dad940f3ef973dd6db797907c49e3c2ef3ab4e31", size = 8728, upload-time = "2023-11-22T19:09:43.465Z" },
 ]
 
-[[package]]
-name = "mypy-extensions"
-version = "1.1.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
-]
-
 [[package]]
 name = "nodeenv"
 version = "1.9.1"