Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,24 @@
repos:
- repo: https://github.com/psf/black
rev: 24.3.0
- repo: https://github.com/astral-sh/uv-pre-commit
rev: 0.7.16
hooks:
- id: black
- id: uv-export
- id: uv-lock
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.4
# Ruff version.
rev: v0.12.1
hooks:
- id: ruff
# Run the linter.
- id: ruff-check
types_or: [ python, pyi ]
args: [ --select, I, --fix, --select=E501 ]
# Run the formatter.
- id: ruff-format
types_or: [ python, pyi ]
4 changes: 0 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,6 @@ testpaths = [
"tests",
]

[tool.black]
line-length = 125
skip-string-normalization = false

[tool.ruff]
line-length = 125

Expand Down
68 changes: 12 additions & 56 deletions src/docbinder_oss/cli/search.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from datetime import datetime
import re
import typer
from typing import Optional
import csv
from typing import Dict, List, Optional

from docbinder_oss.core.schemas import File
from docbinder_oss.helpers.config import load_config
Expand Down Expand Up @@ -64,7 +63,7 @@ def search(


def __filter_files(
files,
files: Dict[str, List[File]],
name=None,
owner=None,
updated_after=None,
Expand All @@ -73,7 +72,7 @@ def __filter_files(
created_before=None,
min_size=None,
max_size=None,
):
) -> Dict[str, List[File]]:
"""
Filters a collection of files based on various criteria such as name, owner,
modification/creation dates, and file size.
Expand Down Expand Up @@ -103,14 +102,14 @@ def file_matches(file: File):
if owner and (not file.owners or not any(owner in u.email_address for u in file.owners)):
return False
if updated_after:
file_mod_time = __parse_dt(file.modified_time)
file_modified_time = __parse_dt(file.modified_time)
updated_after_dt = __parse_dt(updated_after)
if file_mod_time is None or updated_after_dt is None or file_mod_time < updated_after_dt:
if file_modified_time is None or updated_after_dt is None or file_modified_time < updated_after_dt:
return False
if updated_before:
file_mod_time = __parse_dt(file.modified_time)
file_modified_time = __parse_dt(file.modified_time)
updated_before_dt = __parse_dt(updated_before)
if file_mod_time is None or updated_before_dt is None or file_mod_time > updated_before_dt:
if file_modified_time is None or updated_before_dt is None or file_modified_time > updated_before_dt:
return False
if created_after:
file_created_time = __parse_dt(file.created_time)
Expand All @@ -122,9 +121,9 @@ def file_matches(file: File):
created_before_dt = __parse_dt(created_before)
if file_created_time is not None and created_before_dt is not None and file_created_time > created_before_dt:
return False
if min_size and file.size < min_size * 1024:
if min_size and file.size < min_size:
return False
if max_size and file.size > max_size * 1024:
if max_size and file.size > max_size:
return False
return True

Expand All @@ -139,49 +138,6 @@ def __parse_dt(val):
return val
try:
return datetime.fromisoformat(val)
except Exception:
return val


def __write_csv(files_by_provider, filename):
# Collect all possible fieldnames from all files
all_fieldnames = set(["provider"])
for files in files_by_provider.values():
for file in files:
file_dict = file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy()
all_fieldnames.update(file_dict.keys())
# Move provider to the front, rest sorted
fieldnames = ["provider"] + sorted(f for f in all_fieldnames if f != "provider")
with open(filename, "w", newline="") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for provider, files in files_by_provider.items():
for file in files:
file_dict = file.model_dump() if hasattr(file, "model_dump") else file.__dict__.copy()
file_dict["provider"] = provider
# Flatten owners for CSV (only email addresses)
owners = file_dict.get("owners")
if isinstance(owners, list):
emails = []
for u in owners:
if hasattr(u, "email_address") and u.email_address:
emails.append(u.email_address)
elif isinstance(u, dict) and u.get("email_address"):
emails.append(u["email_address"])
elif isinstance(u, str):
emails.append(u)
file_dict["owners"] = ";".join(emails)
# Flatten last_modifying_user for CSV (only email address)
last_mod = file_dict.get("last_modifying_user")
if last_mod is not None:
if hasattr(last_mod, "email_address"):
file_dict["last_modifying_user"] = last_mod.email_address
elif isinstance(last_mod, dict) and "email_address" in last_mod:
file_dict["last_modifying_user"] = last_mod["email_address"]
else:
file_dict["last_modifying_user"] = str(last_mod)
# Flatten parents for CSV
parents = file_dict.get("parents")
if isinstance(parents, list):
file_dict["parents"] = ";".join(str(p) for p in parents)
writer.writerow({fn: file_dict.get(fn, "") for fn in fieldnames})
except Exception as e:
typer.echo(f"Failed to parse datetime from value: {val} with error: {e}", err=True)
raise ValueError(f"Invalid datetime format: {val}") from e
5 changes: 3 additions & 2 deletions src/docbinder_oss/helpers/config.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import logging
import os
from typing import List

import typer
import yaml
from pydantic import BaseModel, ValidationError

from docbinder_oss.providers import get_provider_registry
from docbinder_oss.providers import ServiceUnion, get_provider_registry

logger = logging.getLogger(__name__)

Expand All @@ -15,7 +16,7 @@
class Config(BaseModel):
"""Main configuration model that holds a list of all provider configs."""

providers: list
providers: List[ServiceUnion] # type: ignore


def load_config() -> Config:
Expand Down
19 changes: 0 additions & 19 deletions src/docbinder_oss/helpers/rich_helpers.py

This file was deleted.

9 changes: 6 additions & 3 deletions src/docbinder_oss/helpers/writers/multiformat_writer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from pathlib import Path
from typing import Any
from typing import Dict, List

from docbinder_oss.core.schemas import File
from docbinder_oss.helpers.writers.base import Writer
from docbinder_oss.helpers.writers.writer_console import ConsoleWriter
from docbinder_oss.helpers.writers.writer_csv import CSVWriter
from docbinder_oss.helpers.writers.writer_json import JSONWriter
Expand All @@ -20,7 +22,7 @@ class MultiFormatWriter:
}

@classmethod
def write(cls, data: Any, file_path: str | None = None) -> None:
def write(cls, data: Dict[str, List[File]], file_path: str | None = None) -> None:
if not file_path:
ConsoleWriter().write(data)
return
Expand All @@ -30,5 +32,6 @@ def write(cls, data: Any, file_path: str | None = None) -> None:
if writer_key not in cls._writers:
raise ValueError(f"Unsupported format: {file_path}")
writer_class = cls._writers[writer_key]
writer = writer_class()
writer: Writer = writer_class()
writer.write(data, file_path)

17 changes: 4 additions & 13 deletions src/docbinder_oss/helpers/writers/writer_console.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,20 @@
from pathlib import Path
from typing import Any
from rich.table import Table
from rich import print
from docbinder_oss.helpers.writers.base import Writer


class ConsoleWriter(Writer):
"""Writer for pretty-printing data to the console using rich tables."""

def write(self, data: Any, file_path: str | Path | None = None) -> None:
from rich.table import Table

table = Table(title="Files and Folders")
table.add_column("Provider", justify="right", style="cyan", no_wrap=True)
table.add_column("Id", style="magenta")
table.add_column("Name", style="magenta")
table.add_column("Kind", style="magenta")
for provider, items in data.items() if isinstance(data, dict) else [("?", data)]:
for provider, items in data.items():
for item in items:
if hasattr(item, "model_dump"):
item = item.model_dump()
elif hasattr(item, "__dict__"):
item = dict(item.__dict__)
table.add_row(
str(provider),
str(item.get("id", "")),
str(item.get("name", "")),
str(item.get("kind", "")),
)
table.add_row(provider, item.id, item.name, item.kind)
print(table)
46 changes: 17 additions & 29 deletions src/docbinder_oss/helpers/writers/writer_csv.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,29 @@
import csv
import logging
from pathlib import Path
from typing import Any
from typing import List, Dict, Union
from pydantic import BaseModel
from docbinder_oss.helpers.writers.base import Writer
from docbinder_oss.helpers.writers.helper_functions import flatten_file

logger = logging.getLogger(__name__)


class CSVWriter(Writer):
"""Writer for exporting data to CSV files."""
def get_fieldnames(self, data: Dict[str, List[BaseModel]]) -> List[str]:
fieldnames = next(iter(data.values()))[0].model_fields_set
return ["provider", *fieldnames]

def get_fieldnames(self, rows: list) -> list:
fieldnames = set()
for row in rows:
fieldnames.update(row.keys())
# Provider first, then the rest sorted
return ["provider"] + sorted(f for f in fieldnames if f != "provider")
def write(self, data: List[Dict], file_path: Union[str, Path]) -> None:
if not data:
logger.warning("No data to write to CSV.")
return

def write(self, data: Any, file_path: str | Path | None = None) -> None:
"""
Always flattens grouped dicts to a flat list for CSV export.
"""
rows = []
if isinstance(data, dict):
with open(file_path, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=self.get_fieldnames(data))
writer.writeheader()
for provider, items in data.items():
for item in items:
rows.append(flatten_file(item, provider))
elif isinstance(data, list):
for item in data:
provider = item.get("provider") if isinstance(item, dict) else getattr(item, "provider", None)
rows.append(flatten_file(item, provider))
else:
return
if not rows or not file_path:
logging.warning("No data to write to CSV.")
return
with open(file_path, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=self.get_fieldnames(rows))
writer.writeheader()
for row in rows:
writer.writerow(row)
item_dict = item.model_dump() if isinstance(item, BaseModel) else item
item_dict['provider'] = provider
writer.writerow(item_dict)
30 changes: 9 additions & 21 deletions src/docbinder_oss/helpers/writers/writer_json.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,17 @@
import json
from pathlib import Path
from typing import Any
from typing import Dict, List, Union
from docbinder_oss.core.schemas import File
from docbinder_oss.helpers.writers.base import Writer
from docbinder_oss.helpers.writers.helper_functions import flatten_file


class JSONWriter(Writer):
"""Writer for exporting data to JSON files."""

def write(self, data: Any, file_path: str | Path | None = None) -> None:
"""
Always flattens grouped dicts to a flat list for JSON export.
"""
flat = []
if isinstance(data, dict):
for provider, items in data.items():
for item in items:
flat.append(flatten_file(item, provider))
elif isinstance(data, list):
for item in data:
provider = item.get("provider") if isinstance(item, dict) else getattr(item, "provider", None)
flat.append(flatten_file(item, provider))
else:
return
if not file_path:
return
with open(file_path, "w", encoding="utf-8") as f:
json.dump(flat, f, indent=2, ensure_ascii=False, default=str)
def write(self, data: Dict[str, List[File]], file_path: Union[str, Path]) -> None:
data = {
provider: [item.model_dump() for item in items]
for provider, items in data.items()
}
with open(file_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False, default=str)
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def list_files_in_folder(self, bucket_id: str | None = None) -> list[File]:
if bucket_id:
args["q"] = f"'{bucket_id}' in parents and trashed=false"
else:
args["q"] = None
args["q"] = "trashed=false"

resp = self.service.files().list(**args).execute()
files = resp.get("files", [])
Expand Down
Loading