Skip to content

Commit 479e309

Browse files
authored
Add SQL query interface and Claude Code skill (#249)
## Summary Supersedes #236 with a different approach: instead of shelling out to the `duckdb` CLI via bash scripts, this builds a SQL view layer inside msgvault itself. - **DuckDB views** over Parquet files (`RegisterViews`), registered at engine startup — 8 base views + 5 convenience views (v_messages, v_senders, v_domains, v_labels, v_threads) - **`msgvault query` CLI command** with `--format json|csv|table` output - **`POST /api/v1/query` HTTP endpoint** on the existing serve daemon (503 when Parquet cache unavailable) - **Claude Code skill** that teaches agents to use `msgvault query "SELECT ..."` No bash wrapper scripts, no external `duckdb` CLI dependency, no Parquet path knowledge leaked to consumers. ## Changes | Component | Files | |---|---| | View layer | `internal/query/views.go` (base + convenience views, `RegisterViews`) | | QuerySQL | `internal/query/duckdb.go` (method + read-only validation + view wiring) | | CLI | `cmd/msgvault/cmd/query.go` | | HTTP | `internal/api/handlers.go`, `internal/api/server.go` | | Skill | `skills/claude-code/SKILL.md`, `skills/claude-code/references/views.md` | | Tests | `internal/query/views_test.go`, `cmd/msgvault/cmd/query_test.go`, `internal/api/handlers_test.go` | 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Wes McKinney <wesm@users.noreply.github.com>
1 parent 3720b26 commit 479e309

11 files changed

Lines changed: 1757 additions & 27 deletions

File tree

.roborev.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@ require an attacker with the same privileges as the user — they already
2727
own everything this tool touches. Auth on loopback-only endpoints with
2828
no configured API key is also by design.
2929
30+
Anyone with access to the CLI or HTTP API is a privileged user. Do not
31+
flag SQL injection, statement validation, DDL bypass, or multi-statement
32+
risks on the query interface — the user is trusted and can already do
33+
anything via direct database access. The query surface is a convenience
34+
layer, not a security boundary.
35+
3036
IMAP passwords are stored on disk with restricted file permissions
3137
(0600), equivalent to OAuth tokens and API keys in the same data
3238
directory. If the data directory is compromised, the user must rotate

cmd/msgvault/cmd/query.go

Lines changed: 271 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,271 @@
1+
package cmd
2+
3+
import (
4+
"database/sql"
5+
"encoding/csv"
6+
"encoding/json"
7+
"fmt"
8+
"io"
9+
"os"
10+
"runtime"
11+
"strings"
12+
13+
_ "github.com/marcboeker/go-duckdb"
14+
"github.com/spf13/cobra"
15+
"github.com/wesm/msgvault/internal/query"
16+
)
17+
18+
var queryFormat string
19+
20+
var queryCmd = &cobra.Command{
21+
Use: "query [sql]",
22+
Short: "Run a SQL query against the analytics cache",
23+
Long: `Run arbitrary SQL against the Parquet analytics cache.
24+
25+
The following views are available:
26+
messages, participants, message_recipients, labels,
27+
message_labels, attachments, conversations, sources
28+
29+
Convenience views:
30+
v_messages - messages with resolved sender and labels
31+
v_senders - per-sender aggregates
32+
v_domains - per-domain aggregates
33+
v_labels - label name with message count and size
34+
v_threads - per-conversation aggregates
35+
36+
Output formats:
37+
json - JSON object with columns, rows, row_count (default)
38+
csv - CSV with header row
39+
table - Aligned text table
40+
41+
Examples:
42+
msgvault query "SELECT from_email, COUNT(*) AS n FROM v_messages GROUP BY 1 ORDER BY 2 DESC LIMIT 10"
43+
msgvault query --format csv "SELECT * FROM v_senders ORDER BY message_count DESC"
44+
msgvault query --format table "SELECT name, message_count FROM v_labels"`,
45+
Args: cobra.ExactArgs(1),
46+
RunE: func(cmd *cobra.Command, args []string) error {
47+
dbPath := cfg.DatabaseDSN()
48+
analyticsDir := cfg.AnalyticsDir()
49+
50+
staleness := cacheNeedsBuild(dbPath, analyticsDir)
51+
if staleness.NeedsBuild {
52+
fmt.Fprintf(os.Stderr,
53+
"Building analytics cache (%s)...\n",
54+
staleness.Reason)
55+
result, err := buildCache(
56+
dbPath, analyticsDir, staleness.FullRebuild,
57+
)
58+
if err != nil {
59+
return fmt.Errorf("build cache: %w", err)
60+
}
61+
if !result.Skipped {
62+
fmt.Fprintf(os.Stderr,
63+
"Cached %d messages.\n",
64+
result.ExportedCount)
65+
}
66+
}
67+
68+
if !query.HasCompleteParquetData(analyticsDir) {
69+
return fmt.Errorf(
70+
"analytics cache is empty — sync some " +
71+
"messages first")
72+
}
73+
74+
return executeQuery(
75+
analyticsDir, args[0], queryFormat, os.Stdout,
76+
)
77+
},
78+
}
79+
80+
// executeQuery opens an in-memory DuckDB, registers views over
81+
// the Parquet files in analyticsDir, runs the SQL, and writes
82+
// the results in the requested format.
83+
func executeQuery(
84+
analyticsDir, sqlStr, format string, w io.Writer,
85+
) error {
86+
db, err := sql.Open("duckdb", "")
87+
if err != nil {
88+
return fmt.Errorf("open duckdb: %w", err)
89+
}
90+
defer func() { _ = db.Close() }()
91+
92+
db.SetMaxOpenConns(1)
93+
94+
threads := runtime.GOMAXPROCS(0)
95+
if _, err := db.Exec(
96+
fmt.Sprintf("SET threads = %d", threads),
97+
); err != nil {
98+
return fmt.Errorf("set threads: %w", err)
99+
}
100+
101+
if err := query.RegisterViews(db, analyticsDir); err != nil {
102+
return fmt.Errorf("register views: %w", err)
103+
}
104+
105+
rows, err := db.Query(sqlStr)
106+
if err != nil {
107+
return fmt.Errorf("execute query: %w", err)
108+
}
109+
defer func() { _ = rows.Close() }()
110+
111+
cols, err := rows.Columns()
112+
if err != nil {
113+
return fmt.Errorf("get columns: %w", err)
114+
}
115+
116+
var allRows [][]any
117+
for rows.Next() {
118+
row, scanErr := scanRow(cols, rows)
119+
if scanErr != nil {
120+
return scanErr
121+
}
122+
allRows = append(allRows, row)
123+
}
124+
if err := rows.Err(); err != nil {
125+
return fmt.Errorf("iterate rows: %w", err)
126+
}
127+
128+
switch format {
129+
case "json":
130+
return writeJSON(w, cols, allRows)
131+
case "csv":
132+
return writeCSV(w, cols, allRows)
133+
case "table":
134+
return writeTable(w, cols, allRows)
135+
default:
136+
return fmt.Errorf("unknown format %q (use json, csv, or table)", format)
137+
}
138+
}
139+
140+
// scanRow scans a single row into a slice of interface{} values,
141+
// converting []byte to string for clean serialization.
142+
func scanRow(
143+
cols []string, rows *sql.Rows,
144+
) ([]any, error) {
145+
vals := make([]any, len(cols))
146+
ptrs := make([]any, len(cols))
147+
for i := range vals {
148+
ptrs[i] = &vals[i]
149+
}
150+
if err := rows.Scan(ptrs...); err != nil {
151+
return nil, fmt.Errorf("scan row: %w", err)
152+
}
153+
for i, v := range vals {
154+
if b, ok := v.([]byte); ok {
155+
vals[i] = string(b)
156+
}
157+
}
158+
return vals, nil
159+
}
160+
161+
func writeJSON(
162+
w io.Writer, cols []string, rows [][]any,
163+
) error {
164+
result := query.QueryResult{
165+
Columns: cols,
166+
Rows: rows,
167+
RowCount: len(rows),
168+
}
169+
enc := json.NewEncoder(w)
170+
enc.SetIndent("", " ")
171+
return enc.Encode(result)
172+
}
173+
174+
// displayVal formats a value for CSV/table output. SQL NULLs
175+
// become empty strings; other values use fmt.Sprintf.
176+
func displayVal(v any) string {
177+
if v == nil {
178+
return ""
179+
}
180+
return fmt.Sprintf("%v", v)
181+
}
182+
183+
func writeCSV(
184+
w io.Writer, cols []string, rows [][]any,
185+
) error {
186+
cw := csv.NewWriter(w)
187+
188+
if err := cw.Write(cols); err != nil {
189+
return fmt.Errorf("write csv header: %w", err)
190+
}
191+
192+
for _, row := range rows {
193+
record := make([]string, len(row))
194+
for i, v := range row {
195+
record[i] = displayVal(v)
196+
}
197+
if err := cw.Write(record); err != nil {
198+
return fmt.Errorf("write csv row: %w", err)
199+
}
200+
}
201+
202+
cw.Flush()
203+
return cw.Error()
204+
}
205+
206+
func writeTable(
207+
w io.Writer, cols []string, rows [][]any,
208+
) error {
209+
// Convert all values to strings for width calculation
210+
strRows := make([][]string, len(rows))
211+
for i, row := range rows {
212+
strRows[i] = make([]string, len(row))
213+
for j, v := range row {
214+
strRows[i][j] = displayVal(v)
215+
}
216+
}
217+
218+
// Calculate column widths (min = header length)
219+
widths := make([]int, len(cols))
220+
for i, col := range cols {
221+
widths[i] = len(col)
222+
}
223+
for _, row := range strRows {
224+
for i, val := range row {
225+
if len(val) > widths[i] {
226+
widths[i] = len(val)
227+
}
228+
}
229+
}
230+
231+
// Print header
232+
for i, col := range cols {
233+
if i > 0 {
234+
_, _ = fmt.Fprint(w, " ")
235+
}
236+
_, _ = fmt.Fprintf(w, "%-*s", widths[i], col)
237+
}
238+
_, _ = fmt.Fprintln(w)
239+
240+
// Print separator
241+
for i, width := range widths {
242+
if i > 0 {
243+
_, _ = fmt.Fprint(w, " ")
244+
}
245+
_, _ = fmt.Fprint(w, strings.Repeat("-", width))
246+
}
247+
_, _ = fmt.Fprintln(w)
248+
249+
// Print rows
250+
for _, row := range strRows {
251+
for i, val := range row {
252+
if i > 0 {
253+
_, _ = fmt.Fprint(w, " ")
254+
}
255+
_, _ = fmt.Fprintf(w, "%-*s", widths[i], val)
256+
}
257+
_, _ = fmt.Fprintln(w)
258+
}
259+
260+
// Print row count
261+
_, _ = fmt.Fprintf(w, "(%d rows)\n", len(rows))
262+
return nil
263+
}
264+
265+
func init() {
266+
rootCmd.AddCommand(queryCmd)
267+
queryCmd.Flags().StringVar(
268+
&queryFormat, "format", "json",
269+
"Output format: json, csv, or table",
270+
)
271+
}

0 commit comments

Comments
 (0)