From 6da4180b741e3336dc7a221a4b6c6ca9b2a31fc3 Mon Sep 17 00:00:00 2001 From: Claude Code Date: Thu, 21 May 2026 11:27:49 -0400 Subject: [PATCH] feat: improve semantic search marimo notebook - Multilingual support: switch to multilingual-e5-large, embed English and Spanish sentences, show cross-lingual retrieval and language filtering - Interactive query input with mo.ui.text and mo.ui.radio language selector - Interactive API key input: reads from env/.env with a password field fallback for molab users, with callout admonitions for each state - Display search results as mo.ui.table with lang column - Pin datasets==3.5.1 (datasets>=4 breaks Helsinki-NLP/tatoeba) - Use keyword argument names in all Pinecone API calls - Remove numpy and tqdm (replaced by mo.status.progress_bar) - Remove notebook deps from root pyproject.toml Co-Authored-By: Claude Sonnet 4.6 (1M context) --- docs/semantic-search.py | 74 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 67 insertions(+), 7 deletions(-) diff --git a/docs/semantic-search.py b/docs/semantic-search.py index 92572732..ab5c0d2a 100644 --- a/docs/semantic-search.py +++ b/docs/semantic-search.py @@ -1,7 +1,7 @@ # /// script # requires-python = ">=3.10" # dependencies = [ -# "datasets", +# "datasets==3.5.1", # "marimo>=0.23.6", # "pinecone==9.0.1", # ] @@ -53,17 +53,77 @@ def _(mo): ### Pinecone API Key - Set your `PINECONE_API_KEY` environment variable before running this notebook. - You can get a free key at [app.pinecone.io](https://app.pinecone.io). + You'll need a free Pinecone API key to run this notebook. Get one at + [app.pinecone.io](https://app.pinecone.io). + + **Running locally?** Set `PINECONE_API_KEY` in your environment or in a `.env` + file — marimo reads `.env` files automatically on startup. The cell below will + detect the key and confirm it's loaded. + + **Running in molab?** Enter your key directly in the input field below. """) return -@app.cell -def _(Pinecone, os): - # Initialize client - api_key = os.environ.get("PINECONE_API_KEY") +@app.cell(hide_code=True) +def _(mo, os): + env_key = os.environ.get("PINECONE_API_KEY", "") + + api_key_input = mo.ui.text( + kind="password", + placeholder="pcsk_...", + label="Pinecone API Key", + value=env_key, + full_width=True, + ) + + ( + mo.callout(mo.md("API key loaded from environment."), kind="success") + if env_key + else mo.vstack( + [ + mo.callout( + mo.md( + "Enter your Pinecone API key. Get a free key at [app.pinecone.io](https://app.pinecone.io)." + ), + kind="info", + ), + api_key_input, + ] + ) + ) + return (api_key_input,) + + +@app.cell(hide_code=True) +def _(api_key_input, mo): + api_key = api_key_input.value + mo.stop( + not api_key, + mo.callout( + mo.md("**API key required.** Enter your key above to continue."), + kind="danger", + ), + ) + return (api_key,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ### Instantiating the Client + With the API key in hand, we can create a `Pinecone` client. This is the entry point for all + control-plane operations — creating and managing indexes, listing namespaces, and so on. + + The `source_tag` parameter is used internally by Pinecone to attribute API usage from example + notebooks. You would not include this in your own applications. + """) + return + + +@app.cell(hide_code=True) +def _(Pinecone, api_key): pc = Pinecone( api_key=api_key, source_tag="pinecone_examples:docs:semantic_search",