diff --git a/README.md b/README.md index 7180efd5a..91fb7f7ad 100644 --- a/README.md +++ b/README.md @@ -147,7 +147,7 @@ You can follow these steps to generate a PageIndex tree from a PDF document. pip3 install --upgrade -r requirements.txt ``` -### 2. Set your OpenAI API key +### 2. Set your API key Create a `.env` file in the root directory and add your API key: @@ -155,6 +155,8 @@ Create a `.env` file in the root directory and add your API key: CHATGPT_API_KEY=your_openai_key_here ``` +You can also use `OPENAI_API_KEY` as an alternative environment variable name. + ### 3. Run PageIndex on your PDF ```bash @@ -167,7 +169,9 @@ python3 run_pageindex.py --pdf_path /path/to/your/document.pdf You can customize the processing with additional optional arguments: ``` ---model OpenAI model to use (default: gpt-4o-2024-11-20) +--model Model to use (default: gpt-4o-2024-11-20) +--api-key API key (overrides env var) +--base-url Base URL for OpenAI-compatible API providers --toc-check-pages Pages to check for table of contents (default: 20) --max-pages-per-node Max pages per node (default: 10) --max-tokens-per-node Max tokens per node (default: 20000) @@ -177,6 +181,39 @@ You can customize the processing with additional optional arguments: ``` +
+Using alternative LLM providers +
+ +PageIndex supports any OpenAI-compatible API provider via the `--base-url` flag or the `OPENAI_BASE_URL` environment variable. + +**MiniMax** + +[MiniMax](https://www.minimaxi.com) offers models like `MiniMax-M2.5` with 204K context window through an OpenAI-compatible API: + +```bash +# Via environment variables +OPENAI_API_KEY=your_minimax_key_here +OPENAI_BASE_URL=https://api.minimax.io/v1 + +python3 run_pageindex.py --pdf_path /path/to/your/document.pdf --model MiniMax-M2.5 +``` + +Or via CLI arguments: + +```bash +python3 run_pageindex.py --pdf_path /path/to/your/document.pdf \ + --model MiniMax-M2.5 \ + --api-key your_minimax_key_here \ + --base-url https://api.minimax.io/v1 +``` + +**Other compatible providers** + +Any provider offering an OpenAI-compatible chat completions API can be used in the same way by setting `--base-url` to their endpoint. + +
+
Markdown support
diff --git a/pageindex/utils.py b/pageindex/utils.py index dc7acd888..1f969742c 100644 --- a/pageindex/utils.py +++ b/pageindex/utils.py @@ -17,7 +17,8 @@ from pathlib import Path from types import SimpleNamespace as config -CHATGPT_API_KEY = os.getenv("CHATGPT_API_KEY") +CHATGPT_API_KEY = os.getenv("CHATGPT_API_KEY") or os.getenv("OPENAI_API_KEY") +OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL") def count_tokens(text, model=None): if not text: @@ -26,9 +27,12 @@ def count_tokens(text, model=None): tokens = enc.encode(text) return len(tokens) -def ChatGPT_API_with_finish_reason(model, prompt, api_key=CHATGPT_API_KEY, chat_history=None): +def ChatGPT_API_with_finish_reason(model, prompt, api_key=CHATGPT_API_KEY, chat_history=None, base_url=OPENAI_BASE_URL): max_retries = 10 - client = openai.OpenAI(api_key=api_key) + client_kwargs = {"api_key": api_key} + if base_url: + client_kwargs["base_url"] = base_url + client = openai.OpenAI(**client_kwargs) for i in range(max_retries): try: if chat_history: @@ -36,11 +40,11 @@ def ChatGPT_API_with_finish_reason(model, prompt, api_key=CHATGPT_API_KEY, chat_ messages.append({"role": "user", "content": prompt}) else: messages = [{"role": "user", "content": prompt}] - + response = client.chat.completions.create( model=model, messages=messages, - temperature=0, + temperature=0.01 if base_url else 0, ) if response.choices[0].finish_reason == "length": return response.choices[0].message.content, "max_output_reached" @@ -58,9 +62,12 @@ def ChatGPT_API_with_finish_reason(model, prompt, api_key=CHATGPT_API_KEY, chat_ -def ChatGPT_API(model, prompt, api_key=CHATGPT_API_KEY, chat_history=None): +def ChatGPT_API(model, prompt, api_key=CHATGPT_API_KEY, chat_history=None, base_url=OPENAI_BASE_URL): max_retries = 10 - client = openai.OpenAI(api_key=api_key) + client_kwargs = {"api_key": api_key} + if base_url: + client_kwargs["base_url"] = base_url + client = openai.OpenAI(**client_kwargs) for i in range(max_retries): try: if chat_history: @@ -68,11 +75,11 @@ def ChatGPT_API(model, prompt, api_key=CHATGPT_API_KEY, chat_history=None): messages.append({"role": "user", "content": prompt}) else: messages = [{"role": "user", "content": prompt}] - + response = client.chat.completions.create( model=model, messages=messages, - temperature=0, + temperature=0.01 if base_url else 0, ) return response.choices[0].message.content @@ -86,16 +93,19 @@ def ChatGPT_API(model, prompt, api_key=CHATGPT_API_KEY, chat_history=None): return "Error" -async def ChatGPT_API_async(model, prompt, api_key=CHATGPT_API_KEY): +async def ChatGPT_API_async(model, prompt, api_key=CHATGPT_API_KEY, base_url=OPENAI_BASE_URL): max_retries = 10 messages = [{"role": "user", "content": prompt}] + client_kwargs = {"api_key": api_key} + if base_url: + client_kwargs["base_url"] = base_url for i in range(max_retries): try: - async with openai.AsyncOpenAI(api_key=api_key) as client: + async with openai.AsyncOpenAI(**client_kwargs) as client: response = await client.chat.completions.create( model=model, messages=messages, - temperature=0, + temperature=0.01 if base_url else 0, ) return response.choices[0].message.content except Exception as e: diff --git a/run_pageindex.py b/run_pageindex.py index 107024505..bb72e855e 100644 --- a/run_pageindex.py +++ b/run_pageindex.py @@ -11,6 +11,10 @@ parser.add_argument('--md_path', type=str, help='Path to the Markdown file') parser.add_argument('--model', type=str, default='gpt-4o-2024-11-20', help='Model to use') + parser.add_argument('--api-key', type=str, default=None, + help='API key (defaults to CHATGPT_API_KEY or OPENAI_API_KEY env var)') + parser.add_argument('--base-url', type=str, default=None, + help='Base URL for OpenAI-compatible API (e.g., https://api.minimax.io/v1)') parser.add_argument('--toc-check-pages', type=int, default=20, help='Number of pages to check for table of contents (PDF only)') @@ -36,7 +40,13 @@ parser.add_argument('--summary-token-threshold', type=int, default=200, help='Token threshold for generating summaries (markdown only)') args = parser.parse_args() - + + # Override env vars if CLI args are provided + if args.api_key: + os.environ["CHATGPT_API_KEY"] = args.api_key + if args.base_url: + os.environ["OPENAI_BASE_URL"] = args.base_url + # Validate that exactly one file type is specified if not args.pdf_path and not args.md_path: raise ValueError("Either --pdf_path or --md_path must be specified")