From c8f2240cab37c6e62aac83e9be624d36a0ca66a1 Mon Sep 17 00:00:00 2001
From: funnamer <439325484@qq.com>
Date: Wed, 11 Mar 2026 20:21:02 +0800
Subject: [PATCH] feat: support any OpenAI-compatible API endpoints

---
 .gitignore                 |  1 +
 pageindex/page_index.py    | 32 +++++++++++++-------------
 pageindex/page_index_md.py |  2 +-
 pageindex/utils.py         | 29 ++++++++++++------------
 run_pageindex.py           | 46 ++++++++++++++++++++------------------
 5 files changed, 57 insertions(+), 53 deletions(-)

diff --git a/.gitignore b/.gitignore
index 47d38baef..3db9d3575 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,4 @@ log/*
 logs/
 parts/*
 json_results/*
+.idea/
\ No newline at end of file
diff --git a/pageindex/page_index.py b/pageindex/page_index.py
index 39018c4df..8fb75feb9 100644
--- a/pageindex/page_index.py
+++ b/pageindex/page_index.py
@@ -36,7 +36,7 @@ async def check_title_appearance(item, page_list, start_index=1, model=None):
     }}
     Directly return the final JSON structure. Do not output anything else."""
 
-    response = await ChatGPT_API_async(model=model, prompt=prompt)
+    response = await OpenAI_API_async(model=model, prompt=prompt)
     response = extract_json(response)
     if 'answer' in response:
         answer = response['answer']
@@ -64,7 +64,7 @@ async def check_title_appearance_in_start(title, page_text, model=None, logger=N
     }}
     Directly return the final JSON structure. Do not output anything else."""
 
-    response = await ChatGPT_API_async(model=model, prompt=prompt)
+    response = await OpenAI_API_async(model=model, prompt=prompt)
     response = extract_json(response)
     if logger:
         logger.info(f"Response: {response}")
@@ -116,7 +116,7 @@ def toc_detector_single_page(content, model=None):
     Directly return the final JSON structure. Do not output anything else.
     Please note: abstract,summary, notation list, figure list, table list, etc. are not table of contents."""
 
-    response = ChatGPT_API(model=model, prompt=prompt)
+    response = OpenAI_API(model=model, prompt=prompt)
     # print('response', response)
     json_content = extract_json(response)    
     return json_content['toc_detected']
@@ -135,7 +135,7 @@ def check_if_toc_extraction_is_complete(content, toc, model=None):
     Directly return the final JSON structure. Do not output anything else."""
 
     prompt = prompt + '\n Document:\n' + content + '\n Table of contents:\n' + toc
-    response = ChatGPT_API(model=model, prompt=prompt)
+    response = OpenAI_API(model=model, prompt=prompt)
     json_content = extract_json(response)
     return json_content['completed']
 
@@ -153,7 +153,7 @@ def check_if_toc_transformation_is_complete(content, toc, model=None):
     Directly return the final JSON structure. Do not output anything else."""
 
     prompt = prompt + '\n Raw Table of contents:\n' + content + '\n Cleaned Table of contents:\n' + toc
-    response = ChatGPT_API(model=model, prompt=prompt)
+    response = OpenAI_API(model=model, prompt=prompt)
     json_content = extract_json(response)
     return json_content['completed']
 
@@ -165,7 +165,7 @@ def extract_toc_content(content, model=None):
 
     Directly return the full table of contents content. Do not output anything else."""
 
-    response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
+    response, finish_reason = OpenAI_API_with_finish_reason(model=model, prompt=prompt)
     
     if_complete = check_if_toc_transformation_is_complete(content, response, model)
     if if_complete == "yes" and finish_reason == "finished":
@@ -176,7 +176,7 @@ def extract_toc_content(content, model=None):
         {"role": "assistant", "content": response},    
     ]
     prompt = f"""please continue the generation of table of contents , directly output the remaining part of the structure"""
-    new_response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt, chat_history=chat_history)
+    new_response, finish_reason = OpenAI_API_with_finish_reason(model=model, prompt=prompt, chat_history=chat_history)
     response = response + new_response
     if_complete = check_if_toc_transformation_is_complete(content, response, model)
     
@@ -186,7 +186,7 @@ def extract_toc_content(content, model=None):
             {"role": "assistant", "content": response},    
         ]
         prompt = f"""please continue the generation of table of contents , directly output the remaining part of the structure"""
-        new_response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt, chat_history=chat_history)
+        new_response, finish_reason = OpenAI_API_with_finish_reason(model=model, prompt=prompt, chat_history=chat_history)
         response = response + new_response
         if_complete = check_if_toc_transformation_is_complete(content, response, model)
         
@@ -212,7 +212,7 @@ def detect_page_index(toc_content, model=None):
     }}
     Directly return the final JSON structure. Do not output anything else."""
 
-    response = ChatGPT_API(model=model, prompt=prompt)
+    response = OpenAI_API(model=model, prompt=prompt)
     json_content = extract_json(response)
     return json_content['page_index_given_in_toc']
 
@@ -261,7 +261,7 @@ def toc_index_extractor(toc, content, model=None):
     Directly return the final JSON structure. Do not output anything else."""
 
     prompt = toc_extractor_prompt + '\nTable of contents:\n' + str(toc) + '\nDocument pages:\n' + content
-    response = ChatGPT_API(model=model, prompt=prompt)
+    response = OpenAI_API(model=model, prompt=prompt)
     json_content = extract_json(response)    
     return json_content
 
@@ -289,7 +289,7 @@ def toc_transformer(toc_content, model=None):
     Directly return the final JSON structure, do not output anything else. """
 
     prompt = init_prompt + '\n Given table of contents\n:' + toc_content
-    last_complete, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
+    last_complete, finish_reason = OpenAI_API_with_finish_reason(model=model, prompt=prompt)
     if_complete = check_if_toc_transformation_is_complete(toc_content, last_complete, model)
     if if_complete == "yes" and finish_reason == "finished":
         last_complete = extract_json(last_complete)
@@ -313,7 +313,7 @@ def toc_transformer(toc_content, model=None):
 
         Please continue the json structure, directly output the remaining part of the json structure."""
 
-        new_complete, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
+        new_complete, finish_reason = OpenAI_API_with_finish_reason(model=model, prompt=prompt)
 
         if new_complete.startswith('```json'):
             new_complete =  get_json_content(new_complete)
@@ -474,7 +474,7 @@ def add_page_number_to_toc(part, structure, model=None):
     Directly return the final JSON structure. Do not output anything else."""
 
     prompt = fill_prompt_seq + f"\n\nCurrent Partial Document:\n{part}\n\nGiven Structure\n{json.dumps(structure, indent=2)}\n"
-    current_json_raw = ChatGPT_API(model=model, prompt=prompt)
+    current_json_raw = OpenAI_API(model=model, prompt=prompt)
     json_result = extract_json(current_json_raw)
     
     for item in json_result:
@@ -524,7 +524,7 @@ def generate_toc_continue(toc_content, part, model="gpt-4o-2024-11-20"):
     Directly return the additional part of the final JSON structure. Do not output anything else."""
 
     prompt = prompt + '\nGiven text\n:' + part + '\nPrevious tree structure\n:' + json.dumps(toc_content, indent=2)
-    response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
+    response, finish_reason = OpenAI_API_with_finish_reason(model=model, prompt=prompt)
     if finish_reason == 'finished':
         return extract_json(response)
     else:
@@ -558,7 +558,7 @@ def generate_toc_init(part, model=None):
     Directly return the final JSON structure. Do not output anything else."""
 
     prompt = prompt + '\nGiven text\n:' + part
-    response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt)
+    response, finish_reason = OpenAI_API_with_finish_reason(model=model, prompt=prompt)
 
     if finish_reason == 'finished':
          return extract_json(response)
@@ -743,7 +743,7 @@ def single_toc_item_index_fixer(section_title, content, model="gpt-4o-2024-11-20
     Directly return the final JSON structure. Do not output anything else."""
 
     prompt = toc_extractor_prompt + '\nSection Title:\n' + str(section_title) + '\nDocument pages:\n' + content
-    response = ChatGPT_API(model=model, prompt=prompt)
+    response = OpenAI_API(model=model, prompt=prompt)
     json_content = extract_json(response)    
     return convert_physical_index_to_int(json_content['physical_index'])
 
diff --git a/pageindex/page_index_md.py b/pageindex/page_index_md.py
index 70e8de086..df3d485b0 100644
--- a/pageindex/page_index_md.py
+++ b/pageindex/page_index_md.py
@@ -306,7 +306,7 @@ async def md_to_tree(md_path, if_thinning=False, min_token_threshold=None, if_ad
     MD_PATH = os.path.join(os.path.dirname(__file__), '..', 'tests/markdowns/', f'{MD_NAME}.md')
 
 
-    MODEL="gpt-4.1"
+    MODEL = os.getenv('OPENAI_MODEL', 'deepseek-chat')
     IF_THINNING=False
     THINNING_THRESHOLD=5000
     SUMMARY_TOKEN_THRESHOLD=200
diff --git a/pageindex/utils.py b/pageindex/utils.py
index dc7acd888..a53342cea 100644
--- a/pageindex/utils.py
+++ b/pageindex/utils.py
@@ -2,6 +2,7 @@
 import openai
 import logging
 import os
+import re
 from datetime import datetime
 import time
 import json
@@ -17,18 +18,18 @@
 from pathlib import Path
 from types import SimpleNamespace as config
 
-CHATGPT_API_KEY = os.getenv("CHATGPT_API_KEY")
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+OPENAI_MODEL = os.getenv("OPENAI_MODEL", "deepseek-chat")
+OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "https://api.deepseek.com")
 
 def count_tokens(text, model=None):
-    if not text:
-        return 0
-    enc = tiktoken.encoding_for_model(model)
-    tokens = enc.encode(text)
-    return len(tokens)
+    # 忽略 model 参数，直接强制使用通用编码
+    enc = tiktoken.get_encoding("cl100k_base")
+    return len(enc.encode(text))
 
-def ChatGPT_API_with_finish_reason(model, prompt, api_key=CHATGPT_API_KEY, chat_history=None):
+def OpenAI_API_with_finish_reason(model, prompt, api_key=OPENAI_API_KEY, chat_history=None):
     max_retries = 10
-    client = openai.OpenAI(api_key=api_key)
+    client = openai.OpenAI(api_key=api_key, base_url=OPENAI_BASE_URL)
     for i in range(max_retries):
         try:
             if chat_history:
@@ -58,9 +59,9 @@ def ChatGPT_API_with_finish_reason(model, prompt, api_key=CHATGPT_API_KEY, chat_
 
 
 
-def ChatGPT_API(model, prompt, api_key=CHATGPT_API_KEY, chat_history=None):
+def OpenAI_API(model, prompt, api_key=OPENAI_API_KEY, chat_history=None):
     max_retries = 10
-    client = openai.OpenAI(api_key=api_key)
+    client = openai.OpenAI(api_key=api_key, base_url=OPENAI_BASE_URL)
     for i in range(max_retries):
         try:
             if chat_history:
@@ -86,12 +87,12 @@ def ChatGPT_API(model, prompt, api_key=CHATGPT_API_KEY, chat_history=None):
                 return "Error"
             
 
-async def ChatGPT_API_async(model, prompt, api_key=CHATGPT_API_KEY):
+async def OpenAI_API_async(model, prompt, api_key=OPENAI_API_KEY):
     max_retries = 10
     messages = [{"role": "user", "content": prompt}]
     for i in range(max_retries):
         try:
-            async with openai.AsyncOpenAI(api_key=api_key) as client:
+            async with openai.AsyncOpenAI(api_key=api_key, base_url=OPENAI_BASE_URL) as client:
                 response = await client.chat.completions.create(
                     model=model,
                     messages=messages,
@@ -609,7 +610,7 @@ async def generate_node_summary(node, model=None):
     
     Directly return the description, do not include any other text.
     """
-    response = await ChatGPT_API_async(model, prompt)
+    response = await OpenAI_API_async(model, prompt)
     return response
 
 
@@ -654,7 +655,7 @@ def generate_doc_description(structure, model=None):
     
     Directly return the description, do not include any other text.
     """
-    response = ChatGPT_API(model, prompt)
+    response = OpenAI_API(model, prompt)
     return response
 
 
diff --git a/run_pageindex.py b/run_pageindex.py
index 107024505..45e5641a5 100644
--- a/run_pageindex.py
+++ b/run_pageindex.py
@@ -3,6 +3,8 @@
 import json
 from pageindex import *
 from pageindex.page_index_md import md_to_tree
+from dotenv import load_dotenv
+load_dotenv()
 
 if __name__ == "__main__":
     # Set up argument parser
@@ -10,9 +12,9 @@
     parser.add_argument('--pdf_path', type=str, help='Path to the PDF file')
     parser.add_argument('--md_path', type=str, help='Path to the Markdown file')
 
-    parser.add_argument('--model', type=str, default='gpt-4o-2024-11-20', help='Model to use')
+    parser.add_argument('--model', type=str, default='deepseek-chat', help='Model to use')
 
-    parser.add_argument('--toc-check-pages', type=int, default=20, 
+    parser.add_argument('--toc-check-pages', type=int, default=20,
                       help='Number of pages to check for table of contents (PDF only)')
     parser.add_argument('--max-pages-per-node', type=int, default=10,
                       help='Maximum number of pages per node (PDF only)')
@@ -27,7 +29,7 @@
                       help='Whether to add doc description to the doc')
     parser.add_argument('--if-add-node-text', type=str, default='no',
                       help='Whether to add text to the node')
-                      
+
     # Markdown specific arguments
     parser.add_argument('--if-thinning', type=str, default='no',
                       help='Whether to apply tree thinning for markdown (markdown only)')
@@ -36,20 +38,20 @@
     parser.add_argument('--summary-token-threshold', type=int, default=200,
                       help='Token threshold for generating summaries (markdown only)')
     args = parser.parse_args()
-    
+
     # Validate that exactly one file type is specified
     if not args.pdf_path and not args.md_path:
         raise ValueError("Either --pdf_path or --md_path must be specified")
     if args.pdf_path and args.md_path:
         raise ValueError("Only one of --pdf_path or --md_path can be specified")
-    
+
     if args.pdf_path:
         # Validate PDF file
         if not args.pdf_path.lower().endswith('.pdf'):
             raise ValueError("PDF file must have .pdf extension")
         if not os.path.isfile(args.pdf_path):
             raise ValueError(f"PDF file not found: {args.pdf_path}")
-            
+
         # Process PDF file
         # Configure options
         opt = config(
@@ -66,35 +68,35 @@
         # Process the PDF
         toc_with_page_number = page_index_main(args.pdf_path, opt)
         print('Parsing done, saving to file...')
-        
+
         # Save results
-        pdf_name = os.path.splitext(os.path.basename(args.pdf_path))[0]    
+        pdf_name = os.path.splitext(os.path.basename(args.pdf_path))[0]
         output_dir = './results'
         output_file = f'{output_dir}/{pdf_name}_structure.json'
         os.makedirs(output_dir, exist_ok=True)
-        
+
         with open(output_file, 'w', encoding='utf-8') as f:
             json.dump(toc_with_page_number, f, indent=2)
-        
+
         print(f'Tree structure saved to: {output_file}')
-            
+
     elif args.md_path:
         # Validate Markdown file
         if not args.md_path.lower().endswith(('.md', '.markdown')):
             raise ValueError("Markdown file must have .md or .markdown extension")
         if not os.path.isfile(args.md_path):
             raise ValueError(f"Markdown file not found: {args.md_path}")
-            
+
         # Process markdown file
         print('Processing markdown file...')
-        
+
         # Process the markdown
         import asyncio
-        
+
         # Use ConfigLoader to get consistent defaults (matching PDF behavior)
         from pageindex.utils import ConfigLoader
         config_loader = ConfigLoader()
-        
+
         # Create options dict with user args
         user_opt = {
             'model': args.model,
@@ -103,10 +105,10 @@
             'if_add_node_text': args.if_add_node_text,
             'if_add_node_id': args.if_add_node_id
         }
-        
+
         # Load config with defaults from config.yaml
         opt = config_loader.load(user_opt)
-        
+
         toc_with_page_number = asyncio.run(md_to_tree(
             md_path=args.md_path,
             if_thinning=args.if_thinning.lower() == 'yes',
@@ -118,16 +120,16 @@
             if_add_node_text=opt.if_add_node_text,
             if_add_node_id=opt.if_add_node_id
         ))
-        
+
         print('Parsing done, saving to file...')
-        
+
         # Save results
-        md_name = os.path.splitext(os.path.basename(args.md_path))[0]    
+        md_name = os.path.splitext(os.path.basename(args.md_path))[0]
         output_dir = './results'
         output_file = f'{output_dir}/{md_name}_structure.json'
         os.makedirs(output_dir, exist_ok=True)
-        
+
         with open(output_file, 'w', encoding='utf-8') as f:
             json.dump(toc_with_page_number, f, indent=2, ensure_ascii=False)
-        
+
         print(f'Tree structure saved to: {output_file}')
\ No newline at end of file