diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..324b6658 --- /dev/null +++ b/.gitignore @@ -0,0 +1,48 @@ +# Python +__pycache__/ +*.py[cod] +*.pyo +*.pyd +.Python +*.egg-info/ +dist/ +build/ +.eggs/ + +# Virtual environments +.env +.venv +env/ +venv/ + +# API keys / secrets β€” never commit these +.env.local +secrets.json +*.key + +# Outputs generated by the pipeline +outputs/ +results/ + +# Temp files from PDF download cache +/tmp/ + +# Debug / scratch files +_debug_*.py +_check_*.ps1 + +# Visual Studio +.vs/ +*.suo +*.user +*.userosscache +*.sln.docstates + +# OS +.DS_Store +Thumbs.db +desktop.ini + +# Jupyter +.ipynb_checkpoints/ +*.ipynb diff --git a/README.md b/README.md index 415baa7a..b02e32a1 100644 --- a/README.md +++ b/README.md @@ -1,238 +1,348 @@ -# πŸ“„ Paper2Code: Automating Code Generation from Scientific Papers in Machine Learning +ο»Ώ# Paper2Code - Windows Edition -**Minju Seo, Jinheon Baek†, Seongyun Lee, and Sung Ju Hwang†** († denotes equal advising) -International Conference on Learning Representations (ICLR), 2026 -πŸ“„ [Read the paper](https://arxiv.org/abs/2504.17192) +> A Windows-native application of [Paper2Code](https://github.com/going-doer/Paper2Code) with one-command automation and support for 10 LLM providers. +> +> Original research by **Minju Seo, Jinheon Baek, Seongyun Lee, and Sung Ju Hwang** β€” ICLR 2026. +> [Read the paper](https://arxiv.org/abs/2504.17192) Β· [Dataset](https://huggingface.co/datasets/iaminju/paper2code) Β· [Original repo](https://github.com/going-doer/Paper2Code) ![PaperCoder Overview](./assets/papercoder_overview.png) -**PaperCoder** is the multi-agent LLM system introduced in **Paper2Code**, designed to transform a paper into a code repository. -It follows a three-stage pipeline: planning, analysis, and code generation, each handled by specialized agents. -Our method outperforms strong baselines on both Paper2Code and PaperBench and produces faithful, high-quality implementations. +**PaperCoder** is a multi-agent LLM system that transforms a machine-learning research paper into a working code repository through three specialised stages: **planning -> analysis -> code generation**. ---- +This fork makes the full pipeline run natively on **Windows** with a single command β€” no WSL, no GROBID, no manual steps. -## πŸ—ΊοΈ Table of Contents +--- -- [⚑ Quick Start](#-quick-start) -- [πŸ“š Detailed Setup Instructions](#-detailed-setup-instructions) -- [πŸ“¦ Paper2Code Benchmark Datasets](#-paper2code-benchmark-datasets) -- [πŸ“Š Model-based Evaluation of Repositories](#-model-based-evaluation-of-repositories-generated-by-papercoder) +## Table of Contents + +- [What's New in This Fork](#whats-new-in-this-fork) +- [Quick Start](#quick-start) +- [Supported Providers](#supported-providers) +- [Setting API Keys](#setting-api-keys) +- [All Options](#all-options) +- [Setup](#setup) +- [Running Individual Stages](#running-individual-stages) +- [Debugging a Generated Repo](#debugging-a-generated-repo) +- [Evaluation](#evaluation) +- [Benchmark Dataset](#benchmark-dataset) +- [Test Suite](#test-suite) +- [Credits](#credits) --- -## ⚑ Quick Start -- Note: The following command runs example paper ([Attention Is All You Need](https://arxiv.org/abs/1706.03762)). -- For more setup options, including LaTeX-based inputs and PDF-to-JSON conversion, see [πŸ“š Detailed Setup Instructions](#-detailed-setup-instructions). +## What's New in This Fork + +| Area | Change | +|---|---| +| **One-command launcher** | ``paper2code.bat `` runs the entire pipeline end-to-end | +| **Automatic PDF download** | Pass an arXiv URL and the PDF is downloaded automatically | +| **No GROBID required** | PDF-to-JSON conversion uses PyMuPDF (pure Python, no Java or Docker) | +| **Windows native** | All shell scripts rewritten as PowerShell; no WSL needed | +| **10 LLM providers** | Unified ``providers.py`` abstraction β€” swap provider with one flag | +| **Free tier support** | Groq, Cerebras, OpenRouter, Mistral, GitHub Models, SambaNova, Gemini, Cohere, Cloudflare | +| **Path and encoding fixes** | All file I/O uses ``os.path.join`` and ``encoding='utf-8'`` | +| **Auto-debugging** | ``--rundebug`` flag runs the debugging agent if generated code fails | +| **Test suite** | ``test_suite.py`` β€” 41 automated tests, no API key required | -### Using OpenAI API -- πŸ’΅ Estimated cost for using o3-mini: $0.50–$0.70 +--- -```bash -pip install openai +## Quick Start -export OPENAI_API_KEY="" +### 1. Set up the environment -cd scripts -bash run.sh +```powershell +conda create -n papertocode python=3.11 +conda activate papertocode +pip install -r requirements.txt ``` -### Using Open Source Models with vLLM -- If you encounter any issues installing vLLM, please refer to the [official vLLM repository](https://github.com/vllm-project/vllm). -- The default model is `deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct`. +### 2. Set your API key + +```powershell +# OpenRouter (free models available) +$env:OPENROUTER_API_KEY = "your-key" -```bash -pip install vllm +# Or OpenAI +$env:OPENAI_API_KEY = "your-key" -cd scripts -bash run_llm.sh +# Or any other provider β€” see the full list below ``` -### Output Folder Structure (Only Important Files) -```bash -outputs -β”œβ”€β”€ Transformer -β”‚ β”œβ”€β”€ analyzing_artifacts -β”‚ β”œβ”€β”€ coding_artifacts -β”‚ └── planning_artifacts -└── Transformer_repo # Final output repository +### 3. Run + +```powershell +.\paper2code.bat https://arxiv.org/pdf/2510.01193 --provider openrouter --model inclusionai/ling-2.6-flash:free ``` ---- -## πŸ“š Detailed Setup Instructions +That single command: +1. Downloads the PDF from the URL +2. Converts it to JSON (no GROBID needed) +3. Runs planning, analysis, and code generation +4. Outputs a ready-to-run repo to ``outputs/_repo/`` + +> **Note:** In PowerShell always prefix with ``.\`` β€” e.g. ``.\paper2code.bat``, not ``paper2code.bat``. -### πŸ› οΈ Environment Setup +--- -- πŸ’‘ To use the `o3-mini` version, make sure you have the latest `openai` package installed. -- We recommend using a Python virtual environment before installing dependencies. -- πŸ“¦ Install only what you need: - - For OpenAI API, install `openai`. - - For open-source models, install `vllm`. - - If you encounter any issues installing vLLM, please refer to the [official vLLM repository](https://github.com/vllm-project/vllm). +## Supported Providers + +| Provider | Free tier | Env variable | Signup | +|---|---|---|---| +| ``openai`` | No | ``OPENAI_API_KEY`` | [platform.openai.com](https://platform.openai.com) | +| ``openrouter`` | Yes | ``OPENROUTER_API_KEY`` | [openrouter.ai](https://openrouter.ai) | +| ``groq`` | Yes | ``GROQ_API_KEY`` | [console.groq.com](https://console.groq.com) | +| ``cerebras`` | Yes | ``CEREBRAS_API_KEY`` | [cloud.cerebras.ai](https://cloud.cerebras.ai) | +| ``gemini`` | Yes | ``GEMINI_API_KEY`` | [aistudio.google.com](https://aistudio.google.com) | +| ``mistral`` | Yes | ``MISTRAL_API_KEY`` | [console.mistral.ai](https://console.mistral.ai) | +| ``github`` | Yes* | ``GITHUB_TOKEN`` | [github.com/marketplace/models](https://github.com/marketplace/models) | +| ``sambanova`` | Yes | ``SAMBANOVA_API_KEY`` | [cloud.sambanova.ai](https://cloud.sambanova.ai) | +| ``cohere`` | Yes | ``COHERE_API_KEY`` | [dashboard.cohere.com](https://dashboard.cohere.com) | +| ``cloudflare`` | Yes | ``CLOUDFLARE_API_KEY`` + ``CLOUDFLARE_ACCOUNT_ID`` | [dash.cloudflare.com](https://dash.cloudflare.com) | +--- -```bash -pip install openai -pip install vllm +## Setting API Keys + +Set the environment variable for the provider you want to use before running the pipeline. + +```powershell +$env:OPENAI_API_KEY = "sk-..." +$env:OPENROUTER_API_KEY = "sk-or-..." +$env:GROQ_API_KEY = "gsk_..." +$env:CEREBRAS_API_KEY = "..." +$env:GEMINI_API_KEY = "..." +$env:MISTRAL_API_KEY = "..." +$env:GITHUB_TOKEN = "ghp_..." +$env:SAMBANOVA_API_KEY = "..." +$env:COHERE_API_KEY = "..." +$env:CLOUDFLARE_API_KEY = "..." +$env:CLOUDFLARE_ACCOUNT_ID = "..." # Cloudflare requires both ``` -- Or, if you prefer, you can install all dependencies using `pip`: +Alternatively, pass the key directly at runtime: -```bash -pip install -r requirements.txt +```powershell +.\paper2code.bat https://arxiv.org/pdf/2510.01193 --provider openai --model o3-mini --api_key sk-... ``` -### πŸ“„ (Option) Convert PDF to JSON -The following process describes how to convert a paper PDF into JSON format. -If you have access to the LaTeX source and plan to use it with PaperCoder, you may skip this step and proceed to [πŸš€ Running PaperCoder](#-running-papercoder). -Note: In our experiments, we converted all paper PDFs to JSON format. +--- -1. Clone the `s2orc-doc2json` repository to convert your PDF file into a structured JSON format. - (For detailed configuration, please refer to the [official repository](https://github.com/allenai/s2orc-doc2json).) +## All Options -```bash -git clone https://github.com/allenai/s2orc-doc2json.git ``` - -2. Run the PDF processing service. - -```bash -cd ./s2orc-doc2json/grobid-0.7.3 -./gradlew run +.\paper2code.bat [options] ``` -3. Convert your PDF into JSON format. +| Option | Default | Description | +|---|---|---| +| ``--provider`` | ``openai`` | LLM provider to use | +| ``--model`` | per-provider default | Model name for the chosen provider | +| ``--api_key`` | from env var | Explicit API key (overrides env var) | +| ``--output`` | ``.\outputs`` | Root directory for all outputs | +| ``--latex`` | β€” | Path to a ``.tex`` file to use instead of PDF | +| ``--rundebug`` | off | Run the debugging agent if ``reproduce.ps1`` fails | +| ``--eval`` | off | Run reference-free evaluation after coding | -```bash -mkdir -p ./s2orc-doc2json/output_dir/paper_coder -python ./s2orc-doc2json/doc2json/grobid2json/process_pdf.py \ - -i ${PDF_PATH} \ - -t ./s2orc-doc2json/temp_dir/ \ - -o ./s2orc-doc2json/output_dir/paper_coder -``` +### Examples + +```powershell +# arXiv URL β€” PDF downloaded automatically +.\paper2code.bat https://arxiv.org/pdf/2510.01193 -### πŸš€ Running PaperCoder -- Note: The following command runs example paper ([Attention Is All You Need](https://arxiv.org/abs/1706.03762)). - If you want to run PaperCoder on your own paper, please modify the environment variables accordingly. +# Abstract URL also works +.\paper2code.bat https://arxiv.org/abs/2510.01193 --provider groq --model llama-3.3-70b-versatile -#### Using OpenAI API -- πŸ’΅ Estimated cost for using o3-mini: $0.50–$0.70 +# Local PDF +.\paper2code.bat C:\papers\mypaper.pdf --provider gemini --model gemini-2.5-flash +# Custom output directory +.\paper2code.bat https://arxiv.org/pdf/2510.01193 --provider openai --model o3-mini --output C:\myoutputs -```bash -# Using the PDF-based JSON format of the paper -export OPENAI_API_KEY="" +# With auto-debugging and evaluation +.\paper2code.bat https://arxiv.org/pdf/2510.01193 --provider openrouter --model deepseek/deepseek-r1:free --rundebug --eval -cd scripts -bash run.sh +# LaTeX source instead of PDF +.\paper2code.bat --latex examples\Transformer_cleaned.tex --provider openai --model gpt-4o ``` -```bash -# Using the LaTeX source of the paper -export OPENAI_API_KEY="" +### Output structure -cd scripts -bash run_latex.sh +``` +outputs/ ++-- / +| +-- planning_artifacts/ +| +-- analyzing_artifacts/ +| +-- coding_artifacts/ ++-- _repo/ <- final generated repository ``` +--- -#### Using Open Source Models with vLLM -- The default model is `deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct`. +## Setup -```bash -# Using the PDF-based JSON format of the paper -cd scripts -bash run_llm.sh -``` +### Requirements + +- Windows 10 or 11 +- [Anaconda](https://www.anaconda.com) or Python 3.10+ +- PowerShell 5.1+ (built into Windows) -```bash -# Using the LaTeX source of the paper -cd scripts -bash run_latex_llm.sh +### Installation + +```powershell +conda create -n papertocode python=3.11 +conda activate papertocode +pip install -r requirements.txt ``` +> ``vllm`` (local open-source models) is **Linux/CUDA only** and is excluded from Windows installs. Use any of the cloud providers above instead. + --- -## πŸ“¦ Paper2Code Benchmark Datasets -- Huggingface dataset: [paper2code](https://huggingface.co/datasets/iaminju/paper2code) - -- You can find the description of the Paper2Code benchmark dataset in [data/paper2code](https://github.com/going-doer/Paper2Code/tree/main/data/paper2code). -- For more details, refer to Section 4.1 "Paper2Code Benchmark" in the [paper](https://arxiv.org/abs/2504.17192). +## Running Individual Stages + +``paper2code.bat`` runs all stages automatically. To run them individually: + +```powershell +# Stage 0 β€” Clean a raw JSON +python codes/0_pdf_process.py ` + --input_json_path examples/Transformer.json ` + --output_json_path examples/Transformer_cleaned.json + +# Stage 1 β€” Planning +python codes/1_planning.py ` + --paper_name Transformer ` + --gpt_version o3-mini ` + --provider openai ` + --pdf_json_path examples/Transformer_cleaned.json ` + --output_dir outputs + +# Stage 2 β€” Analyzing +python codes/2_analyzing.py ` + --paper_name Transformer ` + --gpt_version o3-mini ` + --provider openai ` + --pdf_json_path examples/Transformer_cleaned.json ` + --output_dir outputs + +# Stage 3 β€” Coding +python codes/3_coding.py ` + --paper_name Transformer ` + --gpt_version o3-mini ` + --provider openai ` + --pdf_json_path examples/Transformer_cleaned.json ` + --output_dir outputs ` + --output_repo_dir outputs/Transformer_repo +``` +For LaTeX input replace ``--pdf_json_path`` with ``--paper_format LaTeX --pdf_latex_path ``. --- -## πŸ“Š Model-based Evaluation of Repositories Generated by PaperCoder +## Debugging a Generated Repo -- We evaluate repository quality using a model-based approach, supporting both reference-based and reference-free settings. - The model critiques key implementation components, assigns severity levels, and generates a 1–5 correctness score averaged over 8 samples using **o3-mini-high**. +If the generated code fails to run, use the debugging agent: -- For more details, please refer to Section 4.3.1 (*Paper2Code Benchmark*) of the paper. -- **Note:** The following examples evaluate the sample repository (**Transformer_repo**). - Please modify the relevant paths and arguments if you wish to evaluate a different repository. +```powershell +.\scripts\run_debug.ps1 -Provider openai +``` -### πŸ› οΈ Environment Setup -```bash -pip install tiktoken -export OPENAI_API_KEY="" +Or directly: + +```powershell +python codes/4_debugging.py ` + --paper_name Transformer ` + --model o3-mini ` + --provider openai ` + --error_file_name outputs/Transformer_repo/error.txt ` + --output_dir outputs/Transformer ` + --output_repo_dir outputs/Transformer_repo ` + --save_num 1 ``` +The ``--rundebug`` flag in ``paper2code.bat`` does this automatically when ``reproduce.ps1`` exits with an error. -### πŸ“ Reference-free Evaluation -- `target_repo_dir` is the generated repository. +--- -```bash -cd codes/ -python eval.py \ - --paper_name Transformer \ - --pdf_json_path ../examples/Transformer_cleaned.json \ - --data_dir ../data \ - --output_dir ../outputs/Transformer \ - --target_repo_dir ../outputs/Transformer_repo \ - --eval_result_dir ../results \ - --eval_type ref_free \ - --generated_n 8 \ +## Evaluation + +Evaluate a generated repository with a 1-5 correctness score averaged over ``n`` judge-model samples. + +```powershell +# Reference-free (no gold repo needed) +python codes/eval.py ` + --paper_name Transformer ` + --pdf_json_path examples/Transformer_cleaned.json ` + --data_dir data ` + --output_dir outputs/Transformer ` + --target_repo_dir outputs/Transformer_repo ` + --eval_result_dir results ` + --eval_type ref_free ` + --generated_n 8 ` + --provider openai ` + --gpt_version o3-mini ` --papercoder -``` -### πŸ“ Reference-based Evaluation -- `target_repo_dir` is the generated repository. -- `gold_repo_dir` should point to the official repository (e.g., author-released code). - -```bash -cd codes/ -python eval.py \ - --paper_name Transformer \ - --pdf_json_path ../examples/Transformer_cleaned.json \ - --data_dir ../data \ - --output_dir ../outputs/Transformer \ - --target_repo_dir ../outputs/Transformer_repo \ - --gold_repo_dir ../examples/Transformer_gold_repo \ - --eval_result_dir ../results \ - --eval_type ref_based \ - --generated_n 8 \ +# Reference-based (requires a gold repo) +python codes/eval.py ` + --paper_name Transformer ` + --pdf_json_path examples/Transformer_cleaned.json ` + --data_dir data ` + --output_dir outputs/Transformer ` + --target_repo_dir outputs/Transformer_repo ` + --gold_repo_dir examples/Transformer_gold_repo ` + --eval_result_dir results ` + --eval_type ref_based ` + --generated_n 8 ` + --provider openai ` + --gpt_version o3-mini ` --papercoder ``` +``--provider`` and ``--gpt_version`` control which model acts as judge β€” any provider from the table above works. + +**Example output:** -### πŸ“„ Example Output -```bash +``` ======================================== -🌟 Evaluation Summary 🌟 -πŸ“„ Paper name: Transformer -πŸ§ͺ Evaluation type: ref_based -πŸ“ Target repo directory: ../outputs/Transformer_repo -πŸ“Š Evaluation result: - πŸ“ˆ Score: 4.5000 - βœ… Valid: 8/8 + Evaluation Summary + Paper: Transformer + Type: ref_based + Score: 4.5000 (valid: 8/8) + Cost: $0.1645 ======================================== -🌟 Usage Summary 🌟 -[Evaluation] Transformer - ref_based -πŸ› οΈ Model: o3-mini -πŸ“₯ Input tokens: 44318 (Cost: $0.04874980) -πŸ“¦ Cached input tokens: 0 (Cost: $0.00000000) -πŸ“€ Output tokens: 26310 (Cost: $0.11576400) -πŸ’΅ Current total cost: $0.16451380 -πŸͺ™ Accumulated total cost so far: $0.16451380 -============================================ ``` + +--- + +## Benchmark Dataset + +- HuggingFace: [iaminju/paper2code](https://huggingface.co/datasets/iaminju/paper2code) +- Dataset description: [data/paper2code](https://github.com/going-doer/Paper2Code/tree/main/data/paper2code) +- Details in Section 4.1 of the [paper](https://arxiv.org/abs/2504.17192) + +--- + +## Test Suite + +Verify your setup without needing an API key: + +```powershell +conda activate papertocode +python test_suite.py +``` + +Expected output: **41/41 tests passed**. + +--- + +## Credits + +This repository is a Windows and multi-provider implementation of the original **Paper2Code** project. + +**Original paper:** +> Minju Seo, Jinheon Baek, Seongyun Lee, Sung Ju Hwang. +> *"Paper2Code: Automating Code Generation from Scientific Papers in Machine Learning"* +> International Conference on Learning Representations (ICLR), 2026. +> [arXiv:2504.17192](https://arxiv.org/abs/2504.17192) Β· [github.com/going-doer/Paper2Code](https://github.com/going-doer/Paper2Code) + +All credit for the PaperCoder architecture, benchmark, and evaluation methodology belongs to the original authors. This fork adds Windows compatibility, one-command automation, and multi-provider support. The science is entirely theirs. diff --git a/codes/0_pdf_process.py b/codes/0_pdf_process.py index 60a9534f..739bff16 100644 --- a/codes/0_pdf_process.py +++ b/codes/0_pdf_process.py @@ -20,13 +20,13 @@ def main(args): input_json_path = args.input_json_path output_json_path = args.output_json_path - with open(f'{input_json_path}') as f: + with open(f'{input_json_path}', encoding='utf-8') as f: data = json.load(f) cleaned_data = remove_spans(data) print(f"[SAVED] {output_json_path}") - with open(output_json_path, 'w') as f: + with open(output_json_path, 'w', encoding='utf-8') as f: json.dump(cleaned_data, f) diff --git a/codes/1.2_rag_config.py b/codes/1.2_rag_config.py index 0884c876..a7450c85 100644 --- a/codes/1.2_rag_config.py +++ b/codes/1.2_rag_config.py @@ -3,7 +3,7 @@ import sys import argparse -from openai import OpenAI +from providers import build_client, chat_complete, add_provider_args try: from huggingface_hub import HfApi @@ -27,13 +27,14 @@ def parse_args() -> argparse.Namespace: "--gpt_version", type=str, default="gpt-4.1-mini", - help="OpenAI chat model name used for name detection.", + help="Model name used for name detection.", ) + add_provider_args(parser) return parser.parse_args() args = parse_args() -client = OpenAI(api_key = os.environ["OPENAI_API_KEY"]) +client = build_client(provider=args.provider, api_key=args.api_key) planning_config_path = os.path.join( args.output_dir, f"planning_config.yaml" @@ -84,12 +85,14 @@ def parse_args() -> argparse.Namespace: }, ] -response = client.chat.completions.create( +response = chat_complete( + client, + provider=args.provider, model=args.gpt_version, messages=messages, ) -answer = response.choices[0].message.content.strip() +answer = response["choices"][0]["message"]["content"].strip() # print("Raw OpenAI answer:", answer) # Parse the list of names from the model output diff --git a/codes/1_planning.py b/codes/1_planning.py index 30fc5f8c..215ad997 100644 --- a/codes/1_planning.py +++ b/codes/1_planning.py @@ -1,10 +1,10 @@ -from openai import OpenAI import json from tqdm import tqdm import argparse import os import sys from utils import print_response, print_log_cost, load_accumulated_cost, save_accumulated_cost +from providers import build_client, chat_complete, add_provider_args parser = argparse.ArgumentParser() @@ -14,10 +14,11 @@ parser.add_argument('--pdf_json_path', type=str) # json format parser.add_argument('--pdf_latex_path', type=str) # latex format parser.add_argument('--output_dir',type=str, default="") +add_provider_args(parser) args = parser.parse_args() -client = OpenAI(api_key = os.environ["OPENAI_API_KEY"]) +client = build_client(provider=args.provider, api_key=args.api_key) paper_name = args.paper_name gpt_version = args.gpt_version @@ -25,13 +26,14 @@ pdf_json_path = args.pdf_json_path pdf_latex_path = args.pdf_latex_path output_dir = args.output_dir +provider = args.provider if paper_format == "JSON": - with open(f'{pdf_json_path}') as f: + with open(f'{pdf_json_path}', encoding='utf-8') as f: paper_content = json.load(f) elif paper_format == "LaTeX": - with open(f'{pdf_latex_path}') as f: + with open(f'{pdf_latex_path}', encoding='utf-8') as f: paper_content = f.read() else: print(f"[ERROR] Invalid paper format. Please select either 'JSON' or 'LaTeX.") @@ -214,19 +216,10 @@ }] def api_call(msg, gpt_version): - if "o3-mini" in gpt_version: - completion = client.chat.completions.create( - model=gpt_version, - reasoning_effort="high", - messages=msg - ) - else: - completion = client.chat.completions.create( - model=gpt_version, - messages=msg - ) - - return completion + return chat_complete( + client, provider, gpt_version, msg, + reasoning_effort="high" if "o3" in gpt_version or "o4" in gpt_version else None, + ) responses = [] trajectories = [] @@ -247,9 +240,9 @@ def api_call(msg, gpt_version): trajectories.extend(instruction_msg) completion = api_call(trajectories, gpt_version) - - # response - completion_json = json.loads(completion.model_dump_json()) + + # response (chat_complete always returns a normalised dict) + completion_json = completion # print and logging print_response(completion_json) @@ -259,8 +252,8 @@ def api_call(msg, gpt_version): responses.append(completion_json) # trajectories - message = completion.choices[0].message - trajectories.append({'role': message.role, 'content': message.content}) + message = completion["choices"][0]["message"] + trajectories.append({'role': message['role'], 'content': message['content']}) # save @@ -268,8 +261,8 @@ def api_call(msg, gpt_version): os.makedirs(output_dir, exist_ok=True) -with open(f'{output_dir}/planning_response.json', 'w') as f: +with open(f'{output_dir}/planning_response.json', 'w', encoding='utf-8') as f: json.dump(responses, f) -with open(f'{output_dir}/planning_trajectories.json', 'w') as f: +with open(f'{output_dir}/planning_trajectories.json', 'w', encoding='utf-8') as f: json.dump(trajectories, f) diff --git a/codes/1_planning_llm.py b/codes/1_planning_llm.py index 1d2d3cc7..7e6f4c60 100644 --- a/codes/1_planning_llm.py +++ b/codes/1_planning_llm.py @@ -35,10 +35,10 @@ temperature = args.temperature if paper_format == "JSON": - with open(f'{pdf_json_path}') as f: + with open(f'{pdf_json_path}', encoding='utf-8') as f: paper_content = json.load(f) elif paper_format == "LaTeX": - with open(f'{pdf_latex_path}') as f: + with open(f'{pdf_latex_path}', encoding='utf-8') as f: paper_content = f.read() else: print(f"[ERROR] Invalid paper format. Please select either 'JSON' or 'LaTeX.") @@ -290,8 +290,8 @@ def run_llm(msg): # save os.makedirs(output_dir, exist_ok=True) -with open(f'{output_dir}/planning_response.json', 'w') as f: +with open(f'{output_dir}/planning_response.json', 'w', encoding='utf-8') as f: json.dump(responses, f) -with open(f'{output_dir}/planning_trajectories.json', 'w') as f: +with open(f'{output_dir}/planning_trajectories.json', 'w', encoding='utf-8') as f: json.dump(trajectories, f) diff --git a/codes/2_analyzing.py b/codes/2_analyzing.py index 9ffc076f..6d469fbb 100644 --- a/codes/2_analyzing.py +++ b/codes/2_analyzing.py @@ -1,9 +1,9 @@ -from openai import OpenAI import json import os from tqdm import tqdm import sys from utils import extract_planning, content_to_json, print_response, print_log_cost, load_accumulated_cost, save_accumulated_cost +from providers import build_client, chat_complete, add_provider_args import copy import argparse @@ -16,10 +16,11 @@ parser.add_argument('--pdf_json_path', type=str) # json format parser.add_argument('--pdf_latex_path', type=str) # latex format parser.add_argument('--output_dir',type=str, default="") +add_provider_args(parser) args = parser.parse_args() -client = OpenAI(api_key = os.environ["OPENAI_API_KEY"]) +client = build_client(provider=args.provider, api_key=args.api_key) paper_name = args.paper_name gpt_version = args.gpt_version @@ -27,26 +28,27 @@ pdf_json_path = args.pdf_json_path pdf_latex_path = args.pdf_latex_path output_dir = args.output_dir +provider = args.provider if paper_format == "JSON": - with open(f'{pdf_json_path}') as f: + with open(f'{pdf_json_path}', encoding='utf-8') as f: paper_content = json.load(f) elif paper_format == "LaTeX": - with open(f'{pdf_latex_path}') as f: + with open(f'{pdf_latex_path}', encoding='utf-8') as f: paper_content = f.read() else: print(f"[ERROR] Invalid paper format. Please select either 'JSON' or 'LaTeX.") sys.exit(0) -with open(f'{output_dir}/planning_config.yaml') as f: +with open(f'{output_dir}/planning_config.yaml', encoding='utf-8') as f: config_yaml = f.read() context_lst = extract_planning(f'{output_dir}/planning_trajectories.json') # 0: overview, 1: detailed, 2: PRD if os.path.exists(f'{output_dir}/task_list.json'): - with open(f'{output_dir}/task_list.json') as f: + with open(f'{output_dir}/task_list.json', encoding='utf-8') as f: task_list = json.load(f) else: task_list = content_to_json(context_lst[2]) @@ -136,18 +138,10 @@ def get_write_msg(todo_file_name, todo_file_desc): def api_call(msg): - if "o3-mini" in gpt_version: - completion = client.chat.completions.create( - model=gpt_version, - reasoning_effort="high", - messages=msg - ) - else: - completion = client.chat.completions.create( - model=gpt_version, - messages=msg - ) - return completion + return chat_complete( + client, provider, gpt_version, msg, + reasoning_effort="high" if "o3" in gpt_version or "o4" in gpt_version else None, + ) artifact_output_dir=f'{output_dir}/analyzing_artifacts' @@ -170,34 +164,31 @@ def api_call(msg): instruction_msg = get_write_msg(todo_file_name, logic_analysis_dict[todo_file_name]) trajectories.extend(instruction_msg) - completion = api_call(trajectories) - # response - completion_json = json.loads(completion.model_dump_json()) + completion_json = api_call(trajectories) responses.append(completion_json) - + # trajectories - message = completion.choices[0].message - trajectories.append({'role': message.role, 'content': message.content}) + message = completion_json["choices"][0]["message"] + trajectories.append({'role': message['role'], 'content': message['content']}) # print and logging print_response(completion_json) temp_total_accumulated_cost = print_log_cost(completion_json, gpt_version, current_stage, output_dir, total_accumulated_cost) total_accumulated_cost = temp_total_accumulated_cost - # save - with open(f'{artifact_output_dir}/{todo_file_name}_simple_analysis.txt', 'w') as f: + # save (use flat name to avoid subdirectory issues) + save_todo_file_name = todo_file_name.replace("/", "_") + with open(f'{artifact_output_dir}/{save_todo_file_name}_simple_analysis.txt', 'w', encoding='utf-8') as f: f.write(completion_json['choices'][0]['message']['content']) - done_file_lst.append(todo_file_name) # save for next stage(coding) - todo_file_name = todo_file_name.replace("/", "_") - with open(f'{output_dir}/{todo_file_name}_simple_analysis_response.json', 'w') as f: + with open(f'{output_dir}/{save_todo_file_name}_simple_analysis_response.json', 'w', encoding='utf-8') as f: json.dump(responses, f) - with open(f'{output_dir}/{todo_file_name}_simple_analysis_trajectories.json', 'w') as f: + with open(f'{output_dir}/{save_todo_file_name}_simple_analysis_trajectories.json', 'w', encoding='utf-8') as f: json.dump(trajectories, f) save_accumulated_cost(f"{output_dir}/accumulated_cost.json", total_accumulated_cost) diff --git a/codes/2_analyzing_llm.py b/codes/2_analyzing_llm.py index 31123730..2060580d 100644 --- a/codes/2_analyzing_llm.py +++ b/codes/2_analyzing_llm.py @@ -40,23 +40,23 @@ output_dir = args.output_dir if paper_format == "JSON": - with open(f'{pdf_json_path}') as f: + with open(f'{pdf_json_path}', encoding='utf-8') as f: paper_content = json.load(f) elif paper_format == "LaTeX": - with open(f'{pdf_latex_path}') as f: + with open(f'{pdf_latex_path}', encoding='utf-8') as f: paper_content = f.read() else: print(f"[ERROR] Invalid paper format. Please select either 'JSON' or 'LaTeX.") sys.exit(0) -with open(f'{output_dir}/planning_config.yaml') as f: +with open(f'{output_dir}/planning_config.yaml', encoding='utf-8') as f: config_yaml = f.read() context_lst = extract_planning(f'{output_dir}/planning_trajectories.json') # 0: overview, 1: detailed, 2: PRD if os.path.exists(f'{output_dir}/task_list.json'): - with open(f'{output_dir}/task_list.json') as f: + with open(f'{output_dir}/task_list.json', encoding='utf-8') as f: task_list = json.load(f) else: task_list = content_to_json(context_lst[2]) @@ -212,16 +212,16 @@ def run_llm(msg): trajectories.append({'role': 'assistant', 'content': completion}) - # save - with open(f'{artifact_output_dir}/{todo_file_name}_simple_analysis.txt', 'w', encoding='utf-8') as f: + # save (use flat name to avoid subdirectory issues) + save_todo_file_name = todo_file_name.replace("/", "_") + with open(f'{artifact_output_dir}/{save_todo_file_name}_simple_analysis.txt', 'w', encoding='utf-8') as f: f.write(completion) done_file_lst.append(todo_file_name) # save for next stage(coding) - todo_file_name = todo_file_name.replace("/", "_") - with open(f'{output_dir}/{todo_file_name}_simple_analysis_response.json', 'w', encoding='utf-8') as f: + with open(f'{output_dir}/{save_todo_file_name}_simple_analysis_response.json', 'w', encoding='utf-8') as f: json.dump(responses, f) - with open(f'{output_dir}/{todo_file_name}_simple_analysis_trajectories.json', 'w', encoding='utf-8') as f: + with open(f'{output_dir}/{save_todo_file_name}_simple_analysis_trajectories.json', 'w', encoding='utf-8') as f: json.dump(trajectories, f) diff --git a/codes/3.1_coding_sh.py b/codes/3.1_coding_sh.py index 7c8e936b..17acac11 100644 --- a/codes/3.1_coding_sh.py +++ b/codes/3.1_coding_sh.py @@ -1,10 +1,10 @@ -from openai import OpenAI import json import os from tqdm import tqdm import sys import copy from utils import extract_planning, content_to_json, extract_code_from_content, print_response, print_log_cost, load_accumulated_cost, save_accumulated_cost, read_python_files +from providers import build_client, chat_complete, add_provider_args import argparse parser = argparse.ArgumentParser() @@ -16,9 +16,10 @@ parser.add_argument('--pdf_latex_path', type=str) # latex format parser.add_argument('--output_dir',type=str, default="") parser.add_argument('--output_repo_dir',type=str, default="") +add_provider_args(parser) args = parser.parse_args() -client = OpenAI(api_key = os.environ["OPENAI_API_KEY"]) +client = build_client(provider=args.provider, api_key=args.api_key) paper_name = args.paper_name gpt_version = args.gpt_version @@ -27,18 +28,19 @@ pdf_latex_path = args.pdf_latex_path output_dir = args.output_dir output_repo_dir = args.output_repo_dir +provider = args.provider if paper_format == "JSON": - with open(f'{pdf_json_path}') as f: + with open(f'{pdf_json_path}', encoding='utf-8') as f: paper_content = json.load(f) elif paper_format == "LaTeX": - with open(f'{pdf_latex_path}') as f: + with open(f'{pdf_latex_path}', encoding='utf-8') as f: paper_content = f.read() else: print(f"[ERROR] Invalid paper format. Please select either 'JSON' or 'LaTeX.") sys.exit(0) -with open(f'{output_dir}/planning_config.yaml') as f: +with open(f'{output_dir}/planning_config.yaml', encoding='utf-8') as f: config_yaml = f.read() context_lst = extract_planning(f'{output_dir}/planning_trajectories.json') @@ -53,9 +55,9 @@ code_msg = [ {"role": "system", "content": f"""You are an expert researcher and software engineer with a deep understanding of experimental design and reproducibility in scientific research. You will receive configuration file named "config.yaml", and implmented code repository. -Your task is to write a Bash script that can run the given repository from scratch. The script should create and activate the required environment, install all dependencies, and include the commands needed to execute the main file or entry point. Make sure the script is self-contained and can be executed without any manual setup. - -Write code with triple quoto."""}] +Your task is to write a PowerShell script (for Windows) that can run the given repository from scratch. The script should create and activate the required conda/virtual environment, install all dependencies, and include the commands needed to execute the main file or entry point. Make sure the script is self-contained and can be executed without any manual setup on Windows using PowerShell. + +Write code with triple quote."""}] def get_write_msg(todo_file_name, done_file_lst): code_files = "" @@ -102,18 +104,10 @@ def get_write_msg(todo_file_name, done_file_lst): def api_call(msg): - if "o3-mini" in gpt_version or "o4-mini" in gpt_version: - completion = client.chat.completions.create( - model=gpt_version, - reasoning_effort="high", - messages=msg - ) - else: - completion = client.chat.completions.create( - model=gpt_version, - messages=msg - ) - return completion + return chat_complete( + client, provider, gpt_version, msg, + reasoning_effort="high" if "o3" in gpt_version or "o4" in gpt_version else None, + ) artifact_output_dir=f'{output_dir}/coding_artifacts' @@ -130,7 +124,7 @@ def api_call(msg): total_accumulated_cost = load_accumulated_cost(f"{output_dir}/accumulated_cost.json") -for todo_idx, todo_file_name in enumerate(["reproduce.sh"]): +for todo_idx, todo_file_name in enumerate(["reproduce.ps1"]): responses = [] trajectories = copy.deepcopy(code_msg) @@ -144,15 +138,13 @@ def api_call(msg): trajectories.extend(instruction_msg) completion = api_call(trajectories) - # print(completion.choices[0].message) - # response - completion_json = json.loads(completion.model_dump_json()) + completion_json = completion responses.append(completion_json) # trajectories - message = completion.choices[0].message - trajectories.append({'role': message.role, 'content': message.content}) + message = completion_json["choices"][0]["message"] + trajectories.append({'role': message['role'], 'content': message['content']}) done_file_lst.append(todo_file_name) @@ -168,21 +160,22 @@ def api_call(msg): total_accumulated_cost = temp_total_accumulated_cost # save artifacts - with open(f'{artifact_output_dir}/{save_todo_file_name}_coding.txt', 'w') as f: + with open(f'{artifact_output_dir}/{save_todo_file_name}_coding.txt', 'w', encoding='utf-8') as f: f.write(completion_json['choices'][0]['message']['content']) # extract code save - code = extract_code_from_content(message.content) + content = message['content'] + code = extract_code_from_content(content) if len(code) == 0: - code = message.content + code = content done_file_dict[todo_file_name] = code if save_todo_file_name != todo_file_name: - todo_file_dir = '/'.join(todo_file_name.split("/")[:-1]) - os.makedirs(f"{output_repo_dir}/{todo_file_dir}", exist_ok=True) + todo_file_dir = os.path.join(*todo_file_name.replace("\\", "/").split("/")[:-1]) + os.makedirs(os.path.join(output_repo_dir, todo_file_dir), exist_ok=True) - with open(f"{output_repo_dir}/{todo_file_name}", 'w') as f: + with open(os.path.join(output_repo_dir, *todo_file_name.replace("\\", "/").split("/")), 'w', encoding='utf-8') as f: f.write(code) save_accumulated_cost(f"{output_dir}/accumulated_cost.json", total_accumulated_cost) diff --git a/codes/3_coding.py b/codes/3_coding.py index b266d16b..8b8fe21a 100644 --- a/codes/3_coding.py +++ b/codes/3_coding.py @@ -1,4 +1,3 @@ -from openai import OpenAI import json import os from tqdm import tqdm @@ -6,6 +5,7 @@ import sys import copy from utils import extract_planning, content_to_json, extract_code_from_content, print_response, print_log_cost, load_accumulated_cost, save_accumulated_cost +from providers import build_client, chat_complete, add_provider_args import argparse parser = argparse.ArgumentParser() @@ -17,9 +17,10 @@ parser.add_argument('--pdf_latex_path', type=str) # latex format parser.add_argument('--output_dir',type=str, default="") parser.add_argument('--output_repo_dir',type=str, default="") +add_provider_args(parser) args = parser.parse_args() -client = OpenAI(api_key = os.environ["OPENAI_API_KEY"]) +client = build_client(provider=args.provider, api_key=args.api_key) paper_name = args.paper_name gpt_version = args.gpt_version @@ -28,18 +29,19 @@ pdf_latex_path = args.pdf_latex_path output_dir = args.output_dir output_repo_dir = args.output_repo_dir +provider = args.provider if paper_format == "JSON": - with open(f'{pdf_json_path}') as f: + with open(f'{pdf_json_path}', encoding='utf-8') as f: paper_content = json.load(f) elif paper_format == "LaTeX": - with open(f'{pdf_latex_path}') as f: + with open(f'{pdf_latex_path}', encoding='utf-8') as f: paper_content = f.read() else: print(f"[ERROR] Invalid paper format. Please select either 'JSON' or 'LaTeX.") sys.exit(0) -with open(f'{output_dir}/planning_config.yaml') as f: +with open(f'{output_dir}/planning_config.yaml', encoding='utf-8') as f: config_yaml = f.read() context_lst = extract_planning(f'{output_dir}/planning_trajectories.json') @@ -134,19 +136,11 @@ def get_write_msg(todo_file_name, detailed_logic_analysis, done_file_lst): def api_call(msg): - if "o3-mini" in gpt_version: - completion = client.chat.completions.create( - model=gpt_version, - reasoning_effort="high", - messages=msg - ) - else: - completion = client.chat.completions.create( - model=gpt_version, - messages=msg - ) - return completion - + return chat_complete( + client, provider, gpt_version, msg, + reasoning_effort="high" if "o3" in gpt_version or "o4" in gpt_version else None, + ) + # testing for checking detailed_logic_analysis_dict = {} @@ -158,7 +152,7 @@ def api_call(msg): if todo_file_name == "config.yaml": continue - with open(f"{output_dir}/{save_todo_file_name}_simple_analysis_response.json") as f: + with open(f"{output_dir}/{save_todo_file_name}_simple_analysis_response.json", encoding='utf-8') as f: detailed_logic_analysis_response = json.load(f) detailed_logic_analysis_dict[todo_file_name] = detailed_logic_analysis_response[0]['choices'][0]['message']['content'] @@ -180,15 +174,13 @@ def api_call(msg): trajectories.extend(instruction_msg) completion = api_call(trajectories) - # print(completion.choices[0].message) - # response - completion_json = json.loads(completion.model_dump_json()) + completion_json = completion responses.append(completion_json) # trajectories - message = completion.choices[0].message - trajectories.append({'role': message.role, 'content': message.content}) + message = completion_json["choices"][0]["message"] + trajectories.append({'role': message['role'], 'content': message['content']}) done_file_lst.append(todo_file_name) @@ -204,21 +196,22 @@ def api_call(msg): total_accumulated_cost = temp_total_accumulated_cost # save artifacts - with open(f'{artifact_output_dir}/{save_todo_file_name}_coding.txt', 'w') as f: + with open(f'{artifact_output_dir}/{save_todo_file_name}_coding.txt', 'w', encoding='utf-8') as f: f.write(completion_json['choices'][0]['message']['content']) # extract code save - code = extract_code_from_content(message.content) + content = message['content'] + code = extract_code_from_content(content) if len(code) == 0: - code = message.content + code = content done_file_dict[todo_file_name] = code if save_todo_file_name != todo_file_name: - todo_file_dir = '/'.join(todo_file_name.split("/")[:-1]) - os.makedirs(f"{output_repo_dir}/{todo_file_dir}", exist_ok=True) + todo_file_dir = os.path.join(*todo_file_name.replace("\\", "/").split("/")[:-1]) + os.makedirs(os.path.join(output_repo_dir, todo_file_dir), exist_ok=True) - with open(f"{output_repo_dir}/{todo_file_name}", 'w') as f: + with open(os.path.join(output_repo_dir, *todo_file_name.replace("\\", "/").split("/")), 'w', encoding='utf-8') as f: f.write(code) save_accumulated_cost(f"{output_dir}/accumulated_cost.json", total_accumulated_cost) diff --git a/codes/3_coding_llm.py b/codes/3_coding_llm.py index e523fd69..a54d8c44 100644 --- a/codes/3_coding_llm.py +++ b/codes/3_coding_llm.py @@ -43,16 +43,16 @@ if paper_format == "JSON": - with open(f'{pdf_json_path}') as f: + with open(f'{pdf_json_path}', encoding='utf-8') as f: paper_content = json.load(f) elif paper_format == "LaTeX": - with open(f'{pdf_latex_path}') as f: + with open(f'{pdf_latex_path}', encoding='utf-8') as f: paper_content = f.read() else: print(f"[ERROR] Invalid paper format. Please select either 'JSON' or 'LaTeX.") sys.exit(0) -with open(f'{output_dir}/planning_config.yaml') as f: +with open(f'{output_dir}/planning_config.yaml', encoding='utf-8') as f: config_yaml = f.read() context_lst = extract_planning(f'{output_dir}/planning_trajectories.json') @@ -254,8 +254,8 @@ def run_llm(msg): done_file_dict[todo_file_name] = code if save_todo_file_name != todo_file_name: - todo_file_dir = '/'.join(todo_file_name.split("/")[:-1]) - os.makedirs(f"{output_repo_dir}/{todo_file_dir}", exist_ok=True) + todo_file_dir = os.path.join(*todo_file_name.replace("\\", "/").split("/")[:-1]) + os.makedirs(os.path.join(output_repo_dir, todo_file_dir), exist_ok=True) - with open(f"{output_repo_dir}/{todo_file_name}", 'w', encoding='utf-8') as f: + with open(os.path.join(output_repo_dir, *todo_file_name.replace("\\", "/").split("/")), 'w', encoding='utf-8') as f: f.write(code) diff --git a/codes/4_debugging.py b/codes/4_debugging.py index 277bd96d..c5b70b08 100644 --- a/codes/4_debugging.py +++ b/codes/4_debugging.py @@ -4,8 +4,8 @@ import re import sys -from openai import OpenAI from utils import read_python_files, content_to_json, extract_planning +from providers import build_client, chat_complete, add_provider_args, is_reasoning_model def parse_and_apply_changes(responses, debug_dir, save_num=1): @@ -100,6 +100,12 @@ def parse_args() -> argparse.Namespace: "Root output directory that contains planning_trajectories.json and the debug directory." ), ) + parser.add_argument( + "--output_repo_dir", + type=str, + required=True, + help="Directory containing the generated repository to debug.", + ) parser.add_argument( "--paper_name", type=str, @@ -119,11 +125,12 @@ def parse_args() -> argparse.Namespace: required=True, help="Backup index appended as ..bak when saving modified files.", ) + add_provider_args(parser) return parser.parse_args() args = parse_args() -client = OpenAI(api_key = os.environ["OPENAI_API_KEY"]) +client = build_client(provider=args.provider, api_key=args.api_key) if not os.path.exists(args.error_file_name): raise FileNotFoundError(f"Error file not found: {args.error_file_name}") @@ -173,11 +180,18 @@ def parse_args() -> argparse.Namespace: config_yaml = f.read() codes += f"```yaml\n## File name: config.yaml\n{config_yaml}\n```\n\n" -reproduce_path = os.path.join(debug_dir, "reproduce.sh") +reproduce_path = os.path.join(debug_dir, "reproduce.ps1") if os.path.exists(reproduce_path): with open(reproduce_path, "r", encoding="utf-8") as f: reproduce_sh = f.read() - codes += f"```bash\n## File name: reproduce.sh\n{reproduce_sh}\n```\n\n" + codes += f"```powershell\n## File name: reproduce.ps1\n{reproduce_sh}\n```\n\n" +else: + # Fallback: also check for legacy reproduce.sh + reproduce_path_sh = os.path.join(debug_dir, "reproduce.sh") + if os.path.exists(reproduce_path_sh): + with open(reproduce_path_sh, "r", encoding="utf-8") as f: + reproduce_sh = f.read() + codes += f"```bash\n## File name: reproduce.sh\n{reproduce_sh}\n```\n\n" # -------------------------------------------------- # Build debugging prompt @@ -245,13 +259,15 @@ def parse_args() -> argparse.Namespace: """, }, ] -response = client.chat.completions.create( +response = chat_complete( + client, + provider=args.provider, model=args.model, messages=msg, - reasoning_effort="high", + reasoning_effort="high" if is_reasoning_model(args.model) else None, ) -answer = response.choices[0].message.content +answer = response["choices"][0]["message"]["content"] # print("===== RAW MODEL ANSWER =====") # print(answer) diff --git a/codes/eval.py b/codes/eval.py index ca695cb3..9f696255 100644 --- a/codes/eval.py +++ b/codes/eval.py @@ -1,22 +1,38 @@ -from openai import OpenAI import json import os import sys import argparse from utils import read_python_files, extract_planning, content_to_json, \ num_tokens_from_messages, read_all_files, extract_json_from_string, get_now_str, print_log_cost +from providers import build_client, chat_complete, add_provider_args -client = OpenAI(api_key = os.environ["OPENAI_API_KEY"]) +_client = None +_provider = "openai" def api_call(request_json): - completion = client.chat.completions.create(**request_json) - return completion + """Thin wrapper kept for backward compat; uses the providers module.""" + messages = request_json["messages"] + model = request_json["model"] + n = request_json.get("n", 1) + temperature = request_json.get("temperature", None) + reasoning_effort = request_json.get("reasoning_effort", None) + # chat_complete handles n>1 looping for providers that don't support it + return chat_complete( + _client, _provider, model, messages, + n=n, + temperature=temperature, + reasoning_effort=reasoning_effort, + ) def main(args): + global _client, _provider + _client = build_client(provider=args.provider, api_key=args.api_key) + _provider = args.provider + paper_name = args.paper_name pdf_json_path = args.pdf_json_path - output_dir = args.output_dir + output_dir = args.output_dir target_repo_dir = args.target_repo_dir eval_result_dir = args.eval_result_dir gpt_version = args.gpt_version @@ -28,7 +44,7 @@ def main(args): gold_repo_dir = args.gold_repo_dir # paper - with open(f'{pdf_json_path}') as f: + with open(f'{pdf_json_path}', encoding='utf-8') as f: paper_json = json.load(f) codes = "" @@ -37,13 +53,13 @@ def main(args): target_files_dict = read_python_files(target_repo_dir) # configuration - with open(f'{output_dir}/planning_config.yaml') as f: + with open(f'{output_dir}/planning_config.yaml', encoding='utf-8') as f: config_yaml = f.read() context_lst = extract_planning(f'{output_dir}/planning_trajectories.json') if os.path.exists(f'{output_dir}/task_list.json'): - with open(f'{output_dir}/task_list.json') as f: + with open(f'{output_dir}/task_list.json', encoding='utf-8') as f: task_list = json.load(f) else: task_list = content_to_json(context_lst[2]) @@ -61,7 +77,7 @@ def main(args): codes += f"```## File name: {file_name}\n{code}\n```\n\n" - prompt = open(f"{data_dir}/prompts/{eval_type}.txt").read() + prompt = open(f"{data_dir}/prompts/{eval_type}.txt", encoding='utf-8').read() cur_prompt = prompt.replace('{{Paper}}', f"{paper_json}").replace('{{Code}}', codes) @@ -73,7 +89,7 @@ def main(args): gold_cnt = 0 if len(args.selected_file_path) > 0: selected_file_lst = [] - with open(args.selected_file_path) as f: + with open(args.selected_file_path, encoding='utf-8') as f: selected_file_lst = f.readlines() for s_idx in range(len(selected_file_lst)): @@ -113,31 +129,24 @@ def main(args): sys.exit(0) - if "o3-mini" in gpt_version: - if generated_n > 8: - print(f"[WARNING] o3-mini does not support n > 8. Setting generated_n to 8.") - generated_n = 8 - + from providers import is_reasoning_model + if is_reasoning_model(gpt_version): request_json = { - "model": gpt_version, + "model": gpt_version, "messages": msg, "reasoning_effort": "high", "n": generated_n } else: request_json = { - "model": gpt_version, - "messages": msg, + "model": gpt_version, + "messages": msg, "temperature": 1, - "frequency_penalty": 0, - "presence_penalty": 0, - "stop": None, - "n": generated_n # 10 + "n": generated_n } - - completion = api_call(request_json) - completion_json = json.loads(completion.model_dump_json()) - + + completion_json = api_call(request_json) + score_key = "score" rationale_key = "critique_list" @@ -226,27 +235,27 @@ def main(args): if __name__ == '__main__': argparser = argparse.ArgumentParser() - + argparser.add_argument('--paper_name', type=str) argparser.add_argument('--pdf_json_path', type=str) argparser.add_argument('--data_dir',type=str, default="../data") argparser.add_argument('--output_dir',type=str) - + argparser.add_argument('--target_repo_dir', type=str) argparser.add_argument('--gold_repo_dir', type=str, default="") argparser.add_argument('--eval_result_dir',type=str) - + argparser.add_argument('--eval_type', type=str, default="ref_free", choices=["ref_free", "ref_based"]) argparser.add_argument('--generated_n', type=int, default=8) argparser.add_argument('--gpt_version', type=str, default="o3-mini") - argparser.add_argument('--selected_file_path', type=str, default="") + argparser.add_argument('--selected_file_path', type=str, default="") argparser.add_argument('--papercoder', action="store_true") - - - + + add_provider_args(argparser) + args = argparser.parse_args() main(args) diff --git a/codes/pdf_to_json.py b/codes/pdf_to_json.py new file mode 100644 index 00000000..626cfb00 --- /dev/null +++ b/codes/pdf_to_json.py @@ -0,0 +1,261 @@ +ο»Ώ""" +pdf_to_json.py - Convert a PDF file into the s2orc-compatible JSON format +expected by 0_pdf_process.py, without needing GROBID. + +Uses PyMuPDF (fitz) for text extraction. + +Install: pip install pymupdf + +Output schema (mirrors s2orc raw JSON): +{ + "paper_id": "", + "title": "", + "abstract": "", + "pdf_parse": { + "paper_id": "", + "abstract": [{"text": "", "cite_spans": [], "ref_spans": [], + "eq_spans": [], "section": "Abstract", "sec_num": null}], + "body_text": [{"text": "", "cite_spans": [], "ref_spans": [], + "eq_spans": [], "section": "", "sec_num": null}, ...], + "back_matter": [], + "bib_entries": {}, + "ref_entries": {} + } +} +""" + +from __future__ import annotations + +import argparse +import json +import os +import re +import sys + +try: + import fitz # PyMuPDF +except ImportError: + print( + "[ERROR] PyMuPDF is not installed. Run: pip install pymupdf", + file=sys.stderr, + ) + sys.exit(1) + + +# --------------------------------------------------------------------------- +# Heuristics for section heading detection +# --------------------------------------------------------------------------- + +_HEADING_RE = re.compile( + r"""^ + (?: + (?:\d+\.?)+\s+[A-Z] # "1 Introduction" / "2.1 Background" + | [A-Z][A-Z\s]{3,}$ # ALL-CAPS heading + | (?:Abstract|Introduction|Related\s+Work|Background|Method(?:ology)?| + Experiment(?:s|al\s+Setup)?|Results?|Discussion|Conclusion(?:s)?| + References?|Appendix|Acknowledgements?)\b + ) + """, + re.VERBOSE | re.IGNORECASE, +) + +# Sections that are typically back-matter +_BACK_MATTER_RE = re.compile( + r"^(?:References?|Bibliography|Acknowledgements?|Appendix)\b", + re.IGNORECASE, +) + + +def _is_heading(text: str) -> bool: + text = text.strip() + if not text or len(text) > 120: + return False + return bool(_HEADING_RE.match(text)) + + +# --------------------------------------------------------------------------- +# Core extraction +# --------------------------------------------------------------------------- + +_JUNK_TITLE_RE = re.compile( + r"arXiv|doi\.org|preprint|Β©|copyright|\d{4}\s+IEEE" + r"|proceedings of|workshop on|journal of|vol\.\s*\d|pages?\s+\d+", + re.IGNORECASE, +) + + +def _extract_title(doc: fitz.Document) -> str: + """Best-effort title: PDF metadata -> largest non-junk font cluster on page 1.""" + meta_title = (doc.metadata or {}).get("title", "").strip() + if meta_title and len(meta_title) > 8 and not _JUNK_TITLE_RE.search(meta_title): + return meta_title + + # Collect all (font_size, y_position, text) spans from page 1 + page = doc[0] + blocks = page.get_text("dict")["blocks"] + spans: list[tuple[float, float, str]] = [] + for b in blocks: + if b.get("type") != 0: + continue + for line in b.get("lines", []): + for span in line.get("spans", []): + t = span["text"].strip() + if t: + spans.append((span["size"], span["origin"][1], t)) + + if not spans: + return "" + + # Sort by font size descending, skip junk patterns and single chars + spans.sort(key=lambda x: -x[0]) + clean = [(sz, y, t) for sz, y, t in spans + if not _JUNK_TITLE_RE.search(t) and len(t) >= 3] + if not clean: + clean = spans # fall back if everything looked like junk + + # The title is the largest-font cluster (within 2pt of the top clean size) + top_size = clean[0][0] + title_spans = [(sz, y, t) for sz, y, t in clean if abs(sz - top_size) < 2.0] + # Sort by y-position (top to bottom) to preserve reading order + title_spans.sort(key=lambda x: x[1]) + return " ".join(t for _, _, t in title_spans).strip() + + +def _extract_abstract(pages_text: list[str]) -> str: + """Pull the abstract paragraph from the first two pages.""" + combined = "\n".join(pages_text[:3]) + # Try to find "Abstract" heading followed by text + m = re.search( + r"Abstract[.\s\n]*([A-Z].*?)(?=\n\s*(?:\d+\.?\s+[A-Z]|Introduction\b|Keywords?\b))", + combined, + re.DOTALL | re.IGNORECASE, + ) + if m: + return re.sub(r"\s+", " ", m.group(1)).strip() + return "" + + +def pdf_to_json(pdf_path: str) -> dict: + stem = os.path.splitext(os.path.basename(pdf_path))[0] + doc = fitz.open(pdf_path) + + # --- per-page plain text --- + pages_text = [page.get_text("text") for page in doc] + + title = _extract_title(doc) + abstract_text = _extract_abstract(pages_text) + + # --- build body paragraphs --- + full_text = "\n".join(pages_text) + # Split by blank lines to get paragraphs + raw_paragraphs = re.split(r"\n{2,}", full_text) + + current_section = "Introduction" + abstract_blocks: list[dict] = [] + body_blocks: list[dict] = [] + back_matter_blocks: list[dict] = [] + + in_abstract = False + past_abstract = False + in_back_matter = False + + for para in raw_paragraphs: + para = para.strip() + if not para or len(para) < 20: + continue + + first_line = para.split("\n")[0].strip() + + # Detect abstract section + if re.match(r"^Abstract\b", first_line, re.IGNORECASE) and not past_abstract: + in_abstract = True + body_of_para = "\n".join(para.split("\n")[1:]).strip() + if body_of_para: + abstract_blocks.append(_make_block(body_of_para, "Abstract", None)) + continue + + if in_abstract and _is_heading(first_line): + in_abstract = False + past_abstract = True + + if in_abstract: + abstract_blocks.append(_make_block(para, "Abstract", None)) + continue + + # Detect back matter + if _BACK_MATTER_RE.match(first_line): + in_back_matter = True + current_section = first_line + continue + + # Detect regular section headings + if _is_heading(first_line) and not in_back_matter: + current_section = re.sub(r"\s+", " ", first_line).strip() + body_of_para = "\n".join(para.split("\n")[1:]).strip() + if body_of_para: + body_blocks.append(_make_block(body_of_para, current_section, None)) + continue + + text_clean = re.sub(r"\s+", " ", para).strip() + block = _make_block(text_clean, current_section, None) + + if in_back_matter: + back_matter_blocks.append(block) + else: + body_blocks.append(block) + + # If we got no abstract blocks from section detection, use the extracted text + if not abstract_blocks and abstract_text: + abstract_blocks = [_make_block(abstract_text, "Abstract", None)] + + return { + "paper_id": stem, + "title": title, + "abstract": abstract_text, + "pdf_parse": { + "paper_id": stem, + "_pdf_hash": "", + "abstract": abstract_blocks, + "body_text": body_blocks, + "back_matter": back_matter_blocks, + "bib_entries": {}, + "ref_entries": {}, + }, + } + + +def _make_block(text: str, section: str, sec_num) -> dict: + return { + "text": text, + "cite_spans": [], + "ref_spans": [], + "eq_spans": [], + "section": section, + "sec_num": sec_num, + } + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + description="Convert a PDF to s2orc-compatible JSON (no GROBID required)." + ) + parser.add_argument("--pdf_path", required=True, help="Path to input PDF file") + parser.add_argument("--output_json_path", required=True, help="Path for output JSON") + args = parser.parse_args() + + print(f"[pdf_to_json] Reading {args.pdf_path} ...") + data = pdf_to_json(args.pdf_path) + with open(args.output_json_path, "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=2) + print(f"[pdf_to_json] Saved -> {args.output_json_path}") + print(f" title : {data['title'][:80]}") + print(f" abstract : {data['abstract'][:120]}...") + print(f" body paragraphs : {len(data['pdf_parse']['body_text'])}") + + +if __name__ == "__main__": + main() diff --git a/codes/providers.py b/codes/providers.py new file mode 100644 index 00000000..bf0e3de0 --- /dev/null +++ b/codes/providers.py @@ -0,0 +1,534 @@ +""" +providers.py β€” Multi-provider LLM client abstraction. + +Supported providers (all expose an OpenAI-compatible chat/completions endpoint, +except Google Gemini which is handled separately): + + openai https://api.openai.com/v1 + groq https://api.groq.com/openai/v1 + cerebras https://api.cerebras.ai/v1 + openrouter https://openrouter.ai/api/v1 + mistral https://api.mistral.ai/v1 + github https://models.inference.ai.azure.com + sambanova https://api.sambanova.ai/v1 + gemini https://generativelanguage.googleapis.com/v1beta (native REST) + cohere https://api.cohere.com/v2 (native REST) + cloudflare https://api.cloudflare.com/client/v4/accounts/{id}/ai (native REST) + +Usage +----- + from providers import build_client, chat_complete, is_reasoning_model + + client = build_client(provider="groq", api_key="gsk_...") + response = chat_complete(client, provider="groq", + model="llama-3.3-70b-versatile", + messages=[...]) + # response is always a normalised dict: + # {"choices": [{"message": {"role": "assistant", "content": "..."}}], + # "usage": {"prompt_tokens": int, "completion_tokens": int, + # "total_tokens": int, + # "prompt_tokens_details": {"cached_tokens": 0}}} + +Free model suggestions per provider +------------------------------------- + groq : llama-3.3-70b-versatile, meta-llama/llama-4-scout-17b-16e-instruct, + moonshotai/kimi-k2-instruct, qwen/qwen3-32b + cerebras : llama-3.3-70b, qwen3-32b, qwen3-235b, gpt-oss-120b + openrouter : deepseek/deepseek-r1:free, meta-llama/llama-4-scout:free, + qwen/qwen3-235b-a22b:free, microsoft/phi-4-reasoning:free + mistral : mistral-large-latest, mistral-small-latest, open-codestral-mamba, + ministral-8b-latest + github : gpt-4o, gpt-4.1, o3, deepseek-r1, grok-3-mini + sambanova : Meta-Llama-3.3-70B-Instruct, Meta-Llama-3.1-405B-Instruct, + Qwen2.5-72B-Instruct + gemini : gemini-2.5-pro, gemini-2.5-flash, gemini-2.5-flash-lite-preview-06-17 + cohere : command-r-plus, command-a-03-2025, aya-expanse-32b + cloudflare : @cf/meta/llama-3.3-70b-instruct-fp8-fast, + @cf/qwen/qwq-32b, @cf/mistralai/mistral-7b-instruct-v0.2 +""" + +from __future__ import annotations + +import json +import os +import re +import sys +from typing import Any, Dict, List, Optional + +import requests +from openai import OpenAI + +# --------------------------------------------------------------------------- +# Provider registry +# --------------------------------------------------------------------------- + +PROVIDER_BASE_URLS: Dict[str, str] = { + "openai": "https://api.openai.com/v1", + "groq": "https://api.groq.com/openai/v1", + "cerebras": "https://api.cerebras.ai/v1", + "openrouter": "https://openrouter.ai/api/v1", + "mistral": "https://api.mistral.ai/v1", + "github": "https://models.inference.ai.azure.com", + "sambanova": "https://api.sambanova.ai/v1", + # Native REST β€” base URLs used directly in the REST helpers + "gemini": "https://generativelanguage.googleapis.com/v1beta", + "cohere": "https://api.cohere.com/v2", + "cloudflare": "", # requires account_id; built dynamically +} + +# Providers that use the OpenAI Python SDK (openai-compatible base_url) +_OPENAI_SDK_PROVIDERS = { + "openai", "groq", "cerebras", "openrouter", "mistral", "github", "sambanova" +} + +# Reasoning / thinking models β€” these need special param handling +_REASONING_MODEL_PATTERNS = [ + r"o1", r"o3", r"o4", r"deepseek.*r1", r"qwq", r"qwen.*think", + r"r1", r"sonar.*reasoning", r"kimi.*think", +] + + +def is_reasoning_model(model: str) -> bool: + """Return True if the model name matches a known reasoning/thinking model.""" + m = model.lower() + return any(re.search(p, m) for p in _REASONING_MODEL_PATTERNS) + + +# --------------------------------------------------------------------------- +# Client factory +# --------------------------------------------------------------------------- + +def build_client( + provider: str, + api_key: Optional[str] = None, + cloudflare_account_id: Optional[str] = None, +) -> Any: + """ + Return a client object for the given provider. + + For OpenAI-compatible providers this is an ``openai.OpenAI`` instance. + For native-REST providers (gemini, cohere, cloudflare) this is a plain + dict carrying the credentials needed by ``chat_complete``. + """ + provider = provider.lower() + + # Resolve API key from env if not supplied + if not api_key: + env_map = { + "openai": "OPENAI_API_KEY", + "groq": "GROQ_API_KEY", + "cerebras": "CEREBRAS_API_KEY", + "openrouter": "OPENROUTER_API_KEY", + "mistral": "MISTRAL_API_KEY", + "github": "GITHUB_TOKEN", + "sambanova": "SAMBANOVA_API_KEY", + "gemini": "GEMINI_API_KEY", + "cohere": "COHERE_API_KEY", + "cloudflare": "CLOUDFLARE_API_KEY", + } + env_var = env_map.get(provider, "OPENAI_API_KEY") + api_key = os.environ.get(env_var, "") + if not api_key: + print( + f"[WARNING] No API key found for provider '{provider}'. " + f"Set the {env_var} environment variable.", + file=sys.stderr, + ) + + if provider in _OPENAI_SDK_PROVIDERS: + base_url = PROVIDER_BASE_URLS[provider] + extra = {} + if provider == "openrouter": + extra["default_headers"] = {"HTTP-Referer": "https://github.com/Paper2Code"} + return OpenAI(api_key=api_key, base_url=base_url, **extra) + + if provider == "gemini": + return {"_provider": "gemini", "_api_key": api_key} + + if provider == "cohere": + return {"_provider": "cohere", "_api_key": api_key} + + if provider == "cloudflare": + account_id = cloudflare_account_id or os.environ.get("CLOUDFLARE_ACCOUNT_ID", "") + if not account_id: + print( + "[WARNING] Cloudflare provider requires CLOUDFLARE_ACCOUNT_ID env var.", + file=sys.stderr, + ) + return { + "_provider": "cloudflare", + "_api_key": api_key, + "_account_id": account_id, + } + + raise ValueError(f"Unknown provider: '{provider}'. " + f"Valid options: {sorted(PROVIDER_BASE_URLS)}") + + +# --------------------------------------------------------------------------- +# Unified chat completion +# --------------------------------------------------------------------------- + +def chat_complete( + client: Any, + provider: str, + model: str, + messages: List[Dict[str, str]], + n: int = 1, + temperature: Optional[float] = None, + reasoning_effort: Optional[str] = None, +) -> Dict: + """ + Call the chat completion endpoint and return a **normalised response dict**: + + { + "choices": [ + {"message": {"role": "assistant", "content": ""}} + ... # n items + ], + "usage": { + "prompt_tokens": int, + "completion_tokens": int, + "total_tokens": int, + "prompt_tokens_details": {"cached_tokens": 0} + }, + "_provider": "", + "_model": "" + } + + Parameters + ---------- + client : object returned by ``build_client`` + provider : provider name (lowercase) + model : model identifier as accepted by the provider + messages : list of {"role": ..., "content": ...} dicts + n : number of completions to generate (not supported by all providers) + temperature : sampling temperature (ignored for reasoning models) + reasoning_effort: "low"/"medium"/"high" β€” only sent when supported + """ + provider = provider.lower() + + if provider in _OPENAI_SDK_PROVIDERS: + return _call_openai_sdk( + client, provider, model, messages, n, temperature, reasoning_effort + ) + if provider == "gemini": + return _call_gemini(client, model, messages, n, temperature) + if provider == "cohere": + return _call_cohere(client, model, messages, n, temperature) + if provider == "cloudflare": + return _call_cloudflare(client, model, messages) + + raise ValueError(f"Unknown provider: '{provider}'") + + +# --------------------------------------------------------------------------- +# OpenAI-SDK-compatible providers +# --------------------------------------------------------------------------- + +def _call_openai_sdk( + client: OpenAI, + provider: str, + model: str, + messages: List[Dict], + n: int, + temperature: Optional[float], + reasoning_effort: Optional[str], +) -> Dict: + kwargs: Dict[str, Any] = {"model": model, "messages": messages} + + reasoning = is_reasoning_model(model) + + if reasoning: + # reasoning_effort is only honoured by OpenAI o-series and a few others + if reasoning_effort and provider in ("openai",): + kwargs["reasoning_effort"] = reasoning_effort + # temperature must NOT be set for o1/o3/o4 on OpenAI + if provider != "openai": + if temperature is not None: + kwargs["temperature"] = temperature + else: + if temperature is not None: + kwargs["temperature"] = temperature + + # n > 1 β€” not supported by all providers; fall back to looping + if n > 1 and provider in ("groq", "cerebras", "sambanova", "mistral"): + return _loop_n(client, provider, model, messages, n, temperature, reasoning_effort) + + if n > 1: + kwargs["n"] = n + + completion = client.chat.completions.create(**kwargs) + return _normalise_openai(completion, provider, model) + + +def _loop_n( + client: OpenAI, + provider: str, + model: str, + messages: List[Dict], + n: int, + temperature: Optional[float], + reasoning_effort: Optional[str], +) -> Dict: + """Call the API n times and merge results into one normalised dict.""" + choices = [] + total_prompt = 0 + total_completion = 0 + total_cached = 0 + + for _ in range(n): + r = _call_openai_sdk( + client, provider, model, messages, 1, temperature, reasoning_effort + ) + choices.extend(r["choices"]) + total_prompt += r["usage"]["prompt_tokens"] + total_completion += r["usage"]["completion_tokens"] + total_cached += r["usage"]["prompt_tokens_details"].get("cached_tokens", 0) + + return { + "choices": choices, + "usage": { + "prompt_tokens": total_prompt, + "completion_tokens": total_completion, + "total_tokens": total_prompt + total_completion, + "prompt_tokens_details": {"cached_tokens": total_cached}, + }, + "_provider": provider, + "_model": model, + } + + +def _normalise_openai(completion, provider: str, model: str) -> Dict: + raw = json.loads(completion.model_dump_json()) + usage = raw.get("usage") or {} + prompt_tokens = usage.get("prompt_tokens", 0) or 0 + completion_tokens = usage.get("completion_tokens", 0) or 0 + prompt_details = usage.get("prompt_tokens_details") or {} + cached_tokens = (prompt_details.get("cached_tokens") or 0) if isinstance(prompt_details, dict) else 0 + + choices = [] + for c in raw.get("choices", []): + msg = c.get("message") or {} + choices.append({ + "message": { + "role": msg.get("role", "assistant"), + "content": msg.get("content") or "", + } + }) + + return { + "choices": choices, + "usage": { + "prompt_tokens": prompt_tokens, + "completion_tokens": completion_tokens, + "total_tokens": prompt_tokens + completion_tokens, + "prompt_tokens_details": {"cached_tokens": cached_tokens}, + }, + "_provider": provider, + "_model": model, + "_raw": raw, # kept for cost logging that expects the original shape + } + + +# --------------------------------------------------------------------------- +# Google Gemini (native REST) +# --------------------------------------------------------------------------- + +def _call_gemini( + client: Dict, + model: str, + messages: List[Dict], + n: int, + temperature: Optional[float], +) -> Dict: + api_key = client["_api_key"] + base_url = PROVIDER_BASE_URLS["gemini"] + + # Convert messages to Gemini format + system_instruction = None + contents = [] + for m in messages: + role = m["role"] + content = m["content"] + if role == "system": + system_instruction = {"parts": [{"text": content}]} + else: + gemini_role = "user" if role == "user" else "model" + contents.append({"role": gemini_role, "parts": [{"text": content}]}) + + payload: Dict[str, Any] = { + "contents": contents, + "generationConfig": {"candidateCount": n}, + } + if system_instruction: + payload["systemInstruction"] = system_instruction + if temperature is not None: + payload["generationConfig"]["temperature"] = temperature + + url = f"{base_url}/models/{model}:generateContent?key={api_key}" + resp = requests.post(url, json=payload, timeout=300) + resp.raise_for_status() + data = resp.json() + + choices = [] + for candidate in data.get("candidates", []): + text = "" + for part in candidate.get("content", {}).get("parts", []): + text += part.get("text", "") + choices.append({"message": {"role": "assistant", "content": text}}) + + usage_meta = data.get("usageMetadata", {}) + prompt_tokens = usage_meta.get("promptTokenCount", 0) + completion_tokens = usage_meta.get("candidatesTokenCount", 0) + + return { + "choices": choices, + "usage": { + "prompt_tokens": prompt_tokens, + "completion_tokens": completion_tokens, + "total_tokens": prompt_tokens + completion_tokens, + "prompt_tokens_details": {"cached_tokens": 0}, + }, + "_provider": "gemini", + "_model": model, + } + + +# --------------------------------------------------------------------------- +# Cohere (native REST v2) +# --------------------------------------------------------------------------- + +def _call_cohere( + client: Dict, + model: str, + messages: List[Dict], + n: int, + temperature: Optional[float], +) -> Dict: + api_key = client["_api_key"] + url = "https://api.cohere.com/v2/chat" + + # Cohere v2 uses a similar messages format but the system msg is separate + cohere_messages = [] + for m in messages: + cohere_messages.append({"role": m["role"], "content": m["content"]}) + + payload: Dict[str, Any] = {"model": model, "messages": cohere_messages} + if temperature is not None: + payload["temperature"] = temperature + + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + } + + choices = [] + total_prompt = 0 + total_completion = 0 + + for _ in range(n): + resp = requests.post(url, json=payload, headers=headers, timeout=300) + resp.raise_for_status() + data = resp.json() + + text = "" + msg = data.get("message", {}) + for item in msg.get("content", []): + if item.get("type") == "text": + text += item.get("text", "") + choices.append({"message": {"role": "assistant", "content": text}}) + + usage = data.get("usage", {}) + billed = usage.get("billed_units", {}) + total_prompt += billed.get("input_tokens", 0) + total_completion += billed.get("output_tokens", 0) + + return { + "choices": choices, + "usage": { + "prompt_tokens": total_prompt, + "completion_tokens": total_completion, + "total_tokens": total_prompt + total_completion, + "prompt_tokens_details": {"cached_tokens": 0}, + }, + "_provider": "cohere", + "_model": model, + } + + +# --------------------------------------------------------------------------- +# Cloudflare Workers AI (native REST) +# --------------------------------------------------------------------------- + +def _call_cloudflare( + client: Dict, + model: str, + messages: List[Dict], +) -> Dict: + api_key = client["_api_key"] + account_id = client["_account_id"] + url = ( + f"https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/run/{model}" + ) + + payload = {"messages": messages} + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + } + + resp = requests.post(url, json=payload, headers=headers, timeout=300) + resp.raise_for_status() + data = resp.json() + + result = data.get("result", {}) + text = result.get("response", "") + + usage = result.get("usage", {}) + prompt_tokens = usage.get("prompt_tokens", 0) + completion_tokens = usage.get("completion_tokens", 0) + + return { + "choices": [{"message": {"role": "assistant", "content": text}}], + "usage": { + "prompt_tokens": prompt_tokens, + "completion_tokens": completion_tokens, + "total_tokens": prompt_tokens + completion_tokens, + "prompt_tokens_details": {"cached_tokens": 0}, + }, + "_provider": "cloudflare", + "_model": model, + } + + +# --------------------------------------------------------------------------- +# Convenience: add provider/api_key args to any argparse parser +# --------------------------------------------------------------------------- + +def add_provider_args(parser) -> None: + """Add --provider and --api_key arguments to an argparse.ArgumentParser.""" + parser.add_argument( + "--provider", + type=str, + default="openai", + choices=sorted(PROVIDER_BASE_URLS.keys()), + help=( + "LLM provider to use. Default: openai. " + "Free alternatives: groq, cerebras, openrouter, mistral, " + "github, sambanova, gemini, cohere, cloudflare" + ), + ) + parser.add_argument( + "--api_key", + type=str, + default="", + help=( + "API key for the selected provider. " + "If omitted, the corresponding environment variable is used " + "(OPENAI_API_KEY, GROQ_API_KEY, CEREBRAS_API_KEY, " + "OPENROUTER_API_KEY, MISTRAL_API_KEY, GITHUB_TOKEN, " + "SAMBANOVA_API_KEY, GEMINI_API_KEY, COHERE_API_KEY, " + "CLOUDFLARE_API_KEY + CLOUDFLARE_ACCOUNT_ID)." + ), + ) diff --git a/codes/utils.py b/codes/utils.py index 09a95e68..4560c1b7 100644 --- a/codes/utils.py +++ b/codes/utils.py @@ -4,7 +4,7 @@ from datetime import datetime def extract_planning(trajectories_json_file_path): - with open(trajectories_json_file_path) as f: + with open(trajectories_json_file_path, encoding='utf-8') as f: traj = json.load(f) context_lst = [] @@ -239,17 +239,31 @@ def cal_cost(response_json, model_name): prompt_tokens = response_json["usage"]["prompt_tokens"] completion_tokens = response_json["usage"]["completion_tokens"] - cached_tokens = response_json["usage"]["prompt_tokens_details"].get("cached_tokens", 0) + prompt_tokens_details = response_json["usage"].get("prompt_tokens_details") or {} + cached_tokens = (prompt_tokens_details.get("cached_tokens") or 0) if isinstance(prompt_tokens_details, dict) else 0 # input token = (prompt_tokens - cached_tokens) actual_input_tokens = prompt_tokens - cached_tokens output_tokens = completion_tokens + if model_name not in model_cost: + # Unknown / non-OpenAI model β€” report tokens but not dollar cost + return { + 'model_name': model_name, + 'actual_input_tokens': actual_input_tokens, + 'input_cost': 0.0, + 'cached_tokens': cached_tokens, + 'cached_input_cost': 0.0, + 'output_tokens': output_tokens, + 'output_cost': 0.0, + 'total_cost': 0.0, + } + cost_info = model_cost[model_name] - input_cost = (actual_input_tokens / 1_000_000) * cost_info['input'] - cached_input_cost = (cached_tokens / 1_000_000) * cost_info['cached_input'] - output_cost = (output_tokens / 1_000_000) * cost_info['output'] + input_cost = (actual_input_tokens / 1_000_000) * (cost_info['input'] or 0) + cached_input_cost = (cached_tokens / 1_000_000) * (cost_info['cached_input'] or 0) + output_cost = (output_tokens / 1_000_000) * (cost_info['output'] or 0) total_cost = input_cost + cached_input_cost + output_cost @@ -369,6 +383,7 @@ def read_all_files(directory, allowed_ext, is_print=True): for root, _, files in os.walk(directory): # Recursively traverse directories for filename in files: relative_path = os.path.relpath(os.path.join(root, filename), directory) # Preserve directory structure + relative_path = relative_path.replace("\\", "/") # Normalize to forward slashes on Windows # print(f"fn: {filename}\tdirectory: {directory}") _file_name, ext = os.path.splitext(filename) @@ -376,7 +391,7 @@ def read_all_files(directory, allowed_ext, is_print=True): is_skip = False if len(directory) < len(root): root2 = root[len(directory)+1:] - for dirname in root2.split("/"): + for dirname in root2.replace("\\", "/").split("/"): if dirname.startswith("."): is_skip = True break @@ -399,7 +414,7 @@ def read_all_files(directory, allowed_ext, is_print=True): if file_size > 204800: # > 200KB print(f"[BIG] {filepath} {file_size}") - with open(filepath, "r") as file: # encoding="utf-8" + with open(filepath, "r", encoding="utf-8", errors="replace") as file: # encoding="utf-8" all_files_content[relative_path] = file.read() except Exception as e: print(e) @@ -416,6 +431,7 @@ def read_python_files(directory): for filename in files: if filename.endswith(".py"): # Check if file has .py extension relative_path = os.path.relpath(os.path.join(root, filename), directory) # Preserve directory structure + relative_path = relative_path.replace("\\", "/") # Normalize to forward slashes on Windows with open(os.path.join(root, filename), "r", encoding="utf-8") as file: python_files_content[relative_path] = file.read() diff --git a/paper2code.bat b/paper2code.bat new file mode 100644 index 00000000..ff527760 --- /dev/null +++ b/paper2code.bat @@ -0,0 +1,16 @@ +@echo off +:: paper2code.bat β€” one-liner launcher +:: Usage: +:: paper2code.bat https://arxiv.org/pdf/2510.01193 +:: paper2code.bat https://arxiv.org/pdf/2510.01193 --provider groq --model llama-3.3-70b-versatile +:: paper2code.bat C:\papers\mypaper.pdf --provider openai --model o3-mini +:: paper2code.bat https://arxiv.org/abs/2510.01193 --api_key sk-... +:: +:: All options: +:: --provider openai|groq|cerebras|openrouter|mistral|github|sambanova|gemini|cohere|cloudflare +:: --model model name for that provider (default: o3-mini for openai) +:: --api_key explicit API key (otherwise uses env var) +:: --output output root directory (default: outputs) +:: --debug after pipeline, run debugging agent if reproduce.ps1 fails + +powershell.exe -NoProfile -ExecutionPolicy Bypass -File "%~dp0paper2code.ps1" %* diff --git a/paper2code.ps1 b/paper2code.ps1 new file mode 100644 index 00000000..918d10c8 --- /dev/null +++ b/paper2code.ps1 @@ -0,0 +1,420 @@ +ο»Ώ# paper2code.ps1 β€” fully automatic end-to-end pipeline +# +# Usage (called by paper2code.bat): +# paper2code.bat https://arxiv.org/pdf/2510.01193 +# paper2code.bat https://arxiv.org/abs/2510.01193 +# paper2code.bat C:\papers\mypaper.pdf +# paper2code.bat https://arxiv.org/pdf/2510.01193 --provider groq --model llama-3.3-70b-versatile +# paper2code.bat https://arxiv.org/pdf/2510.01193 --api_key sk-... --output C:\myoutputs +# +# Options: +# --provider openai|groq|cerebras|openrouter|mistral|github|sambanova|gemini|cohere|cloudflare +# --model model name for the chosen provider +# --api_key explicit API key (overrides env var) +# --output root output directory (default: .\outputs) +# --latex path to a .tex file to use instead of PDF (skips PDF download) +# --debug run the debugging agent if reproduce.ps1 exits non-zero +# --eval run reference-free evaluation after coding + +param( + [Parameter(Position=0)] + [string]$Source = "", + + [string]$Provider = "openai", + [string]$Model = "", + [string]$ApiKey = "", + [string]$Output = "", + [string]$Latex = "", + [switch]$RunDebug, + [switch]$Eval +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- +function Info ($msg) { Write-Host " $msg" -ForegroundColor Cyan } +function Good ($msg) { Write-Host " [OK] $msg" -ForegroundColor Green } +function Warn ($msg) { Write-Host " [WARN] $msg" -ForegroundColor Yellow } +function Fail ($msg) { Write-Host " [ERROR] $msg" -ForegroundColor Red; exit 1 } +function Banner($msg) { Write-Host "`n========================================" -ForegroundColor Magenta + Write-Host " $msg" -ForegroundColor Magenta + Write-Host "========================================" -ForegroundColor Magenta } + +function Require-Python { + if (-not (Get-Command python -ErrorAction SilentlyContinue)) { + Fail "Python not found. Activate your conda env first: conda activate papertocode" + } +} + +function Check-Module ($module) { + $r = python -c "import $module; print('ok')" 2>&1 + return ("$r" -match "ok") +} + +# --------------------------------------------------------------------------- +# Resolve root directory (where this script lives) +# --------------------------------------------------------------------------- +$Root = $PSScriptRoot +if (-not $Root) { $Root = (Get-Location).Path } + +$CodesDir = Join-Path $Root "codes" + +# --------------------------------------------------------------------------- +# Parse --key value pairs that bat passes as positional args +# (PowerShell param() already handled named args; these catch the rest) +# --------------------------------------------------------------------------- +# (all handled by param block above) + +# --------------------------------------------------------------------------- +# Validate input +# --------------------------------------------------------------------------- +if (-not $Source) { + Write-Host @" + +Usage: + paper2code.bat [options] + +Examples: + paper2code.bat https://arxiv.org/pdf/2510.01193 + paper2code.bat https://arxiv.org/abs/2510.01193 + paper2code.bat C:\papers\mypaper.pdf --provider groq --model llama-3.3-70b-versatile + paper2code.bat https://arxiv.org/pdf/2510.01193 --provider openai --model o3-mini --output C:\myoutputs + +Options: + --provider LLM provider (default: openai) + --model Model name (default: o3-mini for openai) + --api_key Explicit API key + --output Output root (default: .\outputs) + --latex Path to .tex file to use instead of PDF + --rundebug Run debugging agent if generated code fails + --eval Run reference-free evaluation after coding + +"@ + exit 0 +} + +Require-Python + +# Default model per provider +if (-not $Model) { + $defaults = @{ + openai = "o3-mini" + groq = "llama-3.3-70b-versatile" + cerebras = "llama-3.3-70b" + openrouter = "deepseek/deepseek-r1:free" + mistral = "mistral-large-latest" + github = "gpt-4.1" + sambanova = "Meta-Llama-3.3-70B-Instruct" + gemini = "gemini-2.5-flash" + cohere = "command-r-plus" + cloudflare = "@cf/meta/llama-3.3-70b-instruct-fp8-fast" + } + $Model = $defaults[$Provider.ToLower()] + if (-not $Model) { $Model = "o3-mini" } +} + +# Output root +if (-not $Output) { $Output = Join-Path $Root "outputs" } + +# API key flag +$ApiKeyFlag = @() +if ($ApiKey) { $ApiKeyFlag = @("--api_key", $ApiKey) } + +# --------------------------------------------------------------------------- +# Step 1 β€” Resolve PDF +# --------------------------------------------------------------------------- +Banner "Step 1 β€” Resolving input" + +$PdfPath = "" +$PaperName = "" +$UseLatex = $false + +if ($Latex) { + # User supplied a .tex file directly β€” skip PDF download + if (-not (Test-Path $Latex)) { Fail "LaTeX file not found: $Latex" } + $UseLatex = $true + $PaperName = [System.IO.Path]::GetFileNameWithoutExtension($Latex) + Info "Using LaTeX source: $Latex" + Info "Paper name: $PaperName" +} +elseif ($Source -match "^https?://") { + # --- URL input --- + + # Normalise arxiv abstract URL -> direct PDF URL + # e.g. https://arxiv.org/abs/2510.01193 -> https://arxiv.org/pdf/2510.01193 + $Url = $Source -replace "arxiv\.org/abs/", "arxiv.org/pdf/" + # Strip trailing version tags like v1, v2 for cleaner naming + $UrlStem = $Url -replace "v\d+$", "" + + # Derive a safe paper name from the URL + $LastSegment = ($Url.TrimEnd("/").Split("/"))[-1] -replace "\?.*$", "" -replace "v\d+$", "" + $PaperName = $LastSegment -replace "[^\w\-]", "_" + if (-not $PaperName) { $PaperName = "paper" } + + # Download PDF + $TmpDir = Join-Path $env:TEMP "paper2code_$PaperName" + New-Item -ItemType Directory -Force -Path $TmpDir | Out-Null + $PdfPath = Join-Path $TmpDir "$PaperName.pdf" + + if (Test-Path $PdfPath) { + Good "PDF already cached: $PdfPath" + } else { + Info "Downloading PDF from: $Url" + try { + Invoke-WebRequest -Uri $Url -OutFile $PdfPath -UserAgent "Mozilla/5.0" -TimeoutSec 60 + Good "Downloaded -> $PdfPath" + } catch { + Fail "Failed to download PDF: $_" + } + } + + # Verify it's actually a PDF + $Header = [System.IO.File]::ReadAllBytes($PdfPath) | Select-Object -First 4 + $Magic = [System.Text.Encoding]::ASCII.GetString($Header) + if ($Magic -ne "%PDF") { + # arxiv sometimes redirects to an HTML page β€” try appending .pdf + if ($Url -notmatch "\.pdf$") { + $Url2 = $Url + ".pdf" + Info "Retrying with URL: $Url2" + try { + Invoke-WebRequest -Uri $Url2 -OutFile $PdfPath -UserAgent "Mozilla/5.0" -TimeoutSec 60 + $Header = [System.IO.File]::ReadAllBytes($PdfPath) | Select-Object -First 4 + $Magic = [System.Text.Encoding]::ASCII.GetString($Header) + } catch {} + } + if ($Magic -ne "%PDF") { + Fail "Downloaded file is not a valid PDF (header: '$Magic'). Check the URL." + } + } + + Info "Paper name: $PaperName" +} +else { + # --- Local file input --- + if (-not (Test-Path $Source)) { Fail "File not found: $Source" } + $PdfPath = (Resolve-Path $Source).Path + $PaperName = [System.IO.Path]::GetFileNameWithoutExtension($PdfPath) -replace "[^\w\-]", "_" + Info "Using local PDF: $PdfPath" + Info "Paper name: $PaperName" +} + +# --------------------------------------------------------------------------- +# Step 2 β€” Convert PDF -> JSON (skip if using LaTeX) +# --------------------------------------------------------------------------- +$WorkDir = Join-Path $Output $PaperName +$RawJson = Join-Path $WorkDir "${PaperName}_raw.json" +$CleanJson = Join-Path $WorkDir "${PaperName}_cleaned.json" +$OutputRepo = Join-Path $Output "${PaperName}_repo" + +New-Item -ItemType Directory -Force -Path $WorkDir | Out-Null +New-Item -ItemType Directory -Force -Path $OutputRepo | Out-Null + +if (-not $UseLatex) { + Banner "Step 2 β€” PDF -> JSON" + + if (-not (Check-Module "fitz")) { + Warn "PyMuPDF not found. Installing..." + python -m pip install pymupdf --quiet + } + + Info "Converting PDF to JSON..." + python (Join-Path $CodesDir "pdf_to_json.py") ` + --pdf_path $PdfPath ` + --output_json_path $RawJson + if ($LASTEXITCODE -ne 0) { Fail "pdf_to_json.py failed" } + + Banner "Step 3 β€” Cleaning JSON" + python (Join-Path $CodesDir "0_pdf_process.py") ` + --input_json_path $RawJson ` + --output_json_path $CleanJson + if ($LASTEXITCODE -ne 0) { Fail "0_pdf_process.py failed" } + + Good "Cleaned JSON -> $CleanJson" +} else { + Banner "Step 2+3 β€” LaTeX source (no PDF conversion needed)" + $CleanJson = "" # not used in latex mode + Good "Will use LaTeX file: $Latex" +} + +# --------------------------------------------------------------------------- +# Step 4 β€” Planning +# --------------------------------------------------------------------------- +Banner "Step 4 β€” Planning" + +if ($UseLatex) { + python (Join-Path $CodesDir "1_planning.py") ` + --paper_name $PaperName ` + --gpt_version $Model ` + --paper_format LaTeX ` + --pdf_latex_path $Latex ` + --output_dir $WorkDir ` + --provider $Provider ` + @ApiKeyFlag +} else { + python (Join-Path $CodesDir "1_planning.py") ` + --paper_name $PaperName ` + --gpt_version $Model ` + --pdf_json_path $CleanJson ` + --output_dir $WorkDir ` + --provider $Provider ` + @ApiKeyFlag +} +if ($LASTEXITCODE -ne 0) { Fail "1_planning.py failed" } +Good "Planning complete" + +# --------------------------------------------------------------------------- +# Step 5 β€” Extract config +# --------------------------------------------------------------------------- +Banner "Step 5 β€” Extracting config" + +python (Join-Path $CodesDir "1.1_extract_config.py") ` + --paper_name $PaperName ` + --output_dir $WorkDir +if ($LASTEXITCODE -ne 0) { Warn "1.1_extract_config.py failed (non-fatal)" } + +$ConfigSrc = Join-Path $WorkDir "planning_config.yaml" +$ConfigDst = Join-Path $OutputRepo "config.yaml" +if (Test-Path $ConfigSrc) { + Copy-Item -Force $ConfigSrc $ConfigDst + Good "config.yaml -> $ConfigDst" +} + +# --------------------------------------------------------------------------- +# Step 6 β€” Analyzing +# --------------------------------------------------------------------------- +Banner "Step 6 β€” Analyzing" + +if ($UseLatex) { + python (Join-Path $CodesDir "2_analyzing.py") ` + --paper_name $PaperName ` + --gpt_version $Model ` + --paper_format LaTeX ` + --pdf_latex_path $Latex ` + --output_dir $WorkDir ` + --provider $Provider ` + @ApiKeyFlag +} else { + python (Join-Path $CodesDir "2_analyzing.py") ` + --paper_name $PaperName ` + --gpt_version $Model ` + --pdf_json_path $CleanJson ` + --output_dir $WorkDir ` + --provider $Provider ` + @ApiKeyFlag +} +if ($LASTEXITCODE -ne 0) { Fail "2_analyzing.py failed" } +Good "Analysis complete" + +# --------------------------------------------------------------------------- +# Step 7 β€” Coding +# --------------------------------------------------------------------------- +Banner "Step 7 β€” Coding" + +if ($UseLatex) { + python (Join-Path $CodesDir "3_coding.py") ` + --paper_name $PaperName ` + --gpt_version $Model ` + --paper_format LaTeX ` + --pdf_latex_path $Latex ` + --output_dir $WorkDir ` + --output_repo_dir $OutputRepo ` + --provider $Provider ` + @ApiKeyFlag +} else { + python (Join-Path $CodesDir "3_coding.py") ` + --paper_name $PaperName ` + --gpt_version $Model ` + --pdf_json_path $CleanJson ` + --output_dir $WorkDir ` + --output_repo_dir $OutputRepo ` + --provider $Provider ` + @ApiKeyFlag +} +if ($LASTEXITCODE -ne 0) { Fail "3_coding.py failed" } +Good "Coding complete" + +# --------------------------------------------------------------------------- +# Step 8 β€” (Optional) Run reproduce.ps1 +# --------------------------------------------------------------------------- +$ReproScript = Join-Path $OutputRepo "reproduce.ps1" +$ReproFailed = $false + +if (Test-Path $ReproScript) { + Banner "Step 8 β€” Running reproduce.ps1" + Push-Location $OutputRepo + try { + powershell.exe -NoProfile -ExecutionPolicy Bypass -File "reproduce.ps1" + if ($LASTEXITCODE -ne 0) { + Warn "reproduce.ps1 exited with code $LASTEXITCODE" + $ReproFailed = $true + } else { + Good "reproduce.ps1 succeeded" + } + } catch { + Warn "reproduce.ps1 threw an exception: $_" + $ReproFailed = $true + } finally { + Pop-Location + } +} else { + Info "No reproduce.ps1 found β€” skipping auto-run" +} + +# --------------------------------------------------------------------------- +# Step 9 β€” (Optional) Debugging agent +# --------------------------------------------------------------------------- +if ($RunDebug -and $ReproFailed) { + Banner "Step 9 β€” Debugging agent" + + $ErrorFile = Join-Path $OutputRepo "error.txt" + if (-not (Test-Path $ErrorFile)) { + # Capture stderr from a fresh reproduce attempt + $ErrorLog = powershell.exe -NoProfile -ExecutionPolicy Bypass ` + -File (Join-Path $OutputRepo "reproduce.ps1") 2>&1 + $ErrorLog | Out-File $ErrorFile -Encoding utf8 + } + + python (Join-Path $CodesDir "4_debugging.py") ` + --paper_name $PaperName ` + --model $Model ` + --provider $Provider ` + --error_file_name $ErrorFile ` + --output_dir $WorkDir ` + --output_repo_dir $OutputRepo ` + --save_num 1 ` + @ApiKeyFlag + + if ($LASTEXITCODE -eq 0) { Good "Debugging complete" } else { Warn "Debugging agent returned errors" } +} + +# --------------------------------------------------------------------------- +# Step 10 β€” (Optional) Evaluation +# --------------------------------------------------------------------------- +if ($Eval) { + Banner "Step 10 β€” Reference-free evaluation" + + $EvalArgs = @( + "--paper_name", $PaperName, + "--pdf_json_path", $CleanJson, + "--data_dir", (Join-Path $Root "data"), + "--output_dir", $WorkDir, + "--target_repo_dir", $OutputRepo, + "--eval_result_dir", (Join-Path $Root "results"), + "--eval_type", "ref_free", + "--generated_n", "8", + "--gpt_version", $Model, + "--provider", $Provider, + "--papercoder" + ) + python (Join-Path $CodesDir "eval.py") @EvalArgs @ApiKeyFlag +} + +# --------------------------------------------------------------------------- +# Done +# --------------------------------------------------------------------------- +Banner "Done" +Good "Paper name : $PaperName" +Good "Output dir : $WorkDir" +Good "Repo dir : $OutputRepo" +if (Test-Path $CleanJson) { Good "Cleaned JSON: $CleanJson" } +Write-Host "" diff --git a/requirements.txt b/requirements.txt index 9e374b21..c70a1020 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,6 @@ openai>=1.65.4 vllm>=0.6.4.post1 transformers>=4.46.3 tiktoken>=0.9.0 +requests>=2.31.0 +huggingface_hub>=0.23.0 +pymupdf>=1.24.0 diff --git a/scripts/run.ps1 b/scripts/run.ps1 new file mode 100644 index 00000000..52624ec7 --- /dev/null +++ b/scripts/run.ps1 @@ -0,0 +1,79 @@ +# --------------------------------------------------------------------------- +# Provider & API key configuration +# --------------------------------------------------------------------------- +# Set PROVIDER to one of: +# openai | groq | cerebras | openrouter | mistral | github | sambanova +# gemini | cohere | cloudflare +# +# Free model suggestions per provider: +# openai : o3-mini, gpt-4.1, gpt-4o +# groq : llama-3.3-70b-versatile, meta-llama/llama-4-scout-17b-16e-instruct, +# moonshotai/kimi-k2-instruct, qwen/qwen3-32b +# cerebras : llama-3.3-70b, qwen3-32b, qwen3-235b, gpt-oss-120b +# openrouter : deepseek/deepseek-r1:free, meta-llama/llama-4-scout:free, +# qwen/qwen3-235b-a22b:free +# mistral : mistral-large-latest, mistral-small-latest, ministral-8b-latest +# github : gpt-4o, gpt-4.1, o3, deepseek-r1, grok-3-mini +# sambanova : Meta-Llama-3.3-70B-Instruct, Qwen2.5-72B-Instruct +# gemini : gemini-2.5-pro, gemini-2.5-flash, gemini-2.5-flash-lite-preview-06-17 +# cohere : command-r-plus, command-a-03-2025, aya-expanse-32b +# cloudflare : @cf/meta/llama-3.3-70b-instruct-fp8-fast, @cf/qwen/qwq-32b +# +# The API key is read from the environment variable matching your provider: +# OPENAI_API_KEY | GROQ_API_KEY | CEREBRAS_API_KEY | OPENROUTER_API_KEY +# MISTRAL_API_KEY | GITHUB_TOKEN | SAMBANOVA_API_KEY | GEMINI_API_KEY +# COHERE_API_KEY | CLOUDFLARE_API_KEY (+ CLOUDFLARE_ACCOUNT_ID) +# +# Or pass it explicitly via --api_key below. +# --------------------------------------------------------------------------- + +$PROVIDER = "openai" +$GPT_VERSION = "o3-mini" +# $API_KEY = "" # uncomment to pass key explicitly instead of env var + +$PAPER_NAME = "Transformer" +$PDF_JSON_PATH = "..\examples\Transformer.json" # .json +$PDF_JSON_CLEANED_PATH = "..\examples\Transformer_cleaned.json" # _cleaned.json +$OUTPUT_DIR = "..\outputs\Transformer" +$OUTPUT_REPO_DIR = "..\outputs\Transformer_repo" + +New-Item -ItemType Directory -Force -Path $OUTPUT_DIR | Out-Null +New-Item -ItemType Directory -Force -Path $OUTPUT_REPO_DIR | Out-Null + +Write-Host $PAPER_NAME + +Write-Host "------- Preprocess -------" + +python ..\codes\0_pdf_process.py ` + --input_json_path $PDF_JSON_PATH ` + --output_json_path $PDF_JSON_CLEANED_PATH + +Write-Host "------- PaperCoder -------" + +python ..\codes\1_planning.py ` + --paper_name $PAPER_NAME ` + --gpt_version $GPT_VERSION ` + --pdf_json_path $PDF_JSON_CLEANED_PATH ` + --output_dir $OUTPUT_DIR ` + --provider $PROVIDER + +python ..\codes\1.1_extract_config.py ` + --paper_name $PAPER_NAME ` + --output_dir $OUTPUT_DIR + +Copy-Item -Force "$OUTPUT_DIR\planning_config.yaml" "$OUTPUT_REPO_DIR\config.yaml" + +python ..\codes\2_analyzing.py ` + --paper_name $PAPER_NAME ` + --gpt_version $GPT_VERSION ` + --pdf_json_path $PDF_JSON_CLEANED_PATH ` + --output_dir $OUTPUT_DIR ` + --provider $PROVIDER + +python ..\codes\3_coding.py ` + --paper_name $PAPER_NAME ` + --gpt_version $GPT_VERSION ` + --pdf_json_path $PDF_JSON_CLEANED_PATH ` + --output_dir $OUTPUT_DIR ` + --output_repo_dir $OUTPUT_REPO_DIR ` + --provider $PROVIDER diff --git a/scripts/run_debug.ps1 b/scripts/run_debug.ps1 new file mode 100644 index 00000000..329936d5 --- /dev/null +++ b/scripts/run_debug.ps1 @@ -0,0 +1,36 @@ +# Usage: +# .\run_debug.ps1 -ErrorFile "..\outputs\Transformer_repo\error.txt" -SaveNum 1 +# +# Optional overrides: +# -Provider "groq" +# -Model "llama-3.3-70b-versatile" +# -OutputDir "..\outputs\Transformer" +# -OutputRepoDir "..\outputs\Transformer_repo" +# -PaperName "Transformer" +# +# Provider choices: openai | groq | cerebras | openrouter | mistral | +# github | sambanova | gemini | cohere | cloudflare +# API key is read from the matching env var (e.g. GROQ_API_KEY for groq). + +param( + [Parameter(Mandatory=$true)] + [string]$ErrorFile, + + [Parameter(Mandatory=$true)] + [int]$SaveNum, + + [string]$Provider = "openai", + [string]$Model = "o4-mini", + [string]$OutputDir = "..\outputs\Transformer", + [string]$OutputRepoDir = "..\outputs\Transformer_repo", + [string]$PaperName = "Transformer" +) + +python ..\codes\4_debugging.py ` + --error_file_name $ErrorFile ` + --output_dir $OutputDir ` + --output_repo_dir $OutputRepoDir ` + --paper_name $PaperName ` + --model $Model ` + --save_num $SaveNum ` + --provider $Provider diff --git a/scripts/run_latex.ps1 b/scripts/run_latex.ps1 new file mode 100644 index 00000000..79ce6a50 --- /dev/null +++ b/scripts/run_latex.ps1 @@ -0,0 +1,70 @@ +# --------------------------------------------------------------------------- +# Provider & API key configuration +# --------------------------------------------------------------------------- +# Set PROVIDER to one of: +# openai | groq | cerebras | openrouter | mistral | github | sambanova +# gemini | cohere | cloudflare +# +# Free model suggestions per provider: +# openai : o3-mini, gpt-4.1, gpt-4o +# groq : llama-3.3-70b-versatile, meta-llama/llama-4-scout-17b-16e-instruct, +# moonshotai/kimi-k2-instruct, qwen/qwen3-32b +# cerebras : llama-3.3-70b, qwen3-32b, qwen3-235b, gpt-oss-120b +# openrouter : deepseek/deepseek-r1:free, meta-llama/llama-4-scout:free, +# qwen/qwen3-235b-a22b:free +# mistral : mistral-large-latest, mistral-small-latest, ministral-8b-latest +# github : gpt-4o, gpt-4.1, o3, deepseek-r1, grok-3-mini +# sambanova : Meta-Llama-3.3-70B-Instruct, Qwen2.5-72B-Instruct +# gemini : gemini-2.5-pro, gemini-2.5-flash, gemini-2.5-flash-lite-preview-06-17 +# cohere : command-r-plus, command-a-03-2025, aya-expanse-32b +# cloudflare : @cf/meta/llama-3.3-70b-instruct-fp8-fast, @cf/qwen/qwq-32b +# +# The API key is read from the matching environment variable (see run.ps1 for list). +# --------------------------------------------------------------------------- + +$PROVIDER = "openai" +$GPT_VERSION = "o3-mini" +# $API_KEY = "" # uncomment to pass key explicitly instead of env var + +$PAPER_NAME = "Transformer" +$PDF_LATEX_CLEANED_PATH = "..\examples\Transformer_cleaned.tex" # _cleaned.tex +$OUTPUT_DIR = "..\outputs\Transformer" +$OUTPUT_REPO_DIR = "..\outputs\Transformer_repo" + +New-Item -ItemType Directory -Force -Path $OUTPUT_DIR | Out-Null +New-Item -ItemType Directory -Force -Path $OUTPUT_REPO_DIR | Out-Null + +Write-Host $PAPER_NAME + +Write-Host "------- PaperCoder -------" + +python ..\codes\1_planning.py ` + --paper_name $PAPER_NAME ` + --gpt_version $GPT_VERSION ` + --pdf_latex_path $PDF_LATEX_CLEANED_PATH ` + --paper_format LaTeX ` + --output_dir $OUTPUT_DIR ` + --provider $PROVIDER + +python ..\codes\1.1_extract_config.py ` + --paper_name $PAPER_NAME ` + --output_dir $OUTPUT_DIR + +Copy-Item -Force "$OUTPUT_DIR\planning_config.yaml" "$OUTPUT_REPO_DIR\config.yaml" + +python ..\codes\2_analyzing.py ` + --paper_name $PAPER_NAME ` + --gpt_version $GPT_VERSION ` + --pdf_latex_path $PDF_LATEX_CLEANED_PATH ` + --paper_format LaTeX ` + --output_dir $OUTPUT_DIR ` + --provider $PROVIDER + +python ..\codes\3_coding.py ` + --paper_name $PAPER_NAME ` + --gpt_version $GPT_VERSION ` + --pdf_latex_path $PDF_LATEX_CLEANED_PATH ` + --paper_format LaTeX ` + --output_dir $OUTPUT_DIR ` + --output_repo_dir $OUTPUT_REPO_DIR ` + --provider $PROVIDER diff --git a/scripts/run_latex_llm.ps1 b/scripts/run_latex_llm.ps1 new file mode 100644 index 00000000..c01211ac --- /dev/null +++ b/scripts/run_latex_llm.ps1 @@ -0,0 +1,49 @@ +# NOTE: vllm (used by the _llm scripts) requires Linux + CUDA and does NOT support Windows natively. +# To use these scripts on Windows, run them inside WSL2 (Windows Subsystem for Linux) with a CUDA-capable GPU. +# The OpenAI-based scripts (run.ps1 / run_latex.ps1) work natively on Windows without this limitation. + +$MODEL_NAME = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct" +$TP_SIZE = 2 + +$PAPER_NAME = "Transformer" +$PDF_LATEX_CLEANED_PATH = "..\examples\Transformer_cleaned.tex" # _cleaned.tex +$OUTPUT_DIR = "..\outputs\Transformer_dscoder" +$OUTPUT_REPO_DIR = "..\outputs\Transformer_dscoder_repo" + +New-Item -ItemType Directory -Force -Path $OUTPUT_DIR | Out-Null +New-Item -ItemType Directory -Force -Path $OUTPUT_REPO_DIR | Out-Null + +Write-Host $PAPER_NAME + +Write-Host "------- PaperCoder -------" + +python ..\codes\1_planning_llm.py ` + --paper_name $PAPER_NAME ` + --model_name $MODEL_NAME ` + --tp_size $TP_SIZE ` + --pdf_latex_path $PDF_LATEX_CLEANED_PATH ` + --paper_format LaTeX ` + --output_dir $OUTPUT_DIR + +python ..\codes\1.1_extract_config.py ` + --paper_name $PAPER_NAME ` + --output_dir $OUTPUT_DIR + +Copy-Item -Force "$OUTPUT_DIR\planning_config.yaml" "$OUTPUT_REPO_DIR\config.yaml" + +python ..\codes\2_analyzing_llm.py ` + --paper_name $PAPER_NAME ` + --model_name $MODEL_NAME ` + --tp_size $TP_SIZE ` + --pdf_latex_path $PDF_LATEX_CLEANED_PATH ` + --paper_format LaTeX ` + --output_dir $OUTPUT_DIR + +python ..\codes\3_coding_llm.py ` + --paper_name $PAPER_NAME ` + --model_name $MODEL_NAME ` + --tp_size $TP_SIZE ` + --pdf_latex_path $PDF_LATEX_CLEANED_PATH ` + --paper_format LaTeX ` + --output_dir $OUTPUT_DIR ` + --output_repo_dir $OUTPUT_REPO_DIR diff --git a/scripts/run_llm.ps1 b/scripts/run_llm.ps1 new file mode 100644 index 00000000..d797af1b --- /dev/null +++ b/scripts/run_llm.ps1 @@ -0,0 +1,53 @@ +# NOTE: vllm (used by the _llm scripts) requires Linux + CUDA and does NOT support Windows natively. +# To use these scripts on Windows, run them inside WSL2 (Windows Subsystem for Linux) with a CUDA-capable GPU. +# The OpenAI-based scripts (run.ps1 / run_latex.ps1) work natively on Windows without this limitation. + +$MODEL_NAME = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct" +$TP_SIZE = 2 + +$PAPER_NAME = "Transformer" +$PDF_JSON_PATH = "..\examples\Transformer.json" # .json +$PDF_JSON_CLEANED_PATH = "..\examples\Transformer_cleaned.json" # _cleaned.json +$OUTPUT_DIR = "..\outputs\Transformer_dscoder" +$OUTPUT_REPO_DIR = "..\outputs\Transformer_dscoder_repo" + +New-Item -ItemType Directory -Force -Path $OUTPUT_DIR | Out-Null +New-Item -ItemType Directory -Force -Path $OUTPUT_REPO_DIR | Out-Null + +Write-Host $PAPER_NAME + +Write-Host "------- Preprocess -------" + +python ..\codes\0_pdf_process.py ` + --input_json_path $PDF_JSON_PATH ` + --output_json_path $PDF_JSON_CLEANED_PATH + +Write-Host "------- PaperCoder -------" + +python ..\codes\1_planning_llm.py ` + --paper_name $PAPER_NAME ` + --model_name $MODEL_NAME ` + --tp_size $TP_SIZE ` + --pdf_json_path $PDF_JSON_CLEANED_PATH ` + --output_dir $OUTPUT_DIR + +python ..\codes\1.1_extract_config.py ` + --paper_name $PAPER_NAME ` + --output_dir $OUTPUT_DIR + +Copy-Item -Force "$OUTPUT_DIR\planning_config.yaml" "$OUTPUT_REPO_DIR\config.yaml" + +python ..\codes\2_analyzing_llm.py ` + --paper_name $PAPER_NAME ` + --model_name $MODEL_NAME ` + --tp_size $TP_SIZE ` + --pdf_json_path $PDF_JSON_CLEANED_PATH ` + --output_dir $OUTPUT_DIR + +python ..\codes\3_coding_llm.py ` + --paper_name $PAPER_NAME ` + --model_name $MODEL_NAME ` + --tp_size $TP_SIZE ` + --pdf_json_path $PDF_JSON_CLEANED_PATH ` + --output_dir $OUTPUT_DIR ` + --output_repo_dir $OUTPUT_REPO_DIR diff --git a/test_suite.py b/test_suite.py new file mode 100644 index 00000000..b28e5562 --- /dev/null +++ b/test_suite.py @@ -0,0 +1,202 @@ +""" +Paper2Code Windows β€” Test Suite +Runs without any API key (structural/import/logic tests only). +""" +import sys +import os +import json +import argparse +import traceback + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "codes")) + +PASS = 0 +FAIL = 0 + +def ok(name): + global PASS + PASS += 1 + print(f" PASS {name}") + +def fail(name, err): + global FAIL + FAIL += 1 + print(f" FAIL {name}: {err}") + +# ── 1. Import all modules ────────────────────────────────────────────────── +print("\n[1] Import checks") +modules = { + "utils": "codes/utils.py", + "providers": "codes/providers.py", + "0_pdf_process": "codes/0_pdf_process.py", + "1_planning": "codes/1_planning.py", + "2_analyzing": "codes/2_analyzing.py", + "3_coding": "codes/3_coding.py", + "3.1_coding_sh": "codes/3.1_coding_sh.py", + "4_debugging": "codes/4_debugging.py", + "eval": "codes/eval.py", + "1.2_rag_config": "codes/1.2_rag_config.py", +} +import importlib.util +for name, path in modules.items(): + try: + spec = importlib.util.spec_from_file_location(name, path) + assert spec is not None, "spec is None" + ok(f"file exists: {path}") + except Exception as e: + fail(f"file exists: {path}", e) + +# ── 2. providers module ──────────────────────────────────────────────────── +print("\n[2] providers module") +try: + from providers import build_client, chat_complete, is_reasoning_model, add_provider_args + ok("import providers") +except Exception as e: + fail("import providers", e) + sys.exit(1) + +# is_reasoning_model +tests = [ + ("o3-mini", True), ("o1", True), ("deepseek-r1", True), + ("qwq-32b", True), ("gpt-4o", False), ("llama-3.3-70b", False), + ("mistral-large", False), ("gemini-2.5-pro", False), +] +for model, expected in tests: + try: + result = is_reasoning_model(model) + assert result == expected, f"got {result}, want {expected}" + ok(f"is_reasoning_model('{model}') == {expected}") + except Exception as e: + fail(f"is_reasoning_model('{model}')", e) + +# add_provider_args +try: + p = argparse.ArgumentParser() + add_provider_args(p) + args = p.parse_args(["--provider", "groq", "--api_key", "test123"]) + assert args.provider == "groq" + assert args.api_key == "test123" + ok("add_provider_args parses --provider groq --api_key test123") +except Exception as e: + fail("add_provider_args", e) + +# build_client β€” OpenAI-SDK providers +for prov in ["openai", "groq", "cerebras", "openrouter", "mistral", "github", "sambanova"]: + try: + c = build_client(prov, api_key="dummy-key") + assert hasattr(c, "chat"), "missing .chat" + ok(f"build_client('{prov}') -> OpenAI client") + except Exception as e: + fail(f"build_client('{prov}')", e) + +# build_client β€” native REST providers +for prov in ["gemini", "cohere"]: + try: + c = build_client(prov, api_key="dummy-key") + assert isinstance(c, dict) and c.get("_provider") == prov + ok(f"build_client('{prov}') -> REST dict") + except Exception as e: + fail(f"build_client('{prov}')", e) + +try: + c = build_client("cloudflare", api_key="dummy-key", cloudflare_account_id="acc123") + assert c["_account_id"] == "acc123" + ok("build_client('cloudflare') -> REST dict with account_id") +except Exception as e: + fail("build_client('cloudflare')", e) + +# ── 3. utils module ──────────────────────────────────────────────────────── +print("\n[3] utils module") +try: + import utils + ok("import utils") +except Exception as e: + fail("import utils", e) + +fake_resp = { + "usage": { + "prompt_tokens": 1000, + "completion_tokens": 500, + "total_tokens": 1500, + "prompt_tokens_details": {"cached_tokens": 0}, + } +} +try: + cost = utils.cal_cost(fake_resp, "gpt-4o") + assert isinstance(cost, dict) and "total_cost" in cost + ok(f"cal_cost(response, 'gpt-4o') total_cost={cost['total_cost']}") +except Exception as e: + fail("cal_cost gpt-4o", e) + +try: + cost = utils.cal_cost(fake_resp, "unknown-model-xyz") + assert isinstance(cost, dict) and cost.get("total_cost") == 0.0 + ok("cal_cost(response, 'unknown-model-xyz') total_cost=0 (graceful fallback)") +except Exception as e: + fail("cal_cost unknown model", e) + +# ── 4. 0_pdf_process.py functional test ─────────────────────────────────── +print("\n[4] 0_pdf_process.py functional test") +try: + out_path = "test_output_cleaned.json" + # Call via subprocess to test the script end-to-end + import subprocess + result = subprocess.run( + [sys.executable, "codes/0_pdf_process.py", + "--input_json_path", "examples/Transformer.json", + "--output_json_path", out_path], + capture_output=True, text=True + ) + assert result.returncode == 0, f"returncode={result.returncode}\n{result.stderr}" + assert os.path.exists(out_path), "output file not created" + with open(out_path, encoding="utf-8") as f: + data = json.load(f) + assert "title" in data, "missing 'title' key" + assert "abstract" in data, "missing 'abstract' key" + ok(f"0_pdf_process.py -> {out_path} (title: {data.get('title','?')[:40]}...)") + os.remove(out_path) +except Exception as e: + fail("0_pdf_process.py end-to-end", e) + +# ── 5. PowerShell scripts exist ──────────────────────────────────────────── +print("\n[5] PowerShell script presence") +ps_scripts = [ + "scripts/run.ps1", + "scripts/run_llm.ps1", + "scripts/run_latex.ps1", + "scripts/run_latex_llm.ps1", + "scripts/run_debug.ps1", +] +for s in ps_scripts: + if os.path.exists(s): + ok(f"exists: {s}") + else: + fail(f"exists: {s}", "file not found") + +# ── 6. PowerShell script content checks ─────────────────────────────────── +print("\n[6] PowerShell script content") +try: + with open("scripts/run.ps1", encoding="utf-8") as f: + content = f.read() + assert "--provider" in content, "--provider not passed in run.ps1" + assert "$PROVIDER" in content, "$PROVIDER variable not in run.ps1" + assert "$GPT_VERSION" in content, "$GPT_VERSION not in run.ps1" + ok("run.ps1 has $PROVIDER, $GPT_VERSION, --provider") +except Exception as e: + fail("run.ps1 content", e) + +try: + with open("scripts/run_debug.ps1", encoding="utf-8") as f: + content = f.read() + assert "--provider" in content, "--provider not passed in run_debug.ps1" + ok("run_debug.ps1 has --provider") +except Exception as e: + fail("run_debug.ps1 content", e) + +# ── Summary ──────────────────────────────────────────────────────────────── +print() +print("=" * 55) +print(f" Results: {PASS} passed, {FAIL} failed") +print("=" * 55) +if FAIL: + sys.exit(1)